for the following df
data=[['TAMU', 54, 0, 0, 6, 5, 0,],['UIUC', 33, 43, 5, 0, 76, 81],
['USC',4, 1, 0, 7, 21, 4], ['Austin',22,31, 0, 0,55, 0],
['UCLA', 55, 6, 7, 9, 11,12]]
pd.DataFrame(data,columns = ['Name', 'Research', 'Thesis',
'Proposal', 'AI', 'Analytics', 'Data'])
I want to create contingency tables for all possible column combinations (eg: AI,Analytics --Data,AI) of two specified rows (say USC and UCLA) to feed to my chisquare function
def overflow(school1,school2,alpha):
pvals_list=
data=[[‘TAMU’, 54, 0, 0, 6, 5, 0,],[‘UIUC’, 33, 43, 5, 0, 76, 81],
[‘USC’,4, 1, 0, 7, 21, 4], [‘Austin’,22,31, 0, 0,55, 0],
[‘UCLA’, 55, 6, 7, 9, 11,12]]
pd.DataFrame(data,columns = [‘Name’, ‘Research’, ‘Thesis’, ‘Proposal’,
‘AI’, ‘Analytics’, ‘Data’])
df=df[(df[‘Unnamed: 0’] == school1) | (df[‘Unnamed: 0’] == school2)]
df=df.loc[:, df.ne(0).all()]
df=df.set_index(‘Name’)
###
####code to create columns pairs [for loop?]to feed to data_crosstab below
###
data_crosstab = pd.crosstab()
chi,p_vals = stats.chi2_contingency(data_crosstab)
if p > alpha:
pvals_list.appned(p_vals)
return(pvals_list)
overflow(‘USC’,‘UCLA’,0.05)