##Homogeneity test using Russo et al paper, Hodge and Olkin method, fisher transformation for meta-analysis of correlation across samples ##read the Spearman correlations from file of defective sequence types filePath<-"MulitplicitiesDefectsUsersCorrelationsDefectiveST.csv" tmp <- read.csv(filePath, header=FALSE, sep=",") m<-ncol(tmp) n<-nrow(tmp) mat<-matrix(ncol=3,nrow=3) #often data in data.frame are not correctly formatted. Need to chose the comma or point format for(i in 3:m){ tmp[,i]<-as.numeric(gsub(",", ".", tmp[,i])) } ##Variable definition sorted by p-values #Correlation between multiplicity and failure number corr_mu_rho<-tmp[,4] length(corr_mu_rho) tmp_omit1<-tmp[row.names(na.omit(tmp[,5:6])),] #correlation between multiplicity and distribution corr_mu_nu<-tmp_omit1[,6] length(corr_mu_nu) tmp_omit2<-tmp[row.names(na.omit(tmp[,7:8])),] #correlation between distribution and failure number corr_rho_nu<-tmp_omit2[,8] length(corr_rho_nu) tmp_minus<-tmp[-12,] tmp_minus1<-tmp_omit1[-8,] tmp_minus2<-tmp_omit2[-8,] ##Variable definition sorted by p-values #Correlation between multiplicity and failure number corr_mu_rho1<-tmp_minus[,4] length(corr_mu_rho1) #correlation between multiplicity and distribution corr_mu_nu1<-tmp_minus1[,6] length(corr_mu_nu1) #correlation between distribution and failure number corr_rho_nu1<-tmp_minus2[,8] length(corr_rho_nu1) tmp_minus3<-tmp_minus[-24,] tmp_minus4<-tmp_minus1[-15,] tmp_minus5<-tmp_minus2[-15,] ##Variable definition sorted by p-values #Correlation between multiplicity and failure number corr_mu_rho3<-tmp_minus3[,4] length(corr_mu_rho3) #correlation between multiplicity and distribution corr_mu_nu4<-tmp_minus4[,6] length(corr_mu_nu4) #correlation between distribution and failure number corr_rho_nu5<-tmp_minus5[,8] length(corr_rho_nu5) Q_Test<- function(correlation, temp){ #transform the spearman corelations to an approximation of Pearson's corr_temp<- 2*sin((pi/6)*correlation) #Fisher transformation z<- (1/2)*log((1+corr_temp)/(1-corr_temp),exp(1)) #95% confidence interval sum<-sum(temp[,13]-3) mean<-sum((temp[,13]-3)*z)/sum z_minus<- mean-1.96/sqrt(sum) z_plus<-mean+1.96/sqrt(sum) #transform back from Fisher corr_mean<-(exp(2*mean) - 1)/(exp(2*mean) +1) corr_minus<- (exp(2*z_minus) - 1)/(exp(2*z_minus) +1) corr_plus<- (exp(2*z_plus) - 1)/(exp(2*z_plus) +1) Q<- sum((temp[,13]-3)*(z - mean)^2) return(c(corr_minus,corr_plus,Q,corr_mean)) } vector_mu_rho<-Q_Test(corr_mu_rho,tmp) Boolean_mu_rho<-vector_mu_rho[3]>qchisq(0.05,24,lower.tail=FALSE) vector_mu_nu<-Q_Test(corr_mu_nu,tmp_omit1) Boolean_mu_nu<-vector_mu_nu[3]>qchisq(0.05,15,lower.tail=FALSE) vector_rho_nu<-Q_Test(corr_rho_nu,tmp_omit2) Boolean_rho_nu<-vector_rho_nu[3]>qchisq(0.05,15,lower.tail=FALSE) vector_mu_rho1<-Q_Test(corr_mu_rho1,tmp_minus) Boolean_mu_rho1<-vector_mu_rho1[3]>qchisq(0.05,23, lower.tail=FALSE) vector_mu_nu1<-Q_Test(corr_mu_nu1,tmp_minus1) Boolean_mu_nu1<-vector_mu_nu1[3]>qchisq(0.05,14, lower.tail=FALSE) vector_rho_nu1<-Q_Test(corr_rho_nu1,tmp_minus2) Boolean_rho_nu1<-vector_rho_nu1[3]>qchisq(0.05,14, lower.tail=FALSE) vector_mu_rho2<-Q_Test(corr_mu_rho3,tmp_minus3) Boolean_mu_rho2<-vector_mu_rho2[3]>qchisq(0.05,22,lower.tail=FALSE) vector_mu_nu2<-Q_Test(corr_mu_nu4,tmp_minus4) Boolean_mu_nu2<-vector_mu_nu2[3]>qchisq(0.05,13, lower.tail=FALSE) vector_rho_nu2<-Q_Test(corr_rho_nu5,tmp_minus5) Boolean_rho_nu2<-vector_rho_nu2[3]>qchisq(0.05,13,lower.tail=FALSE) vector_Q<-c(vector_mu_rho[3],vector_mu_nu[3],vector_rho_nu[3]) vector1_Q<-c(vector_mu_rho1[3],vector_mu_nu1[3],vector_rho_nu1[3]) vector2_Q<-c(vector_mu_rho2[3],vector_mu_nu2[3],vector_rho_nu2[3]) vector_z_minus<-c(vector_mu_rho[1],vector_mu_nu[1],vector_rho_nu[1]) vector1_z_minus<-c(vector_mu_rho1[1],vector_mu_nu1[1],vector_rho_nu1[1]) vector2_z_minus<-c(vector_mu_rho2[1],vector_mu_nu2[1],vector_rho_nu2[1]) vector_z_plus<-c(vector_mu_rho[2],vector_mu_nu[2],vector_rho_nu[2]) vector1_z_plus<-c(vector_mu_rho1[2],vector_mu_nu1[2],vector_rho_nu1[2]) vector2_z_plus<-c(vector_mu_rho2[2],vector_mu_nu2[2],vector_rho_nu2[2]) mean<-c(vector_mu_rho[4],vector_mu_nu[4],vector_rho_nu[4]) mean1<-c(vector_mu_rho1[4],vector_mu_nu1[4],vector_rho_nu1[4]) mean2<-c(vector_mu_rho2[4],vector_mu_nu2[4],vector_rho_nu2[4]) vector_Q vector1_Q vector2_Q vector_z_minus vector1_z_minus vector2_z_minus vector_z_plus vector1_z_plus vector2_z_plus mean mean1 mean2 BooleanVector<-c(Boolean_mu_rho,Boolean_mu_nu,Boolean_rho_nu) BooleanVector1<-c(Boolean_mu_rho1,Boolean_mu_nu1,Boolean_rho_nu1) BooleanVector2<-c(Boolean_mu_rho2,Boolean_mu_nu2,Boolean_rho_nu2)