##Feature reduction with information gain (see Menzies et al. 2007). 
##Paste the path of your csv file in <file.csv>
##Paste the path of the directory < data/c > in which parametrized data for c=? are. For example paste data/c if your samples are in data/ci

library(FSelector)
library(RWeka)

filePathFin_1=paste("file.csv")
colnames<-c()
colnames<-c("MP","","","","","","RBF","","","","","","L","","","","","")
write.table(t(colnames), file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
colnames<-c()
colnames<-c("MR", "fpr","tpr","MR_Validation","fpr_Validating","tpr_Validating","MR", "fpr","tpr","MR_Validating","fpr_Validating","tpr_Validating","MR", "fpr","tpr","MR_Validating","fpr_Validating","tpr_Validating")
    write.table(t(colnames), file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
    
    for(i in 1:4){
    separator<-paste("c",i, sep="")
    write.table(separator, file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
   	directoryInner<-paste("data/c",i,"/", sep="")
   	DInner<- dir(directoryInner, full.names=TRUE)
	HInner<- dir(directoryInner, full.names=FALSE)  
	s<-length(DInner)   
    for(k in 2:5){
        separator<-paste("k",k, sep="")
        write.table(separator, file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
        c<-c(0,0,0)
        d1<-c(NA,NA,NA)
        d2<-c(NA,NA,NA)
        d3<-c(NA,NA,NA)  
        Top<-matrix(nrow=25,ncol=18)

        if(!length(DInner)==0){
     	    for(j in 1:25){
     		   OriginalSet <- read.csv(DInner[j], header=TRUE, sep=",") 
     		   if(length(unique(OriginalSet$FailureNumber))==2){
     		    	 ReducedSet<-try(InformationGain(OriginalSet,j))
     		      if(is.character(ReducedSet)==FALSE){
     		    	   ReducedSet<-ReducedSet
     		    	   ##Split ReducedSet into FitSet and ValidationSet
     		    	   n<-nrow(ReducedSet)
     		    	   ## Select randomly n\k of ReducedSet 
	     		   rowIDx <- sample(n, size=n/k)
     		           ValidationSet<- ReducedSet[rowIDx,]
     		           ## Define FitSet 
     		           rowTotal <- sample(n, size=n)
     		           tmpIndex <- subset(rowTotal, !rowTotal %in% rowIDx)
     		           FitSet<-ReducedSet[tmpIndex,]	    	    
     		           #recall best weights
     		           BestValues<-TopologySelectionMP(FitSet)
     		           #produce a row of 18 values for MR, fpr, and tpr respectively
     		           Top[j-5,]<-comparison_1(FitSet, ValidationSet, BestValues, j-5)
     		           write.table(t(Top[j-5,]), file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
    		        }else{Top[j-5,] <- c(NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA)
    		        	      write.table(t(Top[j-5,]), file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")}    		    	 
     		   }else{Top[j-5,] <- c(NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA, NA,NA,NA)
     		   	    write.table(t(Top[j-5,]), file=filePathFin_1, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")}
           }
        }
    }
    }
  
##Select features in Original samples       
InformationGain<-function(OriginalSet,j){           
     weights <- information.gain(FailureNumber~., OriginalSet)
     print(weights)
     min<-which.min(sort(weights$attr_importance, decreasing=TRUE))
     if(sort(weights$attr_importance, decreasing=TRUE)[min]==0){
        	numberOfAttributes<-which.min(sort(weights$attr_importance, decreasing=TRUE))-1
     }else{numberOfAttributes<-length(weights)}
     subset <- cutoff.k(weights, numberOfAttributes)
     f <- as.simple.formula(subset, "OriginalSet")
     ##print(f)
     ReducedSet<-subset(OriginalSet, select=subset)
     columns<-sort(colnames(ReducedSet), decreasing=TRUE)
     file<-c(HInner[j],nrow(weights),numberOfAttributes, columns)
     print(file) 
     filePath<-paste("data/c",i,"/InformationGain.csv", sep="")
     write.table(t(file), file=filePath, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
     FailureNumber<-OriginalSet$FailureNumber
     ReducedSetAugmented<-cbind(ReducedSet,FailureNumber)
     return(ReducedSetAugmented)
}


######################################
## Topology Selection
##
## file1 is the file path in which the topology parameters will be saved
TopologySelectionMP<-function(Set){
	filePath<-paste("file1.csv")
	TopFin<-matrix(nrow=1, ncol=3)
    if(length(unique(Set$FailureNumber))==2){
    	MLP<-function(p) MultilayerPerceptronTopologySelection(Set,p[1],p[2],p[3])[1]
        grid<-expand.grid(l=seq(0.1,1,0.1),m=seq(0.1,1,0.1),h=seq(1,10,1))
        Model<-apply(grid,1,MLP)
        MRMF<-which.min(Model)
        TopFin[1,]<-as.matrix(grid[MRMF,])
    }else{TopFin[1,]<-as.matrix(c(NA,NA,NA))}    
    write.table(t(TopFin[1,]), file=filePath, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
    return(TopFin[1,])
}


## D is boolean (TRUE/FALSE)H is the number of hidden layers, L learning rate, V percentage of validation set; M momentum
##MR is the percentage of Incorrectly Classified Instances 
##MAS is the mean absolute value
## summary gives the confusion matrix
MultilayerPerceptronTopologySelection<-function(Set,l,m,h){
    NN<-make_Weka_classifier("weka/classifiers/functions/MultilayerPerceptron")
    ##WOW(NN)  ##http://weka.sourceforge.net/doc/weka/classifiers/functions/MultilayerPerceptron
    ## data is normalized by default 
    ResultNN<-NN(FailureNumber ~ .,data=Set,  control=Weka_control(S=1,R=FALSE,L=l,M=m,N=500,V=30,H=h,D=TRUE))
    ##summary describe the quality of fit of the NN
    Summary<-summary(ResultNN)
    MR<-Summary$details[2]
    MAS<-Summary$details[5]
    c<-c(MR,MAS)
    return(c)
}
   
######################################   
##RBF     
RadialBasisFunctionNetwork<-function(FitSet){
	WPM("load-package","RBFNetwork")
	RBFN<-make_Weka_classifier("weka/classifiers/functions/RBFNetwork")
	##WOW(RBFN)
	## http://weka.sourceforge.net/doc/weka/classifiers/functions/RBFNetwork.html
	ResultRBFN<-RBFN(FailureNumber ~ .,  data=FitSet, control=Weka_control(B= 2)) 
	##summary describe the quality of fit of the NN
	Summary<-summary(ResultRBFN)
	MR<-Summary$details[2]
	tpr<-Summary$confusionMatrix[1,1]/(Summary$confusionMatrix[1,1]+Summary$confusionMatrix[1,2])
	fpr<-Summary$confusionMatrix[2,1]/(Summary$confusionMatrix[2,1]+Summary$confusionMatrix[2,2])
	return(c(MR,tpr,fpr))
}

######################################   
##L
##to classify with linear regression non numeric instances we use ClassificationViaRegression with linearRegression
LinearRegression<-function(FitSet){
	LR<-make_Weka_classifier("weka/classifiers/meta/ClassificationViaRegression")
	##WOW(LR), http://weka.sourceforge.net/doc/weka/classifiers/functions/LinearRegression
	ResultLR<-LR(FailureNumber ~ .,data=FitSet, control=Weka_control(D=TRUE, W="weka.classifiers.functions.LinearRegression", S=0 ))
	##summary describe the quality of fit of the LR
	Summary<-summary(ResultLR)
	MR<-Summary$details[2]
	tpr<-Summary$confusionMatrix[1,1]/(Summary$confusionMatrix[1,1]+Summary$confusionMatrix[1,2])
	fpr<-Summary$confusionMatrix[2,1]/(Summary$confusionMatrix[2,1]+Summary$confusionMatrix[2,2])
	return(c(MR,tpr,fpr))
}

## MP
## d is boolean (TRUE/FALSE)H is the number of hidden layers, L learning rate, V percentage of validation set; M momentum
## pay attention sometimes the nrow of TestSet is too small to replicate with 10 folds. 
MultilayerPerceptronNN<-function(FitSet,l,m,h){
	NN<- make_Weka_classifier("weka/classifiers/functions/MultilayerPerceptron")
	##WOW(NN)http://weka.sourceforge.net/doc/weka/classifiers/functions/MultilayerPerceptron
	## data is normalized by default = I=FALSE
	ResultNN<-NN(FailureNumber ~ .,data=FitSet,  control=Weka_control(S= 1, R= FALSE,L=l,M=m,N=500,V=30,H=h, D=TRUE ))
	Summary<-summary(ResultNN)
	##summary describe the quality of fit of the NN
	MR<-Summary$details[2]
	tpr<-Summary$confusionMatrix[1,1]/(Summary$confusionMatrix[1,1]+Summary$confusionMatrix[1,2])
	fpr<-Summary$confusionMatrix[2,1]/(Summary$confusionMatrix[2,1]+Summary$confusionMatrix[2,2])
	return(c(MR,tpr,fpr))
}