##
##define Training set and Test set before using this script
##load the package RWeka before using this script
## Due to the lack of weka software on windows 7, I had to change computer and the files' path has changed

library(RWeka)

DefectiveSamples<-read.csv("C:\\Documents and Settings\\brusso\\My Documents\\Pedrycz\\Result\\DescriptiveAnalysisDefectiveST.csv", sep=";")
ApplicationNames<-DefectiveSamples$Application
a<-length(ApplicationNames)
MR<-c()
for(i in 1:4){
    directory<-paste("C:\\Documents and Settings\\brusso\\My Documents\\Pedrycz\\data\\c",i,"\\", sep="")
    H<- dir(directory, full.names=FALSE)
    D<- dir(directory, full.names=TRUE)
    s<-length(D)          

##vary the topology of Multilayer Perceptron
    min<-c()
    Lfin<-c()
    Mfin<-c()
    h<-c()
    for (j in 1:(s-4)){ 
        L<-c()
        M<-c()   
        minVector<-c()
        for(h in 1:10){
            MRMatrix<-matrix(ncol=10,nrow=10) 
            for(l in 1:10){
                lrat<-1/(l+1)
                for(m in 1:10){         
                    mrat<-1/(m+1)
                    FitSet <- read.csv(D[j+4], header=TRUE, sep=",")
                    ##MRMatrix is the matrix of incorrectly classified instances percentages
                    MRMatrix[l,m]<-MultilayerPerceptronTopologySelection(FitSet,lrat,mrat,h)[1]
                } 
            }        
            ##min values of l and m and MR according to the min value of MR=incorrect classified instances percentage
            L[h]<-which(MRMatrix==min(MRMatrix), arr.ind=TRUE)[1,1]
            M[h] <-which(MRMatrix==min(MRMatrix), arr.ind=TRUE)[1,2]
            minVector[h]<- min(MRMatrix)
        }
        ## temp_h is the value of h corresponding the min value of incorrect classified instance percentage in minVector
        min[j]<-min(minVector)
        temp_h<-which.min(minVector)
        h[j]<-temp_h
        Lfin[j]<-L[temp_h]	
        Mfin[j]<-M[temp_h] 
    }
    lfin<-1/(Lfin+1)
    mfin<-1/(Mfin+1)
    NNTop<-cbind(min,h,lfin,mfin)
    filePath=paste("C:\\Documents and Settings\\brusso\\My Documents\\Pedrycz\\Result\\NNTopologies.csv")
    write.table(NNTop, file=filePath, row.names=FALSE, col.names=colnames(NNTop), append=TRUE, sep=",")
    separator<-paste("c",i, sep="")
    write.table(separator, file=filePath, row.names=FALSE, col.names=FALSE, append=TRUE, sep=",")
}

## D is boolean (TRUE/FALSE)H is the number of hidden layers, L learning rate, V percentage of validation set; M momentum
##MR is the percentage of Incorrectly Classified Instances 
##MAS is the mean absolute value
## summary gives the confusion matrix
MultilayerPerceptronTopologySelection<-function(FitSet,lrat,mrat,h){
    NN<-make_Weka_classifier("weka/classifiers/functions/MultilayerPerceptron")
    ##WOW(NN)http://weka.sourceforge.net/doc/weka/classifiers/functions/MultilayerPerceptron
    ## data is normalized by default 
    ResultNN<-NN(FailureNumber ~ .,data=FitSet,  control=Weka_control(S=1,L=lrat,M=mrat,N=500,V=40,H=h,D=TRUE))
    ##summary describe the quality of fit of the NN
    Summary<-summary(ResultNN)
    MR<-Summary$details[2]
    MAS<-Summary$details[5]
    c<-c(MR,MAS)
    return(c)
}
##1/k is the percentage of splitting Fit set - Test set
for(i in 1:4){
    for(k in 2:5){
        directoryInner<-paste("C:\\Documents and Settings\\brusso\\My Documents\\Pedrycz\\data\\c",i,"\\",k,"\\", sep="")
        DInner<- dir(directoryInner, full.names=TRUE)
        HInner<- dir(directoryInner, full.names=FALSE)
        
        filePath=paste("C:\\Documents and Settings\\brusso\\My Documents\\Pedrycz\\Result\\NNTopologies.csv")
        a<-length(DInner) 
        for(j in 1:25){
            FitSet <- read.csv(DInner[j+25], header=TRUE, sep=",")
            TestSet <- read.csv(DInner[j], header=TRUE, sep=",")
            BestValues<-read.csv(filePath,header=TRUE, sep=",")
            ## to scroll down the fine for different values of c        
            lrat<-BestValues$lrat[j+a*(i-1)+i-1]
            mrat<-BestValues$mrat[j+a*(i-1)+i-1]
            h<-BestValues$h[j+a*(i-1)+i-1]
            MultilayerPerceptronNN(FitSet,TestSet,lrat,mrat,h)
            RadialBasisFunctionNetwork(FitSet,TestSet)
            LinearRegression(FitSet,TestSet)
        }
    }
}
     
RadialBasisFunctionNetwork<-function(FitSet,TestSet){
RBFN<-make_Weka_classifier("weka/classifiers/functions/RBFNetwork")
##WOW(RBFN)
## http://weka.sourceforge.net/doc/weka/classifiers/functions/RBFNetwork.html
ResultRBFN<-RBFN(FailureNumber ~ .,  data=TrainingSet, control=Weka_control(B= 2)) 
##summary describe the quality of fit of the NN
Summary<-summary(ResultRBFN)
MR<-Summary$details[2]
##I introduced the min between nrow(TestSet) and 10 to run the evaluation
nf<-min(10,nrow(TestSet))
##The cost matrix tells whether is more dangerous to have FP or FN. 
##In particular, we decided to have balanceed miscalssification rate and so the cost matrix is antidiagonal. 
##when testing we determine the prediction. ResultList will tell what will happen in percentsges to a new future dataset 
## numFolds is the option of Weka for which a number of folds n is specified and hte dataset is randomly reordered and then slpit into
## n folds of equal size. In each iteration one fold is used for testing and the other n-1 is used for trianing the classifier. The test results are avaraged 
## over all folds to give the accuracy.
TestRBFN<-evaluate_Weka_classifier(ResultRBFN, newdata= TestSet, normalize= TRUE, cost=matrix(c(0,1,1,0), ncol=2),complexity=TRUE,class=TRUE, numFolds=nf)
DetailsVector<-TestRBFN$details
##Misclassification rate as in Khoshgoftaar
TestMR<-DetailsVector[2]
Details<-c(MR,TestMR)
names(Details)<-c("MR training", "MR testing")
return(Details)
}

##to classify with linear regression non numeric instances we use ClassificationViaRegression with linearRegression
LinearRegression<-function(TrainingSet,TestSet,v,h){
LR<-make_Weka_classifier("weka/classifiers/meta/ClassificationViaRegression")
##WOW(LR), http://weka.sourceforge.net/doc/weka/classifiers/functions/LinearRegression
ResultLR<-LR(FailureNumber ~ .,data=TrainingSet, control=Weka_control(D=TRUE, W="weka.classifiers.functions.LinearRegression", S=0 ))
##summary describe the quality of fit of the LR
summary(ResultLR)
## the cost matrix tells whether is more dangerous to have FP or FN. 
##In particular, it has to have 0 on the diagonal. 
##Balanced misclassification rates means equal values on the anti-diagonal
##the matrix is created with a vector of values: the first two describe the first column
##I introduced the min between nrow(TestSet) and 10 to run the evaluation
nf<-min(10,nrow(TestSet))
##when testing we determine the prediction. ResultList will tell what will happen in percentsges to a new future dataset 
## numFolds is the option of Weka for which a number of folds n is specified and hte dataset is randomly reordered and then slpit into
## n folds of equal size. In each iteration one fold is used for testing and the other n-1 is used for trianing the classifier. The test results are avaraged 
## over all folds to give the accuracy.
TestLR<-evaluate_Weka_classifier(ResultLR, newdata= TestSet, normalize= TRUE, cost=matrix(c(0,1,1,0), ncol=2),complexity=TRUE,class=TRUE, numFolds=nf)
DetailsVector<-TestRBFN$details
##Misclassification rate as in Khoshgoftaar
TestMR<-DetailsVector[2]
Details<-c(MR,TestMR)
names(Details)<-c("MR training", "MR testing")
return(Details)
}



## d is boolean (TRUE/FALSE)H is the number of hidden layers, L learning rate, V percentage of validation set; M momentum
## pay attention sometimes the nrow of TestSet is too small to replicate with 10 folds. 
MultilayerPerceptronNN<-function(TrainingSet,TestSet,l,m,h){
NN<-make_Weka_classifier("weka/classifiers/functions/MultilayerPerceptron")
##WOW(NN)http://weka.sourceforge.net/doc/weka/classifiers/functions/MultilayerPerceptron
## data is normalized by default = I=FALSE
ResultNN<-NN(FailureNumber ~ .,data=TrainingSet,  control=Weka_control(S= 1, E= 20,L=l,M=m,N=500,V=30,H=h, D=TRUE ))
summary(ResultNN)
##summary describe the quality of fit of the NN

## the cost matrix tells whether is more dangerous to have FP or FN. 
##In particular, it has to have 0 on the diagonal. 
##Balanced misclassification rates means equal values on the anti-diagonal
##the matrix is created with a vector of values: the first two describe the first column
##I introduced the min between nrow(TestSet) and 10 to run the evaluation
nf<-min(10,nrow(TestSet))
##when testing we determine the prediction. ResultList will tell what will happen in percentsges to a new future dataset 
## numFolds is the option of Weka for which a number of folds n is specified and hte dataset is randomly reordered and then slpit into
## n folds of equal size. In each iteration one fold is used for testing and the other n-1 is used for trianing the classifier. The test results are avaraged 
## over all folds to give the accuracy.
TestNN<-evaluate_Weka_classifier(ResultNN, newdata= TestSet, normalize= TRUE, cost=matrix(c(0,1,1,0), ncol=2),complexity=TRUE,class=TRUE, numFolds=nf)
DetailsVector<-TestRBFN$details
##Misclassification rate as in Khoshgoftaar
TestMR<-DetailsVector[2]
Details<-c(MR,TestMR)
names(Details)<-c("MR training", "MR testing")
return(Details)
}