#NIRSMultR - Code to fit multiple end-member spectra to sets of individual sample 
#spectra using multiple regression. Results are plotted and saved.
#Fiona Erica Russell & John Boyle

#INPUT FILES - all tab delimited text files comprising columns of NIR spectra
#These should be located in the folder specified in the code
#Any number of rows permitted, but X and Y files must have the same number of these

#Sets of dependent variables (Ys) with sample name or sample in the top row,
#and spectra below

#Independent variables (Xs) with end-member name in the top row, CIF value in the 
#second row, and spectra below
#Edit to select a subset of Xs in the code below for inclusion in a particular run

#OUTPUT FILE - tab delimited text file, written to the specifed input folder
#If a file of the same name is open (repeat run), the output will fail
#Output comprises columns of coefficients
#Col 1  = Sample name or number read from the input file
#Col 2  = R2 of regression for the corresponding  
#Col 3  = The regression intercept
#Col 4 + = The regression coefficients, headed by the end-member name
#Subsequent columns = 95% CIs for each end-member included

#SPECIFY THE INPUT/OUTPUT FOLDER
#setwd("C:/Users/jfb/Dropbox/NIRS RAW/17pt")  #FOR EXAMPLE
#setwd("C:/Users/johnb/Dropbox/NIRS RAW/Paper 1 Raw")
setwd("C:/Users/Fiona/Dropbox/NIRS RAW/Paper3_Fives/Fiona5vs/Gran")

#INITIALISE A NUMBER OF VECTORS/MATRICES
CIrow<-c(rep(0, 4))#Need row vector to store CIs for regression coefficients
outMatrix <- matrix(nrow = 1, ncol = 6)# create an output matrix
outr2 <- matrix(nrow = 1, ncol = 1) #Create object to be added to output matrix


#PASTE HERE THE NAME OF THE Ys INPUT FILE
fName<-"LGran_sc_8000.txt"

Y <- read.table(fName, header = TRUE, sep = "") #OBTAINS THE Y VALUES
Ynames <- read.table(fName, header = FALSE, sep = "") #STORES THE Y NAMES SEPARATELY

#PASTE HERE THE NAME OF THE Xs INPUT FILE
X <- read.table("Xs_fives_Min&Organics_Fiona.txt", header = TRUE, sep = "")

warnings() #

#FOR THE Xs, STORE THE CIF VALUE SEPARATELY AND THEN REMOVE FROM THE MATRIX
#Read the CIF values from X, then  delete that row
CIFvalues<-X[c(1),]
#Now, delete the CIF row from X
X<-X[-c(1),]
 
#FIND AND STORE THE NUMBER OF Xs AND Ys
samples <- ncol(Y) #find number of samples from Y input columns
depths<-c(rep(0,samples)) # find place to store depth values from input file
masses<-c(rep(0, samples)) #find somewhere to store sample mass
Standards <- ncol(X) # ditto

#loop through Y data columns applying the lm function in multiple regression mode to each column
for(a in 1:samples) {
#EDIT THE LINE BELOW TO INCLUDE END-MEMBERS FROM THE Xs FILE
#THE CODE CAN TAKE ANY NUMBER, BUT 4 -6 IS LIKELY MAXIMUM FOR MEANINGFUL INTERPRETATION
    out1 <- lm(formula = Y[[a]] ~  X$SGoo +X$AvLor +X$SAlgCMere +X$OMMHumin +X$HAMig) 

  #Then store the coefficients in out
  out <- coef(out1) 
#Finally store the confidence interval for each coefficient, and the model r2 value
  ci<-confint(out1, level = 0.95) 
  r2 <- summary.lm(out1)   
rows<-nrow(ci) 

#Get the CIs into a single row
for(b in 1:rows){
CIrow[b*2-1] <-ci[b,1]
CIrow[b*2] <- ci[b,2]
}


#this createS (firstsample) or appendS (subsequent samples) data to matrices for output
if (a==1) {
  outMatrix <- rbind(out)
  outr2 <- rbind(r2$r.squared)
  outCI <- rbind(CIrow)
} else {   
  outMatrix <- rbind(outMatrix, out)
  outr2 <- rbind(outr2, r2$r.squared)
  outCI <- rbind(outCI, CIrow)
}
#Place depth in vector FOR THE LATER PLOTS
depths[a]<-Ynames[1,a]


} #end of loop through Y Data columns



#combine output matrices to single matrix
Fulloutmatrix <-cbind(outMatrix, outCI) 

#find number of variables used 
Xvars<-length(out)-1

#Create data frame for output
outData<-data.frame(Fulloutmatrix, row.names = NULL) 

#TIDY UP COLUMN NAMES
#Now remove X. from each component current name
names(outData)<-substring(names(outData),3,12)
names(outData)[1]<-"Int" #make exception for this one - name too long

#Now add in new names
names(outr2)<-"R2"
for (a in 1:(Xvars+1)){
  names(outData)[Xvars+1+a*2-1]<-paste(names(outData)[a], "-CI")
  names(outData)[Xvars+1+a*2]<-paste(names(outData)[a], "+CI")
}


#NOW USE CIF VALUES TO CONVERT FROM COEFFICIENTS (CHROMATIC INTENSITIES) TO MASS PROPORTIONS


#First collect the CIF values of the Xs that were used, and place in simple array
cif<-c(rep(1, Xvars)) #somewhere to put the CIF values
for (a in 1:Xvars){
 cif[a]<-CIFvalues[[names(outData)[a+1]]] 
}

#Use CIF values to find total mass Weighted intensities, then convert these to mass %
#Find "chromatic mass", i.e., the sum across components of MR-coeef * CIF
for (a in 1:samples){
  masses[a]<-0    
  for (b in 1:Xvars+1){
    masses[a]<-masses[a] + outData[a,b]*cif[b-1] 
  }
}

#next normalise to sample "chromatic mass"
for (a in 1:samples){
  for (b in 1:Xvars+1){
    outData[a,b]<-( outData[a,b] * cif[b-1]/(masses[[a]]))   
  }
  for (b in 1:(Xvars)){
    outData[a,Xvars+3+b*2-1]<-( outData[a,Xvars+3+b*2-1] * cif[b]/(masses[[a]]))   
    outData[a,Xvars+3+b*2]<-( outData[a,Xvars+3+b*2] * cif[b]/(masses[[a]]))   
      }
}

#ADD DEPTH AND R2 COLUMNS TO THE MATRIX
outData<-cbind(depths, outr2, outData)

#WRITE THIS TO A FILE IN THE CURRENT FOLDER
write.table(outData, file = "coefs.txt", col.names = TRUE, row.names = FALSE, sep = "\t")

#SPECIFY THE PLOT LAYOUT
mat<-rbind(1:2,3:4, 5:6)
layout(mat)
par(mfrow=c(3,3))

plot(depths, 1-(outData[,(2)]), #PLOT UNEXPLAINED VARIANCE (1-R2)
     xlab = "Samples", 
     ylab = "1-R2", 
     type = "l",
     pch = 5,
     cex = 0.1
)

for (b in 1:(Xvars)){         #CYCLE THROUGH Xs, PLOTTING THE MASS PROPORTIONS, AND ADDING ci LINES
  plot(depths, outData[,b+3], 

        xlab = "Samples", 
        ylab = names(outData)[b+3],
        ylim = c(min(outData[,Xvars+5+2*b], outData[,Xvars+5+2*b-1]), max(outData[,Xvars+5+2*b], outData[,Xvars+5+2*b-1])),
        type = "o",                    #SYMBOLS
        pch = 5,
        cex = 0.2
  )  
  
  lines(depths, outData[,Xvars+5+2*b],    
        type = "l",
        col = "red",
        pch = 5,
        cex = 0.2
    )
  lines(depths, outData[,Xvars+5+2*b-1], 
        type = "l",
        col = "red",
        pch = 5,
        cex = 0.2
  )  
  lines(depths, outData[,b+3], 
        type = "l",                   #NOW LINES
        col = "black",
        pch = 5,
        cex = 0.2
  )
}

100*(1-mean(outData[,(2)])) #READ OUT THE MEAN R2 VALUE