#NIRSMultR - Code to fit multiple end-member spectra to sets of individual sample #spectra using multiple regression. Results are plotted and saved. #Fiona Erica Russell & John Boyle #INPUT FILES - all tab delimited text files comprising columns of NIR spectra #These should be located in the folder specified in the code #Any number of rows permitted, but X and Y files must have the same number of these #Sets of dependent variables (Ys) with sample name or sample in the top row, #and spectra below #Independent variables (Xs) with end-member name in the top row, CIF value in the #second row, and spectra below #Edit to select a subset of Xs in the code below for inclusion in a particular run #OUTPUT FILE - tab delimited text file, written to the specifed input folder #If a file of the same name is open (repeat run), the output will fail #Output comprises columns of coefficients #Col 1 = Sample name or number read from the input file #Col 2 = R2 of regression for the corresponding #Col 3 = The regression intercept #Col 4 + = The regression coefficients, headed by the end-member name #Subsequent columns = 95% CIs for each end-member included #SPECIFY THE INPUT/OUTPUT FOLDER #setwd("C:/Users/jfb/Dropbox/NIRS RAW/17pt") #FOR EXAMPLE #setwd("C:/Users/johnb/Dropbox/NIRS RAW/Paper 1 Raw") setwd("C:/Users/Fiona/Dropbox/NIRS RAW/Paper3_Fives/Fiona5vs/Gran") #INITIALISE A NUMBER OF VECTORS/MATRICES CIrow<-c(rep(0, 4))#Need row vector to store CIs for regression coefficients outMatrix <- matrix(nrow = 1, ncol = 6)# create an output matrix outr2 <- matrix(nrow = 1, ncol = 1) #Create object to be added to output matrix #PASTE HERE THE NAME OF THE Ys INPUT FILE fName<-"LGran_sc_8000.txt" Y <- read.table(fName, header = TRUE, sep = "") #OBTAINS THE Y VALUES Ynames <- read.table(fName, header = FALSE, sep = "") #STORES THE Y NAMES SEPARATELY #PASTE HERE THE NAME OF THE Xs INPUT FILE X <- read.table("Xs_fives_Min&Organics_Fiona.txt", header = TRUE, sep = "") warnings() # #FOR THE Xs, STORE THE CIF VALUE SEPARATELY AND THEN REMOVE FROM THE MATRIX #Read the CIF values from X, then delete that row CIFvalues<-X[c(1),] #Now, delete the CIF row from X X<-X[-c(1),] #FIND AND STORE THE NUMBER OF Xs AND Ys samples <- ncol(Y) #find number of samples from Y input columns depths<-c(rep(0,samples)) # find place to store depth values from input file masses<-c(rep(0, samples)) #find somewhere to store sample mass Standards <- ncol(X) # ditto #loop through Y data columns applying the lm function in multiple regression mode to each column for(a in 1:samples) { #EDIT THE LINE BELOW TO INCLUDE END-MEMBERS FROM THE Xs FILE #THE CODE CAN TAKE ANY NUMBER, BUT 4 -6 IS LIKELY MAXIMUM FOR MEANINGFUL INTERPRETATION out1 <- lm(formula = Y[[a]] ~ X$SGoo +X$AvLor +X$SAlgCMere +X$OMMHumin +X$HAMig) #Then store the coefficients in out out <- coef(out1) #Finally store the confidence interval for each coefficient, and the model r2 value ci<-confint(out1, level = 0.95) r2 <- summary.lm(out1) rows<-nrow(ci) #Get the CIs into a single row for(b in 1:rows){ CIrow[b*2-1] <-ci[b,1] CIrow[b*2] <- ci[b,2] } #this createS (firstsample) or appendS (subsequent samples) data to matrices for output if (a==1) { outMatrix <- rbind(out) outr2 <- rbind(r2$r.squared) outCI <- rbind(CIrow) } else { outMatrix <- rbind(outMatrix, out) outr2 <- rbind(outr2, r2$r.squared) outCI <- rbind(outCI, CIrow) } #Place depth in vector FOR THE LATER PLOTS depths[a]<-Ynames[1,a] } #end of loop through Y Data columns #combine output matrices to single matrix Fulloutmatrix <-cbind(outMatrix, outCI) #find number of variables used Xvars<-length(out)-1 #Create data frame for output outData<-data.frame(Fulloutmatrix, row.names = NULL) #TIDY UP COLUMN NAMES #Now remove X. from each component current name names(outData)<-substring(names(outData),3,12) names(outData)[1]<-"Int" #make exception for this one - name too long #Now add in new names names(outr2)<-"R2" for (a in 1:(Xvars+1)){ names(outData)[Xvars+1+a*2-1]<-paste(names(outData)[a], "-CI") names(outData)[Xvars+1+a*2]<-paste(names(outData)[a], "+CI") } #NOW USE CIF VALUES TO CONVERT FROM COEFFICIENTS (CHROMATIC INTENSITIES) TO MASS PROPORTIONS #First collect the CIF values of the Xs that were used, and place in simple array cif<-c(rep(1, Xvars)) #somewhere to put the CIF values for (a in 1:Xvars){ cif[a]<-CIFvalues[[names(outData)[a+1]]] } #Use CIF values to find total mass Weighted intensities, then convert these to mass % #Find "chromatic mass", i.e., the sum across components of MR-coeef * CIF for (a in 1:samples){ masses[a]<-0 for (b in 1:Xvars+1){ masses[a]<-masses[a] + outData[a,b]*cif[b-1] } } #next normalise to sample "chromatic mass" for (a in 1:samples){ for (b in 1:Xvars+1){ outData[a,b]<-( outData[a,b] * cif[b-1]/(masses[[a]])) } for (b in 1:(Xvars)){ outData[a,Xvars+3+b*2-1]<-( outData[a,Xvars+3+b*2-1] * cif[b]/(masses[[a]])) outData[a,Xvars+3+b*2]<-( outData[a,Xvars+3+b*2] * cif[b]/(masses[[a]])) } } #ADD DEPTH AND R2 COLUMNS TO THE MATRIX outData<-cbind(depths, outr2, outData) #WRITE THIS TO A FILE IN THE CURRENT FOLDER write.table(outData, file = "coefs.txt", col.names = TRUE, row.names = FALSE, sep = "\t") #SPECIFY THE PLOT LAYOUT mat<-rbind(1:2,3:4, 5:6) layout(mat) par(mfrow=c(3,3)) plot(depths, 1-(outData[,(2)]), #PLOT UNEXPLAINED VARIANCE (1-R2) xlab = "Samples", ylab = "1-R2", type = "l", pch = 5, cex = 0.1 ) for (b in 1:(Xvars)){ #CYCLE THROUGH Xs, PLOTTING THE MASS PROPORTIONS, AND ADDING ci LINES plot(depths, outData[,b+3], xlab = "Samples", ylab = names(outData)[b+3], ylim = c(min(outData[,Xvars+5+2*b], outData[,Xvars+5+2*b-1]), max(outData[,Xvars+5+2*b], outData[,Xvars+5+2*b-1])), type = "o", #SYMBOLS pch = 5, cex = 0.2 ) lines(depths, outData[,Xvars+5+2*b], type = "l", col = "red", pch = 5, cex = 0.2 ) lines(depths, outData[,Xvars+5+2*b-1], type = "l", col = "red", pch = 5, cex = 0.2 ) lines(depths, outData[,b+3], type = "l", #NOW LINES col = "black", pch = 5, cex = 0.2 ) } 100*(1-mean(outData[,(2)])) #READ OUT THE MEAN R2 VALUE