#This was created by Gavin Brown #if you have any questions feel free to contact me at browng@reed.edu, or Brown.Gavin.D@gmail.com ########################################################### #load in the data #set the working dir to where the data is located #setwd( "C:/Documents and Settings/Heather de Glanville/My Documents/RennLab/Annas Arrays") setwd( "C:\\Documents and Settings\\Heather de Glanville\\My Documents\\RennLab\\Mollys_data\\Marlieri") #load limma package library(limma) #read in the targets file, see ?readTargets for help #targets <- readTargets("targetsM.txt") targets <- readTargets("targetsmarlieri.csv", sep=",") #read in the R and G values RG <- read.maimages(targets$FILE,source="genepix.median",other.columns=list("Flags","B635 SD","B532 SD","F635 % Sat.","F532 % Sat."),wt.fun=function(x) as.numeric(x$Flags==0)) #RG <- read.maimages(targets$FileName,source="genepix.median",other.columns=list("Flags","B635 SD","B532 SD","F635 % Sat.","F532 % Sat."),wt.fun=function(x) as.numeric(x$Flags==0)) BC <- ((RG$R < (RG$Rb + 2*RG$other$"B635 SD")) & (RG$G < (RG$Gb + 2*RG$other$"B532 SD"))) RG$R[BC]<-NA RG$G[BC]<-NA ########## #Replace spots that are flagged bad, with NA !!!!!!!! Changed by HEM to only exclude bad spots flag<- RG$Flag < 0 RG$R[flag]=NA RG$G[flag]=NA #read in the Gal File #TC's should be listed under a column in the GAL file titled "DFCI_TC_2009" RG$genes <- readGAL("Fishchip4.03_annotatedGAL_20090730HEM.gal") RG$printer <- getLayout(RG$genes) #read in the spot types types <- readSpotTypes("SpotTypesCichlidLibrary.txt") #make sure that the correct number of spots are identified for each type. If no spots are identified and you think some should be examine the spotTypes file. Try putting a * before and after the spot type in the file (R uses regular pattern matching). #spot types are used for batch normalization RG$genes$Status <-controlStatus(types,RG) #design matrix #design=modelMatrix(targets,ref="Mt105") design=modelMatrix(targets,ref="JMK14m") #if you are using batch normalization you need to tell the function which features are part of the batch batch<-grep("old", RG$genes$Status) #All TC's with at least 4 TCtab<-table(RG$genes$DFCI_TC_2009) TCtab2<- sort(TCtab) TCtab4<- TCtab2[TCtab2>3] #190 TCreg<-names(TCtab4) TCreg<-TCreg[1:188] # the last 2 were "NO_2009_TC" and "none" # the 4+ TCs sdevs=compareNormalizations(RG=RG,datrm="yes",design=design,TC=TCreg,batch=batch, AllTCOption=TRUE, background="minimum", within=c("loess","printtiploess","batch","batchprinttiploess","none"),between=c("none")) sdevsbigect=compareNormalizations(RG=RG,datrm="yes",design=design,TC=TCreg,batch=batch, background=c("none","minimum"), within=c("none","loess","printtiploess","batch","batchprinttiploess"),between=c("none", "quantile")) sdevsbigmar=compareNormalizations(RG=RG,datrm="yes",design=design,TC=TCreg,batch=batch, background=c("none","minimum"), within=c("none","loess","printtiploess","batch","batchprinttiploess"),between=c("none", "quantile")) #^^^^^^^^^^^^Value^^^^^^^^^^^^^^^^ #A list with summarys for each TC chosen as well as a random subset of the data of equal size to all of the TC's combined. #Each summary is a list with components "coef", "sd", "TC" and "score". #"coef" is a list where each element is a matrix of the coeficients for the chosen TC and each item in the list corresponds to the different normalization methods selected. #"sd" is a matrix giving the standard deviations of "coef" #"TC" is which TC the analysis was for. #"score" the median of the standard deviations of all TC's within that method. #"IQR" the IQR of the standard deviations of all TC's within that method. #examine boxplots of the chosen coeff across normalization methods in a chosen tc. plot.normcomp(sdevs,coeff=1,TC="TC1884",box=TRUE) #examine the same but for a random set of genes instead of a TC plot.normcomp(sdevs4,coeff=1,TC="Randset",box=TRUE) #see the mean of the standard deviations of coefficents over all TC's and a randomset of genes of equal size to all the TC's combined. Also shows the IQR (Inter Quartile Range) of the standard deviations. report(sdevs) #Copy and paste from here to the other marker like the one below at the end of the file to define the functions ######################################################### #<><><><><><><><><><><><><><><><><><><><><><><><><><><><> #******************************************************** #<><><><><><><><><><><><><><><><><><><><><><><><><><><><> ######################################################### compareNormalizations<-function(RG=NULL, datrm=NULL, background=NULL , within=NULL, between=NULL , design=NULL,TC=NULL,batch=NULL){ require("limma") if(is.null(RG)){ stop("RG needs to be specifed") } if(is.null(design)){ stop("design needs to be specified") } if(is.null(TC)){ stop("TC needs to be specified") } if (is.null(datrm)){ cat("Enter options for data removal, press enter between options: Option 1: no Option 2: yes Press enter again when you are finished. \n") datrm=scan(what=character()) } datrm.fin=match.arg(datrm,c("yes","no"),several.ok=TRUE) if (length(datrm)!=length(datrm.fin)){ cat(datrm,":: in datrm does not match options of:", c("yes","no")," \n") } datrm=datrm.fin cat("*For datrm using : ",datrm,"\n \n") if (is.null(background)){ cat("Enter options for background correction, press enter between options: Option 1: none Option 2: minimum Option 3: subtract Option 4: normexp Press enter again when you are finished. \n") background=scan(what=character()) } background.fin=match.arg(background,c("none","minimum","subtract","normexp"),several.ok=TRUE) if (length(background)!=length(background.fin)){ cat(background,":: in background does not match options of:",c("none","minimum","subtract","normexp"),"\n") } background=background.fin cat("*For background using : ",background,"\n \n") if (is.null(within)){ cat("Enter options for within Array Normalization, press enter between options: Option 1: none Option 2: loess Option 3: printtiploess / ptl Option 4: batch Option 5: batchprinttiploess / bptl Press enter again when you are finished. \n") within=scan(what=character()) } within.abr=match.arg(within,c("none","loess","printtiploess","batch","batchprinttiploess","ptl","bptl"),several.ok=TRUE) within.abr[which(within.abr=="ptl")]<-"printtiploess" within.abr[which(within.abr=="bptl")]<-"batchprinttiploess" within.fin=match.arg(within.abr,c("none","loess","printtiploess","batch","batchprinttiploess"),several.ok=TRUE) if (length(within)!=length(within.fin)){ cat(within,":: in within does not match options of:",c("none","loess","printtiploess","batch","batchprinttiploess"),"\n") } within=within.fin cat("*For within using : ",within,"\n \n") if (is.null(between)){ cat("Enter options for between Array Normalization, press enter between options: Option 1: none Option 2: quantile Option 3: vsn Press enter again when you are finished. \n") between=scan(what=character()) } between.fin=match.arg(between,c("none","quantile","vsn"),several.ok=TRUE) if (length(between)!=length(between.fin)){ cat(between,":: in between does not match options of:",c("none","quantile","vsn"),"\n") } between=between.fin cat("*For between using : ",between,"\n \n") datarm=any(datrm=="yes") datarm.n=any(datrm=="no") if (!(datarm | datarm.n)){ stop(paste('Valid options for data removal are "yes" and "no" instead of:',datrm)) } bc.n=any(background=="none") bc.min=any(background=="minimum") bc.sub=any(background=="subtract") bc.normexp=any(background=="normexp") if (!(bc.n | bc.min | bc.sub |bc.normexp)){ stop(paste('Valid options for background are "none", "minimum", "subtract", and "normexp" instead of:', background)) } within.n=any(within=="none") within.loess=any(within=="loess") within.ptl=any(within=="printtiploess") within.batch=any(within=="batch") within.bptl=any(within=="batchprinttiploess") if (!(within.n | within.loess | within.ptl | within.batch | within.bptl)){ stop('Valid options for within are "none", "loess", "ptl", "batch", and "batchptl" instead of:', within) } between.n=any(between=="none") between.quan=any(between=="quantile") between.vsn=any(between=="vsn") if (!(between.n | between.quan | between.vsn)){ stop(paste('Valid options for between are "none", "quantile", and "vsn" instead of:', between)) } if ((within.batch | within.bptl) & is.null(batch)){ stop("batch needs to be specified.") } if (between.vsn){ require("vsn") } ########################################################### #Data removal #Note there are 163486 (35.5%) spots that will be removed if all three are run. if (datarm){ RG.datarm=RG ######### #Replace spots that are within 2 SD's of the background with NA. BC <- ((RG.datarm$R < (RG.datarm$Rb + 2*RG.datarm$other$"B635 SD")) & (RG.datarm$G < (RG.datarm$Gb + 2*RG.datarm$other$"B532 SD"))) RG.datarm$R[BC]<-NA RG.datarm$G[BC]<-NA ########## #Replace spots that are flagged bad, with NA !!!!!!!! Changed by HEM to only exclude bad spots flag<- RG.datarm$other$Flag < 0 RG.datarm$R[flag]=NA RG.datarm$G[flag]=NA } ############################################################ #BackGround Correction if (datarm){ if (bc.normexp) { RG.datarm.normexp<-backgroundCorrect(RG.datarm, method="normexp", offset=30,verbose=FALSE) } if (bc.sub) { RG.datarm.sub=backgroundCorrect(RG.datarm,method="subtract") } if (bc.min) { RG.datarm.min=backgroundCorrect(RG.datarm,method="minimum") } } if (datarm.n){ if (bc.normexp) { RG.normexp<-backgroundCorrect(RG, method="normexp", offset=30,verbose=FALSE) } if (bc.sub) { RG.sub=backgroundCorrect(RG,method="subtract") } if (bc.min) { RG.min=backgroundCorrect(RG,method="minimum") } } ############################################################ #Within array normalization if (datarm){ if (bc.n){ if (within.loess){ RG.datarm.loess=normalizeWithinArrays(RG.datarm,method="loess",bc.method="none") } if (within.bptl){ RG.datarm.bptl=normalizeWithinArraysBatch(RG.datarm,method="bPrinttip",bc.method="none",batch=batch) } if (within.batch) { RG.datarm.batch=normalizeWithinArraysBatch(RG.datarm,method="batch",bc.method="none",batch=batch) } if (within.ptl){ RG.datarm.ptl=normalizeWithinArrays(RG.datarm,method="printtiploess",bc.method="none") } } if (bc.min) { if (within.ptl){ RG.datarm.min.ptl=normalizeWithinArrays(RG.datarm.min,method="printtiploess",bc.method="none") } if (within.bptl){ RG.datarm.min.bptl=normalizeWithinArraysBatch(RG.datarm.min,method="bPrinttip",bc.method="none",batch=batch) } if (within.batch){ RG.datarm.min.batch=normalizeWithinArraysBatch(RG.datarm.min,method="batch",bc.method="none",batch=batch) } if (within.loess){ RG.datarm.min.loess=normalizeWithinArrays(RG.datarm.min,method="loess",bc.method="none") } } if (bc.sub){ if (within.ptl){ RG.datarm.sub.ptl=normalizeWithinArrays(RG.datarm.sub,method="printtiploess",bc.method="none") } if (within.batch){ RG.datarm.sub.batch=normalizeWithinArraysBatch(RG.datarm.sub,method="batch",bc.method="none",batch=batch) } if (within.loess){ RG.datarm.sub.loess=normalizeWithinArrays(RG.datarm.sub,method="loess",bc.method="none") } if (within.bptl){ RG.datarm.sub.bptl=normalizeWithinArraysBatch(RG.datarm.sub,method="bPrinttip",bc.method="none",batch=batch) } } if (bc.normexp){ if (within.ptl){ RG.datarm.normexp.ptl<-normalizeWithinArrays(RG.datarm.normexp,method="printtiploess",bc.method="none") } if (within.batch){ RG.datarm.normexp.batch<-normalizeWithinArraysBatch(RG.datarm.normexp,method="batch",bc.method="none",batch=batch) } if (within.loess){ RG.datarm.normexp.loess<-normalizeWithinArrays(RG.datarm.normexp,method="loess",bc.method="none") } if (within.bptl){ RG.datarm.normexp.bptl<-normalizeWithinArraysBatch(RG.datarm.normexp,method="bPrinttip",bc.method="none",batch=batch) } } } if (datarm.n){ if(bc.n){ if (within.loess){ RG.loess=normalizeWithinArrays(RG,method="loess",bc.method="none") } if (within.batch){ RG.batch=normalizeWithinArraysBatch(RG,method="batch",bc.method="none",batch=batch) } if (within.ptl){ RG.ptl=normalizeWithinArrays(RG,method="printtiploess",bc.method="none") } if (within.bptl){ RG.bptl=normalizeWithinArraysBatch(RG,method="bPrinttip",bc.method="none",batch=batch) } } if(bc.min){ if (within.batch){ RG.min.batch=normalizeWithinArraysBatch(RG.min,method="batch",bc.method="none",batch=batch) } if (within.ptl){ RG.min.ptl=normalizeWithinArrays(RG.min,method="printtiploess",bc.method="none") } if (within.loess){ RG.min.loess=normalizeWithinArrays(RG.min,method="loess",bc.method="none") } if (within.bptl){ RG.min.bptl=normalizeWithinArraysBatch(RG.min,method="bPrinttip",bc.method="none",batch=batch) } } if(bc.sub){ if (within.batch){ RG.sub.batch=normalizeWithinArraysBatch(RG.sub,method="batch",bc.method="none",batch=batch) } if (within.bptl){ RG.sub.bptl=normalizeWithinArraysBatch(RG.sub,method="bPrinttip",bc.method="none",batch=batch) } if (within.ptl){ RG.sub.ptl=normalizeWithinArrays(RG.sub,method="printtiploess",bc.method="none") } if (within.loess){ RG.sub.loess=normalizeWithinArrays(RG.sub,method="loess",bc.method="none") } } if(bc.normexp){ if (within.loess){ RG.normexp.loess<-normalizeWithinArrays(RG.normexp,method="loess",bc.method="none") } if (within.bptl){ RG.normexp.bptl<-normalizeWithinArraysBatch(RG.normexp,method="bPrinttip",bc.method="none",batch=batch) } if (within.ptl){ RG.normexp.ptl<-normalizeWithinArrays(RG.normexp,method="printtiploess",bc.method="none") } if (within.batch){ RG.normexp.batch<-normalizeWithinArraysBatch(RG.normexp,method="batch",bc.method="none",batch=batch) } } } ############################################################ #Between array normalzation #Also determines which methods were used and names them in meth #Builds a selections matrix #builds a list m of the MA objects count=1 ld=length(datarm) lbc=length(background) lw=length(within) if (between.vsn){ lbe=length(between)-1 ns=ld*lbc*lw*lbe + 1#number of overall selections }else{ lbe=length(between) ns=ld*lbc*lw*lbe #number of overall selections } m=vector("list",ns) meth=vector(mode="character",length=ns) if (between.quan){ if (datarm){ if (bc.n){ if(within.n){ RG.datarm.quan=normalizeBetweenArrays(MA.RG(RG.datarm,bc.method="none"), method="quantile") meth[count]="drm.quan" m[[count]]=RG.datarm.quan count=count+1 } if(within.ptl){ RG.datarm.ptl.quan=normalizeBetweenArrays(RG.datarm.ptl, method="quantile") meth[count]="drm.ptl.quan" m[[count]]=RG.datarm.ptl.quan count=count+1 } if(within.bptl){ RG.datarm.bptl.quan=normalizeBetweenArrays(RG.datarm.bptl, method="quantile") meth[count]="drm.bptl.quan" m[[count]]=RG.datarm.bptl.quan count=count+1 } if(within.loess){ RG.datarm.loess.quan=normalizeBetweenArrays(RG.datarm.loess, method="quantile") meth[count]="drm.loess.quan" m[[count]]=RG.datarm.loess.quan count=count+1 } if(within.batch){ RG.datarm.batch.quan=normalizeBetweenArrays(RG.datarm.batch, method="quantile") meth[count]="drm.batch.quan" m[[count]]=RG.datarm.batch.quan count=count+1 } } if (bc.min){ if(within.ptl){ RG.datarm.min.ptl.quan=normalizeBetweenArrays(RG.datarm.min.ptl, method="quantile") meth[count]="drm.min.ptl.quan" m[[count]]=RG.datarm.min.ptl.quan count=count+1 } if(within.bptl){ RG.datarm.min.bptl.quan=normalizeBetweenArrays(RG.datarm.min.bptl, method="quantile") meth[count]="drm.min.bptl.quan" m[[count]]=RG.datarm.min.bptl.quan count=count+1 } if(within.n){ RG.datarm.min.quan=normalizeBetweenArrays(RG.datarm.min, method="quantile") meth[count]="drm.min.quan" m[[count]]=RG.datarm.min.quan count=count+1 } if(within.loess){ RG.datarm.min.loess.quan=normalizeBetweenArrays(RG.datarm.min.loess, method="quantile") meth[count]="drm.min.loess.quan" m[[count]]=RG.datarm.min.loess.quan count=count+1 } if(within.batch){ RG.datarm.min.batch.quan=normalizeBetweenArrays(RG.datarm.min.batch, method="quantile") meth[count]="drm.min.batch.quan" m[[count]]=RG.datarm.min.batch.quan count=count+1 } } if(bc.sub){ if(within.ptl){ RG.datarm.sub.ptl.quan=normalizeBetweenArrays(RG.datarm.sub.ptl, method="quantile") meth[count]="drm.sub.ptl.quan" m[[count]]=RG.datarm.sub.ptl.quan count=count+1 } if(within.batch){ RG.datarm.sub.batch.quan=normalizeBetweenArrays(RG.datarm.sub.batch, method="quantile") meth[count]="drm.sub.batch.quan" m[[count]]=RG.datarm.sub.batch.quan count=count+1 } if(within.loess){ RG.datarm.sub.loess.quan=normalizeBetweenArrays(RG.datarm.sub.loess, method="quantile") meth[count]="drm.sub.loess.quan" m[[count]]=RG.datarm.sub.loess.quan count=count+1 } if(within.bptl){ RG.datarm.sub.bptl.quan=normalizeBetweenArrays(RG.datarm.sub.bptl, method="quantile") meth[count]="drm.sub.bptl.quan" m[[count]]=RG.datarm.sub.bptl.quan count=count+1 } if(within.n){ RG.datarm.sub.quan=normalizeBetweenArrays(RG.datarm.sub, method="quantile") meth[count]="drm.sub.quan" m[[count]]=RG.datarm.sub.quan count=count+1 } } if(bc.normexp){ if(within.ptl){ RG.datarm.normexp.ptl.quan=normalizeBetweenArrays(RG.datarm.normexp.ptl, method="quantile") meth[count]="drm.normexp.ptl.quan" m[[count]]=RG.datarm.normexp.ptl.quan count=count+1 } if(within.loess){ RG.datarm.normexp.loess.quan=normalizeBetweenArrays(RG.datarm.normexp.loess, method="quantile") meth[count]="drm.normexp.loess.quan" m[[count]]=RG.datarm.normexp.loess.quan count=count+1 } if(within.n){ RG.datarm.normexp.quan=normalizeBetweenArrays(RG.datarm.normexp, method="quantile") meth[count]="drm.normexp.quan" m[[count]]=RG.datarm.normexp.quan count=count+1 } if(within.batch){ RG.datarm.normexp.batch.quan=normalizeBetweenArrays(RG.datarm.normexp.batch, method="quantile") meth[count]="drm.normexp.batch.quan" m[[count]]=RG.datarm.normexp.batch.quan count=count+1 } if(within.bptl){ RG.datarm.normexp.bptl.quan=normalizeBetweenArrays(RG.datarm.normexp.bptl, method="quantile") meth[count]="drm.normexp.bptl.quan" m[[count]]=RG.datarm.normexp.bptl.quan count=count+1 } } } if (datarm.n){ if (bc.n){ if(within.n){ RG.quan=normalizeBetweenArrays(MA.RG(RG,bc.method="none"), method="quantile") meth[count]="quan" m[[count]]=RG.quan count=count+1 } if(within.ptl){ RG.ptl.quan=normalizeBetweenArrays(RG.ptl, method="quantile") meth[count]="ptl.quan" m[[count]]=RG.ptl.quan count=count+1 } if(within.bptl){ RG.bptl.quan=normalizeBetweenArrays(RG.bptl, method="quantile") meth[count]="bptl.quan" m[[count]]=RG.bptl.quan count=count+1 } if(within.loess){ RG.loess.quan=normalizeBetweenArrays(RG.loess, method="quantile") meth[count]="loess.quan" m[[count]]=RG.loess.quan count=count+1 } if(within.batch){ RG.batch.quan=normalizeBetweenArrays(RG.batch, method="quantile") meth[count]="batch.quan" m[[count]]=RG.batch.quan count=count+1 } } if (bc.min){ if(within.ptl){ RG.min.ptl.quan=normalizeBetweenArrays(RG.min.ptl, method="quantile") meth[count]="min.ptl.quan" m[[count]]=RG.min.ptl.quan count=count+1 } if(within.bptl){ RG.min.bptl.quan=normalizeBetweenArrays(RG.min.bptl, method="quantile") meth[count]="min.bptl.quan" m[[count]]=RG.min.bptl.quan count=count+1 } if(within.n){ RG.min.quan=normalizeBetweenArrays(RG.min, method="quantile") meth[count]="min.quan" m[[count]]=RG.min.quan count=count+1 } if(within.loess){ RG.min.loess.quan=normalizeBetweenArrays(RG.min.loess, method="quantile") meth[count]="min.loess.quan" m[[count]]=RG.min.loess.quan count=count+1 } if(within.batch){ RG.min.batch.quan=normalizeBetweenArrays(RG.min.batch, method="quantile") meth[count]="min.batch.quan" m[[count]]=RG.min.batch.quan count=count+1 } } if(bc.sub){ if(within.ptl){ RG.sub.ptl.quan=normalizeBetweenArrays(RG.sub.ptl, method="quantile") meth[count]="sub.ptl.quan" m[[count]]=RG.sub.ptl.quan count=count+1 } if(within.batch){ RG.sub.batch.quan=normalizeBetweenArrays(RG.sub.batch, method="quantile") meth[count]="sub.batch.quan" m[[count]]=RG.sub.batch.quan count=count+1 } if(within.loess){ RG.sub.loess.quan=normalizeBetweenArrays(RG.sub.loess, method="quantile") meth[count]="sub.loess.quan" m[[count]]=RG.sub.loess.quan count=count+1 } if(within.bptl){ RG.sub.bptl.quan=normalizeBetweenArrays(RG.sub.bptl, method="quantile") meth[count]="sub.bptl.quan" m[[count]]=RG.sub.bptl.quan count=count+1 } if(within.n){ RG.sub.quan=normalizeBetweenArrays(RG.sub, method="quantile") meth[count]="sub.quan" m[[count]]=RG.sub.quan count=count+1 } } if(bc.normexp){ if(within.ptl){ RG.normexp.ptl.quan=normalizeBetweenArrays(RG.normexp.ptl, method="quantile") meth[count]="normexp.ptl.quan" m[[count]]=RG.normexp.ptl.quan count=count+1 } if(within.loess){ RG.normexp.loess.quan=normalizeBetweenArrays(RG.normexp.loess, method="quantile") meth[count]="normexp.loess.quan" m[[count]]=RG.normexp.loess.quan count=count+1 } if(within.n){ RG.normexp.quan=normalizeBetweenArrays(RG.normexp, method="quantile") meth[count]="normexp.quan" m[[count]]=RG.normexp.quan count=count+1 } if(within.batch){ RG.normexp.batch.quan=normalizeBetweenArrays(RG.normexp.batch, method="quantile") meth[count]="normexp.batch.quan" m[[count]]=RG.normexp.batch.quan count=count+1 } if(within.bptl){ RG.normexp.bptl.quan=normalizeBetweenArrays(RG.normexp.bptl, method="quantile") meth[count]="normexp.bptl.quan" m[[count]]=RG.normexp.bptl.quan count=count+1 } } } } if (between.n){ if (datarm){ if (bc.n){ if(within.n){ meth[count]="drm" m[[count]]=MA.RG(RG.datarm,bc.method="none") count=count+1 } if(within.ptl){ meth[count]="drm.ptl" m[[count]]=RG.datarm.ptl count=count+1 } if(within.bptl){ meth[count]="drm.bptl" m[[count]]=RG.datarm.bptl count=count+1 } if(within.loess){ meth[count]="drm.loess" m[[count]]=RG.datarm.loess count=count+1 } if(within.batch){ meth[count]="drm.batch" m[[count]]=RG.datarm.batch count=count+1 } } if (bc.min){ if(within.ptl){ meth[count]="drm.min.ptl" m[[count]]=RG.datarm.min.ptl count=count+1 } if(within.bptl){ meth[count]="drm.min.bptl" m[[count]]=RG.datarm.min.bptl count=count+1 } if(within.n){ meth[count]="drm.min" m[[count]]=MA.RG(RG.datarm.min,bc.method="none") count=count+1 } if(within.loess){ meth[count]="drm.min.loess" m[[count]]=RG.datarm.min.loess count=count+1 } if(within.batch){ meth[count]="drm.min.batch" m[[count]]=RG.datarm.min.batch count=count+1 } } if(bc.sub){ if(within.ptl){ meth[count]="drm.sub.ptl" m[[count]]=RG.datarm.sub.ptl count=count+1 } if(within.batch){ meth[count]="drm.sub.batch" m[[count]]=RG.datarm.sub.batch count=count+1 } if(within.loess){ meth[count]="drm.sub.loess" m[[count]]=RG.datarm.sub.loess count=count+1 } if(within.bptl){ meth[count]="drm.sub.bptl" m[[count]]=RG.datarm.sub.bptl count=count+1 } if(within.n){ meth[count]="drm.sub" m[[count]]=MA.RG(RG.datarm.sub,bc.method="none") count=count+1 } } if(bc.normexp){ if(within.ptl){ meth[count]="drm.normexp.ptl" m[[count]]=RG.datarm.normexp.ptl count=count+1 } if(within.loess){ meth[count]="drm.normexp.loess" m[[count]]=RG.datarm.normexp.loess count=count+1 } if(within.n){ meth[count]="drm.normexp" m[[count]]=MA.RG(RG.datarm.normexp,bc.method="none") count=count+1 } if(within.batch){ meth[count]="drm.normexp.batch" m[[count]]=RG.datarm.normexp.batch count=count+1 } if(within.bptl){ meth[count]="drm.normexp.bptl" m[[count]]=RG.datarm.normexp.bptl count=count+1 } } } if (datarm.n){ if (bc.n){ if(within.n){ meth[count]="none" m[[count]]=MA.RG(RG,bc.method="none") count=count+1 } if(within.ptl){ meth[count]="ptl" m[[count]]=RG.ptl count=count+1 } if(within.bptl){ meth[count]="bptl" m[[count]]=RG.bptl count=count+1 } if(within.loess){ meth[count]="loess" m[[count]]=RG.loess count=count+1 } if(within.batch){ meth[count]="batch" m[[count]]=RG.batch count=count+1 } } if (bc.min){ if(within.ptl){ meth[count]="min.ptl" m[[count]]=RG.min.ptl count=count+1 } if(within.bptl){ meth[count]="min.bptl" m[[count]]=RG.min.bptl count=count+1 } if(within.n){ meth[count]="min" m[[count]]=MA.RG(RG.min,bc.method="none") count=count+1 } if(within.loess){ meth[count]="min.loess" m[[count]]=RG.min.loess count=count+1 } if(within.batch){ meth[count]="min.batch" m[[count]]=RG.min.batch count=count+1 } } if(bc.sub){ if(within.ptl){ meth[count]="sub.ptl" m[[count]]=RG.sub.ptl count=count+1 } if(within.batch){ meth[count]="sub.batch" m[[count]]=RG.sub.batch count=count+1 } if(within.loess){ meth[count]="sub.loess" m[[count]]=RG.sub.loess count=count+1 } if(within.bptl){ meth[count]="sub.bptl" m[[count]]=RG.sub.bptl count=count+1 } if(within.n){ meth[count]="sub" m[[count]]=MA.RG(RG.sub,bc.method="none") count=count+1 } } if(bc.normexp){ if(within.ptl){ meth[count]="normexp.ptl" m[[count]]=RG.normexp.ptl count=count+1 } if(within.loess){ meth[count]="normexp.loess" m[[count]]=RG.normexp.loess count=count+1 } if(within.n){ meth[count]="normexp" m[[count]]=MA.RG(RG.normexp,bc.method="none") count=count+1 } if(within.batch){ meth[count]="normexp.batch" m[[count]]=RG.normexp.batch count=count+1 } if(within.bptl){ meth[count]="normexp.bptl" m[[count]]=RG.normexp.bptl count=count+1 } } } } #VSN if (between.vsn){ RG.vsn<-normalizeBetweenArrays(RG,method="vsn") meth[count]="vsn" m[[count]]=RG.vsn count=count+1 } ############################################################ #look at coefficients of genes in the same TC group across different methods #DFCI_TC_2009 is where the TC information is located TCs=RG$genes$DFCI_TC_2009 for (i in 1:ns){ m[[i]]<- lmFit(m[[i]],design) } ntc=length(TC) sdevs.l=vector("list",ntc) for (j in 1:ntc){ sdevs=vector("list",ns) sdevs.l[[j]]$coef=vector("list",ns) sdevs.l[[j]]$TC=TC[j] for (i in 1:ns){ ## The coeffiecients from lmFit sdevs.l[[j]]$coef[[i]]=m[[i]]$coefficients[(TCs==TC[j] & !is.na(TCs)),] } ## The method names(sdevs.l[[j]]$coef)=meth } names(sdevs.l)=TC #all genes on array ind=which(!is.na(RG$R[,1]) ) # perform functions on the random or AllTC sets sdevs.l$Randset$coef=vector("list",ns) sdevs.l$Randset$TC="RandSet" #sdevs.m=NULL sdevs=vector("list",ns) for (i in 1:ns){ sdevs.l$Randset$coef[[i]]=m[[i]]$coefficients[ind,] } names(sdevs.l$Randset$coef)=meth #The scores for the report function #The commented out method treats all TC's the same scores=matrix(NA,ncol=ns,nrow=4) for (i in 1:ns){ sds=NA sds.r=NA for (k in 1:dim(design)[2]){ #coefs=NA sds.tc=NA for (j in 1:ntc){ #coefs=c(coefs,sdevs.l[[j]]$coef[[i]][,k]) #compute a sd for the coefs in each TC sds.tc[k]=sd(sdevs.l[[j]]$coef[[i]][,k],na.rm=TRUE) } #sds[k]=sd(coefs,na.rm=TRUE) sds=c(sds,sds.tc) sds.r[k]=sd(sdevs.l$Randset$coef[[i]][,k],na.rm=TRUE) } scores[1,i]=mean(sds,na.rm=TRUE) scores[2,i]=abs(diff(quantile(sds,c(.75,.25),na.rm=TRUE))) scores[3,i]=mean(sds.r,na.rm=TRUE) scores[4,i]=abs(diff(quantile(sds.r,c(.75,.25),na.rm=TRUE))) } colnames(scores)=meth rownames(scores)=c("BigTC MeanSD: ","BigTC IQR: ","AllGenes SD: ", "AllGenes IQR: ") sdevs.l$scores=scores sdevs.l } ########################################################## # A function to summarize the normalization methods form compareNormalizations report<-function(sdevs=NULL){ rp=as.table(sdevs$scores) rp=rp[,order(rp[1,])] rp } ########################################################## # A function to plot a specified coefficient across methods for a specified TC plot.normcomp=function(sdevs,TC=NULL,coeff=1,box=TRUE,...){ if (is.null(TC)){ stop("Please specify the TC") } n=which(names(sdevs)==TC) if (length(n)>1){ stop(paste("Found more than one entry in sdevs corresponding to",TC)) } if (length(n)<1){ stop(paste("In sdevs could not find",TC,sep=" ")) } ns=length(sdevs[[1]]$coef) #number of normalization methods if (is.character(coeff)){ #check that coeff is in the colnames of the coefficients if (!any(colnames(sdevs[[n]]$coef[[1]])==coeff)){ stop(paste(coeff,"is not a column in the coefficients.")) } } if (is.numeric(coeff)){ #check that coeff is in range of the columns of the coefficients if (coeff<1 | coeff>dim(sdevs[[n]]$coef[[1]])[2]){ stop(paste(coeff,"is not a column in the coefficients.")) } } m=matrix(NA,ncol=ns,nrow=length(sdevs[[n]]$coef[[1]][,coeff])) for (i in 1:ns){ m[,i]=sdevs[[n]]$coef[[i]][,coeff] } colnames(m)=names(sdevs[[n]]$coef) par(mar=c(10, 4, 2, 0.5),cex=.7) if(box){ boxplot(m,...,ylab=paste("Coefficent:",coeff,sep=" "), main=TC, las=2) }else{ plot(m~col(m),...,ylab=paste("Coefficent:",coeff,sep=" "), main=TC,xlab="",axes = FALSE) Axis(at=1:dim(m)[2],side=1,labels=colnames(m),las=2) Axis(side=2) box() } } ########################################################### #If you use any of the batch methods make sure to have normalizeWithinArraysBatch defined. normalizeWithinArraysBatch <- function(object,layout=object$printer,method="bPrinttip",weights=object$weights,span=0.3,iterations=4,controlspots=NULL,df=5,robust="M",bc.method="subtract",offset=0, batch=batch) # Within array normalization with batch mode # { if(!is(object,"MAList")) object <- MA.RG(object,bc.method=bc.method,offset=offset) choices <- c("none","median","loess","printtiploess","composite","control","robustspline", "batch", "bPrinttip") method <- match.arg(method,choices) if(method=="none") return(object) if(is.vector(object$M)) object$M <- as.matrix(object$M) if(is.vector(object$A)) object$A <- as.matrix(object$A) if(is.vector(weights)) weights <- as.matrix(weights) narrays <- ncol(object$M) if(method=="median") { if(is.null(weights)) for (j in 1:narrays) object$M[,j] <- object$M[,j] - median(object$M[,j],na.rm=TRUE) else for (j in 1:narrays) object$M[,j] <- object$M[,j] - weighted.median(object$M[,j],weights[,j],na.rm=TRUE) return(object) } # All remaining methods use regression of M-values on A-values switch(method, batch ={ for (j in 1:narrays) { y <- object$M[batch,j] x <- object$A[batch,j] w <- weights[batch,j] object$M[batch,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals cbatch<-setdiff(1:nrow(object$M), batch) y <- object$M[cbatch,j] x <- object$A[cbatch,j] w <- weights[cbatch,j] object$M[cbatch,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals } }, bPrinttip ={ if(is.null(layout)) stop("Layout argument not specified") ngr <- layout$ngrid.r ngc <- layout$ngrid.c nspots <- layout$nspot.r * layout$nspot.c nprobes <- ngr*ngc*nspots if(nprobes != NROW(object$M)) stop("printer layout information does not match M row dimension") if(nprobes != NROW(object$A)) stop("printer layout information does not match A row dimension") for (j in 1:narrays) { spots <- 1:nspots for (gridr in 1:ngr) for (gridc in 1:ngc) { spots1<-intersect(batch, spots) cbatch<-setdiff(1:nrow(object$M), batch) spots2<-intersect(cbatch, spots) y <- object$M[spots1,j] x <- object$A[spots1,j] w <- weights[spots1,j] object$M[spots1,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals y <- object$M[spots2,j] x <- object$A[spots2,j] w <- weights[spots2,j] object$M[spots2,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals spots <- spots + nspots } } }, loess = { for (j in 1:narrays) { y <- object$M[,j] x <- object$A[,j] w <- weights[,j] object$M[,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals } }, printtiploess = { if(is.null(layout)) stop("Layout argument not specified") ngr <- layout$ngrid.r ngc <- layout$ngrid.c nspots <- layout$nspot.r * layout$nspot.c nprobes <- ngr*ngc*nspots if(nprobes != NROW(object$M)) stop("printer layout information does not match M row dimension") if(nprobes != NROW(object$A)) stop("printer layout information does not match A row dimension") for (j in 1:narrays) { spots <- 1:nspots for (gridr in 1:ngr) for (gridc in 1:ngc) { y <- object$M[spots,j] x <- object$A[spots,j] w <- weights[spots,j] object$M[spots,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals spots <- spots + nspots } } }, composite = { if(is.null(layout)) stop("Layout argument not specified") if(is.null(controlspots)) stop("controlspots argument not specified") ntips <- layout$ngrid.r * layout$ngrid.c nspots <- layout$nspot.r * layout$nspot.c for (j in 1:narrays) { y <- object$M[,j] x <- object$A[,j] w <- weights[,j] f <- is.finite(y) & is.finite(x) & is.finite(w) y[!f] <- NA fit <- loess(y~x,weights=w,span=span,subset=controlspots,na.action=na.exclude,degree=0,surface="direct",family="symmetric",trace.hat="approximate",iterations=iterations) alpha <- global <- y global[f] <- predict(fit,newdata=x[f]) alpha[f] <- (rank(x[f])-1) / sum(f) spots <- 1:nspots for (tip in 1:ntips) { y <- object$M[spots,j] x <- object$A[spots,j] w <- weights[spots,j] local <- loessFit(y,x,w,span=span,iterations=iterations)$fitted object$M[spots,j] <- object$M[spots,j] - alpha[spots]*global[spots]-(1-alpha[spots])*local spots <- spots + nspots } } }, control = { if(is.null(layout)) stop("Layout argument not specified") if(is.null(controlspots)) stop("controlspots argument not specified") ntips <- layout$ngrid.r * layout$ngrid.c nspots <- layout$nspot.r * layout$nspot.c for (j in 1:narrays) { y <- object$M[,j] x <- object$A[,j] f <- is.finite(y) & is.finite(x) if(!is.null(weights)) { w <- weights[,j] f <- f & is.finite(w) } y[!f] <- NA fit <- loess(y~x,weights=w,span=span,subset=controlspots,na.action=na.exclude,degree=1,surface="direct",family="symmetric",trace.hat="approximate",iterations=iterations) y[f] <- y[f]-predict(fit,newdata=x[f]) object$M[,j] <- y } }, robustspline = { if(is.null(layout)) stop("Layout argument not specified") for (j in 1:narrays) object$M[,j] <- normalizeRobustSpline(object$M[,j],object$A[,j],layout,df=df,method=robust) } ) object } ######################################################### #<><><><><><><><><><><><><><><><><><><><><><><><><><><><> #******************************************************** #<><><><><><><><><><><><><><><><><><><><><><><><><><><><> #########################################################