QMadj.GaussianDLY<-function(InSeries,InCs,output,MissingValueCode,GUI=FALSE, Iadj=10000,Mq=10){ itmp<-ReadDLY.g(InSeries,MissingValueCode) if(itmp<0){ ErrorMSG<<-paste("QMadj.GaussianDLY: Error in read data from",InSeries,"\n", get("ErrorMSG",env=.GlobalEnv),"\n") if(!GUI) cat(ErrorMSG) return(-1) } N<-length(Y0) itmp<-readLines(InCs) if(length(itmp)>=2){ Ns<-length(itmp)-1 Ips<-c(rep(0,Ns),N) for(i in 1:Ns){ # using YYYYMMDD as index, searching for the largest # date less or equal to given YYYYMMDD ymdtmp<-as.real(substr(itmp[i+1],5,14)) if(ymdtmp==as.integer(ymdtmp/100)*100) ymdtmp<-ymdtmp+15 # set date as 15 if input break point is 00 for date it<-match(ymdtmp,IY0) if(!is.na(it)) Ips[i]<-it else Ips[i]<-max(c(1:N)[IY0<=ymdtmp]) } if(sum(is.na(Ips))>0|!identical(Ips,sort(Ips))){ ErrorMSG<<-paste("QMadj.GaussianDLY: Ips read in from ",InCs,"error:") for(i in 1:Ns) ErrorMSG<<-paste(get("ErrorMSG",env=.GlobalEnv),Ips[i]) ErrorMSG<<-paste(get("ErrorMSG",env=.GlobalEnv),"\n\n") if(!GUI) cat(ErrorMSG) return(-1) } } ofileSout<-paste(output,"_QMadjDLY_stat.txt",sep="") file.create(ofileSout) cat(paste("Input data filename:", InSeries,"; N=",N,"\n"),file=ofileSout) if(Ns>0) { Nsegs<-Ips-c(0,Ips[1:Ns]) Iseg.longest<-sort(Nsegs,index=T,decreasing=T)$ix[1] } else Iseg.longest<-0 if(Iadj>(Ns+1)|Iseg.longest==0) Iseg.adj<-Ns+1 else if(Iadj==0)Iseg.adj<-Iseg.longest else Iseg.adj<-Iadj oout<-rmCycle(itable) Y1<-oout$Base EB<-oout$EB assign("EB",EB,envir=.GlobalEnv) otmp<-LSmultiRedCycle(Y1,Ti,Ips,Iseg.adj) Y1<-otmp$Y0 cor<-otmp$cor corl<-otmp$corl corh<-otmp$corh pcor<-pt(abs(cor)*sqrt((N-2)/(1-cor^2)),N-2) Rf<-otmp$resi W<-otmp$W WL<-otmp$WL WU<-otmp$WU EB1<-otmp$EB itmp1<-cbind(EB1,Icy) itmp2<-cbind(1:N,Imd) colnames(itmp2)<-c("idx","Icy") itmp<-merge(itmp1,itmp2,by="Icy") EBfull<-itmp[order(itmp[,"idx"]),"EB1"] EEB<-mean(EB1,na.rm=T) Rb<-Y1-otmp$trend*Ti+EBfull QMout<-QMadjDLY(Rb,Ips,Mq,Iseg.adj) B<-QMout$PA adj<-otmp$Y0+EBfull B<-B+otmp$trend*Ti if(Ns>0){ cat(paste("Nseg_shortest =",QMout$Nseg.mn,"; Mq = ",QMout$Mq,"\n"), file=ofileSout,append=T) cat(paste("\n Adjust to segment", Iseg.adj,": from", if(Iseg.adj==1) 1 else Ips[Iseg.adj-1]+1, "to",Ips[Iseg.adj],"\n"),file=ofileSout,append=T) cat("#Fcat, DP (CDF and Differnces in category mean)\n",file=ofileSout, append=T) write.table(round(QMout$osmean,4),file=ofileSout,append=T, row.names=F,col.names=F) for(i in 1:(Ns+1)){ I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] if(i!=Iseg.adj) cat(paste("Seg. ",i,": mean of QM-adjustments =",round(mean(QMout$W[I1:I2]),4), "\n",sep=""),file=ofileSout,append=T) } } cat(paste("#steps= ",Ns,"; trend=",round(otmp$trend,6),"(", round(otmp$betaL,6),",",round(otmp$betaU,6),") (p=", round(otmp$p.tr,4),"); cor=", round(cor,4),"(",round(corl,4),",",round(corh,4),")", round(pcor,4),"\n"), file=ofileSout,append=T) if(Ns>0) for(i in 1:Ns){ I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] Delta<-otmp$mu[Ns+1]-otmp$mu[i] adj[I1:I2]<-adj[I1:I2]+Delta stepsize<-otmp$mu[i+1]-otmp$mu[i] cat(paste(Ips[i],IY0[Ips[i]],"stepsize=",round(stepsize,4),"\n"), file=ofileSout,append=T) } # oR<-Y1-otmp$meanhat # oR[2:N]<-oR[2:N]-oR[1:(N-1)]*cor # Ehat<-mean(otmp$meanhat) if(Ns>0){ odata<-matrix(NA,dim(ori.itable)[1],8) odata[,1]<-ori.itable[,1] odata[,2]<-ori.itable[,2] odata[,3]<-ori.itable[,3] odata[is.na(ori.itable[,4])==F,4]<-round(otmp$Y0+EBfull,4) odata[is.na(ori.itable[,4])==F,5]<-round(B,4) odata[is.na(ori.itable[,4])==F,6]<-round(adj,4) odata[is.na(ori.itable[,4])==F,7]<-round(B-otmp$Y0-EBfull,4) odata[is.na(ori.itable[,4])==F,8]<-round(adj-otmp$Y0-EBfull,4) } else odata<-cbind(ori.itable[,c(1,2,3,4,4,4)],0,0) ofileAout<-paste(output,"_QMadjDLY_data.dat",sep="") # ofileAout<-output # suggested by Lucie, user can choose whatever filename write.table(odata,file=ofileAout,na=MissingValueCode,col.names=F,row.names=F) ofilePdf<-paste(output,"_QMadjDLY_plot.pdf",sep="") pdf(file=ofilePdf,onefile=T) op <- par(no.readonly = TRUE) # the whole list of settable par's par(mfrow=c(2,1)) uyrs<-unique(floor(ori.itable[,1]/10))*10 labels<-NULL ats<-NULL for(i in 1:length(uyrs)){ if(!is.na(match(uyrs[i],ori.itable[,1]))){ labels<-c(labels,uyrs[i]) ats<-c(ats,match(uyrs[i],ori.itable[,1])) } } par(mar=c(3,4,3,2)+.1) pdata<-rep(NA,dim(ori.itable)[1]) pdata[is.na(ori.itable[,4])==F]<-otmp$Y0 plot(1:dim(ori.itable)[1],pdata,type="l",xlab="",ylab="", ylim=c(min(otmp$Y0,otmp$meanhat),max(otmp$Y0,otmp$meanhat)), xaxt="n",col="red", main="Base anomaly series and regression fit") axis(side=1,at=ats,labels=labels) pdata[is.na(ori.itable[,4])==F]<-otmp$meanhat lines(1:dim(ori.itable)[1],pdata,col="blue") pdata[is.na(ori.itable[,4])==F]<-otmp$Y0+EBfull plot(1:dim(ori.itable)[1],pdata,type="l",xlab="",ylab="", ylim=c(min(otmp$Y0+EBfull,otmp$meanhat+EBfull), max(otmp$Y0+EBfull,otmp$meanhat+EBfull)), xaxt="n",col="red", main="Base series and regression fit") axis(side=1,at=ats,labels=labels) pdata[is.na(ori.itable[,4])==F]<-otmp$meanhat+EEB lines(1:dim(ori.itable)[1],pdata,col="blue") pdata[is.na(ori.itable[,4])==F]<-adj plot(1:dim(ori.itable)[1],pdata,type="l",xlab="",ylab="", ylim=c(min(adj),max(adj)), xaxt="n",col="red", main="Mean-adjusted base series") axis(side=1,at=ats,labels=labels) pdata[is.na(ori.itable[,4])==F]<-B plot(1:dim(ori.itable)[1],pdata,type="l",xlab="",ylab="", ylim=c(min(B),max(B)), xaxt="n",col="red", main="QM-adjusted base series") axis(side=1,at=ats,labels=labels) # test plot par(mar=c(4,5,3,2)+.1,cex=.8,mfrow=c(1,1)) col=0 np<-0 osp<-QMout$osp osmean<-QMout$osmean for(i in 1:(Ns+1)){ Fd<-.5/QMout$Mq I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] ymax<-max(osp[,2:3],na.rm=T); ymin<-min(osp[,2:3],na.rm=T) if(i!=Iseg.adj){ np<-np+1 if(col==0) { col<-2 plot(osp[I1:I2,2],osp[I1:I2,3],xlim=c(0,1),ylim=c(ymin,ymax), type="l",lwd=2,col=col,xlab="Cumulative Frequency", ylab="QM Adjustment", main=paste("distribution of QM adjustments with Mq=",QMout$Mq)) icol<-2*np for(j in 1:QMout$Mq){ lines(c(osmean[(j+1),icol]-Fd,osmean[(j+1),icol]+Fd), c(rep(osmean[(j+1),(icol+1)],2)),col=col) if(j>1&j1&j=imdbegin] # first year # if(iyrend>(iyrbegin+1)) for(i in (iyrbegin+1):(iyrend-1)) # Ind2<-c(Ind2,i*10000+Icy) # Ind2<-c(Ind2,iyrend*10000+Icy[Icy<=imdend]) Nt<-length(Icy) Nall<-dim(itable)[1] ind<-itable[,1]*10000+itable[,2]*100+itable[,3] # for(i in 1:length(Ind2)) # if(Ind2[i]!=ind[i]) { # ErrorMSG<<-paste("Input data series not continuous at:",Ind2[i],ind[i],"\n") # return(-1) # } IY0<-ind[is.na(itable[,4])==F] IY0flg<-rep(0,length(IY0)) Y0<-itable[is.na(itable[,4])==F,4] Iyr<-floor(IY0/10000) Imd<-IY0-Iyr*10000 Ti<-IY0 for(i in 1:length(IY0)){ ith<-match(Imd[i],Icy) Ti[i]<-(Iyr[i]-iyrbegin)*Nt+ith } for(i in 1:(length(IY0)-1)){ if(Ti[i+1]-Ti[i]==1) IY0flg[i]<-1 } if(sum(IY0flg)<10){ # too few available data for autocorlh ErrorMSG<<-paste("Too many missing values in ", idata, "to estimate autocorrelation\n") return(-1) } itable<-itable[is.na(itable[,4])==F,] assign("ori.itable",ori.itable,envir=.GlobalEnv) assign("itable",itable,envir=.GlobalEnv) assign("Ti",Ti,envir=.GlobalEnv) # Time index for LS fitting assign("Y0",Y0,envir=.GlobalEnv) # Data series for Base assign("IY0",IY0,envir=.GlobalEnv) # Cycle index for Base assign("Imd",Imd,envir=.GlobalEnv) # Cycle index for Base assign("IY0flg",IY0flg,envir=.GlobalEnv) # continuous flag for Base assign("Icy",Icy,envir=.GlobalEnv) # Cycle index assign("Nt",Nt,envir=.GlobalEnv) # Cycle length return(0) } rmCycle<-function(idata){ tdata<-cbind(idata[,2]*100+idata[,3],idata[,4]) inds<-sort(unique(tdata[,1])) nx<-length(inds) mu<-rep(0,nx) for(i in 1:nx){ mu[i]<-mean(tdata[tdata[,1]==inds[i],2],na.rm=T) tdata[tdata[,1]==inds[i],2]<-tdata[tdata[,1]==inds[i],2]-mu[i] } oout<-list() oout$EB<-mu oout$Base<-tdata[,2] return(oout) } LSmultiple<-function(Y,T,Ips){ Nx<-length(Y) Ns<-length(Ips)-1 X<-t(t(Y)) D<-rep(1,Nx) D<-cbind(D,T) if(Ns>=1){ for(i in 1:Ns){ tmp<-rep(0,Nx) tmp[(Ips[i]+1):Ips[i+1]]<-1 D<-cbind(D,tmp) } } sig<-solve(t(D)%*%D)%*%t(D)%*%X fitted<-D%*%sig resi<-X-fitted SSE<-sum(resi^2) oout<-list() oout$SSE<-SSE oout$fitted<-as.vector(fitted) oout$resi<-as.vector(resi) oout$sig<-as.vector(sig) return(oout) } LSmultipleRed<-function(Y0,Ti,Ips){ Ns<-length(Ips)-1 N<-length(Y0) otmp<-LSmultiple(Y0,Ti,Ips) sig<-otmp$sig beta<-otmp$sig[2] resi<-otmp$resi otmp<-autocorlh(resi,IY0flg) cor<-otmp$cor corl<-otmp$corl corh<-otmp$corh resi<-resi+beta*Ti W1<-resi/(1-cor) W2<-c(W1[1],(resi[2:N]-cor*resi[1:(N-1)])/(1-cor)) W<-c(1,IY0flg[1:(N-1)])*W2+(!c(1,IY0flg[1:(N-1)]))*W1 otmp<-LSmatrix(W,Ti,NA) beta<-otmp$sig[2] St0<-sum((Ti-mean(Ti))^2) df<-(N-2-Ns-Nt) sigmaE2<-otmp$SSE/df t.stat<-abs(beta)/sqrt(sigmaE2/St0) p.tr<-pt(t.stat,df) betaL<-beta-qt(.975,df)*sqrt(sigmaE2/St0) betaU<-beta+qt(.975,df)*sqrt(sigmaE2/St0) itmp<-Y0-beta*Ti mu<-rep(0,Ns+1) meanhat<-mu for(i in 0:Ns){ I0<- if(i==0) 1 else Ips[i]+1 I2<-Ips[i+1] mu[i+1]<-mean(itmp[I0:I2]) meanhat[I0:I2]<-mu[i+1]+beta*Ti[I0:I2] resi[I0:I2]<-Y0[I0:I2]-meanhat[I0:I2] } W1<-resi W2<-c(resi[1],resi[2:N]-cor*resi[1:(N-1)]) W3<-c(resi[1],resi[2:N]-corl*resi[1:(N-1)]) W4<-c(resi[1],resi[2:N]-corh*resi[1:(N-1)]) W<-c(1,IY0flg[1:(N-1)])*W2+(!c(1,IY0flg[1:(N-1)]))*W1 WL<-c(1,IY0flg[1:(N-1)])*W3+(!c(1,IY0flg[1:(N-1)]))*W1 WU<-c(1,IY0flg[1:(N-1)])*W4+(!c(1,IY0flg[1:(N-1)]))*W1 for(i in 0:Ns){ I0<- if(i==0) 1 else Ips[i]+1 I2<-Ips[i+1] W[I0:I2]<-W[I0:I2]+mean(itmp[I0:I2])+beta*Ti[I0:I2] WL[I0:I2]<-WL[I0:I2]+mean(itmp[I0:I2])+beta*Ti[I0:I2] WU[I0:I2]<-WU[I0:I2]+mean(itmp[I0:I2])+beta*Ti[I0:I2] } oout<-list() oout$W<-W oout$WL<-WL oout$WU<-WU oout$sig<-sig oout$cor<-cor oout$corl<-corl oout$corh<-corh oout$resi<-resi oout$mu<-mu oout$meanhat<-meanhat oout$trend<-beta oout$betaL<-betaL oout$betaU<-betaU oout$p.tr<-p.tr return(oout) } autocorlh<-function(Y,IY){ # calculate autocorrelation of given data vector, using given time vector to # judge continuouse N<-length(Y) cnt<-sum(IY) m0<-mean(Y,na.rm=T) xsd0<-0 xsd1<-0 S1<-sum(((Y-m0)^2*IY)[1:(N-1)]) S2<-sum(((Y-m0)*(c(Y[2:N],0)-m0)*IY)[1:(N-1)]) cor<-S2/S1 # else stop("too few available data in autocor") z975<-1.96 z<-.5*log((1+cor)/(1-cor)) df<-sum(IY[1:(N-1)]) zl<-z-z975/sqrt(df-3) zh<-z+z975/sqrt(df-3) cl<-tanh(zl) ch<-tanh(zh) corl<-min(c(cl,ch)) corh<-max(c(cl,ch)) oout<-list() oout$cor<-cor oout$corl<-corl oout$corh<-corh return(oout) } LSmultiRedCycle<-function(Y0,Ti,Ips,Iseg.adj){ N<-length(Y0) Ns<-length(Ips)-1 Niter<-0 tt<-TRUE EB1<-EB while(tt){ tt<-FALSE Niter<-Niter+1 EB0<-EB1 otmp<-LSmultipleRed(Y0,Ti,Ips) trend<-otmp$trend betaL<-otmp$betaL betaU<-otmp$betaU resi<-otmp$resi cor<-otmp$cor corl<-otmp$corl corh<-otmp$corh p.tr<-otmp$p.tr meanhat<-otmp$meanhat mu<-otmp$mu W<-otmp$W WL<-otmp$WL WU<-otmp$WU if(Nt>1){ itmp1<-cbind(EB0,Icy) itmp2<-cbind(1:N,Imd) colnames(itmp2)<-c("idx","Icy") itmp<-merge(itmp1,itmp2,by="Icy") EBfull<-itmp[order(itmp[,"idx"]),"EB0"] for(i in 1:(Ns+1)){ I0<- if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] # delta<-if(i==(Ns+1)) 0 else mu[i]-mu[Iseg.adj] delta<-mu[i]-mu[Iseg.adj] Y0[I0:I2]<-Y0[I0:I2]+EBfull[I0:I2]-delta } for(i in 1:Nt) EB1[i]<-mean(Y0[Imd==Icy[i]]) VEB<-sqrt(var(EB1)) if(is.na(VEB)) tt<-FALSE else{ itmp1<-cbind(EB1,Icy) itmp2<-cbind(1:N,Imd) colnames(itmp2)<-c("idx","Icy") itmp<-merge(itmp1,itmp2,by="Icy") EBfull<-itmp[order(itmp[,"idx"]),"EB1"] for(i in 1:(Ns+1)){ I0<- if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] # delta<-if(i==(Ns+1)) 0 else mu[i]-mu[Iseg.adj] delta<-mu[i]-mu[Iseg.adj] Y0[I0:I2]<-Y0[I0:I2]-EBfull[I0:I2]+delta } DEBmx<-max(abs(EB1-EB0)) if(DEBmx>VEB/1000&Niter<20) tt<-TRUE } } } oout<-list() oout$trend<-trend oout$betaL<-betaL oout$betaU<-betaU oout$EB<-EB1 oout$mu<-mu oout$cor<-cor oout$corl<-corl oout$corh<-corh oout$W<-W oout$WL<-WL oout$WU<-WU oout$resi<-resi oout$Y0<-as.vector(Y0) oout$meanhat<-as.vector(meanhat) oout$p.tr<-p.tr return(oout) } is.csv<-function(names){ nlen<-nchar(names) if(substr(names,nlen-2,nlen)=="csv"|substr(names,nlen-2,nlen)=="CSV") return(T) else return(F) } LSmatrix<-function(Y,T,Ic){ Nx<-length(Y) D<-rep(1,Nx) X<-t(t(Y)) D<-cbind(D,T) if(!is.na(Ic)) D<-cbind(D,c(rep(0,Ic),rep(1,Nx-Ic))) sig<-solve(t(D)%*%D)%*%t(D)%*%X fitted<-D%*%sig resi<-X-fitted SSE<-sum(resi^2) oout<-list() oout$sig<-as.vector(sig) oout$fitted<-as.vector(fitted) oout$resi<-as.vector(resi) oout$SSE<-SSE return(oout) } QMadjDLY<-function(P,Ips,Mq,Iseg.adj){ Ns<-length(Ips)-1 N<-length(P) Nseg.mn<-N for(i in 1:(Ns+1)){ I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] Nseg<-I2-I1+1 if(Nseg20) Mq<-20 if(Mq<=0) Mq<-1 Fd<-.5/Mq Fcat<-matrix(NA,(Ns+1),(Mq+2)) F<-matrix(NA,(Ns+1),N) EP<-matrix(NA,(Ns+1),(Mq+2)) for(i in 1:(Ns+1)) Fcat[i,]<-seq(0,by=1/Mq,length=(Mq+2)) for(i in 1:(Ns+1)){ I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] Nseg<-I2-I1+1 Y<-P[I1:I2] iindx<-sort(Y,index=T)$ix irank<-sort(iindx,index=T)$ix F[i,1:Nseg]<-irank/Nseg EP[i,]<-0 for(k in 2:(Mq+1)){ Mp1<-floor(Nseg*Fcat[i,(k-1)]) Mp2<-floor(Nseg*Fcat[i,k]) EP[i,k]<-mean(Y[iindx[(Mp1+1):Mp2]]) } EP[i,(Mq+2)]<-EP[i,(Mq+1)] } EP[,1]<-EP[,2] osmean<-c(1:(Mq+2)) # for plot purpose for(Iseg in c(1:(Ns+1))) osmean<-cbind(osmean,Fcat[Iseg,]-Fd,EP[Iseg.adj,]-EP[Iseg,]) # output osmean is a 2*(Ns+1)+1 by Mq+2 matrix PA<-P W<-rep(NA,N) osp<-NULL for(i in 1:(Ns+1)){ I1<-if(i==1) 1 else Ips[i-1]+1 I2<-Ips[i] Nseg<-I2-I1+1 if(i==Iseg.adj) PA[I1:I2]<-P[I1:I2] else{ dx<-Fcat[i,]-Fd fdx<-EP[Iseg.adj,]-EP[i,] if(Mq==1) fdx[1]<-fdx[2] fdx2<-splineN(dx,fdx,2E30,2E30) for(j in I1:I2) W[j]<-splintN(dx,fdx,fdx2,F[i,(j-I1+1)]) PA[I1:I2]<-P[I1:I2]+W[I1:I2] } Rs<-F[i,1:Nseg] ors<-sort(Rs,index=T)$ix osp<-rbind(osp,cbind(I1:I2,Rs[ors],W[I1:I2][ors])) } oout<-list() oout$PA<-PA oout$W<-W oout$Mq<-Mq oout$Nseg.mn<-Nseg.mn oout$osmean<-osmean oout$osp<-osp return(oout) } splintN<-function(xa,ya,y2a,x){ n<-length(xa) if(length(ya)!=n|length(y2a)!=n) stop("input vector length differ") klo<-1 khi<-n while((khi-klo)>1){ k<-ceiling((khi+klo)/2) if(xa[k]>x) khi<-k else klo<-k } h<-xa[khi]-xa[klo] if(h==0) stop("bad xa input in splintN") a<-(xa[khi]-x)/h b<-(x-xa[klo])/h y<-a*ya[klo]+b*ya[khi]+((a**3-a)*y2a[klo]+(b**3-b)*y2a[khi])*(h**2)/6 return(y) } splineN<-function(x,y,yp1,yp2){ n<-length(x) if(length(y)!=n) stop("input vector length differ") y2<-rep(NA,0) u<-rep(NA,0) if(yp1>1E30){ y2[1]<-0 u[1]<-0 } else{ y2[1]<-(-0.5) u[1]<-(3/(x[2]-x[1]))*((y[2]-y[1])/(x[2]-x[1])-yp1) } if(n>2) for(i in c(2:(n-1))){ sig<-(x[i]-x[i-1])/(x[i+1]-x[i-1]) p<-sig*y2[i-1]+2 y2[i]<-(sig-1)/p u[i]<-(6*((y[i+1]-y[i])/(x[i+1]-x[i])-(y[i]-y[i-1])/(x[i]-x[i-1]))/ (x[i+1]-x[i-1])-sig*u[i-1])/p } if(yp2>1E30){ qn<-0 un<-0 } else{ qn<-0.5 un<-(3/(x[n]-x[n-1]))*(yp2-(y[n]-y[n-1])/(x[n]-x[n-1])) } y2[n]<-(un-qn*u[n-1])/(qn*y2[n-1]+1) for(i in c((n-1):1)) y2[i]<-y2[i]*y2[i+1]+u[i] return(y2) }