options(error = quote({ dump.frames(to.file=T, dumpto='last.dump') load('last.dump.rda') print(last.dump) q() })) ###### ### Script written by ZTS: 27 Aug 2023 reorganized and automated. ### following a discussion and presentation by Chris Funk ### The script produces forecast based on NMME SST forecast ### using Pacific indices: Western V gradient, Nino3.4,... ### ZTS: 11 Aug 2022 -- The code only works for nmodel > 1 otherwise the ### indexing [,im] will give error. Modify code to work for a single model ###### #library(foreach) #library(doParallel) library(RNetCDF) #library(qmap) ## Set the number of core to be used # ncores <- detectCores() zproctime0 <- proc.time() #ncores <- 2 #klust <- makeCluster(ncores) #registerDoParallel(klust) iMonth <- OMONS # Change GCM Initial Month (1==Jan) SznLen <- SZNLEN # Number of months is a season yrFCST <- CURRENTYR SEASON <- SEASONNAME fileprecnc <- OBSPRECIP #"precOND_1982-2020.nc" FirstYr <- FIRSTYR #1982 LastYr <- LASTYR #2020 # This is for observations; iLead <- ILEAD # lead month for the initial month of season plev = 0.1 # Statistical Significance plevel gcmStart <- 1 # first index for regression computation gcmLast <- 5 #length(gcmtype) # last indx of available model forecastNames <- c("CanSIP","NASA","GFDLS","CCSM4", "CFSv2") LastHnd <- 2016 # This is the last year of training period slctYrs <- seq(1993,LastHnd,1) slctYrs <- seq(FirstYr,LastYr,1) #FirstYr <- 1982 #LastYr <- 2020 #slctYrs <- seq(1991,LastYr,1) #climYrs <- seq(1991,LastYr,1) DETREND <- TRUE stdizeObs <- FALSE ####### PRDCTRS <- c("Nino34","WVG") hnames <- c("n34hZ","wvGhZ") fnames <- c("n34fZ","wvGfZ") nprdct <- length(fnames) VARNMTHD <- "_PRCP_Logit" varunits = "C" nmodels <- length(gcmStart:gcmLast) nmiss <- 15 rthr <- 1 # rainfall threshold if (SznLen == 4) { FTYPES <- "_4monthSeasonal.nc" } else { FTYPES <- "_3monthSeasonal.nc"} ######## Initialize file names and direcotories dirdat0 <- "./analdat/" fcstdir <- "./sstdat/" hinddiro <- "./" hinddir <- fcstdir hindcastNames <- c( paste0(paste0("sst",SEASON), "WPGCanSIP.nc"), paste0(paste0("sst",SEASON), "WPGNASA.nc"), paste0(paste0("sst",SEASON), "WPGGFDLS.nc"), paste0(paste0("sst",SEASON), "WPGCCSM4.nc"), paste0(paste0("sst",SEASON), "WPGCFSv2.nc")) #print(hindcastNames) month.2abb <- c(month.abb, month.abb) #5######### Get Observational data and its dates MonthlyPrec <- FALSE nco <- open.nc(paste0(hinddiro, fileprecnc)) timeo <- var.get.nc(nco, "time") #zodate <- utcal.nc("months since 1901-01-15 23:00:00", timeo, type = "n") htimeatt <- att.get.nc(nco, "time", "units") zodate <- utcal.nc(htimeatt, timeo, type='n') preco <- var.get.nc(nco, "prec") #(lon,lat, time) lon <- as.character(var.get.nc(nco, "lon")) lat <- as.character(var.get.nc(nco, "lat")) #print(lon) ######### Use dates to form variable names for indexing if (MonthlyPrec) { #The below work for time series containing 12 months y <- zodate[, 2] y1 <- month.abb[y] oDateNames <- paste0(zodate[, 1], y1) names(oDateNames) <- oDateNames oSyrmmi <- paste0(FirstYr,month.2abb[iMonth+iLead]) #init mon of szn oLyrmmf <- paste0(LastYr, month.2abb[iMonth+iLead+SznLen-1]) ##### dimnames(preco) <- list(lon, lat, oDateNames) preco[preco < 0] <- NA ########## #### Get seasonal observational data #print(slctd) validT <- paste0(seq(FirstYr,LastYr,1),SEASON) strtO <- which(oDateNames == oSyrmmi,arr.ind =TRUE) #it should exist lastO <- which(oDateNames == oLyrmmf,arr.ind =TRUE) #it should exist StartMon <- iLead + iMonth # of Season if (SznLen == 3 ) { x <- oDateNames[strtO:lastO] if (StartMon < 11) {xn <- length(x) } else {xn <- length(x) - 1} # Verifying SZN & Yr 'x' are: 1982Mar,1982Apr,1982May,1982Jun # ...1982Dec,1983Jan,....2020Jan,...,2020Mar,2020Apr,2020May yrsmonv <- x[ sort(c(seq(1,xn,12),seq(2,xn,12),seq(3,xn,12))) ] yobs <- preco[, , yrsmonv] # predictand y1 <- array(yobs,c(length(lon),length(lat),3,length(yrsmonv)/3)) yobs <- apply(y1, c(1, 2, 4), sum, na.rm = FALSE) #note in above any missing in one of the 3 vals will yield NA } else { # Verifying precip are as above except upto 2020Jun if iMonth=2 x <- oDateNames[strtO:lastO] if (StartMon < 11) {xn <- length(x) } else {xn <- length(x) - 1} yrsmonv<- x[sort(c(seq(1,xn,12),seq(2,xn,12), seq(3,xn,12),seq(4,xn,12))) ] yobs <- preco[, , yrsmonv] # predictand y1 <- array(yobs,c(length(lon),length(lat),4,length(yrsmonv)/4)) yobs <- apply(y1, c(1, 2, 4), sum, na.rm = FALSE) #note in above any missing in one of the 3 vals will yield NA } } else { ### For data downloaded using get... oDateNames <- paste0(zodate[, 1], SEASON) names(oDateNames) <- oDateNames dimnames(preco) <- list(lon, lat, oDateNames) preco[preco < 0] <- NA validT <- paste0(seq(FirstYr,LastYr,1),SEASON) yobs <- preco[,,validT] print(validT) } fcstyrs <- paste0(yrFCST[length(yrFCST)],SEASON) # only one season slctd <- paste0(slctYrs,SEASON) # Training period dimnames(yobs) <- list(lon,lat,validT) yobs <- yobs[,,slctd] # Note: model and obs will have slctd time yobs[yobs < rthr ] <- NA yobs <- round(yobs, 1) ntot <- length(yobs[1,1,]) hblw <- apply(yobs,c(1, 2), quantile, probs=1/3, na.rm = TRUE) habv <- apply(yobs,c(1, 2), quantile, probs=2/3, na.rm = TRUE) hblw <- round(array(rep(hblw,ntot),c(dim(hblw),ntot)),1) habv <- round(array(rep(habv,ntot),c(dim(habv),ntot)),1) oblw <- 1 * (yobs < hblw) oabv <- 1 * (yobs > habv) onrm <- 1 * (yobs >= hblw & yobs <= habv) #; 3-8N obsx <- apply(yobs[3:10,3:4,],3,mean,na.rm=T) blwx <- quantile(obsx,probs=1/3,na.rm=T) dryx <- 1*(obsx < blwx) if (stdizeObs) { # To be consistent with oSdev, need to do mannualy y <- yobs ysum <- apply(y, c(1,2),sum, na.rm=T) ysum[ysum < rthr ] <- NA ycnt <- 1* (!is.na(y)) ycnt <- apply(ycnt,c(1,2),sum) # available data ycnt[ycnt <= 5 ] <- NA oMean <- ysum/ycnt oMean <- array(rep(oMean,ntot),c(dim(oMean),ntot)) oAnom <- yobs - oMean ycnt <- ycnt - 1 anomsum <- apply(oAnom*oAnom,c(1,2),sum,na.rm=T) oSdev <- sqrt(anomsum/ycnt) oSdev[oSdev < 0.1] <- NA oSdev <- array(rep(oSdev,ntot),c(dim(oSdev),ntot)) oZscore <- oAnom/oSdev hblw <- apply(oZscore,c(1, 2), quantile, probs=1/3, na.rm = TRUE) habv <- apply(oZscore,c(1, 2), quantile, probs=2/3, na.rm = TRUE) hblw <- round(array(rep(hblw,ntot),c(dim(hblw),ntot)),1) habv <- round(array(rep(habv,ntot),c(dim(habv),ntot)),1) oblw <- 1 * (yobs < hblw) oabv <- 1 * (yobs > habv) onrm <- 1 * (yobs >= hblw & yobs <= habv) #43-47E; 3-8N obsx <- apply(oZscore[47:53,35:44,],3,mean,na.rm=F) blwx <- quantile(obsx,probs=1/3) dryx <- 1*(obsx < blwx) } ######################################### #### Get all model data wnpAll <- matrix(NA, nrow = length(FirstYr:yrFCST), ncol = length(forecastNames)) dimnames(wnpAll) <- list(paste0(FirstYr:yrFCST,SEASON), forecastNames) wepAll <- wnpAll ni4All <- wnpAll n34All <- wnpAll wvgAll <- wnpAll for (imodel in gcmStart:gcmLast) { ncf <- open.nc(paste0(fcstdir, hindcastNames[imodel])) nch <- open.nc(paste0(hinddir, hindcastNames[imodel])) timeh <- var.get.nc(nch, "time") zhdate <- utcal.nc("months since 1960-01-15 23:00:00", timeh, type = "n") yrs <- paste0(zhdate[ ,1],SEASON) wnp <- var.get.nc(nch, "wnp") # (lon,lat,time) wep <- var.get.nc(nch, "wep") # (lon,lat,time) ni4 <- var.get.nc(nch, "ni4") # (lon,lat,time) n34 <- var.get.nc(nch, "n34") wvg <- var.get.nc(nch, "wvg") wnpAll[yrs,imodel] <- wnp wepAll[yrs,imodel] <- wep ni4All[yrs,imodel] <- ni4 n34All[yrs,imodel] <- n34 wvgAll[yrs,imodel] <- wvg } traceback() ##### Done Get models #get selectd yrs which can be different from clim or hindyrs wnph <- wnpAll[slctd,gcmStart:gcmLast] weph <- wepAll[slctd,gcmStart:gcmLast] ni4h <- ni4All[slctd,gcmStart:gcmLast] n34h <- n34All[slctd,gcmStart:gcmLast] wvgh <- wvgAll[slctd,gcmStart:gcmLast] wnpf <- wnpAll[fcstyrs,gcmStart:gcmLast] wepf <- wepAll[fcstyrs,gcmStart:gcmLast] ni4f <- ni4All[fcstyrs,gcmStart:gcmLast] n34f <- n34All[fcstyrs,gcmStart:gcmLast] wvgf <- wvgAll[fcstyrs,gcmStart:gcmLast] if (DETREND) { ones <- seq(1,length(slctd),1) wnphx <- wnph wephx <- weph ni4hx <- ni4h n34hx <- n34h wvghx <- wvgh wnpfx <- wnpf wepfx <- wepf ni4fx <- ni4f n34fx <- n34f wvgfx <- wvgf wnpRgr <- vector("list",length=nmodels) wepRgr <- wnpRgr ni4Rgr <- wnpRgr n34Rgr <- wnpRgr wvgRgr <- wnpRgr tyr <- yrFCST[length(yrFCST)] for (im in 1:nmodels) { #### Western North Pacific trnd <- lm(wnphx[,im] ~ ones) vtrnd <- trnd$fitted.values xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE) xtmp2 <- which(!is.na(wnphx[,im]), arr.ind = TRUE) rtmp <- intersect(xtmp1, xtmp2) wnphx[rtmp,im] <- wnphx[rtmp,im] - vtrnd[rtmp] wnpfx[im] <- wnpfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr- slctYrs[1] + 1)) wnpRgr[[im]] <- trnd #### Western Equatorial Pacific trnd <- lm(wephx[,im] ~ ones) vtrnd <- trnd$fitted.values xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE) xtmp2 <- which(!is.na(wephx[,im]), arr.ind = TRUE) rtmp <- intersect(xtmp1, xtmp2) wephx[rtmp,im] <- wephx[rtmp,im] - vtrnd[rtmp] wepfx[im] <- wepfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1)) wepRgr[[im]] <- trnd #### Nino4 trnd <- lm(ni4hx[,im] ~ ones) vtrnd <- trnd$fitted.values xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE) xtmp2 <- which(!is.na(ni4hx[,im]), arr.ind = TRUE) rtmp <- intersect(xtmp1, xtmp2) ni4hx[rtmp,im] <- ni4hx[rtmp,im] - vtrnd[rtmp] ni4fx[im] <- ni4fx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1)) ni4Rgr[[im]] <- trnd #### Nino34 trnd <- lm(n34hx[,im] ~ ones) vtrnd <- trnd$fitted.values xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE) xtmp2 <- which(!is.na(n34hx[,im]), arr.ind = TRUE) rtmp <- intersect(xtmp1, xtmp2) n34hx[rtmp,im] <- n34hx[rtmp,im] - vtrnd[rtmp] n34fx[im] <- n34fx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1)) n34Rgr[[im]] <- trnd #### Western V Gradient trnd <- lm(wvghx[,im] ~ ones) vtrnd <- trnd$fitted.values xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE) xtmp2 <- which(!is.na(wvghx[,im]), arr.ind = TRUE) rtmp <- intersect(xtmp1, xtmp2) wvghx[rtmp,im] <- wvghx[rtmp,im] - vtrnd[rtmp] wvgfx[im] <- wvgfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1)) wvgRgr[[im]] <- trnd } # im wnph <- wnphx weph <- wephx ni4h <- ni4hx n34h <- n34hx wvgh <- wvghx wnpf <- wnpfx wepf <- wepfx ni4f <- ni4fx n34f <- n34fx wvgf <- wvgfx } #DETREND ########################## wnphZ <- wnph wephZ <- weph ni4hZ <- ni4h n34hZ <- n34h wvGhZ <- wvgh wnpfZ <- wnpf wepfZ <- wepf ni4fZ <- ni4f n34fZ <- n34f wvGfZ <- wvgf #### Can we do for different indexes at once? hindcastNames <- hindcastNames[gcmStart:gcmLast] forecastNames <- forecastNames[gcmStart:gcmLast] for (iprd in 1:nprdct) { ####### varaibles to store below <- array(NA,c(length(lon),length(lat))) above <- below normal <- below bSig <- below aSig <- below nSig <- below ssth <- get(hnames[iprd]) sstf <- get(fnames[iprd]) ########################################### ### Do logit ####################################### for (imodel in 1:nmodels) { zproctime1 <- proc.time() print(paste0("Starting model ",hindcastNames[imodel])) ## This was meant only to show what it does for a sample box (dryx) above dataZ <- data.frame(Dry=dryx, EqWestPac=wephZ[,imodel], Nino34=n34hZ[,imodel],NWPac=wnphZ[,imodel], Nino4=ni4hZ[,imodel], WVG=wvGhZ[,imodel]) glm.EAm <- glm(formula = Dry ~ EqWestPac + NWPac + Nino4 + Nino34 + WVG, family="binomial",data=dataZ) glm.EAs <- glm(formula = Dry ~ Nino34, family="binomial",data=dataZ) #print("Multimodel ") #print(summary(glm.EAm)) print("Nino34 ") print(summary(glm.EAs)) for (ilon in 1:length(lon)) { for (jlat in 1:length(lat)) { dry <- oblw[ilon,jlat,] prdr <- ssth[,imodel] ytmp <- cbind(dry, prdr) xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE) xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE) len1 <- length(xtmp1) len2 <- length(xtmp2) if( len1 >= nmiss & len2 >= nmiss) { xtmp <- intersect(xtmp1, xtmp2) nlen <- length(xtmp) if (nlen > 10) { dry <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ] dataH <- data.frame(Dry=dry,VAR=prdr) dataF <- data.frame(VAR=sstf[imodel]) dglm <- glm(formula = Dry ~ VAR, family="binomial", data=dataH) below[ilon,jlat] <- 100. * predict(dglm, dataF,type="response") #bSig[ilon,jlat] <- below[ilon,jlat] bSig[ilon,jlat] <- 1 xsig <- summary(dglm)$coef[2,][4] if(xsig > plev ) { bSig[ilon,jlat] <- NA} } } nrm <- onrm[ilon,jlat,] prdr <- ssth[,imodel] ytmp <- cbind(nrm, prdr) xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE) xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE) len1 <- length(xtmp1) len2 <- length(xtmp2) if( len1 >= nmiss & len2 >= nmiss) { xtmp <- intersect(xtmp1, xtmp2) nlen <- length(xtmp) if (nlen > 10) { nrm <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ] dataH <- data.frame(Nrm=nrm,VAR=prdr) dataF <- data.frame(VAR=sstf[imodel]) dglm <- glm(formula = Nrm ~ VAR, family="binomial", data=dataH) normal[ilon,jlat] <- 100. * predict(dglm, dataF,type="response") #nSig[ilon,jlat] <- normal[ilon,jlat] nSig[ilon,jlat] <- 1 xsig <- summary(dglm)$coef[2,][4] if(xsig > plev ) { nSig[ilon,jlat] <- NA} } } wet <- oabv[ilon,jlat,] prdr <- ssth[,imodel] ytmp <- cbind(wet, prdr) xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE) xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE) len1 <- length(xtmp1) len2 <- length(xtmp2) if( len1 >= nmiss & len2 >= nmiss) { xtmp <- intersect(xtmp1, xtmp2) nlen <- length(xtmp) if (nlen > 10) { wet <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ] dataH <- data.frame(Wet=wet,VAR=prdr) dataF <- data.frame(VAR=sstf[imodel]) dglm <- glm(formula = Wet ~ VAR, family="binomial", data=dataH) above[ilon,jlat] <- 100. * predict(dglm, dataF,type="response") #aSig[ilon,jlat] <- above[ilon,jlat] aSig[ilon,jlat] <- 1 xsig <- summary(dglm)$coef[2,][4] if(xsig > plev ) { aSig[ilon,jlat] <- NA} } } xb <- below[ilon,jlat] xn <- normal[ilon,jlat] xa <- above[ilon,jlat] xt <- xb+xn+xa if (is.finite(xt) && xt > 100.01) { above[ilon,jlat] <- 100. - (below[ilon,jlat] + normal[ilon,jlat]) } } # ilat } # ilon ################### dirdat <- paste0(paste0(dirdat0,forecastNames[imodel]),VARNMTHD) fileName = paste0(PRDCTRS[iprd],FTYPES) source("write2ncLogitRgr.R") print("Done NetCDF for Logit Regression") print(paste("Completed fcst for imodel = ",imodel)) print(" ") print(date()) zproctime2 <- proc.time() show(zproctime2 - zproctime1) } # imodel } #stopCluster(klust) print(date()) zproctime2 <- proc.time() show(zproctime2 - zproctime0)