options(error = quote({
  dump.frames(to.file=T, dumpto='last.dump')
  load('last.dump.rda')
  print(last.dump)
  q()
}))
######
### Script written by ZTS: 27 Aug 2023 reorganized and automated. 
### following a discussion and presentation by Chris Funk
### The script produces forecast based on NMME SST forecast
### using Pacific indices: Western V gradient, Nino3.4,...
### ZTS: 11 Aug 2022  -- The code only works for nmodel > 1 otherwise the 
###   indexing [,im] will give error. Modify code to work for a single model
######

#library(foreach)
#library(doParallel)
library(RNetCDF)
#library(qmap)

## Set the number of core to be used
# ncores <- detectCores()
zproctime0 <- proc.time()

#ncores <- 2
#klust <- makeCluster(ncores)
#registerDoParallel(klust)

iMonth <- OMONS  # Change GCM  Initial Month (1==Jan)
SznLen <- SZNLEN   # Number of months is a season
yrFCST <- CURRENTYR 
SEASON <- SEASONNAME 
fileprecnc <- OBSPRECIP  #"precOND_1982-2020.nc"
FirstYr <- FIRSTYR   #1982
LastYr  <- LASTYR    #2020    # This is for observations; 
iLead  <- ILEAD   # lead month for the initial month of season

plev = 0.1  # Statistical Significance plevel

gcmStart <- 1    # first index  for regression computation   
gcmLast  <- 5    #length(gcmtype) # last indx of available model

forecastNames <- c("CanSIP","NASA","GFDLS","CCSM4", "CFSv2")


LastHnd <- 2016    # This is the last year of training period 
slctYrs <- seq(1993,LastHnd,1)
slctYrs <- seq(FirstYr,LastYr,1)

#FirstYr <- 1982
#LastYr  <- 2020
#slctYrs <- seq(1991,LastYr,1)
#climYrs <- seq(1991,LastYr,1)

                   
DETREND <- TRUE 
stdizeObs <- FALSE

#######
PRDCTRS  <- c("Nino34","WVG")
hnames   <- c("n34hZ","wvGhZ")
fnames   <- c("n34fZ","wvGfZ")
nprdct   <- length(fnames)
VARNMTHD <- "_PRCP_Logit"

varunits = "C" 

nmodels <- length(gcmStart:gcmLast)
nmiss    <- 15
rthr     <- 1    # rainfall threshold

if (SznLen == 4) { FTYPES <- "_4monthSeasonal.nc" } else { FTYPES <- "_3monthSeasonal.nc"}

######## Initialize file names and direcotories
dirdat0 <- "./analdat/"
fcstdir <- "./sstdat/"
hinddiro <- "./"
hinddir <- fcstdir

hindcastNames <- c(
           paste0(paste0("sst",SEASON), "WPGCanSIP.nc"),
           paste0(paste0("sst",SEASON), "WPGNASA.nc"),
           paste0(paste0("sst",SEASON), "WPGGFDLS.nc"),
           paste0(paste0("sst",SEASON), "WPGCCSM4.nc"),
           paste0(paste0("sst",SEASON), "WPGCFSv2.nc"))

#print(hindcastNames)
month.2abb <- c(month.abb, month.abb)

#5######### Get Observational data and its dates

MonthlyPrec <- FALSE

nco <- open.nc(paste0(hinddiro, fileprecnc))
timeo <- var.get.nc(nco, "time")
#zodate <- utcal.nc("months since 1901-01-15 23:00:00", timeo, type = "n")
htimeatt <- att.get.nc(nco, "time", "units")
 zodate <- utcal.nc(htimeatt, timeo, type='n')

preco <- var.get.nc(nco, "prec")  #(lon,lat, time) 
lon <- as.character(var.get.nc(nco, "lon"))
lat <- as.character(var.get.nc(nco, "lat"))
#print(lon)
######### Use dates to form variable names for indexing
if (MonthlyPrec) {
#The below work for time series containing 12 months 
  y <- zodate[, 2]
  y1 <- month.abb[y]
  oDateNames <- paste0(zodate[, 1], y1)
  names(oDateNames) <- oDateNames

  oSyrmmi <- paste0(FirstYr,month.2abb[iMonth+iLead]) #init mon of szn
  oLyrmmf <- paste0(LastYr, month.2abb[iMonth+iLead+SznLen-1])
#####

dimnames(preco) <- list(lon, lat, oDateNames)
preco[preco < 0] <- NA

##########
#### Get seasonal observational data
#print(slctd)

validT  <- paste0(seq(FirstYr,LastYr,1),SEASON)

strtO <- which(oDateNames == oSyrmmi,arr.ind =TRUE) #it should exist  
lastO <- which(oDateNames == oLyrmmf,arr.ind =TRUE) #it should exist  

StartMon <- iLead + iMonth  # of Season

  if (SznLen == 3 )
  {
     x <- oDateNames[strtO:lastO]
     if (StartMon < 11) {xn <- length(x) } else {xn <- length(x) - 1}

     # Verifying SZN & Yr 'x' are: 1982Mar,1982Apr,1982May,1982Jun
     # ...1982Dec,1983Jan,....2020Jan,...,2020Mar,2020Apr,2020May 
     yrsmonv <- x[ sort(c(seq(1,xn,12),seq(2,xn,12),seq(3,xn,12))) ]
     yobs <- preco[, , yrsmonv]  # predictand 
     y1 <- array(yobs,c(length(lon),length(lat),3,length(yrsmonv)/3))
     yobs <- apply(y1, c(1, 2, 4), sum, na.rm = FALSE)
      #note in above any missing in one of the 3 vals will yield NA
  } else
  {
    # Verifying precip are as above except upto 2020Jun if iMonth=2
    x <- oDateNames[strtO:lastO]
    if (StartMon < 11) {xn <- length(x) } else {xn <- length(x) - 1}
    yrsmonv<- x[sort(c(seq(1,xn,12),seq(2,xn,12),
                       seq(3,xn,12),seq(4,xn,12))) ]
    yobs <- preco[, , yrsmonv]  # predictand 
    y1 <- array(yobs,c(length(lon),length(lat),4,length(yrsmonv)/4))
    yobs <- apply(y1, c(1, 2, 4), sum, na.rm = FALSE)
    #note in above any missing in one of the 3 vals will yield NA
  }

} else

{
### For data downloaded using get...
 oDateNames <- paste0(zodate[, 1], SEASON) 
 names(oDateNames) <- oDateNames
 
 dimnames(preco) <- list(lon, lat, oDateNames)
 preco[preco < 0] <- NA
 validT  <- paste0(seq(FirstYr,LastYr,1),SEASON)
 yobs <- preco[,,validT] 
 print(validT)
}

fcstyrs <- paste0(yrFCST[length(yrFCST)],SEASON) # only one season
slctd <- paste0(slctYrs,SEASON)  # Training period
  dimnames(yobs) <- list(lon,lat,validT)
  yobs <- yobs[,,slctd]     # Note: model and obs will have slctd time
  yobs[yobs < rthr ] <- NA
  yobs               <- round(yobs, 1)
  ntot               <- length(yobs[1,1,])

  hblw <- apply(yobs,c(1, 2), quantile, probs=1/3, na.rm = TRUE)
  habv <- apply(yobs,c(1, 2), quantile, probs=2/3, na.rm = TRUE)
  hblw  <- round(array(rep(hblw,ntot),c(dim(hblw),ntot)),1)
  habv  <- round(array(rep(habv,ntot),c(dim(habv),ntot)),1)

  oblw <- 1 * (yobs < hblw)
  oabv <- 1 * (yobs > habv)
  onrm <- 1 * (yobs >= hblw & yobs <= habv)
                                            #; 3-8N   
  obsx                <- apply(yobs[3:10,3:4,],3,mean,na.rm=T)
  blwx                 <- quantile(obsx,probs=1/3,na.rm=T)
  dryx                <- 1*(obsx < blwx)

  if (stdizeObs)
  {
       # To be consistent with oSdev, need to do mannualy
     y                  <- yobs
     ysum               <- apply(y, c(1,2),sum, na.rm=T)
     ysum[ysum < rthr ] <- NA
     ycnt               <- 1* (!is.na(y))
     ycnt               <- apply(ycnt,c(1,2),sum)  # available data
     ycnt[ycnt <= 5 ]   <- NA
     oMean              <- ysum/ycnt
     oMean              <- array(rep(oMean,ntot),c(dim(oMean),ntot))
     oAnom              <- yobs - oMean

     ycnt               <- ycnt - 1
     anomsum            <- apply(oAnom*oAnom,c(1,2),sum,na.rm=T)
     oSdev              <- sqrt(anomsum/ycnt)
     oSdev[oSdev < 0.1] <- NA
     oSdev              <- array(rep(oSdev,ntot),c(dim(oSdev),ntot))

     oZscore            <- oAnom/oSdev

     hblw <- apply(oZscore,c(1, 2), quantile, probs=1/3, na.rm = TRUE)
     habv <- apply(oZscore,c(1, 2), quantile, probs=2/3, na.rm = TRUE)
     hblw  <- round(array(rep(hblw,ntot),c(dim(hblw),ntot)),1)
     habv  <- round(array(rep(habv,ntot),c(dim(habv),ntot)),1)

     oblw <- 1 * (yobs < hblw)
     oabv <- 1 * (yobs > habv)
     onrm <- 1 * (yobs >= hblw & yobs <= habv)
                                            #43-47E; 3-8N   
     obsx                <- apply(oZscore[47:53,35:44,],3,mean,na.rm=F)
     blwx                 <- quantile(obsx,probs=1/3)
     dryx                <- 1*(obsx < blwx)
  }

#########################################
#### Get all model data
  wnpAll <- matrix(NA, nrow = length(FirstYr:yrFCST),
                     ncol = length(forecastNames))  
  dimnames(wnpAll) <- list(paste0(FirstYr:yrFCST,SEASON), forecastNames)

  wepAll <- wnpAll
  ni4All <- wnpAll
  n34All <- wnpAll
  wvgAll <- wnpAll

  for (imodel in gcmStart:gcmLast)
  {
     ncf <- open.nc(paste0(fcstdir, hindcastNames[imodel]))
     nch <- open.nc(paste0(hinddir, hindcastNames[imodel]))

     timeh  <- var.get.nc(nch, "time")
     zhdate <- utcal.nc("months since 1960-01-15 23:00:00", timeh, type = "n")
     yrs    <- paste0(zhdate[ ,1],SEASON)

     wnp <- var.get.nc(nch, "wnp")  # (lon,lat,time)
     wep <- var.get.nc(nch, "wep")  # (lon,lat,time)
     ni4 <- var.get.nc(nch, "ni4")  # (lon,lat,time)
     n34 <- var.get.nc(nch, "n34")
     wvg <- var.get.nc(nch, "wvg")

     wnpAll[yrs,imodel] <- wnp
     wepAll[yrs,imodel] <- wep
     ni4All[yrs,imodel] <- ni4
     n34All[yrs,imodel] <- n34
     wvgAll[yrs,imodel] <- wvg
  }

traceback()

##### Done Get models 
 #get selectd yrs which can be different from clim or hindyrs
  wnph <- wnpAll[slctd,gcmStart:gcmLast]
  weph <- wepAll[slctd,gcmStart:gcmLast]
  ni4h <- ni4All[slctd,gcmStart:gcmLast]
  n34h <- n34All[slctd,gcmStart:gcmLast]
  wvgh <- wvgAll[slctd,gcmStart:gcmLast]

  wnpf <- wnpAll[fcstyrs,gcmStart:gcmLast]
  wepf <- wepAll[fcstyrs,gcmStart:gcmLast]
  ni4f <- ni4All[fcstyrs,gcmStart:gcmLast]
  n34f <- n34All[fcstyrs,gcmStart:gcmLast]
  wvgf <- wvgAll[fcstyrs,gcmStart:gcmLast]

  if (DETREND)
  {
     ones <- seq(1,length(slctd),1)
     wnphx <- wnph
     wephx <- weph
     ni4hx <- ni4h
     n34hx <- n34h
     wvghx <- wvgh

     wnpfx <- wnpf
     wepfx <- wepf
     ni4fx <- ni4f
     n34fx <- n34f
     wvgfx <- wvgf

     wnpRgr <- vector("list",length=nmodels)
     wepRgr <- wnpRgr
     ni4Rgr <- wnpRgr
     n34Rgr <- wnpRgr
     wvgRgr <- wnpRgr

     tyr       <- yrFCST[length(yrFCST)]  
     for (im in 1:nmodels)
     {
#### Western North Pacific
       trnd  <- lm(wnphx[,im] ~ ones)
       vtrnd <- trnd$fitted.values
       xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE)
       xtmp2 <- which(!is.na(wnphx[,im]), arr.ind = TRUE)
       rtmp  <- intersect(xtmp1, xtmp2)

       wnphx[rtmp,im] <- wnphx[rtmp,im] - vtrnd[rtmp]
       wnpfx[im]      <- wnpfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr- slctYrs[1] + 1))
       wnpRgr[[im]] <- trnd

#### Western Equatorial Pacific
       trnd  <- lm(wephx[,im] ~ ones)
       vtrnd <- trnd$fitted.values
       xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE)
       xtmp2 <- which(!is.na(wephx[,im]), arr.ind = TRUE)
       rtmp  <- intersect(xtmp1, xtmp2)

       wephx[rtmp,im] <- wephx[rtmp,im] - vtrnd[rtmp]
       wepfx[im]  <- wepfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1))
       wepRgr[[im]] <- trnd

#### Nino4
       trnd  <- lm(ni4hx[,im] ~ ones)
       vtrnd <- trnd$fitted.values
       xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE)
       xtmp2 <- which(!is.na(ni4hx[,im]), arr.ind = TRUE)
       rtmp  <- intersect(xtmp1, xtmp2)

       ni4hx[rtmp,im] <- ni4hx[rtmp,im] - vtrnd[rtmp]
       ni4fx[im]  <- ni4fx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1))
       ni4Rgr[[im]] <- trnd

#### Nino34
       trnd  <- lm(n34hx[,im] ~ ones)
       vtrnd <- trnd$fitted.values
       xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE)
       xtmp2 <- which(!is.na(n34hx[,im]), arr.ind = TRUE)
       rtmp  <- intersect(xtmp1, xtmp2)

       n34hx[rtmp,im] <- n34hx[rtmp,im] - vtrnd[rtmp]
       n34fx[im]  <- n34fx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1))
       n34Rgr[[im]] <- trnd

#### Western V Gradient
       trnd  <- lm(wvghx[,im] ~ ones)
       vtrnd <- trnd$fitted.values
       xtmp1 <- which(!is.na(vtrnd), arr.ind = TRUE)
       xtmp2 <- which(!is.na(wvghx[,im]), arr.ind = TRUE)
       rtmp  <- intersect(xtmp1, xtmp2)

       wvghx[rtmp,im] <- wvghx[rtmp,im] - vtrnd[rtmp]
       wvgfx[im]  <- wvgfx[im] - (trnd$coef[1] + trnd$coef[2] * (tyr - slctYrs[1] + 1))
       wvgRgr[[im]] <- trnd


     } # im

     wnph <- wnphx
     weph <- wephx
     ni4h <- ni4hx
     n34h <- n34hx
     wvgh <- wvghx

     wnpf <- wnpfx
     wepf <- wepfx
     ni4f <- ni4fx
     n34f <- n34fx
     wvgf <- wvgfx

  } #DETREND

##########################

  wnphZ  <- wnph 
  wephZ  <- weph 
  ni4hZ  <- ni4h
  n34hZ  <- n34h
  wvGhZ  <- wvgh

  wnpfZ  <- wnpf 
  wepfZ  <- wepf 
  ni4fZ  <- ni4f 
  n34fZ  <- n34f
  wvGfZ  <- wvgf

#### Can we do for different indexes at once?

 hindcastNames <- hindcastNames[gcmStart:gcmLast]
 forecastNames   <- forecastNames[gcmStart:gcmLast] 
 for (iprd in 1:nprdct) 
  {
####### varaibles to store
    below <- array(NA,c(length(lon),length(lat)))
    above <- below
    normal <- below
    bSig   <- below
    aSig   <- below
    nSig   <- below
    
    ssth  <- get(hnames[iprd])
    sstf  <- get(fnames[iprd])
###########################################
### Do logit 
####################################### 
    for (imodel in 1:nmodels)
    { 
      zproctime1 <- proc.time()
      print(paste0("Starting model ",hindcastNames[imodel]))  

      ## This was meant only to show what it does for a sample box (dryx) above
      dataZ    <- data.frame(Dry=dryx, EqWestPac=wephZ[,imodel], 
                             Nino34=n34hZ[,imodel],NWPac=wnphZ[,imodel],
                             Nino4=ni4hZ[,imodel], WVG=wvGhZ[,imodel])
    
      glm.EAm <- glm(formula = Dry ~ EqWestPac + NWPac + Nino4 + Nino34 + WVG, family="binomial",data=dataZ)
      glm.EAs <- glm(formula = Dry ~ Nino34, family="binomial",data=dataZ)     
      #print("Multimodel ")
      #print(summary(glm.EAm))
      print("Nino34 ")
      print(summary(glm.EAs))
     
      for (ilon in 1:length(lon)) {
        for (jlat in 1:length(lat)) {
           dry   <- oblw[ilon,jlat,]
           prdr   <- ssth[,imodel]
           ytmp <- cbind(dry, prdr)
           xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE)
           xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE)
           len1 <- length(xtmp1)
           len2 <- length(xtmp2)

           if( len1 >= nmiss & len2 >= nmiss) {
              xtmp <- intersect(xtmp1, xtmp2)
              nlen <- length(xtmp)
              if (nlen > 10) {
                 dry <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ]
                 dataH <- data.frame(Dry=dry,VAR=prdr) 
                 dataF <- data.frame(VAR=sstf[imodel]) 
                 dglm  <- glm(formula = Dry ~ VAR, family="binomial",
                                data=dataH)
                 below[ilon,jlat]  <- 100. * predict(dglm,
                                        dataF,type="response")
                 #bSig[ilon,jlat] <- below[ilon,jlat]     
                 bSig[ilon,jlat] <- 1     
                 xsig <- summary(dglm)$coef[2,][4]
                 if(xsig > plev ) { bSig[ilon,jlat] <- NA} 
              }
           }

           nrm   <- onrm[ilon,jlat,]
           prdr   <- ssth[,imodel]
           ytmp <- cbind(nrm, prdr)
           xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE)
           xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE)
           len1 <- length(xtmp1)
           len2 <- length(xtmp2)

           if( len1 >= nmiss & len2 >= nmiss) {
              xtmp <- intersect(xtmp1, xtmp2)
              nlen <- length(xtmp)
              if (nlen > 10) {
                  nrm <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ]

                  dataH <- data.frame(Nrm=nrm,VAR=prdr)
                  dataF <- data.frame(VAR=sstf[imodel])
                  dglm  <- glm(formula = Nrm ~ VAR, family="binomial",
                               data=dataH)
                  normal[ilon,jlat]  <- 100. * predict(dglm,
                                        dataF,type="response")
                  #nSig[ilon,jlat] <- normal[ilon,jlat]
                  nSig[ilon,jlat] <- 1
                  xsig <- summary(dglm)$coef[2,][4]
                  if(xsig > plev ) { nSig[ilon,jlat] <- NA}
              }
           }

           wet   <- oabv[ilon,jlat,]
           prdr   <- ssth[,imodel]
           ytmp <- cbind(wet, prdr)
           xtmp1 <- which(!is.na(ytmp[, 1]), arr.ind = TRUE)
           xtmp2 <- which(!is.na(ytmp[, 2]), arr.ind = TRUE)
           len1 <- length(xtmp1)
           len2 <- length(xtmp2)

           if( len1 >= nmiss & len2 >= nmiss) {
              xtmp <- intersect(xtmp1, xtmp2)
              nlen <- length(xtmp)
              if (nlen > 10) {
                 wet <- ytmp[xtmp,1]; prdr <- ytmp[xtmp,2 ]

                 dataH <- data.frame(Wet=wet,VAR=prdr)
                 dataF <- data.frame(VAR=sstf[imodel])
                 dglm  <- glm(formula = Wet ~ VAR, family="binomial",
                              data=dataH)
                 above[ilon,jlat]  <- 100. * predict(dglm,
                                      dataF,type="response")
                 #aSig[ilon,jlat] <- above[ilon,jlat]     
                 aSig[ilon,jlat] <- 1     
                 xsig <- summary(dglm)$coef[2,][4]
                 if(xsig > plev ) { aSig[ilon,jlat] <- NA}
              }
           }

           xb <- below[ilon,jlat]
           xn <- normal[ilon,jlat]
           xa <- above[ilon,jlat]
           xt <- xb+xn+xa

           if (is.finite(xt) && xt > 100.01) 
           { 
              above[ilon,jlat] <- 100. - 
                           (below[ilon,jlat] + normal[ilon,jlat])
           }
       } # ilat
     } # ilon

###################

     dirdat <- paste0(paste0(dirdat0,forecastNames[imodel]),VARNMTHD)
    
     fileName = paste0(PRDCTRS[iprd],FTYPES)
     source("write2ncLogitRgr.R")
     print("Done NetCDF for Logit Regression")


     print(paste("Completed fcst for imodel = ",imodel))
     print(" ")

     print(date())

     zproctime2 <- proc.time()

     show(zproctime2 - zproctime1)

    }  # imodel

  }
#stopCluster(klust)

print(date())

zproctime2 <- proc.time()

show(zproctime2 - zproctime0)