# -*- coding: utf-8 -*- """ Created on Wed Dec 30 20:52:02 2020 @author: 53575 """ import sys import os import xarray as xr #import arcpy import numpy as np import pandas as pd import rasterio from osgeo import gdal_array import datetime import glob #import dask #import dask.array as da print("Made it past imports") concat_array = [] # THIS IS WHERE YOU WOULD CHANGE THE YEARS InitYear=1991 FinalYear=2020 # This makes a directory for the data that is relative to the system path of this file. MainDirectory=os.path.dirname(os.path.abspath(__file__))+'/PRISM_Data/' ####replace folder paths in script with tmin to create netcdfs for tmin data, data is split to accomodate RAM memory issues for laptops with 32 gb ram or less folder = MainDirectory+str(InitYear)+'_'+str(FinalYear)+'/tmin/**/*.bil' print('MainDirectory:'+MainDirectory) print('Input folder:'+folder) folder_len = len(list(glob.glob(folder, recursive = True))) print(folder_len) set_one = int(folder_len / 2) set_two = int(folder_len - set_one) print(set_one, set_two) for num, file in enumerate(glob.glob(folder, recursive = True)): #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\_Heat_phase1_final\Script_Inputs\Historical\red_threshold\red_threshold\p95tmax.nc' # Remote OPeNDAP Dataset #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\Chris_Workspace\PRISM_Data\PRISM_tmax_stable_4kmD2_20170101_20170131_asc\PRISM_tmax_stable_4kmD2_20170102_asc.asc' if num <= set_one : # if num print(num) #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\Chris_Workspace\PRISM_Data\PRISM_tmax_stable_4kmD2_20170101_20171231_bil\PRISM_tmax_stable_4kmD2_20170102_bil.bil' #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\_Heat_phase1_final\Script_Inputs\Historical\tmax_min_19812010_halfdegree\tmax_19812010_halfdegree.nc' #tmin_19812010_halfdegree.nc time = file[-16:-8] s = xr.Dataset({"time":pd.date_range(time, periods = 1)}) #test = pd.read_csv(fname, skiprows = 6) #array = xr.Dataset.from_dataframe(test) #p = gdal_array.LoadFile(fname) b = rasterio.open(file) n = b.read(1) y = xr.open_rasterio(file) #print(y.band) #print(p) #print(array) #print(test) time_new = pd.date_range(time, periods = 1) temperature = n lat = y.y lon = y.x #print(lat, lon, time_new) #["x","y","time"], r = xr.Dataset( data_vars=dict( tmpmin=(['lat', 'lon'],temperature) ), coords=dict( #lon= lon, #lat= lat, lat=(["lat"], lat), lon=(["lon"], lon), time=(["time"],time_new), )) #print(r) #if num == 0: #output_nc = r'C:\\Users\\53575\\ICF\\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\\Chris_Workspace\\PRISM_Data\\Net_CDFs\\' + file #df = r.to_netcdf() #df = r.to_dask_dataframe() #rechunked = ds.chunk({"latitude": 100, "longitude": 100}) #df.to_netcdf(r'C:\\Users\\53575\\ICF\\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\\Chris_Workspace\\PRISM_Data\\T_Min.nc') #print(df.lat) # df.to_netcdf(r'C:\Users\53575\Downloads\Dask.nc', mode ='w') concat_array.append(r) #print(concat_array) t = xr.concat(concat_array, dim="time") #print(t.compute_chunk_sizes()) # for Dask Array `x` #t= xr.concat(concat_array, dim ="time") #print(t) #t.to_netcdf(path = MainDirectory+'\\T_Min_PartOne.nc', mode = "w", format = "NETCDF4") print(MainDirectory) t.to_netcdf(path = MainDirectory+'/T_Min_PartOne.nc', mode = "w", format = "NETCDF4") ###clear array to free up RAM memory concat_array.clear() del t print("Going to Part 2") ###Part Two for num, file in enumerate(glob.glob(folder, recursive = True)): #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\_Heat_phase1_final\Script_Inputs\Historical\red_threshold\red_threshold\p95tmax.nc' # Remote OPeNDAP Dataset #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\Chris_Workspace\PRISM_Data\PRISM_tmax_stable_4kmD2_20170101_20170131_asc\PRISM_tmax_stable_4kmD2_20170102_asc.asc' if num > set_one : # if num print(num) #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\Chris_Workspace\PRISM_Data\PRISM_tmax_stable_4kmD2_20170101_20171231_bil\PRISM_tmax_stable_4kmD2_20170102_bil.bil' #fname = r'C:\Users\53575\ICF\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\_Heat_phase1_final\Script_Inputs\Historical\tmax_min_19812010_halfdegree\tmax_19812010_halfdegree.nc' #tmin_19812010_halfdegree.nc time = file[-16:-8] s = xr.Dataset({"time":pd.date_range(time, periods = 1)}) # print(time) #test = pd.read_csv(fname, skiprows = 6) #array = xr.Dataset.from_dataframe(test) #p = gdal_array.LoadFile(fname) b = rasterio.open(file) n = b.read(1) y = xr.open_rasterio(file) #print(y.band) #print(p) #print(array) #print(test) time_new = pd.date_range(time, periods = 1) temperature = n lat = y.y lon = y.x #print(lat, lon, time_new) #["x","y","time"], r = xr.Dataset( data_vars=dict( tmpmin=(['lat', 'lon'],temperature) ), coords=dict( #lon= lon, #lat= lat, lat=(["lat"], lat), lon=(["lon"], lon), time=(["time"],time_new), )) #print(r) #if num == 0: #output_nc = r'C:\\Users\\53575\\ICF\\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\\Chris_Workspace\\PRISM_Data\\Net_CDFs\\' + file #df = r.to_netcdf() #df = r.to_dask_dataframe() #b = r.to_dask_array(lengths=True) #print(b) #rechunked = ds.chunk({"latitude": 100, "longitude": 100}) #df.to_netcdf(r'C:\\Users\\53575\\ICF\\Chamberlain, Kevin - 20191105001_CHMO_Climate_Health_Monitoring\\Chris_Workspace\\PRISM_Data\\T_Min.nc') #print(df.lat) # df.to_netcdf(r'C:\Users\53575\Downloads\Dask.nc', mode ='w') concat_array.append(r) #print(concat_array) t = xr.concat(concat_array, dim="time") #print(t.compute_chunk_sizes()) # for Dask Array `x` #t= xr.concat(concat_array, dim ="time") #print(t) #t.to_netcdf(path = MainDirectory+'\\T_Min_PartTwo.nc', mode = "w", format = "NETCDF4")) t.to_netcdf(path = MainDirectory+'/T_Min_PartTwo.nc', mode = "w", format = "NETCDF4") concat_array.clear() del t print(MainDirectory) #final = xr.open_mfdataset('my/files/*.nc', parallel=True) final = xr.open_mfdataset([MainDirectory+'/T_Min_PartOne.nc',MainDirectory+'/T_Min_PartTwo.nc'], parallel=True, chunks={"lat": 621, "lon": 1405, "time": 365}) final.to_netcdf(path = MainDirectory+'/T_Min_'+str(InitYear)+'_'+str(FinalYear)+'.nc', mode = "w", format = "NETCDF4") # TMin - use these and comment out the two lines above #final = xr.open_mfdataset([MainDirectory+'\\T_Min_PartOne.nc',MainDirectory+'\\T_Max_PartTwo.nc'], parallel=True, chunks={"lat": 621, "lon": 1405, "time": 365}) #final.to_netcdf(path = MainDirectory+'\\T_Min_'+str(InitYear)+'_'+str(FinalYear)+'.nc', mode = "w", format = "NETCDF4")