import os import sys import pandas as pd from pandas import Series from datetime import datetime pd.options.mode.chained_assignment = None # default='warn' pd.set_option('precision', 1) pd.set_option('display.max_rows', None) def lat_lon_to_float(v): # Convert strings from NHC file (Lat Lon with N/S and W/E to floats) if (v[-1] == 'S') or (v[-1] =='W'): multiplier = -1 else: multiplier = 1 return float(v[:-1]) * multiplier AnnualList = [['YEAR','NAME','Lat','Lon','Reg']] SummaryList = [] SummDFCols = ['YEAR', 'MDR', 'GMX', 'EXT','Total', 'MDRFrac','GMXFrac','EXTFrac'] data = [] data2= [] with open('atl_hurdat.txt','r') as f: for line in f.readlines(): if line.startswith('AL'): storm_id =line.split(',') storm_id.pop() # storm_id_str=','.join([str(elem) for elem in storm_id]) storm_numberFull = storm_id[0].strip() storm_name = storm_id[1].strip() basin = storm_numberFull[0:2] storm_number = int(storm_numberFull[2:4]) storm_year = int(storm_numberFull[4:]) else: location_line = line.split(',') location_line.pop() # location_line_str=','.join([str(elem) for elem in location_line]) #print(location_line) dt = datetime.strptime(location_line[0]+ location_line[1], '%Y%m%d %H%M') dy = datetime.strptime(location_line[0], '%Y%m%d') hr = int(location_line[1][1:3]) mns = int(location_line[1][3:5]) lfall = location_line[2].strip() storm_status = location_line[3].strip() storm_lat = lat_lon_to_float(location_line[4].strip()) storm_lon = lat_lon_to_float(location_line[5].strip()) max_wind = int(location_line[6].strip()) data.append([basin, storm_number, storm_year, storm_name,storm_status, storm_lat, storm_lon, lfall, dt, dy, hr, mns, max_wind]) # data2.append([storm_id_str,location_line_str]) #bigdf=pd.DataFrame(data2,columns=['id','locline']) #s=bigdf['id'] #p=s.str.split(pat=",",expand=True) #s=bigdf['locline'] #t=s.str.split(pat=",",expand=True) #newBigdf=pd.concat([p,t],axis=1,join="inner") #print(newBigdf) #newBigdf.to_csv('./atl_hurdat.csv') df = pd.DataFrame(data, columns=['Basin','StormNumber', 'StormYear', 'StormName', 'StormStatus', 'Lat', 'Lon', 'LandFall','Date_Time', 'Day', 'Hour', 'Minutes','MaxWind']) df['MaxWSqd']=df['MaxWind']*df['MaxWind'] df.drop(df[df['StormYear']<1950].index, inplace=True) #dfPost1950=df for status in ['TD','EX', 'LO','DB', 'SD', 'WV']: df.drop(df[(df['StormStatus'] == status)].index,inplace=True) years= df['StormYear'].unique() #years=['2019','2020'] for yr in years: # print("\n"+str(yr)) dfYear=df[df['StormYear']==int(yr)] # print(dfYear['StormNumber'].unique().max()) #print(dfYear.drop_duplicates(subset='StormName')) MDRCount = 0 GMXCount = 0 EXTCount = 0 for num in dfYear['StormNumber'].unique(): dfByNum=dfYear[dfYear['StormNumber']==num] #print(dfByNum) Name=dfByNum['StormName'].iloc[0] Lat=dfByNum['Lat'].iloc[0] Lon=dfByNum['Lon'].iloc[0] if ((0 < Lat < 21) and (-90 < Lon < -20)): FormReg = "MDR" MDRCount = MDRCount + 1 elif ((Lat >= 21) and (Lon < -81)): FormReg = "GMX" GMXCount = GMXCount + 1 elif ((Lat >= 18) and (-100 < Lon < -90)): FormReg = "GMX" GMXCount = GMXCount + 1 else: FormReg = "EXT" EXTCount = EXTCount + 1 AnnualList.append([yr, Name, Lat, Lon, FormReg]) AllCount = MDRCount + GMXCount + EXTCount MDRFrac=(MDRCount/AllCount)*100 GMXFrac=(GMXCount/AllCount)*100 EXTFrac=(EXTCount/AllCount)*100 SummaryList.append([yr, MDRCount, GMXCount, EXTCount,AllCount, MDRFrac, GMXFrac, EXTFrac]) #print(AnnualList) #print(SummaryList) SummaryDF=pd.DataFrame(SummaryList, columns=SummDFCols) SummaryDF.loc['mean']=SummaryDF.mean() SummaryDF.loc['median']=SummaryDF.median() print(SummaryDF.describe()) SummaryDF.to_csv("./ATL_1950_Present_FormationRegionStats.csv", index=False) #os.system('more ./ATL_1950_Present_Stats.csv')