# Calculate climatologies of observed daily sea ice area in the Arctic (interpolated) # 12 periods: Jan-Jun, Feb-Jul, Mar-Aug, Apr-Sep, May-Oct, Jun-Nov, # Jul-Dec, Aug-Jan, Sep-Feb, Oct-Mar, Nov-Apr, Dec-May # 20 years: 2003-2022 # Convert climatologies into CSV file # # Script version 2.1.0 # # Script works with: # Python version 3.10.13 # # Package version # numpy: 1.26.3 # pandas: 2.2.0 # # Ver1.0.0: Created by Mariko Koseki, 13.12.2023 # Ver1.1.0: Updated by Mariko, 06.02.2024 # Ver2.0.0: Updated by Mariko, 13.08.2024 # Ver2.1.0: Updated by Mariko, 15.08.2024 import numpy as np import pandas as pd import datetime import os import glob import sys import platform # Ver1.1.0 print('--- Python version ---') print(platform.python_version()) print('--- Package version ---') print('numpy: ', np.__version__) print('pandas: ', pd.__version__) ##--- Select variable -----------------------------------## #ver2.0.0 var = 'area' #var = 'extent' ##--- Create list of year -------------------------------## #ver2.0.0 yyyy1 = 2003 yyyy2 = 2022 yyyy_int_list = [] for yyyy in range(yyyy1, yyyy2+1): yyyy_int_list.append(yyyy) #print(yyyy_int_list) ##--- Set path ---------------------------------------------## ## Set path to input files ## # Ver1.1.0 ''' Set 'out_obs_post' (path to input csv file) ''' seaice_dir = '/nird/projects/NS9873K/norcpm/validation/seaicepre/' #ver2.0.0 out_obs_post = seaice_dir + 'obs_post/' #print(out_obs_post) ## Set path to output files ## # Ver1.1.0 ''' Set 'out_clm_obs' (path to csv file) ''' out_clm_obs = seaice_dir + 'clm/obs/' + var + '/' #ver2.0.0 if not os.path.exists(out_clm_obs): os.makedirs(out_clm_obs) ##--- Read CSV files ---------------------------------------## #ver2.0.0 seaice_area = pd.DataFrame() # initialize an empty dataframe for YEAR in yyyy_int_list: #print('') #print('Year: ', YEAR) #print('') ## Paths to CSV files ## file_obs_post = out_obs_post + 'sea_ice_obs_post_' + str(YEAR) + '.csv' df = pd.read_csv(file_obs_post, index_col=0) #print(df) if YEAR == 2003: area = df.loc[:,['DATE', 'AREA_Interpolated']] seaice_area = area.rename({'AREA_Interpolated': str(YEAR)}, axis='columns') else: area = df.loc[:,['AREA_Interpolated']] seaice_area2 = area.rename({'AREA_Interpolated': str(YEAR)}, axis='columns') seaice_area = pd.concat([seaice_area, seaice_area2], axis=1) #print(seaice_area) ## Compute climatology 2003-2022 ## seaice_area['mean'] = seaice_area[['2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',\ '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']].mean(axis = 1) seaice_area['mmdd'] = pd.to_datetime(seaice_area['DATE']).dt.strftime('%m-%d') #print(seaice_area) ## Insert 'MMDD' column in the first column ## target_col = 'mmdd' df_target = seaice_area[target_col] seaice_area_drp = seaice_area.drop(['DATE', 'mmdd'], axis=1) seaice_area_drp.insert(0, target_col, df_target) df = seaice_area_drp.reset_index(drop=True) #print(df) ##--- Save DataFrame as CSV --------------------------------------## #ver2.0.0 df.to_csv(out_clm_obs + 'obs_area_clm_all.csv') ##--- Extract climatology for each period -----------------------## #ver2.0.0 df_list = [] df['date'] = pd.to_datetime('2003' + '-' + df['mmdd'].astype(str)) #print(df) ### Jan-Jun ### df_01_06 = df[df['date'] <= datetime.datetime(2003, 6, 30)] #print(df_01_06) df_01_06_drp = df_01_06.drop(['date'], axis=1) #print(df_01_06_drp) df_list.append(df_01_06_drp) ### Feb-Jul ### df_02_07 = df[(df['date'] >= datetime.datetime(2003, 2, 1)) & (df['date'] <= datetime.datetime(2003, 7, 31))] df_02_07_drp = df_02_07.drop(['date'], axis=1) #print(df_02_07_drp) df_02_07_rs = df_02_07_drp.reset_index(drop=True) df_list.append(df_02_07_rs) ### Mar-Aug ### df_03_08 = df[(df['date'] >= datetime.datetime(2003, 3, 1)) & (df['date'] <= datetime.datetime(2003, 8, 31))] df_03_08_drp = df_03_08.drop(['date'], axis=1) #print(df_03_08_drp) df_03_08_rs = df_03_08_drp.reset_index(drop=True) df_list.append(df_03_08_rs) ### Apr-Sep ### df_04_09 = df[(df['date'] >= datetime.datetime(2003, 4, 1)) & (df['date'] <= datetime.datetime(2003, 9, 30))] df_04_09_drp = df_04_09.drop(['date'], axis=1) #print(df_04_09_drp) df_04_09_rs = df_04_09_drp.reset_index(drop=True) df_list.append(df_04_09_rs) ### May-Oct ### df_05_10 = df[(df['date'] >= datetime.datetime(2003, 5, 1)) & (df['date'] <= datetime.datetime(2003, 10, 31))] df_05_10_drp = df_05_10.drop(['date'], axis=1) #print(df_05_10_drp) df_05_10_rs = df_05_10_drp.reset_index(drop=True) df_list.append(df_05_10_rs) ### Jun-Nov ### df_06_11 = df[(df['date'] >= datetime.datetime(2003, 6, 1)) & (df['date'] <= datetime.datetime(2003, 11, 30))] df_06_11_drp = df_06_11.drop(['date'], axis=1) #print(df_06_11_drp) df_06_11_rs = df_06_11_drp.reset_index(drop=True) df_list.append(df_06_11_rs) ### Jul-Dec ### df_07_12 = df[(df['date'] >= datetime.datetime(2003, 7, 1))] df_07_12_drp = df_07_12.drop(['date'], axis=1) #print(df_07_12_drp) df_07_12_rs = df_07_12_drp.reset_index(drop=True) df_list.append(df_07_12_rs) ### Aug-Dec + Jan ### df_01 = df[df['date'] <= datetime.datetime(2003, 1, 31)] df_08 = df[df['date'] >= datetime.datetime(2003, 8, 1)] df_08_01 = pd.concat([df_08, df_01], axis=0) df_08_01_drp = df_08_01.drop(['date'], axis=1) df_08_01_rs = df_08_01_drp.reset_index(drop=True) #print(df_08_01_rs) df_list.append(df_08_01_rs) ### Sep-Dec + Jan-Feb ### df_02 = df[df['date'] <= datetime.datetime(2003, 2, 28)] df_09 = df[df['date'] >= datetime.datetime(2003, 9, 1)] df_09_02 = pd.concat([df_09, df_02], axis=0) df_09_02_drp = df_09_02.drop(['date'], axis=1) df_09_02_rs = df_09_02_drp.reset_index(drop=True) #print(df_09_02_rs) df_list.append(df_09_02_rs) ### Oct-Dec + Jan-Mar ### df_03 = df[df['date'] <= datetime.datetime(2003, 3, 31)] df_10 = df[df['date'] >= datetime.datetime(2003, 10, 1)] df_10_03 = pd.concat([df_10, df_03], axis=0) df_10_03_drp = df_10_03.drop(['date'], axis=1) df_10_03_rs = df_10_03_drp.reset_index(drop=True) #print(df_10_03_rs) df_list.append(df_10_03_rs) ### Nov-Dec + Jan-Apr ### df_04 = df[df['date'] <= datetime.datetime(2003, 4, 30)] df_11 = df[df['date'] >= datetime.datetime(2003, 11, 1)] df_11_04 = pd.concat([df_11, df_04], axis=0) df_11_04_drp = df_11_04.drop(['date'], axis=1) df_11_04_rs = df_11_04_drp.reset_index(drop=True) #print(df_11_04_rs) df_list.append(df_11_04_rs) ### Dec + Jan-May ### df_05 = df[df['date'] <= datetime.datetime(2003, 5, 31)] df_12 = df[df['date'] >= datetime.datetime(2003, 12, 1)] df_12_05 = pd.concat([df_12, df_05], axis=0) df_12_05_drp = df_12_05.drop(['date'], axis=1) df_12_05_rs = df_12_05_drp.reset_index(drop=True) #print(df_12_05_rs) df_list.append(df_12_05_rs) #print(df_list) ##--- Save DataFrame as CSV --------------------------------------## #ver2.0.0 mm_name = ['01_06', '02_07', '03_08', '04_09', '05_10', '06_11', '07_12', '08_01', '09_02', '10_03', '11_04', '12_05'] for df_clm, mm in zip(df_list, mm_name): df_clm.to_csv(out_clm_obs + 'obs_area_' + mm + '_clm.csv', index=False) #ver2.1.0 print('') print('Saved climatology') print('') print('Complated!!')