# Merge sea ice area/extent in the Arctic # - Fill missing data in OISST ver2.1 with ver2 # Treat outliers # Interpolate NaN # Convert sea ice data into CSV file # # # Script version 2.2 # # Python version: 3.10.13 # # Package version # numpy: 1.26.4 # pandas: 2.2.0 # matplotlib: 3.7.3 # # Ver1.0.0: Created by Mariko Koseki, 17.06.2024 # Ver1.0.1: Updated by Mariko, 21.06.2024 # Ver2.0: updated by Mariko, 26.05.2025 # ver2.1: updated by Mariko, 12.06.2025 # ver2.2: updated by Mariko, 08.12.2025 import numpy as np import pandas as pd import datetime from datetime import timedelta from dateutil.relativedelta import relativedelta import statistics import math import collections import matplotlib import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib.ticker as ptick import os import glob import sys import platform import calendar print('--- Python version ---') print(platform.python_version()) print('--- Package version ---') print('numpy: ', np.__version__) print('pandas: ', pd.__version__) print('matplotlib: ', matplotlib.__version__) ##--- Input -----------------------------------------## args = sys.argv if len(args) == 2: year_start = args[1] if len(year_start) == 4: yyyy1 = int(year_start[0:4]) yyyy2 = yyyy1 else: print('---How to use this script---') print('python merge_oisst_ice.py ') print('or') print('python merge_oisst_ice.py ') print(' = start year; = end year') print('') print('Example: python merge_oisst_ice.py 2023') print('Example: python merge_oisst_ice.py 1995 2023') print('') sys.exit() elif len(args) == 3: year_start = args[1] year_end = args[2] if (len(year_start) == 4) & (len(year_end) == 4): yyyy1 = int(year_start[0:4]) yyyy2 = int(year_end[0:4]) else: print('') print('---How to use this script---') print('python merge_oisst_ice.py ') print('or') print('python merge_oisst_ice.py ') print(' = start year; = end year') print('') print('Example: python merge_oisst_ice.py 2023') print('Example: python merge_oisst_ice.py 1995 2023') print('') sys.exit() else: print('') print('---How to use this script---') print('python merge_oisst_ice.py ') print('or') print('python merge_oisst_ice.py ') print(' = start year; = end year') print('') print('Example: python merge_oisst_ice.py 2023') print('Example: python merge_oisst_ice.py 1995 2023') print('') sys.exit() ##--- Functions: Data treatment --------------------------## ### Remove leap day ### ''' Remove 29th of February ''' def rm_leap(input_df1): df1 = input_df1.copy() date = df1.iloc[0,0] yyyy = date.year #print(calendar.isleap(yyyy)) if calendar.isleap(yyyy): print(yyyy, ' is a leap year') leap_day = datetime.datetime(yyyy, 2, 29) drop_ind = df1.index[df1['dtime'] == leap_day] df_drop = df1.drop(drop_ind) df_drop = df_drop.reset_index(drop=True) else: df_drop = df1.copy() #print(df_drop) return df_drop ### Fill missing data in ver2.1 with ver2 ### def fill_oisst(input_df21, input_df2): ''' Fill missing data in OISST ver2.1 with ver2 If there is no ver2 data, set NaN ''' df21 = input_df21.copy() #ver2.1 df2 = input_df2.copy() #ver2 ### AREA ### df21['sea_ice_area_merge'] = df21['sea_ice_area'].where(df21['sea_ice_area'] > 10, df2['sea_ice_area']) #if "False" (area <= 10), set ver2. if "True", set ver2.1 ### EXTENT ### df21['sea_ice_extent_merge'] = df21['sea_ice_extent'].where(df21['sea_ice_extent'] > 10, df2['sea_ice_extent']) print(df21.head(120)) #print(df21.tail(60)) return df21 ### Remove outliers ### #ver2.0 def remove_outlier(input_df4): df4 = input_df4.copy() date = df4.iloc[0,0] yyyy = date.year if yyyy == 2009: print('year is 2009') ''' ## OLD version ### #df4 = df4[(df4['dtime'] <= datetime.datetime(yyyy, 4, 10)) | (df4['dtime'] >= datetime.datetime(yyyy, 5, 20))] #print(df4.head(60)) #start_date = datetime.datetime(yyyy, 1, 1) #end_date = datetime.datetime(yyyy, 12, 31) #all_date = pd.DataFrame(pd.date_range(start = start_date, end = end_date, freq='D'), columns=['dtime']) #df_fill = pd.merge(all_date, df4, on = 'dtime', how = 'left') #print(df_fill.head(60)) ''' ### if "False" (date: between 4/11 and 5/19), set NaN. if "True", set "sea_ice_area_merge" df4['sea_ice_area_merge'] = df4['sea_ice_area_merge'].where((df4['dtime'] <= datetime.datetime(yyyy, 4, 10)) | (df4['dtime'] >= datetime.datetime(yyyy, 5, 20)), np.nan) print(df4.head(120)) #print(df4.tail(60)) #sys.exit() elif yyyy == 2025: print('year is 2025') ### if "False" (date: between 3/13 and 4/30), set NaN. if "True", set "sea_ice_area_merge" df4['sea_ice_area_merge'] = df4['sea_ice_area_merge'].where((df4['dtime'] <= datetime.datetime(yyyy, 3, 12)) | (df4['dtime'] >= datetime.datetime(yyyy, 5, 1)), np.nan) df4['sea_ice_area_merge'] = df4['sea_ice_area_merge'].where((df4['dtime'] <= datetime.datetime(yyyy, 9, 25)) | (df4['dtime'] >= datetime.datetime(yyyy, 10, 15)), np.nan) #ver2.2 print(df4.head(120)) #sys.exit() else: df4 = df4.copy() #print(df_fill) #sys.exit() return df4 ### Fill missing value with NaN ### def fill_nan(input_df5): df5 = input_df5.copy() ### AREA ### df5['sea_ice_area_merge'] = df5['sea_ice_area_merge'].where(df5['sea_ice_area_merge'] > 10, np.nan) #if "False" (area <= 10), set NaN. if "True", set sea ice area merge ### EXTENT ### df5['sea_ice_extent_merge'] = df5['sea_ice_extent_merge'].where(df5['sea_ice_extent_merge'] > 10, np.nan) #print(df5.head(60)) #print(df5.tail(60)) return df5 ### Interpolate missing values ### def inter_df(input_df6): ''' Linear interpolation The same value is repeated if there is NaN at the top/bottom of DataFrame ''' df6 = input_df6.copy() date = df6.iloc[0,0] yyyy = date.year if yyyy == 2025: #ver2.1 print('year is 2025') ### AREA ### df6['sea_ice_area_interpolated'] = df6['sea_ice_area_merge'] ### EXTENT ### df6['sea_ice_extent_interpolated'] = df6['sea_ice_extent_merge'] else: ### AREA ### df6['sea_ice_area_interpolated'] = df6['sea_ice_area_merge'].interpolate(limit_direction='both', method='linear') ### EXTENT ### df6['sea_ice_extent_interpolated'] = df6['sea_ice_extent_merge'].interpolate(limit_direction='both', method='linear') print(df6.head(120)) #print(df6.tail(60)) #sys.exit() return df6 ### Plot interpolated sea ice area/extent ### def plot_seaice(input_df7): df7 = input_df7.copy() date = df7.iloc[0,0] yyyy = date.year time_obs = df7['dtime'] ### AREA ### area = df7['sea_ice_area'] area_inter = df7['sea_ice_area_interpolated'] ### EXTENT ### extent = df7['sea_ice_extent'] extent_inter = df7['sea_ice_extent_interpolated'] fig3 = plt.figure(figsize=(8,5)) ax3 = fig3.add_subplot(111) ### AREA ### ax3.plot(time_obs, area_inter, linewidth = 4.7, label= 'Sea ice area (interpolated/ver2.1&ver2)', alpha=0.7) ax3.plot(time_obs, area, linewidth = 1.3, label = 'Sea ice area (ver2.1)', color='k') ### EXTENT ### #ax3.plot(time_obs, extent_inter, linewidth = 4.7, label= 'Sea ice extent (interpolated)', color='magenta', alpha=0.7) #ax3.plot(time_obs, extent, linewidth = 1.3, label = 'Sea ice extent', color='r') ax3.legend() ## Save figures ## fig_name3 = '/nird/datapeak/NS9873K/www/users/mariko/fig/seaice/OISST/sea_ice_area_interpolated_' + str(yyyy) + '.png' #ver2.2 fig3.savefig(fig_name3, dpi=200, format='png') plt.show() ##--- Set version ---------------------------------------## ver21 = 'v2.1' ver2 = 'v2' ##--- Create list of year ------------------------------------------------------## yyyy_int_list = [] for yyyy in range(yyyy1, yyyy2+1): yyyy_int_list.append(yyyy) print(yyyy_int_list) ##--- Set path ------------------------------------------## ## Set path to input files ## ''' Set path to input directory ### ''' seaice_dir = '/nird/datapeak/NS9873K/norcpm/validation/seaicepre/' #ver2.2 ### OISST ### out_ice_oisst21 = seaice_dir + 'oisst/' + ver21 + '/' out_ice_oisst2 = seaice_dir + 'oisst/' + ver2 + '/' ## Output: Set path to output file ## ''' Set path to output file ''' out_dir = seaice_dir + 'oisst/merge/' if not os.path.exists(out_dir): os.makedirs(out_dir) for YEAR in yyyy_int_list: print('') print('Year: ', YEAR) print('') ##--- Read CSV files ------------------------------## ## Paths to CSV files ## file_oisst_list21 = sorted(glob.glob(out_ice_oisst21 + 'seaice_oisst_' + str(YEAR) + '*')) file_oisst_list2 = sorted(glob.glob(out_ice_oisst2 + 'seaice_oisst_' + str(YEAR) + '*')) print('') print('file of OISST ver2.1: ', file_oisst_list21) print('') print('file of OISST ver2: ', file_oisst_list2) print('') ## CSV files -> pandas.DataFrame ## if len(file_oisst_list21) > 0: ice_oisst_all21 = [] for file_oisst21 in file_oisst_list21: ice_oisst21 = pd.read_csv(file_oisst21) #print(ice_oisst21) ice_oisst_all21.append(ice_oisst21) if len(file_oisst_list2) > 0: ice_oisst_all2 = [] for file_oisst2 in file_oisst_list2: ice_oisst2 = pd.read_csv(file_oisst2) #print(ice_oisst2) ice_oisst_all2.append(ice_oisst2) ## Combine DataFrame objects ## if len(file_oisst_list21) > 0: ice_oisst21_mer= pd.concat(ice_oisst_all21, ignore_index=True, axis=0) #print('OISST ver2.1: ') #print(ice_oisst21_mer.head(60)) #print(ice_oisst21_mer.tail(60)) if len(file_oisst_list2) > 0: ice_oisst2_mer= pd.concat(ice_oisst_all2, ignore_index=True, axis=0) #print('OISST ver2: ') #print(ice_oisst2_mer.head(60)) #print(ice_oisst2_mer.tail(60)) ## Change 'object' to 'datetime' ## if len(file_oisst_list21) > 0: ice_oisst21_mer['dtime'] = pd.to_datetime(ice_oisst21_mer['dtime']) if len(file_oisst_list2) > 0: ice_oisst2_mer['dtime'] = pd.to_datetime(ice_oisst2_mer['dtime']) ##--- Data treatment ----------------------------------## ## Call Functions ## ### Fill missing data in ver2.1 with ver2 ### if len(file_oisst_list2) > 0: # OISST ver2 is available 1981/09/01 - 2020/04/26 df_fill = fill_oisst(ice_oisst21_mer, ice_oisst2_mer) else: # if OISST ver2 is not available df_fill = fill_oisst(ice_oisst21_mer, ice_oisst21_mer) ### Remove leap day ### df_drop_leap = rm_leap(df_fill) ### Fill missing value with NaN ### df_fill_nan = fill_nan(df_drop_leap) ### Remove outliers ### df_post = remove_outlier(df_fill_nan) ### Interpolate missing values ### df_inter = inter_df(df_post) ### Plot interpolated sea ice area ### plot_seaice(df_inter) ##--- Save DataFrame as CSV --------------------------------------## df_inter.to_csv(out_dir + 'seaice_oisst_merge_' + str(YEAR) + '.csv') print('') print('') print('Saved sea ice area/extent!') print('') print('-----------------------') print('') print('Completed!')