# Interpolate Arctic sea ice area/extent (OSTIA)
# Treat outliers
# Interpolate NaN
# Save results as a CSV file
#
#
# Script version 1.1
#
# Python version: 3.10
#
# Package version
#   numpy:
#   pandas: 
#   matplotlib:
#
# Ver1.0: Created by Mariko Koseki, 18.06.2025
# ver1.1: updated by Mariko, 09.01.2026


import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import statistics
import math
import collections
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ptick
import os
import glob
import sys
import platform
import calendar


print('--- Python version ---')
print(platform.python_version())

print('--- Package version ---')
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('matplotlib: ', matplotlib.__version__)


##--- Input -----------------------------------------##
args = sys.argv
if len(args) == 2:
    year_start = args[1]
    if len(year_start) == 4:
        yyyy1 = int(year_start[0:4])
        yyyy2 = yyyy1

    else:
        print('---How to use this script---')
        print('python', args[0] ,'<yyyy>')
        print('or')
        print('python', args[0] ,'<yyyy1> <yyyy2>')
        print('<yyyy1> = start year; <yyyy2> = end year')
        print('')
        print('Example: python', args[0] ,'2023')
        print('Example: python', args[0] ,'1995 2023')
        print('')
        sys.exit()



elif len(args) == 3:
    year_start = args[1]
    year_end = args[2]
    if (len(year_start) == 4) & (len(year_end) == 4):
        yyyy1 = int(year_start[0:4])
        yyyy2 = int(year_end[0:4])
    else:
        print('')
        print('---How to use this script---')
        print('python', args[0] ,'<yyyy>')
        print('or')
        print('python', args[0] ,'<yyyy1> <yyyy2>')
        print('<yyyy1> = start year; <yyyy2> = end year')
        print('')
        print('Example: python', args[0] ,'2023')
        print('Example: python', args[0] ,'1995 2023')
        print('')
        sys.exit()


else:
    print('')
    print('---How to use this script---')
    print('python', args[0] ,'<yyyy>')
    print('or')
    print('python', args[0] ,'<yyyy1> <yyyy2>')
    print('<yyyy1> = start year; <yyyy2> = end year')
    print('')
    print('Example: python', args[0] ,'2023')
    print('Example: python', args[0] ,'1995 2023')
    print('')
    sys.exit()




##--- Functions: Data treatment --------------------------##
### Remove leap day ###
'''
Remove 29th of February
'''
def rm_leap(input_df1):
    df1 = input_df1.copy()
    date = df1.iloc[0,0]
    yyyy = date.year
    #print(calendar.isleap(yyyy))
    if calendar.isleap(yyyy):
        print(yyyy, ' is a leap year')
        leap_day = datetime.datetime(yyyy, 2, 29)
        drop_ind = df1.index[df1['dtime'] == leap_day]
        df_drop = df1.drop(drop_ind)
        df_drop = df_drop.reset_index(drop=True)

    else:
        df_drop = df1.copy()

    #print(df_drop)
    return df_drop



### Remove outliers ###
def remove_outlier(input_df2):
    df2 = input_df2.copy()
    date = df2.iloc[0,0]
    yyyy = date.year
    if yyyy == 2023:
        print('year is 2023')
        print('')

        ### if "False" (date: between 10/03 and 10/04), set NaN. if "True", set "sea_ice"
        df2['sea_ice_area_post'] = df2['sea_ice_area'].where((df2['dtime'] <= datetime.datetime(yyyy, 10, 2)) | (df2['dtime'] >= datetime.datetime(yyyy, 10, 5)), np.nan)

        df2['sea_ice_extent_post'] = df2['sea_ice_extent'].where((df2['dtime'] <= datetime.datetime(yyyy, 10, 2)) | (df2['dtime'] >= datetime.datetime(yyyy, 10, 5)), np.nan)



    else:
        ### AREA ###
        df2['sea_ice_area_post'] = df2['sea_ice_area']

        ### EXTENT ###
        df2['sea_ice_extent_post'] = df2['sea_ice_extent']


    return df2



### Interpolate missing values ###
def inter_df(input_df6):
    '''
    Linear interpolation
    The same value is repeated if there is NaN at the top/bottom of DataFrame
    '''
    df6 = input_df6.copy()
    date = df6.iloc[0,0]
    yyyy = date.year

    if yyyy == 2023:
        print('year is 2023')
        print('')

        ### AREA ###
        df6['sea_ice_area_interpolated'] = df6['sea_ice_area_post'].interpolate(limit_direction='both', method='linear')

        ### EXTENT ###
        df6['sea_ice_extent_interpolated'] = df6['sea_ice_extent_post'].interpolate(limit_direction='both', method='linear')


    else:

        ### AREA ###
        df6['sea_ice_area_interpolated'] = df6['sea_ice_area_post']

        ### EXTENT ###
        df6['sea_ice_extent_interpolated'] = df6['sea_ice_extent_post']

    #print(df6.head(120))
    #print(df6.tail(60))
    #sys.exit()
    return df6




### Plot interpolated sea ice area/extent ###
def plot_seaice(input_df7):
    df7 = input_df7.copy()
    date = df7.iloc[0,0]
    yyyy = date.year

    time_obs = df7['dtime']


    ### AREA ###
    area = df7['sea_ice_area']
    area_inter = df7['sea_ice_area_interpolated']

    ### EXTENT ###
    extent = df7['sea_ice_extent']
    extent_inter = df7['sea_ice_extent_interpolated']

    fig3 = plt.figure(figsize=(8,5))
    ax3 = fig3.add_subplot(111)

    ### AREA ###
    ax3.plot(time_obs, area_inter, linewidth = 4.7, label= 'Sea ice area (interpolated)', alpha=0.7)
    ax3.plot(time_obs, area, linewidth = 1.3, label = 'Sea ice area (original)', color='k')

    ### EXTENT ###
    #ax3.plot(time_obs, extent_inter, linewidth = 4.7, label= 'Sea ice extent (interpolated)', color='magenta', alpha=0.7)
    #ax3.plot(time_obs, extent, linewidth = 1.3, label = 'Sea ice extent', color='r')


    ax3.legend()

    ## Save figures ##
    out_dir = '/nird/datapeak/NS9873K/www/users/mariko/fig/seaice/OSTIA/' #ver1.1
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    fig_name3 = out_dir + 'sea_ice_area_interpolated_ostia_' + str(yyyy) + '.png'
    fig3.savefig(fig_name3, dpi=200, format='png')
    plt.show()


        


##--- Create list of year ------------------------------------------------------##
yyyy_int_list = []
for yyyy in range(yyyy1, yyyy2+1):
    yyyy_int_list.append(yyyy)

print(yyyy_int_list)



##--- Set path ------------------------------------------##
## Set path to input files ##
'''
Set path to input directory ###
'''
out_ice = '/nird/datapeak/NS9873K/norcpm/validation/seaicepre/ostia/monthly/' #ver1.1


## Output: Set path to output file ##
'''
Set path to output file
'''
out_dir = '/nird/datapeak/NS9873K/norcpm/validation/seaicepre/ostia/yearly/' #ver1.1

if not os.path.exists(out_dir):
    os.makedirs(out_dir)




for YEAR in yyyy_int_list:
    print('')
    print('Year: ', YEAR)
    print('')



    ##--- Read CSV files ------------------------------##
    ## Paths to CSV files ##
    file_list = sorted(glob.glob(out_ice + 'seaice_ostia_' + str(YEAR) + '*'))

    print('')
    print('file of OSTIA: ', file_list)
    print('')




    ## CSV files -> pandas.DataFrame ##
    if len(file_list) > 0:
        ice_all = []
        for file_ice in file_list:
            ice_ostia = pd.read_csv(file_ice)
            ice_all.append(ice_ostia)


    ## Combine DataFrame objects ##
    if len(file_list) > 0:
        df= pd.concat(ice_all, ignore_index=True, axis=0)
        #print(df.head(60))
        #print(df.tail(60))


    ## Change 'object' to 'datetime' ##
    if len(file_list) > 0:
        df['dtime'] = pd.to_datetime(df['dtime'])



    ##--- Data treatment ----------------------------------##
    ## Call Functions ##
    ### Remove leap day ###
    df_drop_leap = rm_leap(df)


    ### Remove outliers ###
    df_post = remove_outlier(df_drop_leap)


    ### Interpolate missing values ###
    df_inter = inter_df(df_post)


    ### Plot interpolated sea ice area ###
    plot_seaice(df_inter)



    ##--- Save DataFrame as CSV --------------------------------------##
    df_inter.to_csv(out_dir + 'seaice_ostia_merge_' + str(YEAR) + '.csv')
    print('')
    print('')
    print('Saved sea ice area/extent!')
    print('')
    print('-----------------------')

print('')
print('Completed!')
