# Calculate climatology of 2m temperature
# Period: 1993-2020 -> Copernicus: 1993-2016 #ver1.3.0

# Script version 1.4

# Script works with:
#   Python version: 3.10.13
#
#   Package version
#     numpy:  1.26.3
#     pandas:  2.2.0
#     netCDF4:  1.6.4
#
# Ver1.0.0: Created by Mariko Koseki, 06.03.2024
# Ver1.2.0: updated by Mariko, 19.03.2024
# Ver1.2.1: updated by Mariko, 22.03.2024
# Ver1.3.0: updated by Mariko, 15.05.2024
# ver1.4: updated by Mariko, 20.10.2025


import netCDF4
import numpy as np
from numpy import dtype
import pandas as pd
import datetime
import os
import sys
import platform


print('--- Python version ---')
print(platform.python_version())

print('--- Package version ---')
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('netCDF4: ', netCDF4.__version__)




##--- Set path ------------------------------------------##
## Set path to input files ##
'''
Set '' (path to input NetCDF file)
'''
norcpm_monthly = '/nird/projects/NS9873K/www/norcpm/forecast/monthly/'
#norcpm_monthly = '/nird/projects/NS9873K/norcpm/processed/monthly/'

## Set path to output files ##
'''
Set '' (path to output file)
'''
out_dir = '/nird/projects/NS9873K/norcpm/validation/2m_temperature/median/clim/' #Ver1.2.0
if not os.path.exists(out_dir):
    os.makedirs(out_dir)


out_dir_mean = '/nird/projects/NS9873K/norcpm/validation/2m_temperature/mean/clim/' #Ver1.4
if not os.path.exists(out_dir_mean):
    os.makedirs(out_dir_mean)


## Select NorCPM system ## # Ver1.1.0
'''
Set system number 'system_num'
'''
system_num = '1'
#system_num = '2'

print('')
print('system: ', system_num)
print('')


##--- Set year, month ------------------------------------##
'''
Set 'start_year' and 'end_year'
Copernicus: 1993-2016 #ver1.3.0
'''
start_year = 1993 # 1993
end_year = 2017 # 2016
#y_str_list = np.arange(start_year,end_year).tolist()
#ym_str_list = ["{}{:02}".format(y,m) for y in range(start_year,end_year) for m in range(1,13)]
y_str_list = ["{}".format(y) for y in range(start_year,end_year)]
m_str_list = ["{:02}".format(m) for m in range(1,13)]


## Set length of variables ##
nyear = len(y_str_list)
nmonth = len(m_str_list)
nlead = 6
nmem = 60
nlat = 180; nlon = 360

leads = np.arange(1, nlead + 1)
months = np.arange(1, nmonth + 1)



##--- Read netCDF files ----------------------------------##
## Calculate climatology ##
'''
## Example: "2m_temperature_bccr_1_2024_02.nc" ##
dimensions:
	number = 60 ;
	time = 6 ;
	latitude = 180 ;
	longitude = 360 ;
variables:
	int time(time) ;
		time:units = "hours since 1900-01-01 00:00:00.0" ;
		time:long_name = "time" ;
		time:calendar = "gregorian" ;
	float latitude(latitude) ;
		latitude:units = "degrees_north" ;
		latitude:long_name = "latitude" ;
	float longitude(longitude) ;
		longitude:units = "degrees_east" ;
		longitude:long_name = "longitude" ;
	int number(number) ;
		number:long_name = "ensemble_member" ;
	float t2m(time, number, latitude, longitude) ;
		t2m:_FillValue = 1.e+20f ;
		t2m:units = "K" ;
		t2m:long_name = "2 metre temperature" ;
		t2m:initial_time = "01/02/2024 00:00" ;
'''


t2m_clim_median_4d = np.zeros((nlead, nmonth, nlat, nlon)) #(6, 12, 180, 360)
t2m_clim_mean_4d = np.zeros((nlead, nmonth, nlat, nlon)) #ver1.4
for lead in range(0, nlead):

    print('lead:', lead)

    t2m_clim_median_3d = np.zeros((nmonth, nlat, nlon)) #(12, 180, 360)
    t2m_clim_mean_3d = np.zeros((nmonth, nlat, nlon)) #ver1.4
    for mon_num in range(0, nmonth):
        mm = m_str_list[mon_num]
        print('month: ', mm)



        ##--- Calculate median for nyear, 60 members ------------------------------------------### #Ver1.2.0, ver1.3.0
        t2m_clim_median = np.zeros((nyear, nmem, nlat, nlon)) #(nyear, 60, 180, 360)
        t2m_clim_mean = np.zeros((nyear, nmem, nlat, nlon))
        for year_num in range(0, nyear):
            yyyy = y_str_list[year_num]
            
            print('year: ', yyyy)
            nc_2m_temp = norcpm_monthly + yyyy + mm + '/2m_temperature_bccr_' + system_num + '_' + yyyy + '_' + mm + '.nc' #Ver1.1.0
            print('file name: ', nc_2m_temp)
            print('')

            
            nc = netCDF4.Dataset(nc_2m_temp, 'r')
             
            '''
            ## Check length of variables ##
            #nlon = len(nc.dimensions['longitude']) # Longitude: 360 
            #nlat = len(nc.dimensions['latitude']) # Latitude: 180
            #nlead = len(nc.dimensions['time']) # Lead time: 6
            #nmem = len(nc.dimensions['number']) # Ensemble member: 60

            #print(lon_len, lat_len, time_len, mem_len)
            '''


            ## Read variables ##
            '''
            time = nc.variables['time'][:] # "hours since 1900-01-01 00:00:00.0"
            timeunits = nc.variables['time'].getncattr('units')
            timecalendar = nc.variables['time'].getncattr('calendar')
            date_t2m = netCDF4.num2date(time,timeunits,timecalendar)
            date_t2m = [ f'{i.year}-{i.month:02}' for i in date_t2m]
            '''

            lons = nc.variables['longitude'][:] # Longitude
            lats = nc.variables['latitude'][:] # Latitude


            for mem in range(0, nmem):
                t2m = nc.variables['t2m'][lead][mem][:][:]

                ## Convert variables into Numpy ndarray ##
                t2m_np = np.array(t2m)


                ## Add results into a zeros numpy ndarray ##
                t2m_clim_median[year_num, mem,:,:] = t2m_np # Ver1.2.1
                t2m_clim_mean[year_num, mem,:,:] = t2m_np #ver1.4


            ## Close netCDF file ##
            nc.close()


        median_2d = np.median(t2m_clim_median, axis=(0, 1)) #(180, 360)
        mean_2d = np.mean(t2m_clim_mean, axis=(0, 1)) #ver1.4

        t2m_clim_median_3d[mon_num,:,:] = median_2d # Ver1.2.1
        t2m_clim_mean_3d[mon_num,:,:] = mean_2d #ver1.4

        print('file shape of 3D', t2m_clim_median_3d.shape) #(12, 180, 360)


    t2m_clim_median_4d[lead,:,:,:] = t2m_clim_median_3d # Ver1.2.1
    t2m_clim_mean_4d[lead,:,:,:] = t2m_clim_mean_3d #ver1.4

    print('file shape of 4D', t2m_clim_median_4d.shape) #(6, 12, 180, 360)


print('')
print('Calculation completed')
print('')
print('------------------------')


##--- Save results as a new NetCDF file ------------------------------##
print('Save climatology')

'''
### Median ###
## File name ##
outfile = out_dir + 'bccr_' + system_num + '_2m_temperature_clim_median_' + str(start_year) + '_' + str(end_year-1 ) + '.nc' #Ver1.2.0, ver1.3.0
nc2 = netCDF4.Dataset(outfile ,'w', format="NETCDF4")

## Define dimensions ##
nc2.createDimension('latitude', nlat)
nc2.createDimension('longitude', nlon)
nc2.createDimension('lead', nlead)
nc2.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc2.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc2.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### lead time ###
lead = nc2.createVariable('lead', dtype('int32').char, ('lead',))
lead.units = 'lead_month'
lead.long_name = 'lead_time'
lead[:] = leads

### month ###
month = nc2.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_2m_temp = nc2.createVariable('clim_2m_temp', dtype('float32').char, ('lead', 'month', 'latitude', 'longitude',))
clim_2m_temp.units = 'K'
clim_2m_temp.long_name = 'climatology of 2meter temperature'
clim_2m_temp[:,:,:,:] = t2m_clim_median_4d


### close NetCDF file ###
nc2.close()
'''


### Mean ### #ver1.4
## File name ##
outfile3 = out_dir_mean + 'bccr_' + system_num + '_2m_temperature_clim_mean_' + str(start_year) + '_' + str(end_year-1 ) + '.nc'
nc3 = netCDF4.Dataset(outfile3 ,'w', format="NETCDF4")

## Define dimensions ##
nc3.createDimension('latitude', nlat)
nc3.createDimension('longitude', nlon)
nc3.createDimension('lead', nlead)
nc3.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc3.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc3.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### lead time ###
lead = nc3.createVariable('lead', dtype('int32').char, ('lead',))
lead.units = 'lead_month'
lead.long_name = 'lead_time'
lead[:] = leads

### month ###
month = nc3.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_2m_temp = nc3.createVariable('clim_2m_temp', dtype('float32').char, ('lead', 'month', 'latitude', 'longitude',))
clim_2m_temp.units = 'K'
clim_2m_temp.long_name = 'climatology of 2meter temperature'
clim_2m_temp[:,:,:,:] = t2m_clim_mean_4d


### close NetCDF file ###
nc3.close()


print('')
print('Completed!!')
