# Calculate climatology of precipitation
# Period: Copernicus (1993-2016)
#
# Script version 1.1
#
# Script works with:
#   Python version: 3.10
#
#   Package version
#     numpy:  1.26.3
#     pandas:  2.2.0
#     netCDF4:  1.6.4
#
# Ver1.0.0: Created by Mariko Koseki, 11.09.2024
# ver1.1: updated by Mariko, 20.10.2025


import netCDF4
import numpy as np
from numpy import dtype
import pandas as pd
import datetime
import calendar
import os
import sys
import platform


print('--- Python version ---')
print(platform.python_version())

print('--- Package version ---')
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('netCDF4: ', netCDF4.__version__)




##--- Set path ------------------------------------------##
## Set path to input files ##
'''
Set '' (path to input NetCDF file)
'''
norcpm_monthly = '/nird/projects/NS9873K/www/norcpm/forecast/monthly/'
#norcpm_monthly = '/nird/projects/NS9873K/norcpm/processed/monthly/'

## Set path to output files ##
'''
Set '' (path to output file)
'''
out_dir = '/nird/projects/NS9873K/norcpm/validation/precipitation/median/clim/'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)


out_dir_mean = '/nird/projects/NS9873K/norcpm/validation/precipitation/mean/clim/' #Ver1.1
if not os.path.exists(out_dir_mean):
    os.makedirs(out_dir_mean)


## Select NorCPM system ##
'''
Set system number 'system_num'
'''
system_num = '1'
#system_num = '2'

print('')
print('system: ', system_num)
print('')


##--- Set year, month ------------------------------------##
'''
Set 'start_year' and 'end_year'
Copernicus: 1993-2016
'''
start_year = 1993 # 1993
end_year = 2016 # 2016

y_str_list = ["{}".format(y) for y in range(start_year,end_year+1)]
m_str_list = ["{:02}".format(m) for m in range(1,13)]


## Set length of variables ##
nyear = len(y_str_list)
nmonth = len(m_str_list)
nlead = 6
nmem = 60
nlat = 180; nlon = 360

leads = np.arange(1, nlead + 1)
months = np.arange(1, nmonth + 1)



##--- Read netCDF files ----------------------------------##
## Calculate climatology ##
'''
## Example: "total_precipitation_bccr_1_2024_08.nc"
dimensions:
	number = 60 ;
	time = 6 ;
	latitude = 180 ;
	longitude = 360 ;
variables:
	int time(time) ;
		time:units = "hours since 1900-01-01 00:00:00.0" ;
		time:long_name = "time" ;
		time:calendar = "gregorian" ;
	float latitude(latitude) ;
		latitude:units = "degrees_north" ;
		latitude:long_name = "latitude" ;
	float longitude(longitude) ;
		longitude:units = "degrees_east" ;
		longitude:long_name = "longitude" ;
	int number(number) ;
		number:long_name = "ensemble_member" ;
	float tprate(time, number, latitude, longitude) ;
		tprate:_FillValue = 1.e+20f ;
		tprate:units = "m s**-1" ;
		tprate:long_name = "Mean total precipitation rate" ;
		tprate:initial_time = "01/08/2024 00:00" ;
'''


prec_clim_median_4d = np.zeros((nlead, nmonth, nlat, nlon)) #(6, 12, 180, 360)
prec_clim_mean_4d = np.zeros((nlead, nmonth, nlat, nlon)) #(6, 12, 180, 360) #ver1.1
for lead in range(0, nlead):

    print('lead:', lead)

    prec_clim_median_3d = np.zeros((nmonth, nlat, nlon)) #(12, 180, 360)
    prec_clim_mean_3d = np.zeros((nmonth, nlat, nlon)) #(12, 180, 360) #ver1.1
    for mon_num in range(0, nmonth):
        mm = m_str_list[mon_num]
        print('month: ', mm)


        ##--- Calculate median for nyear, 60 members ------------------------------------------###
        prec_clim_median = np.zeros((nyear, nmem, nlat, nlon)) #(nyear, 60, 180, 360)
        prec_clim_mean = np.zeros((nyear, nmem, nlat, nlon)) #(nyear, 60, 180, 360) #ver1.1
        for year_num in range(0, nyear):
            yyyy = y_str_list[year_num]
            
            print('year: ', yyyy)
            nc_precip = norcpm_monthly + yyyy + mm + '/total_precipitation_bccr_' + system_num + '_' + yyyy + '_' + mm + '.nc'
            print('file name: ', nc_precip)
            print('')

            ndate = calendar.monthrange(int(yyyy), int(mm))[1]
            if calendar.isleap(int(yyyy)):
                ndate = 28


            nc = netCDF4.Dataset(nc_precip, 'r')
             
            '''
            ## Check length of variables ##
            #nlon = len(nc.dimensions['longitude']) # Longitude: 360 
            #nlat = len(nc.dimensions['latitude']) # Latitude: 180
            #nlead = len(nc.dimensions['time']) # Lead time: 6
            #nmem = len(nc.dimensions['number']) # Ensemble member: 60

            #print(lon_len, lat_len, time_len, mem_len)
            '''


            ## Read variables ##
            lons = nc.variables['longitude'][:] # Longitude
            lats = nc.variables['latitude'][:] # Latitude


            for mem in range(0, nmem):
                tprate = nc.variables['tprate'][lead][mem][:][:] #units = m s**-1

                ## Convert variables into Numpy ndarray ##
                tprate_np = np.array(tprate)


                ## Change unit: m/s -> mm/month
                tprate2 = tprate_np * 1000 * 3600 * 24 * ndate
                #print(tprate2)
                #print(tprate2.max())


                ## Add results into a zeros numpy ndarray ##
                prec_clim_median[year_num, mem,:,:] = tprate2
                prec_clim_mean[year_num, mem,:,:] = tprate2 #ver1.1


            ## Close netCDF file ##
            nc.close()


        median_2d = np.median(prec_clim_median, axis=(0, 1)) #(180, 360)
        mean_2d = np.mean(prec_clim_median, axis=(0, 1)) #(180, 360) #ver1.1

        prec_clim_median_3d[mon_num,:,:] = median_2d
        prec_clim_mean_3d[mon_num,:,:] = mean_2d #ver1.1

        print('file shape of 3D', prec_clim_median_3d.shape) #(12, 180, 360)


    prec_clim_median_4d[lead,:,:,:] = prec_clim_median_3d
    prec_clim_mean_4d[lead,:,:,:] = prec_clim_mean_3d #ver1.1

    print('file shape of 4D', prec_clim_median_4d.shape) #(6, 12, 180, 360)


print('')
print('Calculation completed')
print('')
print('------------------------')


##--- Save results as a new NetCDF file ------------------------------##
print('Save climatology')

### Median ###
## File name ##
outfile = out_dir + 'bccr_' + system_num + '_total_precipitation_clim_median_' + str(start_year) + '_' + str(end_year) + '.nc'
nc2 = netCDF4.Dataset(outfile ,'w', format="NETCDF4")

## Define dimensions ##
nc2.createDimension('latitude', nlat)
nc2.createDimension('longitude', nlon)
nc2.createDimension('lead', nlead)
nc2.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc2.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc2.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### lead time ###
lead = nc2.createVariable('lead', dtype('int32').char, ('lead',))
lead.units = 'lead_month'
lead.long_name = 'lead_time'
lead[:] = leads

### month ###
month = nc2.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_prec = nc2.createVariable('clim_prec', dtype('float32').char, ('lead', 'month', 'latitude', 'longitude',))
clim_prec.units = 'mm/month'
clim_prec.long_name = 'climatology of precipitation'
clim_prec[:,:,:,:] = prec_clim_median_4d


### close NetCDF file ###
nc2.close()



### Mean ### #ver1.1
## File name ##
outfile3 = out_dir_mean + 'bccr_' + system_num + '_total_precipitation_clim_mean_' + str(start_year) + '_' + str(end_year) + '.nc'
nc3 = netCDF4.Dataset(outfile3 ,'w', format="NETCDF4")

## Define dimensions ##
nc3.createDimension('latitude', nlat)
nc3.createDimension('longitude', nlon)
nc3.createDimension('lead', nlead)
nc3.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc3.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc3.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### lead time ###
lead = nc3.createVariable('lead', dtype('int32').char, ('lead',))
lead.units = 'lead_month'
lead.long_name = 'lead_time'
lead[:] = leads

### month ###
month = nc3.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_prec = nc3.createVariable('clim_prec', dtype('float32').char, ('lead', 'month', 'latitude', 'longitude',))
clim_prec.units = 'mm/month'
clim_prec.long_name = 'climatology of precipitation'
clim_prec[:,:,:,:] = prec_clim_mean_4d


### close NetCDF file ###
nc3.close()


print('')
print('Completed!!')
