# Calculate climatology of precipitation (ERA5)
# Period: Copernicus: (1993-2016)
#
# Script version 1.1
#
# Script works with:
#   Python version: 3.10.13
#
#   Package version
#     numpy:  1.26.3
#     pandas:  2.2.0
#     netCDF4:  1.6.4
#
# Ver1.0.0: Created by Mariko Koseki, 27.11.2024
# ver1.1: updated by mariko, 20.10.2025


import netCDF4
import numpy as np
from numpy import dtype
import pandas as pd
import datetime
import os
import sys
import platform


print('--- Python version ---')
print(platform.python_version())

print('--- Package version ---')
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('netCDF4: ', netCDF4.__version__)




##--- Set path ------------------------------------------##
## Set path to input files ##
'''
Set '' (path to input NetCDF file)
'''
era5_monthly = '/nird/projects/NS9873K/norcpm/validation/reanalysis/ECMWF/ERA5/original/monthly_single_level/monthly_averaged_reanalysis/tp/'


## Set path to output files ##
'''
Set '' (path to output file)
'''
out_dir = '/nird/projects/NS9873K/norcpm/validation/reanalysis/ECMWF/ERA5/clim/total_precipitation/'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)




##--- Set year, month ------------------------------------##
'''
Set 'start_year' and 'end_year'
Copernicus: 1993-2016
'''
start_year = 1993 # 1993
end_year = 2016 # 2016
#y_str_list = np.arange(start_year,end_year).tolist()
#ym_str_list = ["{}{:02}".format(y,m) for y in range(start_year,end_year) for m in range(1,13)]
y_str_list = ["{}".format(y) for y in range(start_year,end_year+1)]
m_str_list = ["{:02}".format(m) for m in range(1,13)]


## Set length of variables ##
nyear = len(y_str_list)
nmonth = len(m_str_list)
nlat = 721; nlon = 1440

months = np.arange(1, nmonth + 1)



##--- Read netCDF files ----------------------------------##
## Calculate climatology ##
'''
## Example: "ERA5_228_1993.nc" ## ver1.1
dimensions:
	valid_time = 12 ;
	latitude = 721 ;
	longitude = 1440 ;
variables:
	int64 number ;
		number:long_name = "ensemble member numerical id" ;
		number:units = "1" ;
		number:standard_name = "realization" ;
	int64 valid_time(valid_time) ;
		valid_time:long_name = "time" ;
		valid_time:standard_name = "time" ;
		valid_time:units = "seconds since 1970-01-01" ;
		valid_time:calendar = "proleptic_gregorian" ;
	double latitude(latitude) ;
		latitude:_FillValue = NaN ;
		latitude:units = "degrees_north" ;
		latitude:standard_name = "latitude" ;
		latitude:long_name = "latitude" ;
		latitude:stored_direction = "decreasing" ;
	double longitude(longitude) ;
		longitude:_FillValue = NaN ;
		longitude:units = "degrees_east" ;
		longitude:standard_name = "longitude" ;
		longitude:long_name = "longitude" ;
	string expver(valid_time) ;
	float tp(valid_time, latitude, longitude) ;
		tp:_FillValue = NaNf ;
		tp:GRIB_paramId = 228LL ;
		tp:GRIB_dataType = "fc" ;
		tp:GRIB_numberOfPoints = 1038240LL ;
		tp:GRIB_typeOfLevel = "surface" ;
		tp:GRIB_stepUnits = 1LL ;
		tp:GRIB_stepType = "avgad" ;
		tp:GRIB_gridType = "regular_ll" ;
		tp:GRIB_uvRelativeToGrid = 0LL ;
		tp:GRIB_NV = 0LL ;
		tp:GRIB_Nx = 1440LL ;
		tp:GRIB_Ny = 721LL ;
		tp:GRIB_cfName = "unknown" ;
		tp:GRIB_cfVarName = "tp" ;
		tp:GRIB_gridDefinitionDescription = "Latitude/Longitude Grid" ;
		tp:GRIB_iDirectionIncrementInDegrees = 0.25 ;
		tp:GRIB_iScansNegatively = 0LL ;
		tp:GRIB_jDirectionIncrementInDegrees = 0.25 ;
		tp:GRIB_jPointsAreConsecutive = 0LL ;
		tp:GRIB_jScansPositively = 0LL ;
		tp:GRIB_latitudeOfFirstGridPointInDegrees = 90. ;
		tp:GRIB_latitudeOfLastGridPointInDegrees = -90. ;
		tp:GRIB_longitudeOfFirstGridPointInDegrees = 0. ;
		tp:GRIB_longitudeOfLastGridPointInDegrees = 359.75 ;
		tp:GRIB_missingValue = 3.40282346638529e+38 ;
		tp:GRIB_name = "Total precipitation" ;
		tp:GRIB_shortName = "tp" ;
		tp:GRIB_totalNumber = 0LL ;
		tp:GRIB_units = "m" ;
		tp:long_name = "Total precipitation" ;
		tp:units = "m" ;
		tp:standard_name = "unknown" ;
		tp:GRIB_surface = 0. ;
		tp:coordinates = "number valid_time latitude longitude expver" ;

'''


tp_clim_median_3d = np.zeros((nmonth, nlat, nlon)) #(12, 721, 1440)
tp_clim_mean_3d = np.zeros((nmonth, nlat, nlon)) #(12, 721, 1440) #ver1.1
for mon_num in range(0, nmonth):
    mm = m_str_list[mon_num]
    print('month: ', mm)



    ##--- Calculate median for nyear -----------------------------------------------------###
    tp_clim_median = np.zeros((nyear, nlat, nlon)) #(nyear, 721, 1440)
    tp_clim_mean = np.zeros((nyear, nlat, nlon)) #(nyear, 721, 1440) #ver1.1
    for year_num in range(0, nyear):
        yyyy = y_str_list[year_num]
            
        print('year: ', yyyy)
        nc_tp = era5_monthly + 'ERA5_228_' + yyyy + '.nc'
        print('file name: ', nc_tp)
        print('')

            
        nc = netCDF4.Dataset(nc_tp, 'r')
             
        ''' 
        ## Check length of variables ##
        nlon = len(nc.dimensions['longitude']) # Longitude: 
        nlat = len(nc.dimensions['latitude']) # Latitude: 

        print(nlon, nlat)
        '''    


        ## Read variables ##
        lons = nc.variables['longitude'][:] # Longitude
        lats = nc.variables['latitude'][:] # Latitude


            
        tp = nc.variables['tp'][:][:][:] # tp(date, latitude, longitude)

        ## Convert variables into Numpy ndarray ##
        tp_np = np.array(tp)
        print(tp_np.shape)
        tp_np2 = tp_np[mon_num,:,:]
        print(tp_np2.shape)


        ## Add results into a zeros numpy ndarray ##
        tp_clim_median[year_num, :,:] = tp_np2
        tp_clim_mean[year_num, :,:] = tp_np2 #ver1.1


        ## Close netCDF file ##
        nc.close()


    median_2d = np.median(tp_clim_median, axis=0) #(721, 1440)
    mean_2d = np.mean(tp_clim_mean, axis=0) #(721, 1440) #ver1.1

    tp_clim_median_3d[mon_num,:,:] = median_2d
    tp_clim_mean_3d[mon_num,:,:] = mean_2d #ver1.1

print('file shape of 3D', tp_clim_median_3d.shape) #(12, 721, 1440)





print('')
print('Calculation completed')
print('')
print('------------------------')


##--- Save results as a new NetCDF file ------------------------------##
print('Save climatology')



### Median ###
## File name ##
outfile = out_dir + 'era5_Total_precipitation_clim_median_' + str(start_year) + '_' + str(end_year) + '.nc'
nc2 = netCDF4.Dataset(outfile ,'w', format="NETCDF4")

## Define dimensions ##
nc2.createDimension('latitude', nlat)
nc2.createDimension('longitude', nlon)
nc2.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc2.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc2.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### month ###
month = nc2.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_tp = nc2.createVariable('clim_tp_era5', dtype('float32').char, ('month', 'latitude', 'longitude',))
clim_tp.units = 'm'
clim_tp.long_name = 'climatology of Total_precipitation ERA5'
clim_tp[:,:,:] = tp_clim_median_3d


### close NetCDF file ###
nc2.close()



### Mean ### #ver1.1
## File name ##
outfile3 = out_dir + 'era5_Total_precipitation_clim_mean_' + str(start_year) + '_' + str(end_year) + '.nc'
nc3 = netCDF4.Dataset(outfile3 ,'w', format="NETCDF4")

## Define dimensions ##
nc3.createDimension('latitude', nlat)
nc3.createDimension('longitude', nlon)
nc3.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc3.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc3.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### month ###
month = nc3.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_tp = nc3.createVariable('clim_tp_era5', dtype('float32').char, ('month', 'latitude', 'longitude',))
clim_tp.units = 'm'
clim_tp.long_name = 'climatology of Total_precipitation ERA5'
clim_tp[:,:,:] = tp_clim_mean_3d


### close NetCDF file ###
nc3.close()


print('')
print('Completed!!')
