# Calculate climatology of 2m temperature (ERA5)
# Period: Copernicus: (1993-2016)
#
# Script version 1.1
#
# Script works with:
#   Python version: 3.10.13
#
#   Package version
#     numpy:  1.26.3
#     pandas:  2.2.0
#     netCDF4:  1.6.4
#
# Ver1.0.0: Created by Mariko Koseki, 19.11.2024
# ver1.1: updated by Mariko, 20.10.2025



import netCDF4
import numpy as np
from numpy import dtype
import pandas as pd
import datetime
import os
import sys
import platform


print('--- Python version ---')
print(platform.python_version())

print('--- Package version ---')
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('netCDF4: ', netCDF4.__version__)




##--- Set path ------------------------------------------##
## Set path to input files ##
'''
Set '' (path to input NetCDF file)
'''
era5_monthly = '/nird/projects/NS9873K/norcpm/validation/reanalysis/ECMWF/ERA5/original/monthly_single_level/monthly_averaged_reanalysis/2t/'


## Set path to output files ##
'''
Set '' (path to output file)
'''
out_dir = '/nird/projects/NS9873K/norcpm/validation/reanalysis/ECMWF/ERA5/clim/2m_temperature/'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)




##--- Set year, month ------------------------------------##
'''
Set 'start_year' and 'end_year'
Copernicus: 1993-2016
'''
start_year = 1993 # 1993
end_year = 2016 # 2016
#y_str_list = np.arange(start_year,end_year).tolist()
#ym_str_list = ["{}{:02}".format(y,m) for y in range(start_year,end_year) for m in range(1,13)]
y_str_list = ["{}".format(y) for y in range(start_year,end_year+1)]
m_str_list = ["{:02}".format(m) for m in range(1,13)]


## Set length of variables ##
nyear = len(y_str_list)
nmonth = len(m_str_list)
nlat = 721; nlon = 1440

months = np.arange(1, nmonth + 1)



##--- Read netCDF files ----------------------------------##
## Calculate climatology ##
'''
## Example: "ERA5_167_1993.nc" ## #ver1.1
dimensions:
	valid_time = 12 ;
	latitude = 721 ;
	longitude = 1440 ;
variables:
	int64 number ;
		number:long_name = "ensemble member numerical id" ;
		number:units = "1" ;
		number:standard_name = "realization" ;
	int64 valid_time(valid_time) ;
		valid_time:long_name = "time" ;
		valid_time:standard_name = "time" ;
		valid_time:units = "seconds since 1970-01-01" ;
		valid_time:calendar = "proleptic_gregorian" ;
	double latitude(latitude) ;
		latitude:_FillValue = NaN ;
		latitude:units = "degrees_north" ;
		latitude:standard_name = "latitude" ;
		latitude:long_name = "latitude" ;
		latitude:stored_direction = "decreasing" ;
	double longitude(longitude) ;
		longitude:_FillValue = NaN ;
		longitude:units = "degrees_east" ;
		longitude:standard_name = "longitude" ;
		longitude:long_name = "longitude" ;
	string expver(valid_time) ;
	float t2m(valid_time, latitude, longitude) ;
		t2m:_FillValue = NaNf ;
		t2m:GRIB_paramId = 167LL ;
		t2m:GRIB_dataType = "an" ;
		t2m:GRIB_numberOfPoints = 1038240LL ;
		t2m:GRIB_typeOfLevel = "surface" ;
		t2m:GRIB_stepUnits = 1LL ;
		t2m:GRIB_stepType = "avgua" ;
		t2m:GRIB_gridType = "regular_ll" ;
		t2m:GRIB_uvRelativeToGrid = 0LL ;
		t2m:GRIB_NV = 0LL ;
		t2m:GRIB_Nx = 1440LL ;
		t2m:GRIB_Ny = 721LL ;
		t2m:GRIB_cfName = "unknown" ;
		t2m:GRIB_cfVarName = "t2m" ;
		t2m:GRIB_gridDefinitionDescription = "Latitude/Longitude Grid" ;
		t2m:GRIB_iDirectionIncrementInDegrees = 0.25 ;
		t2m:GRIB_iScansNegatively = 0LL ;
		t2m:GRIB_jDirectionIncrementInDegrees = 0.25 ;
		t2m:GRIB_jPointsAreConsecutive = 0LL ;
		t2m:GRIB_jScansPositively = 0LL ;
		t2m:GRIB_latitudeOfFirstGridPointInDegrees = 90. ;
		t2m:GRIB_latitudeOfLastGridPointInDegrees = -90. ;
		t2m:GRIB_longitudeOfFirstGridPointInDegrees = 0. ;
		t2m:GRIB_longitudeOfLastGridPointInDegrees = 359.75 ;
		t2m:GRIB_missingValue = 3.40282346638529e+38 ;
		t2m:GRIB_name = "2 metre temperature" ;
		t2m:GRIB_shortName = "2t" ;
		t2m:GRIB_totalNumber = 0LL ;
		t2m:GRIB_units = "K" ;
		t2m:long_name = "2 metre temperature" ;
		t2m:units = "K" ;
		t2m:standard_name = "unknown" ;
		t2m:GRIB_surface = 0. ;
		t2m:coordinates = "number valid_time latitude longitude expver" ;
'''


t2m_clim_median_3d = np.zeros((nmonth, nlat, nlon)) #(12, 721, 1440)
t2m_clim_mean_3d = np.zeros((nmonth, nlat, nlon)) #ver1.1
for mon_num in range(0, nmonth):
    mm = m_str_list[mon_num]
    print('month: ', mm)



    ##--- Calculate median for nyear -----------------------------------------------------###
    t2m_clim_median = np.zeros((nyear, nlat, nlon)) #(nyear, 721, 1440)
    t2m_clim_mean = np.zeros((nyear, nlat, nlon)) #ver1.1
    for year_num in range(0, nyear):
        yyyy = y_str_list[year_num]
            
        print('year: ', yyyy)
        nc_2m_temp = era5_monthly + 'ERA5_167_' + yyyy + '.nc' #ver1.1
        print('file name: ', nc_2m_temp)
        print('')

            
        nc = netCDF4.Dataset(nc_2m_temp, 'r')
             
        ''' 
        ## Check length of variables ##
        nlon = len(nc.dimensions['longitude']) # Longitude: 
        nlat = len(nc.dimensions['latitude']) # Latitude: 

        print(nlon, nlat)
        '''    


        ## Read variables ##
        lons = nc.variables['longitude'][:] # Longitude
        lats = nc.variables['latitude'][:] # Latitude


            
        t2m = nc.variables['t2m'][:][:][:] # t2m(date, latitude, longitude)

        ## Convert variables into Numpy ndarray ##
        t2m_np = np.array(t2m)
        print(t2m_np.shape)
        t2m_np2 = t2m_np[mon_num,:,:]
        print(t2m_np2.shape)


        ## Add results into a zeros numpy ndarray ##
        t2m_clim_median[year_num, :,:] = t2m_np2
        t2m_clim_mean[year_num, :,:] = t2m_np2 #ver1.1


        ## Close netCDF file ##
        nc.close()


    median_2d = np.median(t2m_clim_median, axis=0) #(721, 1440)
    mean_2d = np.mean(t2m_clim_mean, axis=0) #(721, 1440) #ver1.1

    t2m_clim_median_3d[mon_num,:,:] = median_2d
    t2m_clim_mean_3d[mon_num,:,:] = mean_2d #ver1.1

print('file shape of 3D', t2m_clim_median_3d.shape) #(12, 721, 1440)





print('')
print('Calculation completed')
print('')
print('------------------------')


##--- Save results as a new NetCDF file ------------------------------##
print('Save climatology')



### Median ###
## File name ##
outfile = out_dir + 'era5_2m_temperature_clim_median_' + str(start_year) + '_' + str(end_year) + '.nc'
nc2 = netCDF4.Dataset(outfile ,'w', format="NETCDF4")

## Define dimensions ##
nc2.createDimension('latitude', nlat)
nc2.createDimension('longitude', nlon)
nc2.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc2.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc2.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### month ###
month = nc2.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_2m_temp = nc2.createVariable('clim_2m_temp_era5', dtype('float32').char, ('month', 'latitude', 'longitude',))
clim_2m_temp.units = 'K'
clim_2m_temp.long_name = 'climatology of 2meter temperature ERA5'
clim_2m_temp[:,:,:] = t2m_clim_median_3d


### close NetCDF file ###
nc2.close()



### Mean ###
## File name ##
outfile = out_dir + 'era5_2m_temperature_clim_mean_' + str(start_year) + '_' + str(end_year) + '.nc'
nc3 = netCDF4.Dataset(outfile ,'w', format="NETCDF4")

## Define dimensions ##
nc3.createDimension('latitude', nlat)
nc3.createDimension('longitude', nlon)
nc3.createDimension('month', nmonth)

## Define variables ##
### latitude ###
lat = nc3.createVariable('latitude', dtype('float32').char, ('latitude',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lat[:] = lats

### longitude ###
lon = nc3.createVariable('longitude', dtype('float32').char, ('longitude',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
lon[:] = lons

### month ###
month = nc3.createVariable('month', dtype('int32').char, ('month',))
month.units = 'month'
month.long_name = 'month'
month[:] = months

### climatology ###
clim_2m_temp = nc3.createVariable('clim_2m_temp_era5', dtype('float32').char, ('month', 'latitude', 'longitude',))
clim_2m_temp.units = 'K'
clim_2m_temp.long_name = 'climatology of 2meter temperature ERA5'
clim_2m_temp[:,:,:] = t2m_clim_mean_3d


### close NetCDF file ###
nc3.close()


print('')
print('Completed!!')
