Skip to content
Snippets Groups Projects
climatology.py 3.95 KiB
Newer Older
import numpy as np
import xarray as xr
import metpy.calc as mpcalc


Christoph.Fischer's avatar
Christoph.Fischer committed
def smthClmDay(clmDay, nHarm):  # from athul
    # nharm is the number of harmonic modes that needs to be removed. So we retain that in smthClm to later 
    # subtract from raw.
    print(clmDay)

    cf = np.fft.rfft(clmDay, axis=0)  # ([nday/2]+1)x[nlat]x[mlon]<==n:even
    # ([nday+1/2])x[nlat]x[mlon]<==n:odd
    # cf(:,0:nharm-1)
    cf[nHarm, :, :] = 0.5 * cf[nHarm, :, :]  # mini-taper
    cf[nHarm + 1:, :, :] = 0.0  # set all higher coef. to zero

    clmDaySmth = np.fft.irfft(cf, axis=0)  # reverse FT to real reconstruct data

    clmDaySmth = xr.DataArray(clmDaySmth, dims=('hourofyear', 'lat', 'lon'),
                              coords={'hourofyear': clmDay.hourofyear, 'lat': clmDay.lat, 'lon': clmDay.lon})
    return (clmDaySmth)

# TODO TEST THIS, maybe needs fix, currently cached! (maybe works with multi year data)
# also weird dims. le_ti_level_0
def compute_climatology(cfg):
    strt = '1979-01-01'
    last = '2019-12-31'
    cv_database_dir = cfg.cv_data_dir

    f = xr.open_mfdataset(cv_database_dir + "*.nc").sel(
        # plev=slice(plev, plev), 
        # lat=slice(latS,latN), 
        # lon=slice(lonW,lonE),
        time=slice(strt, last))
    # smoothing
    cv = mpcalc.smooth_n_point(f.cv, n=9, passes=2).metpy.dequantify()
    # generate hourofyear climatology, save it to file for next time

    # Calculate a hour-of-year climatology to be subtracted later.
    cv = cv.assign_coords(hourofyear=cv.time.dt.strftime("%m-%d %H"))
    print('Computing climatology...')
    cv_clim_da = cv.groupby('hourofyear').mean('time')

    # print('Smoothing climatology...')
    # clm = smthClmDay(clm, 3)  # calculate climatology with just the 3 harmonic modes retained.
    print('Climatology computed ')

    cv_clim_da.attrs['units'] = 's**-1'
    cv_clim_da.attrs['standard_name'] = 'Curvature Vorticity'
    cv_clim_da.attrs['long_name'] = 'Curvature Vorticity climatology (hour of year)'

    cv_clim = cv_clim_da.to_dataset(name="cv")

    # compute anomalies
    print("Compute anomalies of " + str(cfg.cv_percentile) + " percentile (total)...")
    full_cv_anomalies = cv.groupby('hourofyear') - cv_clim.cv

    # quantile applied over time,lat,lon -> so one scalar of cfg.cv_percentile% quantile per pressure level
    quantiles_full = full_cv_anomalies.quantile(float(cfg.cv_percentile) / 100.0, dim=['lat', 'lon', 'time'])
    cv_clim['cva_quantile_full'] = quantiles_full
    print(quantiles_full.data)

    print("Compute anomalies of " + str(cfg.cv_percentile) + " percentile (hourofyear)...")
    # quantile for each hourofyear, bit more complicated. bascially for each Jan 1 00UTC the x% quantile etc.
    full_cv_anomalies = full_cv_anomalies.assign_coords(hourofyear=full_cv_anomalies.time.dt.strftime("%m-%d %H"))

    full_cv_anomalies = full_cv_anomalies.swap_dims({"time": "hourofyear"})
    full_cv_anomalies_stacked = full_cv_anomalies.stack(le_ti=("plev", "hourofyear"))

    # group by pressure level and hourofyear -> quantile over lat+lon+date(hoy)
    quantiles_hoy_stacked = full_cv_anomalies_stacked.groupby('le_ti').quantile(q=float(cfg.cv_percentile) / 100.0,
                                                                                dim=...)
    quantiles_hoy = quantiles_hoy_stacked.unstack(dim='le_ti')

    cv_clim['cva_quantile_hoy'] = quantiles_hoy
    quantiles_hoy = cv_clim.cva_quantile_hoy
    # somehow xarray cant reconstruct stacked groupby results, fix manually.
    quantiles_hoy = quantiles_hoy.swap_dims({"le_ti_level_0": "plev"})
    quantiles_hoy = quantiles_hoy.swap_dims({"le_ti_level_1": "hourofyear"})
    quantiles_hoy = quantiles_hoy.rename({"le_ti_level_0": "plev"})
    quantiles_hoy = quantiles_hoy.rename({"le_ti_level_1": "hourofyear"})
    print(quantiles_hoy)

    cv_clim['cva_quantile_hoy'] = quantiles_hoy
    cv_clim = cv_clim.drop_dims(["le_ti_level_0", "le_ti_level_1"])
    print("Done!")
    print("Generated dataset:")
    print(cv_clim)

    return cv_clim