Newer
Older
Christoph Fischer
committed
# Usage Example
from enstools.feature.pipeline import FeaturePipeline
from enstools.feature.identification.african_easterly_waves import AEWIdentification
from enstools.feature.tracking.african_easterly_waves import AEWTracking
from enstools.feature.identification._proto_gen import african_easterly_waves_pb2
from os.path import expanduser, join
from enstools.feature.util.graph import DataGraph
from enstools.feature.identification.african_easterly_waves.plotting import plot_differences, plot_track, plot_track_in_ts, plot_timesteps_from_desc, plot_tracks_from_desc
import enstools.feature.identification.african_easterly_waves.configuration as cfg
import os, sys, glob, shutil
from enstools.feature.util.data_utils import get_subset_by_description
import xarray as xr
xr.set_options(keep_attrs=True)
import numpy as np
Christoph Fischer
committed
pipeline = FeaturePipeline(african_easterly_waves_pb2, processing_mode='2d')
Christoph Fischer
committed
# in_files_all_cv_data = cfg.cv_data_ex
# for climatology
# if len(sys.argv) > 1:
# proc_summer_of_year = int(sys.argv[1])
# if len(sys.argv) > 2:
# proc_month_of_year = int(sys.argv[2])
if len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ana':
# kitweather: make plots from available cached ecmwf analysis
data_fc_root = cfg.aew_kitweather_ecmwf_dir
in_file = data_fc_root + "*/*000h_tropicalvars.nc"
print("Executing for: " + in_file)
elif len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ecmwf_fc':
# kitweather: use last 7 days of analysis and the ecmwf forecast
# get latest subdirectory time as what we choose
data_fc_root = cfg.aew_kitweather_ecmwf_dir
print(data_fc_root + "*/")
sorted_subdirs = sorted(all_subdirs)
latest_subdir = sorted_subdirs[-1]
forecast_files_glob = latest_subdir + "ecmwf-hres_latlon_1.0deg_*"
fc_file_list = glob.glob(forecast_files_glob)
fc_rain_file_list = [fc_file.replace("1.0", "0.4").replace("tropicalvars", "tp") for fc_file in fc_file_list if "000h" not in fc_file]
if len(fc_file_list) < 41:
print("Expected 41 files in " + forecast_files_glob + ", got " + str(len(fc_file_list)))
print("Trying previous timestep...")
latest_subdir = sorted_subdirs[-2]
forecast_files_glob = latest_subdir + "ecmwf-hres_latlon_1.0deg_*"
fc_file_list = glob.glob(forecast_files_glob)
fc_rain_file_list = [fc_file.replace("1.0", "0.4").replace("tropicalvars", "tp") for fc_file in fc_file_list if "000h" not in fc_file]
if len(fc_file_list) < 41:
print("Missing files as well. Found " + str(len(fc_file_list)) + " files.")
print("Exit.")
exit(1)
print("Found all 41 forecast files at " + forecast_files_glob + ".")
# get last 7 days of analysis: 000h from previous runs
print("Collecting analysis files...")
data_fc_root = cfg.aew_kitweather_ecmwf_dir
all_subdirs_by_time = sorted(all_subdirs)
last_7d_ana_subdirs = all_subdirs_by_time[-28:-1] # last 28 timesteps = last 7 days
last_7d_ana_glob = [sd + "ecmwf-hres_latlon_1.0deg_*_000h_tropicalvars.nc" for sd in last_7d_ana_subdirs]
ana_file_list = []
for ana_ts_glob in last_7d_ana_glob:
cur_g = glob.glob(ana_ts_glob)
if len(cur_g) != 1:
print("Found " + str(len(cur_g)) + " files at " + ana_ts_glob + ", expected 1. Exit.")
exit(1)
ana_file_list.extend(cur_g)
print("Found " + str(len(ana_file_list)) + " analysis files.")
in_file = sorted(list(set(ana_file_list + fc_file_list))) # current 000h twice.
print("Collecting rain data...")
# just get all tp 0.4deg 6h
ana_rain_files = glob.glob(data_fc_root + "*/ecmwf-hres_latlon_0.4deg_*_006h_tp.nc")
# load forecast files separately: need to compute deltas from tp.
rain_fc_ds = xr.open_mfdataset(fc_rain_file_list)
rain_fc_tp = rain_fc_ds.tp
# rain_fc_tp_diff = rain_fc_tp.differentiate(coord="time", datetime_unit="6h")
times = rain_fc_tp.time.values
for t_idx, time in reversed(list(enumerate(times))):
if t_idx > 0:
rain_fc_tp.loc[dict(time=time)] = rain_fc_tp.isel(time=t_idx) - rain_fc_tp.isel(time=(t_idx-1))
rain_fc_ds['tp'] = rain_fc_tp
ana_rain_files = sorted(ana_rain_files)
rain_ana_ds = xr.open_mfdataset(ana_rain_files)
latest_ana_dt = rain_ana_ds.time[-1]
earliest_ana_dt = latest_ana_dt - np.timedelta64(7, 'D')
earliest_fc_dt = rain_fc_ds.time[0]
rain_ana_ds = rain_ana_ds.sel(time=(slice(earliest_ana_dt, earliest_fc_dt - np.timedelta64(1, 'h')))) # now-7days to fc start. TODO fc 000?
print(rain_ana_ds.time.data)
print(rain_fc_ds.time.data)
# select analysis data up to forecast start. sometimes overlap if future analysis gets downloaded. done above.
all_rain_ds = xr.merge([rain_ana_ds, rain_fc_ds])
# change total precipitation to hourly precip
all_rain_ds.tp.attrs['units'] = 'mm hr-1'
all_rain_ds.tp.attrs['long_name'] = 'Precipitation rate'
all_rain_ds['tp'] = all_rain_ds.tp / 6.0 * 1000.0 # from 6hrly (downloaded) to hourly rate and m to mm
else:
in_file = cfg.in_files
out_dir = cfg.out_dir
Christoph Fischer
committed
# init AEWIdentification strategy, can take different parameters
i_strat = AEWIdentification(wt_out_file=False, cv='cv') # , year_summer=proc_summer_of_year, month=proc_month_of_year)
Christoph Fischer
committed
pipeline.set_identification_strategy(i_strat)
pipeline.set_tracking_strategy(t_strat)
Christoph Fischer
committed
pipeline.set_data_path(in_file)
Christoph Fischer
committed
# execute pipeline
pipeline.execute()
od = pipeline.get_object_desc()
for trackable_set in od.sets:
# generate graph out of tracked data
g = DataGraph(trackable_set, t_strat)
# generate single tracks from tracked data
# returns list of tracks, also gets added to object description. Also if apply_filter, keep_track can be implemented
g.generate_tracks(apply_filter=True) # add tracks to OD, applies filtering TODO tracks not in desc.
tracks = g.set_desc.tracks
# track = tracks[0]
# parents of a node: track.get_parents(track.graph.edges[0].parent)
# childs of a node: track.get_childs(track.graph.edges[0].parent)
# for track_id, track in enumerate(tracks):
# plot_track(track, "track" + str(track_id))
ds = pipeline.get_data()
ds_set = get_subset_by_description(ds, trackable_set, '2d')
if len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ecmwf_fc':
time_dir = os.path.basename(os.path.normpath(latest_subdir))
plot_differences(g, tracks, ds=ds_set, tp=all_rain_ds, plot_prefix=cfg.plot_dir + time_dir + "/")
elif len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ana':
plot_differences(g, tracks, ds=ds_set, tp=rain_ds, plot_prefix=cfg.plot_dir + "ana/")
plot_differences(g, tracks, ds=ds_set)
time_dir = os.path.basename(os.path.normpath(latest_subdir))
# no out data besides plots on kitweather
if sys.argv[1] == '-kw':
# delete old plots
subdirs = [dI for dI in os.listdir(cfg.plot_dir) if os.path.isdir(os.path.join(cfg.plot_dir,dI))]
for sd in subdirs: # for each subdir in plot dir
if not sd == time_dir and datetime.fromtimestamp(os.path.getmtime(os.path.join(cfg.plot_dir, sd))) < datetime.now() - timedelta(days=7): # not touched in a week? delete it.
print("Removing directory " + str(os.path.join(cfg.plot_dir, sd)))
shutil.rmtree(os.path.join(cfg.plot_dir, sd))
# All done. Update text file containing time of latest finished run.
yyyymmddhh = time_dir[4:]
with open(cfg.latest_run_info_file, 'w+') as info_file:
info_file.write(yyyymmddhh)
exit()
Christoph Fischer
committed
# out_netcdf_path = data_path + '_streamers.nc'
if len(sys.argv) == 1:
out_json_path = out_dir + 'aew_desc.json'
out_dataset_path = out_dir + '05_wt.nc'
elif len(sys.argv) == 2:
out_json_path = out_dir + 'aew_desc_' + str(proc_summer_of_year) + '.json'
out_dataset_path = out_dir + '05_wt_' + str(proc_summer_of_year) + '.nc'
else:
m_str = str(proc_month_of_year).zfill(2)
out_json_path = out_dir + 'aew_desc_' + str(proc_summer_of_year) + '_' + m_str + '.json'
out_dataset_path = out_dir + '05_wt_' + str(proc_summer_of_year) + '_' + m_str + '.nc'
pipeline.save_result(description_type='json', description_path=out_json_path) # , dataset_path=out_dataset_path) # dataset_path=out_dataset_path,
# , description_path=out_json_path, graph_path=out_graph_path
# print("Plot.")
# plot_timesteps_from_desc(od, pipeline.get_data())
# plot_tracks_from_desc(od, None)