#!/usr/bin/env python # Usage Example from enstools.feature.pipeline import FeaturePipeline from enstools.feature.identification.african_easterly_waves import AEWIdentification from enstools.feature.tracking.african_easterly_waves import AEWTracking from datetime import timedelta, datetime from enstools.feature.identification._proto_gen import african_easterly_waves_pb2 from os.path import expanduser, join from enstools.feature.util.graph import DataGraph from enstools.feature.identification.african_easterly_waves.plotting import plot_differences, plot_track, plot_track_in_ts, plot_timesteps_from_desc, plot_tracks_from_desc import enstools.feature.identification.african_easterly_waves.configuration as cfg import os, sys, glob, shutil from enstools.feature.util.data_utils import get_subset_by_description import xarray as xr xr.set_options(keep_attrs=True) import numpy as np pipeline = FeaturePipeline(african_easterly_waves_pb2, processing_mode='2d') # in_files_all_cv_data = cfg.cv_data_ex # for climatology # if len(sys.argv) > 1: # proc_summer_of_year = int(sys.argv[1]) # if len(sys.argv) > 2: # proc_month_of_year = int(sys.argv[2]) if len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ana': # kitweather: make plots from available cached ecmwf analysis data_fc_root = cfg.aew_kitweather_ecmwf_dir in_file = data_fc_root + "*/*000h_tropicalvars.nc" print("Executing for: " + in_file) elif len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ecmwf_fc': # kitweather: use last 7 days of analysis and the ecmwf forecast # get latest subdirectory time as what we choose data_fc_root = cfg.aew_kitweather_ecmwf_dir all_subdirs = glob.glob(data_fc_root + "*/") print(data_fc_root + "*/") print("Collecting forecast files...") sorted_subdirs = sorted(all_subdirs) latest_subdir = sorted_subdirs[-1] forecast_files_glob = latest_subdir + "ecmwf-hres_latlon_1.0deg_*" fc_file_list = glob.glob(forecast_files_glob) fc_rain_file_list = [fc_file.replace("1.0", "0.4").replace("tropicalvars", "tp") for fc_file in fc_file_list if "000h" not in fc_file] if len(fc_file_list) < 41: print("Expected 41 files in " + forecast_files_glob + ", got " + str(len(fc_file_list))) print("Trying previous timestep...") latest_subdir = sorted_subdirs[-2] forecast_files_glob = latest_subdir + "ecmwf-hres_latlon_1.0deg_*" fc_file_list = glob.glob(forecast_files_glob) fc_rain_file_list = [fc_file.replace("1.0", "0.4").replace("tropicalvars", "tp") for fc_file in fc_file_list if "000h" not in fc_file] if len(fc_file_list) < 41: print("Missing files as well. Found " + str(len(fc_file_list)) + " files.") print("Exit.") exit(1) print("Found all 41 forecast files at " + forecast_files_glob + ".") # get last 7 days of analysis: 000h from previous runs print("Collecting analysis files...") data_fc_root = cfg.aew_kitweather_ecmwf_dir all_subdirs_by_time = sorted(all_subdirs) last_7d_ana_subdirs = all_subdirs_by_time[-28:-1] # last 28 timesteps = last 7 days last_7d_ana_glob = [sd + "ecmwf-hres_latlon_1.0deg_*_000h_tropicalvars.nc" for sd in last_7d_ana_subdirs] ana_file_list = [] for ana_ts_glob in last_7d_ana_glob: cur_g = glob.glob(ana_ts_glob) if len(cur_g) != 1: print("Found " + str(len(cur_g)) + " files at " + ana_ts_glob + ", expected 1. Exit.") exit(1) ana_file_list.extend(cur_g) print("Found " + str(len(ana_file_list)) + " analysis files.") in_file = sorted(list(set(ana_file_list + fc_file_list))) # current 000h twice. print("Collecting rain data...") # just get all tp 0.4deg 6h ana_rain_files = glob.glob(data_fc_root + "*/ecmwf-hres_latlon_0.4deg_*_006h_tp.nc") # load forecast files separately: need to compute deltas from tp. rain_fc_ds = xr.open_mfdataset(fc_rain_file_list) rain_fc_tp = rain_fc_ds.tp # rain_fc_tp_diff = rain_fc_tp.differentiate(coord="time", datetime_unit="6h") times = rain_fc_tp.time.values for t_idx, time in reversed(list(enumerate(times))): if t_idx > 0: rain_fc_tp.loc[dict(time=time)] = rain_fc_tp.isel(time=t_idx) - rain_fc_tp.isel(time=(t_idx-1)) rain_fc_ds['tp'] = rain_fc_tp ana_rain_files = sorted(ana_rain_files) rain_ana_ds = xr.open_mfdataset(ana_rain_files) latest_ana_dt = rain_ana_ds.time[-1] earliest_ana_dt = latest_ana_dt - np.timedelta64(7, 'D') earliest_fc_dt = rain_fc_ds.time[0] rain_ana_ds = rain_ana_ds.sel(time=(slice(earliest_ana_dt, earliest_fc_dt - np.timedelta64(1, 'h')))) # now-7days to fc start. TODO fc 000? print(rain_ana_ds.time.data) print(rain_fc_ds.time.data) # select analysis data up to forecast start. sometimes overlap if future analysis gets downloaded. done above. all_rain_ds = xr.merge([rain_ana_ds, rain_fc_ds]) # change total precipitation to hourly precip all_rain_ds.tp.attrs['units'] = 'mm hr-1' all_rain_ds.tp.attrs['long_name'] = 'Precipitation rate' all_rain_ds['tp'] = all_rain_ds.tp / 6.0 * 1000.0 # from 6hrly (downloaded) to hourly rate and m to mm print("Done collecting files.") else: in_file = cfg.in_files out_dir = cfg.out_dir # init AEWIdentification strategy, can take different parameters i_strat = AEWIdentification(wt_out_file=False, cv='cv') # , year_summer=proc_summer_of_year, month=proc_month_of_year) t_strat = AEWTracking() pipeline.set_identification_strategy(i_strat) pipeline.set_tracking_strategy(t_strat) pipeline.set_data_path(in_file) # execute pipeline pipeline.execute() od = pipeline.get_object_desc() for trackable_set in od.sets: # generate graph out of tracked data g = DataGraph(trackable_set, t_strat) # generate single tracks from tracked data # returns list of tracks, also gets added to object description. Also if apply_filter, keep_track can be implemented g.generate_tracks(apply_filter=True) # add tracks to OD, applies filtering TODO tracks not in desc. tracks = g.set_desc.tracks # track = tracks[0] # parents of a node: track.get_parents(track.graph.edges[0].parent) # childs of a node: track.get_childs(track.graph.edges[0].parent) # for track_id, track in enumerate(tracks): # plot_track(track, "track" + str(track_id)) ds = pipeline.get_data() ds_set = get_subset_by_description(ds, trackable_set, '2d') if len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ecmwf_fc': time_dir = os.path.basename(os.path.normpath(latest_subdir)) plot_differences(g, tracks, ds=ds_set, tp=all_rain_ds, plot_prefix=cfg.plot_dir + time_dir + "/") elif len(sys.argv) == 3 and sys.argv[1] == '-kw' and sys.argv[2] == 'ana': plot_differences(g, tracks, ds=ds_set, tp=rain_ds, plot_prefix=cfg.plot_dir + "ana/") else: plot_differences(g, tracks, ds=ds_set) time_dir = os.path.basename(os.path.normpath(latest_subdir)) # no out data besides plots on kitweather if sys.argv[1] == '-kw': # delete old plots subdirs = [dI for dI in os.listdir(cfg.plot_dir) if os.path.isdir(os.path.join(cfg.plot_dir,dI))] for sd in subdirs: # for each subdir in plot dir if not sd == time_dir and datetime.fromtimestamp(os.path.getmtime(os.path.join(cfg.plot_dir, sd))) < datetime.now() - timedelta(days=7): # not touched in a week? delete it. print("Removing directory " + str(os.path.join(cfg.plot_dir, sd))) shutil.rmtree(os.path.join(cfg.plot_dir, sd)) # All done. Update text file containing time of latest finished run. yyyymmddhh = time_dir[4:] with open(cfg.latest_run_info_file, 'w+') as info_file: info_file.write(yyyymmddhh) exit() # out_netcdf_path = data_path + '_streamers.nc' if len(sys.argv) == 1: out_json_path = out_dir + 'aew_desc.json' out_dataset_path = out_dir + '05_wt.nc' elif len(sys.argv) == 2: out_json_path = out_dir + 'aew_desc_' + str(proc_summer_of_year) + '.json' out_dataset_path = out_dir + '05_wt_' + str(proc_summer_of_year) + '.nc' else: m_str = str(proc_month_of_year).zfill(2) out_json_path = out_dir + 'aew_desc_' + str(proc_summer_of_year) + '_' + m_str + '.json' out_dataset_path = out_dir + '05_wt_' + str(proc_summer_of_year) + '_' + m_str + '.nc' pipeline.save_result(description_type='json', description_path=out_json_path) # , dataset_path=out_dataset_path) # dataset_path=out_dataset_path, # , description_path=out_json_path, graph_path=out_graph_path # print("Plot.") # plot_timesteps_from_desc(od, pipeline.get_data()) # plot_tracks_from_desc(od, None)