From 0b2d2560669986b272ac725bb14d79dac0cfe296 Mon Sep 17 00:00:00 2001 From: "Jonas.Spaeth" <jonas.spaeth@physik.uni-muenchen.de> Date: Wed, 9 Aug 2023 16:11:30 +0200 Subject: [PATCH] implement FROM_MEMBERS and prepare_parent_date.py --- conf/spin-off/expdef.yml | 12 +- conf/spin-off/jobs.yml | 34 +++--- conf/spin-off/parent_simulation.yml | 2 +- templates/spin-off/prepare_parent_date.py | 115 ++++++++++++++++++ templates/spin-off/prepare_parent_namelist.py | 77 ++++++++---- templates/spin-off/prepare_parent_runtime.py | 46 ------- templates/spin-off/run_parent_icon.sh | 35 ++++++ 7 files changed, 227 insertions(+), 94 deletions(-) create mode 100644 templates/spin-off/prepare_parent_date.py delete mode 100644 templates/spin-off/prepare_parent_runtime.py create mode 100644 templates/spin-off/run_parent_icon.sh diff --git a/conf/spin-off/expdef.yml b/conf/spin-off/expdef.yml index 5b30855..049ce66 100644 --- a/conf/spin-off/expdef.yml +++ b/conf/spin-off/expdef.yml @@ -1,13 +1,11 @@ # TODO: How to specify mother-member/ date combinations as start points for spin-offs? experiment: -# DATELIST: 20201001 -# MEMBERS: "m[1-2]" - DATELIST_FROM_MOTHERMEMBERS: # list of (date, mothermember) to run spinoffs from - - [20201002, 2] - - [20201003, 2] - MEMBERS: "m[1-2]" # number of members in spinoff ensemble + DATELIST: "[20201001, 20201003]" # TODO: like this or with squared brackets? + FROM_MEMBERS: "[1, 3]" # requires same length as datelist + MEMBERS: "m[1-2]" # members for new spinoff ensemble CHUNKSIZEUNIT: day CHUNKSIZE: 7 NUMCHUNKS: 2 CHUNKINI: 0 - CALENDAR: standard \ No newline at end of file + CALENDAR: standard + RSFILE_MAXDAYS_BACK: 30 \ No newline at end of file diff --git a/conf/spin-off/jobs.yml b/conf/spin-off/jobs.yml index f9cd36f..f19c653 100644 --- a/conf/spin-off/jobs.yml +++ b/conf/spin-off/jobs.yml @@ -25,39 +25,39 @@ JOBS: ############# parent experiment PREPARE_PARENT_EXPERIMENT: - FILE: templates/event-generator/prepare_parent_experiment.sh + FILE: templates/spin-off/prepare_parent_experiment.sh DEPENDENCIES: BUILD_ICON RUNNING: once WALLCLOCK: 01:00 - PREPARE_PARENT_DATE: - FILE: templates/event-generator/prepare_parent_date.sh + PREPARE_PARENT_DATE: # TODO: ----> get index of date and select appropriate member + FILE: templates/spin-off/prepare_parent_date.py RUNNING: date WALLCLOCK: 01:00 PLATFORM: LOCAL - PREPARE_PARENT_MEMBER: - FILE: templates/event-generator/prepare_parent_member.sh - DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE - RUNNING: member - WALLCLOCK: 01:00 +# PREPARE_PARENT_MEMBER: +# FILE: templates/event-generator/prepare_parent_member.sh # TODO: In here: get ens member for each date +# DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE +# RUNNING: date +# WALLCLOCK: 01:00 - ADAPT_PARENT_MEMBER: - FILE: templates/event-generator/adapt_parent_member.sh - RUNNING: member - WALLCLOCK: 00:20 - DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT +# ADAPT_PARENT_MEMBER: # TODO: no multiple members +# FILE: templates/event-generator/adapt_parent_member.sh +# RUNNING: date +# WALLCLOCK: 00:20 +# DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT PREPARE_PARENT_NAMELIST: - FILE: templates/event-generator/prepare_parent_namelist.py - DEPENDENCIES: ADAPT_PARENT_MEMBER RUN_PARENT_ICON-1 + FILE: templates/spin-off/prepare_parent_namelist.py + DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE RUN_PARENT_ICON-1 WALLCLOCK: 00:05 RUNNING: chunk TYPE: python EXECUTABLE: "%HPCROOTDIR%/%python_environment.folder_name%/bin/python3" RUN_PARENT_ICON: - FILE: templates/common/run_icon.sh + FILE: templates/spin-off/run_parent_icon.sh DEPENDENCIES: PREPARE_PARENT_NAMELIST COMPRESS-1 # TODO: remove COMPRESS-1? WALLCLOCK: 08:00 RUNNING: chunk @@ -93,7 +93,7 @@ JOBS: DEPENDENCIES: PREPARE_SPINOFF_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT PREPARE_SPINOFF_NAMELIST: - FILE: templates/event-generator/prepare_spinoff_namelist.py + FILE: templates/spin-off/prepare_spinoff_namelist.py DEPENDENCIES: ADAPT_SPINOFF_MEMBER RUN_ICON-1 WALLCLOCK: 00:05 RUNNING: chunk diff --git a/conf/spin-off/parent_simulation.yml b/conf/spin-off/parent_simulation.yml index da3c0f9..d876c3d 100644 --- a/conf/spin-off/parent_simulation.yml +++ b/conf/spin-off/parent_simulation.yml @@ -19,5 +19,5 @@ simulation: local: true # /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201031T000000Z.nc # parent_folder: /archive/meteo/external-models/dwd/icon/oper/icon_oper_eps_gridded-global_rolling/ - parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/ + parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/ sea_surface_temperature_forcing: /project/meteo/w2w/Z2/autoicon/dummy_sst_enso_neutral.nc diff --git a/templates/spin-off/prepare_parent_date.py b/templates/spin-off/prepare_parent_date.py new file mode 100644 index 0000000..d3248ff --- /dev/null +++ b/templates/spin-off/prepare_parent_date.py @@ -0,0 +1,115 @@ +""" +A spin-off experiment may start from a day, where no event-generator restart file is available. +This script finds the last available restart file in the event-generator directory. +From this date, the event-generator member can be restarted. +""" +from glob import glob +from dateutil.parser import parse +from datetime import datetime, timedelta +from pathlib import Path +import ast +import os + +# Get some autosubmit variables +WORKDIR = "%HPCROOTDIR%" +STARTDATE = "%SDATE%" +# Get further autosubmit variables +CURRENT_DATE = "%SDATE%" +DATELIST_STRING = "%experiment.DATELIST%" +FROM_MEMBERS_STRING = "%experiment.FROM_MEMBERS%" +NOT_MORE_THAN_N_DAYS_BACK = "%experiment.RSFILE_MAXDAYS_BACK%" + +# parse FROM_MEMBERS and DATELIST into a python list +DATELIST = ast.literal_eval(DATELIST_STRING) +FROM_MEMBERS = ast.literal_eval(FROM_MEMBERS_STRING) + + +# get index of current start date in datelist and therefrom infer from_member +def find_index_in_list(number_list, target_number): + try: + index = number_list.index(target_number) + return index + except ValueError: + return -1 + + +index = find_index_in_list(DATELIST, CURRENT_DATE) +if index != -1: + print(f"The date {CURRENT_DATE} is at index {index} in the datelist.") +else: + print(f"The date {CURRENT_DATE} is not in the datelist.") +FROM_MEMBER = FROM_MEMBERS[index] + +# determine parent directory for restart +PARENTDIR = "%initial_conditions.parent_folder%" + f"m{FROM_MEMBER:0d}/" +# create target directory for restart parent +RUNDIR = f"{WORKDIR}/{STARTDATE}_from_parent_{FROM_MEMBER}" +try: + os.makedirs(RUNDIR) + print(f"Directory '{RUNDIR}' created successfully.") +except OSError as e: + print(f"An error occurred: {e}") + + +def infer_restart_dates_from_files_in_direcory(directory): + restart_files = glob(directory + '/*restart*.nc') + + inferred_restart_file_dates = [parse(f.split("atm")[-1], fuzzy=True, ignoretz=True) for f in restart_files] + return inferred_restart_file_dates + + +def find_date_before(target_date, dates_list, max_lag=None): + # Convert the target_date to a datetime object if it's not already + if not isinstance(target_date, datetime): + target_date = datetime.strptime(target_date, '%Y%m%d') # Adjust the format if needed + + # Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided) + if max_lag is not None: + min_date_allowed = target_date - timedelta(days=max_lag) + else: + min_date_allowed = datetime.min # If NOT_MORE_THAN_N_DAYS is not provided, set a very early date + + # Filter dates_list to get only the dates that occur before the target_date + filtered_dates = [dt for dt in dates_list if min_date_allowed <= dt < target_date] + + # Find the maximum date from the filtered dates (i.e., the date that occurs first before the target_date) + if filtered_dates: + return max(filtered_dates) + else: + return None # No date found before the target_date + + +def link_restart_files(from_restart_date, from_directoy, into_directory): + # sample restart files: + # chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201016T000000Z.nc + date_string = from_restart_date.strftime("%Y%m%dT00") + pattern = f"{from_directoy}/*restart*{date_string}*.nc" + files = glob(pattern) + if len(files) == 0: + raise AssertionError(f"No restart files match pattern {pattern}.") + elif len(files) > 1: + raise AssertionError(f"More than 1 file matches pattern {pattern}: {files}.") + else: + restart_file = files[0] + + # Path to the source file + source_file_path = restart_file + # Path to the directory where to create the symlink + symlink_directory = into_directory + # Name of the symlink (same as original) + symlink_name = source_file_path.split('/')[-1] + # Create the symbolic link + symlink_path = os.path.join(symlink_directory, symlink_name) + os.symlink(source_file_path, symlink_path) + + print(f"Symbolic link '{symlink_name}' created at '{symlink_directory}'") + + +if __name__ == "__main__": + # find available restart files in event generator run + restart_file_dates = infer_restart_dates_from_files_in_direcory(PARENTDIR) + # find appropriate restart file prior to spin-off start date + restart_from = find_date_before(CURRENT_DATE, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK) + print(f"Restarting parent run on {restart_from}.") + # link appropriate restart files into into parental re-run directory + link_restart_files(from_restart_date=restart_from, from_directoy=PARENTDIR, into_directory=RUNDIR) diff --git a/templates/spin-off/prepare_parent_namelist.py b/templates/spin-off/prepare_parent_namelist.py index 35888d5..63369ac 100644 --- a/templates/spin-off/prepare_parent_namelist.py +++ b/templates/spin-off/prepare_parent_namelist.py @@ -1,6 +1,7 @@ import logging from datetime import datetime, timedelta from pathlib import Path +from dateutil.parser import parse import f90nml import yaml @@ -11,10 +12,10 @@ logger.setLevel(logging.INFO) # Get some autosubmit variables WORKDIR = "%HPCROOTDIR%" STARTDATE = "%SDATE%" -MEMBER = "%MEMBER%" -CHUNK = "%CHUNK%" +# MEMBER = "%MEMBER%" +# CHUNK = "%CHUNK%" # Get run directory -RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") +# RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") # _from_parent_ ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%") MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%") @@ -22,30 +23,60 @@ MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%") # analysis_filename = (RUNDIR / "igaf*.m*.grb") # analysis_filename = analysis_filename.parent.glob(analysis_filename.name).__next__() # analysis_filename = analysis_filename.name -analysis_filename = "analysis.nc" +# analysis_filename = "analysis.nc" # first_guess_filename = (RUNDIR / "igfff00030000.m*.grb").resolve() # first_guess_filename = first_guess_filename.parent.glob(first_guess_filename.name).__next__() # first_guess_filename = first_guess_filename.name -first_guess_filename = "first_guess.nc" +# first_guess_filename = "first_guess.nc" +# restart_filename = "restart.nc" # Example of date format "2018-06-01T00:00:00Z" date_format = "%simulation.date_format%" -START_YEAR = "%Chunk_START_YEAR%" -START_MONTH = "%Chunk_START_MONTH%" -START_DAY = "%Chunk_START_DAY%" -START_HOUR = "%Chunk_START_HOUR%" - -END_YEAR = "%Chunk_END_YEAR%" -END_MONTH = "%Chunk_END_MONTH%" -END_DAY = "%Chunk_END_DAY%" -END_HOUR = "%Chunk_END_HOUR%" - -Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR)) -Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR)) +# this would be for the spin off, not for reproducing the parent +# START_YEAR = "%Chunk_START_YEAR%" +# START_MONTH = "%Chunk_START_MONTH%" +# START_DAY = "%Chunk_START_DAY%" +# START_HOUR = "%Chunk_START_HOUR%" +# +# END_YEAR = "%Chunk_END_YEAR%" +# END_MONTH = "%Chunk_END_MONTH%" +# END_DAY = "%Chunk_END_DAY%" +# END_HOUR = "%Chunk_END_HOUR%" + +# Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR)) +# Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR)) + +# from which date are we actually starting? +from glob import glob + +pattern = f"{WORKDIR}/{STARTDATE}_from_member_*" +files = glob(pattern) +if len(files) == 0: + raise AssertionError(f"No files match pattern {pattern}.") +elif len(files) > 1: + raise AssertionError(f"More than 1 file matches pattern {pattern}.") +else: + RUNDIR = files[0] + print(f"Rundirectory is {RUNDIR}") + +pattern = f"{RUNDIR}/*_restart_atm_*" +restart_files_in_rundir = glob(pattern) +if len(restart_files_in_rundir) == 0: + raise AssertionError(f"No files match pattern {pattern}.") +elif len(restart_files_in_rundir) > 1: + raise AssertionError(f"More than 1 file matches pattern {pattern}.") +else: + f = restart_files_in_rundir[0] + inferred_restart_file_date = parse(f.split("atm")[-1], fuzzy=True, ignoretz=True) + +Chunk_START_DATE = inferred_restart_file_date +Chunk_END_DATE = datetime.strptime(STARTDATE, "%Y%m%d") + +print(f"Start chunk on: {Chunk_START_DATE}, end chunk on: {Chunk_END_DATE}") # Read custom namelist parameters from configuration atmosphere_namelist_string = """ @@ -81,15 +112,16 @@ atmosphere_namelist_replacements = { "extpar_filename": "extpar.nc", }, - "initicon_nml": { - "dwdfg_filename": first_guess_filename, - "dwdana_filename": analysis_filename, - } + # "initicon_nml": { + # "dwdfg_filename": first_guess_filename, + # "dwdana_filename": analysis_filename, + # } } master_namelist_replacements = { "master_nml": { - "lrestart": False if "%CHUNK%" == "1" else True, + # "lrestart": False if "%CHUNK%" == "1" else True, + "lrestart": True, }, "master_time_control_nml": { "experimentStartDate": Chunk_START_DATE.strftime(date_format), @@ -171,4 +203,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/templates/spin-off/prepare_parent_runtime.py b/templates/spin-off/prepare_parent_runtime.py deleted file mode 100644 index d355a42..0000000 --- a/templates/spin-off/prepare_parent_runtime.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -A spin-off experiment may start from a day, where no event-generator restart file is available. -This script finds the last available restart file in the event-generator directory. -From this date, the event-generator member can be restarted. -""" -from glob import glob -from dateutil.parser import parse -from datetime import datetime, timedelta - -DIR_MOTHER_RUN = '/scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1' -SPIN_OFF_START = '2020-10-20' -NOT_MORE_THAN_N_DAYS_BACK = 30 - - -def infer_restart_dates_from_files_in_direcory(directory): - restart_files = glob(directory + '/*restart*.nc') - - inferred_restart_file_dates = [parse(f.split("atm")[-1], fuzzy=True, ignoretz=True) for f in restart_files] - return inferred_restart_file_dates - - -def find_date_before(target_date, dates_list, max_lag=None): - # Convert the target_date to a datetime object if it's not already - if not isinstance(target_date, datetime): - target_date = datetime.strptime(target_date, '%Y-%m-%d') # Adjust the format if needed - - # Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided) - if max_lag is not None: - min_date_allowed = target_date - timedelta(days=max_lag) - else: - min_date_allowed = datetime.min # If NOT_MORE_THAN_N_DAYS is not provided, set a very early date - - # Filter dates_list to get only the dates that occur before the target_date - filtered_dates = [dt for dt in dates_list if min_date_allowed <= dt < target_date] - - # Find the maximum date from the filtered dates (i.e., the date that occurs first before the target_date) - if filtered_dates: - return max(filtered_dates) - else: - return None # No date found before the target_date - - -if __name__ == "__main__": - restart_file_dates = infer_restart_dates_from_files_in_direcory(DIR_MOTHER_RUN) - restart_from = find_date_before(SPIN_OFF_START, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK) - print(restart_from) diff --git a/templates/spin-off/run_parent_icon.sh b/templates/spin-off/run_parent_icon.sh new file mode 100644 index 0000000..f04a165 --- /dev/null +++ b/templates/spin-off/run_parent_icon.sh @@ -0,0 +1,35 @@ +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +ICON_VERSION=%ICON_VERSION% + +STARTDATE=%SDATE% + +# Define rundir +RUNDIR=$(find ${WORKDIR} -name "${STARTDATE}_from_parent_" | sort -n | head -n 1) + +# Go to the member rundir +cd ${RUNDIR} + +# Activate spack +. ${WORKDIR}/production_project/platforms/common/spack_utils.sh +load_spack "%spack.init%" "%spack.root%" "%spack.url%" "%spack.branch%" "%spack.externals%" "%spack.compiler%" "%spack.disable_local_config%" "%spack.user_cache_path%" "%spack.user_config_path%" "%spack.upstreams%" + + +# Get proper load command. +SPACK_BUILD_ICON="%ICON.BUILD_CMD%" +SPACK_LOAD_ICON="%ICON.LOAD_CMD%" + +if [ "${SPACK_LOAD_ICON}" == "build_cmd" ]; then + SPACK_LOAD_ICON=${SPACK_BUILD_ICON} +fi +# Load icon module +spack load --first ${SPACK_LOAD_ICON} + +# Set environment variable for eccodes-dwd definitions: +source ${WORKDIR}/eccodes_defs.env + +# Increase stack size limit +ulimit -s unlimited + +# Run icon +srun icon -- GitLab