From 0b2d2560669986b272ac725bb14d79dac0cfe296 Mon Sep 17 00:00:00 2001
From: "Jonas.Spaeth" <jonas.spaeth@physik.uni-muenchen.de>
Date: Wed, 9 Aug 2023 16:11:30 +0200
Subject: [PATCH] implement FROM_MEMBERS and prepare_parent_date.py

---
 conf/spin-off/expdef.yml                      |  12 +-
 conf/spin-off/jobs.yml                        |  34 +++---
 conf/spin-off/parent_simulation.yml           |   2 +-
 templates/spin-off/prepare_parent_date.py     | 115 ++++++++++++++++++
 templates/spin-off/prepare_parent_namelist.py |  77 ++++++++----
 templates/spin-off/prepare_parent_runtime.py  |  46 -------
 templates/spin-off/run_parent_icon.sh         |  35 ++++++
 7 files changed, 227 insertions(+), 94 deletions(-)
 create mode 100644 templates/spin-off/prepare_parent_date.py
 delete mode 100644 templates/spin-off/prepare_parent_runtime.py
 create mode 100644 templates/spin-off/run_parent_icon.sh

diff --git a/conf/spin-off/expdef.yml b/conf/spin-off/expdef.yml
index 5b30855..049ce66 100644
--- a/conf/spin-off/expdef.yml
+++ b/conf/spin-off/expdef.yml
@@ -1,13 +1,11 @@
 # TODO: How to specify mother-member/ date combinations as start points for spin-offs?
 experiment:
-#  DATELIST: 20201001
-#  MEMBERS: "m[1-2]"
-  DATELIST_FROM_MOTHERMEMBERS:  # list of (date, mothermember) to run spinoffs from
-    - [20201002, 2]
-    - [20201003, 2]
-  MEMBERS: "m[1-2]" # number of members in spinoff ensemble
+  DATELIST: "[20201001, 20201003]"  # TODO: like this or with squared brackets?
+  FROM_MEMBERS: "[1, 3]"  # requires same length as datelist
+  MEMBERS: "m[1-2]" # members for new spinoff ensemble
   CHUNKSIZEUNIT: day
   CHUNKSIZE: 7
   NUMCHUNKS: 2
   CHUNKINI: 0
-  CALENDAR: standard
\ No newline at end of file
+  CALENDAR: standard
+  RSFILE_MAXDAYS_BACK: 30
\ No newline at end of file
diff --git a/conf/spin-off/jobs.yml b/conf/spin-off/jobs.yml
index f9cd36f..f19c653 100644
--- a/conf/spin-off/jobs.yml
+++ b/conf/spin-off/jobs.yml
@@ -25,39 +25,39 @@ JOBS:
 ############# parent experiment
 
   PREPARE_PARENT_EXPERIMENT:
-    FILE: templates/event-generator/prepare_parent_experiment.sh
+    FILE: templates/spin-off/prepare_parent_experiment.sh
     DEPENDENCIES: BUILD_ICON
     RUNNING: once
     WALLCLOCK: 01:00
 
-  PREPARE_PARENT_DATE:
-    FILE: templates/event-generator/prepare_parent_date.sh
+  PREPARE_PARENT_DATE:  # TODO: ----> get index of date and select appropriate member
+    FILE: templates/spin-off/prepare_parent_date.py
     RUNNING: date
     WALLCLOCK: 01:00
     PLATFORM: LOCAL
 
-  PREPARE_PARENT_MEMBER:
-    FILE: templates/event-generator/prepare_parent_member.sh
-    DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE
-    RUNNING: member
-    WALLCLOCK: 01:00
+#  PREPARE_PARENT_MEMBER:
+#    FILE: templates/event-generator/prepare_parent_member.sh  # TODO: In here: get ens member for each date
+#    DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE
+#    RUNNING: date
+#    WALLCLOCK: 01:00
 
-  ADAPT_PARENT_MEMBER:
-    FILE: templates/event-generator/adapt_parent_member.sh
-    RUNNING: member
-    WALLCLOCK: 00:20
-    DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT
+#  ADAPT_PARENT_MEMBER:  # TODO: no multiple members
+#    FILE: templates/event-generator/adapt_parent_member.sh
+#    RUNNING: date
+#    WALLCLOCK: 00:20
+#    DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT
 
   PREPARE_PARENT_NAMELIST:
-    FILE: templates/event-generator/prepare_parent_namelist.py
-    DEPENDENCIES: ADAPT_PARENT_MEMBER RUN_PARENT_ICON-1
+    FILE: templates/spin-off/prepare_parent_namelist.py
+    DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE RUN_PARENT_ICON-1
     WALLCLOCK: 00:05
     RUNNING: chunk
     TYPE: python
     EXECUTABLE: "%HPCROOTDIR%/%python_environment.folder_name%/bin/python3"
 
   RUN_PARENT_ICON:
-    FILE: templates/common/run_icon.sh
+    FILE: templates/spin-off/run_parent_icon.sh
     DEPENDENCIES: PREPARE_PARENT_NAMELIST COMPRESS-1  # TODO: remove COMPRESS-1?
     WALLCLOCK: 08:00
     RUNNING: chunk
@@ -93,7 +93,7 @@ JOBS:
     DEPENDENCIES: PREPARE_SPINOFF_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT
 
   PREPARE_SPINOFF_NAMELIST:
-    FILE: templates/event-generator/prepare_spinoff_namelist.py
+    FILE: templates/spin-off/prepare_spinoff_namelist.py
     DEPENDENCIES: ADAPT_SPINOFF_MEMBER RUN_ICON-1
     WALLCLOCK: 00:05
     RUNNING: chunk
diff --git a/conf/spin-off/parent_simulation.yml b/conf/spin-off/parent_simulation.yml
index da3c0f9..d876c3d 100644
--- a/conf/spin-off/parent_simulation.yml
+++ b/conf/spin-off/parent_simulation.yml
@@ -19,5 +19,5 @@ simulation:
     local: true
     # /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201031T000000Z.nc
 #    parent_folder: /archive/meteo/external-models/dwd/icon/oper/icon_oper_eps_gridded-global_rolling/
-    parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/
+    parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/
     sea_surface_temperature_forcing: /project/meteo/w2w/Z2/autoicon/dummy_sst_enso_neutral.nc
diff --git a/templates/spin-off/prepare_parent_date.py b/templates/spin-off/prepare_parent_date.py
new file mode 100644
index 0000000..d3248ff
--- /dev/null
+++ b/templates/spin-off/prepare_parent_date.py
@@ -0,0 +1,115 @@
+"""
+A spin-off experiment may start from a day, where no event-generator restart file is available.
+This script finds the last available restart file in the event-generator directory.
+From this date, the event-generator member can be restarted.
+"""
+from glob import glob
+from dateutil.parser import parse
+from datetime import datetime, timedelta
+from pathlib import Path
+import ast
+import os
+
+# Get some autosubmit variables
+WORKDIR = "%HPCROOTDIR%"
+STARTDATE = "%SDATE%"
+# Get further autosubmit variables
+CURRENT_DATE = "%SDATE%"
+DATELIST_STRING = "%experiment.DATELIST%"
+FROM_MEMBERS_STRING = "%experiment.FROM_MEMBERS%"
+NOT_MORE_THAN_N_DAYS_BACK = "%experiment.RSFILE_MAXDAYS_BACK%"
+
+# parse FROM_MEMBERS and DATELIST into a python list
+DATELIST = ast.literal_eval(DATELIST_STRING)
+FROM_MEMBERS = ast.literal_eval(FROM_MEMBERS_STRING)
+
+
+# get index of current start date in datelist and therefrom infer from_member
+def find_index_in_list(number_list, target_number):
+    try:
+        index = number_list.index(target_number)
+        return index
+    except ValueError:
+        return -1
+
+
+index = find_index_in_list(DATELIST, CURRENT_DATE)
+if index != -1:
+    print(f"The date {CURRENT_DATE} is at index {index} in the datelist.")
+else:
+    print(f"The date {CURRENT_DATE} is not in the datelist.")
+FROM_MEMBER = FROM_MEMBERS[index]
+
+# determine parent directory for restart
+PARENTDIR = "%initial_conditions.parent_folder%" + f"m{FROM_MEMBER:0d}/"
+# create target directory for restart parent
+RUNDIR = f"{WORKDIR}/{STARTDATE}_from_parent_{FROM_MEMBER}"
+try:
+    os.makedirs(RUNDIR)
+    print(f"Directory '{RUNDIR}' created successfully.")
+except OSError as e:
+    print(f"An error occurred: {e}")
+
+
+def infer_restart_dates_from_files_in_direcory(directory):
+    restart_files = glob(directory + '/*restart*.nc')
+
+    inferred_restart_file_dates = [parse(f.split("atm")[-1], fuzzy=True, ignoretz=True) for f in restart_files]
+    return inferred_restart_file_dates
+
+
+def find_date_before(target_date, dates_list, max_lag=None):
+    # Convert the target_date to a datetime object if it's not already
+    if not isinstance(target_date, datetime):
+        target_date = datetime.strptime(target_date, '%Y%m%d')  # Adjust the format if needed
+
+    # Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided)
+    if max_lag is not None:
+        min_date_allowed = target_date - timedelta(days=max_lag)
+    else:
+        min_date_allowed = datetime.min  # If NOT_MORE_THAN_N_DAYS is not provided, set a very early date
+
+    # Filter dates_list to get only the dates that occur before the target_date
+    filtered_dates = [dt for dt in dates_list if min_date_allowed <= dt < target_date]
+
+    # Find the maximum date from the filtered dates (i.e., the date that occurs first before the target_date)
+    if filtered_dates:
+        return max(filtered_dates)
+    else:
+        return None  # No date found before the target_date
+
+
+def link_restart_files(from_restart_date, from_directoy, into_directory):
+    # sample restart files:
+    # chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201016T000000Z.nc
+    date_string = from_restart_date.strftime("%Y%m%dT00")
+    pattern = f"{from_directoy}/*restart*{date_string}*.nc"
+    files = glob(pattern)
+    if len(files) == 0:
+        raise AssertionError(f"No restart files match pattern {pattern}.")
+    elif len(files) > 1:
+        raise AssertionError(f"More than 1 file matches pattern {pattern}: {files}.")
+    else:
+        restart_file = files[0]
+
+    # Path to the source file
+    source_file_path = restart_file
+    # Path to the directory where to create the symlink
+    symlink_directory = into_directory
+    # Name of the symlink (same as original)
+    symlink_name = source_file_path.split('/')[-1]
+    # Create the symbolic link
+    symlink_path = os.path.join(symlink_directory, symlink_name)
+    os.symlink(source_file_path, symlink_path)
+
+    print(f"Symbolic link '{symlink_name}' created at '{symlink_directory}'")
+
+
+if __name__ == "__main__":
+    # find available restart files in event generator run
+    restart_file_dates = infer_restart_dates_from_files_in_direcory(PARENTDIR)
+    # find appropriate restart file prior to spin-off start date
+    restart_from = find_date_before(CURRENT_DATE, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK)
+    print(f"Restarting parent run on {restart_from}.")
+    # link appropriate restart files into into parental re-run directory
+    link_restart_files(from_restart_date=restart_from, from_directoy=PARENTDIR, into_directory=RUNDIR)
diff --git a/templates/spin-off/prepare_parent_namelist.py b/templates/spin-off/prepare_parent_namelist.py
index 35888d5..63369ac 100644
--- a/templates/spin-off/prepare_parent_namelist.py
+++ b/templates/spin-off/prepare_parent_namelist.py
@@ -1,6 +1,7 @@
 import logging
 from datetime import datetime, timedelta
 from pathlib import Path
+from dateutil.parser import parse
 
 import f90nml
 import yaml
@@ -11,10 +12,10 @@ logger.setLevel(logging.INFO)
 # Get some autosubmit variables
 WORKDIR = "%HPCROOTDIR%"
 STARTDATE = "%SDATE%"
-MEMBER = "%MEMBER%"
-CHUNK = "%CHUNK%"
+# MEMBER = "%MEMBER%"
+# CHUNK = "%CHUNK%"
 # Get run directory
-RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}")
+# RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}")  # _from_parent_
 ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%")
 MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%")
 
@@ -22,30 +23,60 @@ MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%")
 # analysis_filename = (RUNDIR / "igaf*.m*.grb")
 # analysis_filename = analysis_filename.parent.glob(analysis_filename.name).__next__()
 # analysis_filename = analysis_filename.name
-analysis_filename = "analysis.nc"
+# analysis_filename = "analysis.nc"
 
 
 # first_guess_filename = (RUNDIR / "igfff00030000.m*.grb").resolve()
 # first_guess_filename = first_guess_filename.parent.glob(first_guess_filename.name).__next__()
 # first_guess_filename = first_guess_filename.name
-first_guess_filename = "first_guess.nc"
+# first_guess_filename = "first_guess.nc"
 
+# restart_filename = "restart.nc"
 
 # Example of date format "2018-06-01T00:00:00Z"
 date_format = "%simulation.date_format%"
 
-START_YEAR = "%Chunk_START_YEAR%"
-START_MONTH = "%Chunk_START_MONTH%"
-START_DAY = "%Chunk_START_DAY%"
-START_HOUR = "%Chunk_START_HOUR%"
-
-END_YEAR = "%Chunk_END_YEAR%"
-END_MONTH = "%Chunk_END_MONTH%"
-END_DAY = "%Chunk_END_DAY%"
-END_HOUR = "%Chunk_END_HOUR%"
-
-Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR))
-Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR))
+# this would be for the spin off, not for reproducing the parent
+# START_YEAR = "%Chunk_START_YEAR%"
+# START_MONTH = "%Chunk_START_MONTH%"
+# START_DAY = "%Chunk_START_DAY%"
+# START_HOUR = "%Chunk_START_HOUR%"
+#
+# END_YEAR = "%Chunk_END_YEAR%"
+# END_MONTH = "%Chunk_END_MONTH%"
+# END_DAY = "%Chunk_END_DAY%"
+# END_HOUR = "%Chunk_END_HOUR%"
+
+# Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR))
+# Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR))
+
+# from which date are we actually starting?
+from glob import glob
+
+pattern = f"{WORKDIR}/{STARTDATE}_from_member_*"
+files = glob(pattern)
+if len(files) == 0:
+    raise AssertionError(f"No files match pattern {pattern}.")
+elif len(files) > 1:
+    raise AssertionError(f"More than 1 file matches pattern {pattern}.")
+else:
+    RUNDIR = files[0]
+    print(f"Rundirectory is {RUNDIR}")
+
+pattern = f"{RUNDIR}/*_restart_atm_*"
+restart_files_in_rundir = glob(pattern)
+if len(restart_files_in_rundir) == 0:
+    raise AssertionError(f"No files match pattern {pattern}.")
+elif len(restart_files_in_rundir) > 1:
+    raise AssertionError(f"More than 1 file matches pattern {pattern}.")
+else:
+    f = restart_files_in_rundir[0]
+    inferred_restart_file_date = parse(f.split("atm")[-1], fuzzy=True, ignoretz=True)
+
+Chunk_START_DATE = inferred_restart_file_date
+Chunk_END_DATE = datetime.strptime(STARTDATE, "%Y%m%d")
+
+print(f"Start chunk on: {Chunk_START_DATE}, end chunk on: {Chunk_END_DATE}")
 
 # Read custom namelist parameters from configuration
 atmosphere_namelist_string = """
@@ -81,15 +112,16 @@ atmosphere_namelist_replacements = {
         "extpar_filename": "extpar.nc",
     },
 
-    "initicon_nml": {
-        "dwdfg_filename": first_guess_filename,
-        "dwdana_filename": analysis_filename,
-    }
+    # "initicon_nml": {
+    #     "dwdfg_filename": first_guess_filename,
+    #     "dwdana_filename": analysis_filename,
+    # }
 }
 
 master_namelist_replacements = {
     "master_nml": {
-        "lrestart": False if "%CHUNK%" == "1" else True,
+        # "lrestart": False if "%CHUNK%" == "1" else True,
+        "lrestart": True,
     },
     "master_time_control_nml": {
         "experimentStartDate": Chunk_START_DATE.strftime(date_format),
@@ -171,4 +203,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-
diff --git a/templates/spin-off/prepare_parent_runtime.py b/templates/spin-off/prepare_parent_runtime.py
deleted file mode 100644
index d355a42..0000000
--- a/templates/spin-off/prepare_parent_runtime.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-A spin-off experiment may start from a day, where no event-generator restart file is available.
-This script finds the last available restart file in the event-generator directory.
-From this date, the event-generator member can be restarted.
-"""
-from glob import glob
-from dateutil.parser import parse
-from datetime import datetime, timedelta
-
-DIR_MOTHER_RUN = '/scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1'
-SPIN_OFF_START = '2020-10-20'
-NOT_MORE_THAN_N_DAYS_BACK = 30
-
-
-def infer_restart_dates_from_files_in_direcory(directory):
-    restart_files = glob(directory + '/*restart*.nc')
-
-    inferred_restart_file_dates = [parse(f.split("atm")[-1], fuzzy=True, ignoretz=True) for f in restart_files]
-    return inferred_restart_file_dates
-
-
-def find_date_before(target_date, dates_list, max_lag=None):
-    # Convert the target_date to a datetime object if it's not already
-    if not isinstance(target_date, datetime):
-        target_date = datetime.strptime(target_date, '%Y-%m-%d')  # Adjust the format if needed
-
-    # Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided)
-    if max_lag is not None:
-        min_date_allowed = target_date - timedelta(days=max_lag)
-    else:
-        min_date_allowed = datetime.min  # If NOT_MORE_THAN_N_DAYS is not provided, set a very early date
-
-    # Filter dates_list to get only the dates that occur before the target_date
-    filtered_dates = [dt for dt in dates_list if min_date_allowed <= dt < target_date]
-
-    # Find the maximum date from the filtered dates (i.e., the date that occurs first before the target_date)
-    if filtered_dates:
-        return max(filtered_dates)
-    else:
-        return None  # No date found before the target_date
-
-
-if __name__ == "__main__":
-    restart_file_dates = infer_restart_dates_from_files_in_direcory(DIR_MOTHER_RUN)
-    restart_from = find_date_before(SPIN_OFF_START, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK)
-    print(restart_from)
diff --git a/templates/spin-off/run_parent_icon.sh b/templates/spin-off/run_parent_icon.sh
new file mode 100644
index 0000000..f04a165
--- /dev/null
+++ b/templates/spin-off/run_parent_icon.sh
@@ -0,0 +1,35 @@
+# Get some variables provided by autosubmit.
+WORKDIR=%HPCROOTDIR%
+ICON_VERSION=%ICON_VERSION%
+
+STARTDATE=%SDATE%
+
+# Define rundir
+RUNDIR=$(find ${WORKDIR} -name "${STARTDATE}_from_parent_" | sort -n | head -n 1)
+
+# Go to the member rundir
+cd ${RUNDIR}
+
+# Activate spack
+. ${WORKDIR}/production_project/platforms/common/spack_utils.sh
+load_spack "%spack.init%" "%spack.root%" "%spack.url%" "%spack.branch%" "%spack.externals%" "%spack.compiler%" "%spack.disable_local_config%" "%spack.user_cache_path%" "%spack.user_config_path%" "%spack.upstreams%"
+
+
+# Get proper load command.
+SPACK_BUILD_ICON="%ICON.BUILD_CMD%"
+SPACK_LOAD_ICON="%ICON.LOAD_CMD%"
+
+if [ "${SPACK_LOAD_ICON}" == "build_cmd" ]; then
+  SPACK_LOAD_ICON=${SPACK_BUILD_ICON}
+fi
+# Load icon module
+spack load --first ${SPACK_LOAD_ICON}
+
+# Set environment variable for eccodes-dwd definitions:
+source ${WORKDIR}/eccodes_defs.env
+
+# Increase stack size limit
+ulimit -s unlimited
+
+# Run icon
+srun icon
-- 
GitLab