From c2e15b42771e1cea056ba6731324b8f0376b72df Mon Sep 17 00:00:00 2001
From: "Takumi.Matsunobu" <Takumi.Matsunobu@physik.uni-muenchen.de>
Date: Wed, 31 May 2023 14:16:15 +0200
Subject: [PATCH] initialise the branch for  ICON-D2 from DWD ana

---
 .gitignore                                    |   1 +
 conf/real-from-d2-ana/jobs.yml                |  21 +++
 .../real-from-d2-ana/icon_atmosphere.namelist |   0
 .../real-from-d2-ana/prepare_date_local.sh    |  87 ++++++++++
 .../real-from-d2-ana/prepare_date_remote.sh   |  54 ++++++
 .../real-from-d2-ana/prepare_experiment.sh    |  45 +++++
 templates/real-from-d2-ana/prepare_member.sh  |  24 +++
 .../real-from-d2-ana/prepare_namelist.py      | 164 ++++++++++++++++++
 8 files changed, 396 insertions(+)
 create mode 100644 conf/real-from-d2-ana/jobs.yml
 create mode 100644 namelists/real-from-d2-ana/icon_atmosphere.namelist
 create mode 100644 templates/real-from-d2-ana/prepare_date_local.sh
 create mode 100644 templates/real-from-d2-ana/prepare_date_remote.sh
 create mode 100644 templates/real-from-d2-ana/prepare_experiment.sh
 create mode 100644 templates/real-from-d2-ana/prepare_member.sh
 create mode 100644 templates/real-from-d2-ana/prepare_namelist.py

diff --git a/.gitignore b/.gitignore
index eeb8a6e..df1465e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 **/__pycache__
+.ipynb_checkpoints
\ No newline at end of file
diff --git a/conf/real-from-d2-ana/jobs.yml b/conf/real-from-d2-ana/jobs.yml
new file mode 100644
index 0000000..2ce9bae
--- /dev/null
+++ b/conf/real-from-d2-ana/jobs.yml
@@ -0,0 +1,21 @@
+JOBS:
+  PREPARE_EXPERIMENT:
+    FILE: templates/real-from-dwd-ana/prepare_experiment.sh
+
+  PREPARE_DATE_LOCAL:
+    FILE: templates/real-from-dwd-ana/prepare_date_local.sh
+    RUNNING: date
+    WALLCLOCK: 01:00
+    PLATFORM: LOCAL
+
+  PREPARE_DATE_REMOTE:
+    FILE: templates/real-from-dwd-ana/prepare_date_remote.sh
+    RUNNING: date
+    WALLCLOCK: 01:00
+
+  PREPARE_MEMBER:
+    FILE: templates/real-from-dwd-ana/prepare_member.sh
+    DEPENDENCIES: PREPARE_EXPERIMENT PREPARE_DATE_REMOTE PREPARE_DATE_LOCAL
+
+  PREPARE_NAMELIST:
+    FILE: templates/real-from-dwd-ana/prepare_namelist.py
\ No newline at end of file
diff --git a/namelists/real-from-d2-ana/icon_atmosphere.namelist b/namelists/real-from-d2-ana/icon_atmosphere.namelist
new file mode 100644
index 0000000..e69de29
diff --git a/templates/real-from-d2-ana/prepare_date_local.sh b/templates/real-from-d2-ana/prepare_date_local.sh
new file mode 100644
index 0000000..0f579e8
--- /dev/null
+++ b/templates/real-from-d2-ana/prepare_date_local.sh
@@ -0,0 +1,87 @@
+#!/bin/bash -l
+
+# This script is executed on the machine at which autosubmit is executed.
+# and will be used if the initial conditions are in this same system.
+
+# Because it can happen that the initial conditions as well and the execution happens in the local
+# system we need to define these two variables:
+DATA_IS_LOCAL=%SIMULATION.INITIAL_CONDITIONS.LOCAL%
+
+if [ "x%HPCARCH%" == "xlocal" ]; then
+  RUN_MACHINE_IS_LOCAL="True"
+else
+  RUN_MACHINE_IS_LOCAL="False"
+fi
+
+
+
+if [ "${DATA_IS_LOCAL}" == "True" ]; then
+  # Get some variables provided by autosubmit.
+  WORKDIR=%HPCROOTDIR%
+  STARTDATE=%SDATE%
+  HPCUSER=%HPCUSER%
+  HPCHOST=%HPCHOST%
+
+  # Define date directory, create it and go there
+  COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata
+
+  AN_MEMBER=$(printf "%03d" %SIMULATION.INITIAL_CONDITIONS.MEMBER%)
+  INITIAL_CONDITIONS_PARENT_FOLDER=%SIMULATION.INITIAL_CONDITIONS.PARENT_FOLDER%
+  INITIAL_CONDITIONS_PATH=${INITIAL_CONDITIONS_PARENT_FOLDER}/${STARTDATE:0:6}/${STARTDATE:0:8}T00
+
+  AN_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igaf*00.m${AN_MEMBER}.grb" | sort | tail -n 1)
+  FG_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igfff00030000.m${AN_MEMBER}.grb" | sort | tail -n 1)
+
+  AN_FILE=$(basename "${AN_SOURCE}")
+  FG_FILE=$(basename "${FG_SOURCE}")
+
+  # Find files
+  if [ ! -f "${AN_SOURCE}" ]; then
+    echo "Analysis file for date ${STARTDATE} not found!"
+    exit 1
+  fi
+
+  if [ ! -f "${FG_SOURCE}" ]; then
+    echo "FG file for date ${STARTDATE} not found!"
+    exit 1
+  fi
+
+
+  # Check if we copy the initial conditions from the local system or the remote one
+  if [ "${RUN_MACHINE_IS_LOCAL}" != "True" ]; then
+    # Create member folder
+    ssh "${HPCUSER}@${HPCHOST}" mkdir -p ${COMMON_DATE_FOLDER}
+
+    # Save filenames to be used later by other scripts.
+    echo "${AN_FILE}" > an_file.txt
+    echo "${FG_FILE}" > fg_file.txt
+    rsync -v an_file.txt "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/an_file.txt"
+    rsync -v fg_file.txt "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/fg_file.txt"
+
+    # Remove temporary files.
+    rm an_file.txt
+    rm fg_file.txt
+
+    # Copy the first-guess and analysis files.
+    rsync -v "${FG_SOURCE}" "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/${FG_FILE}"
+    rsync -v "${AN_SOURCE}" "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/${AN_FILE}"
+
+    # Change permissions to read only.
+    ssh "${HPCUSER}@${HPCHOST}" chmod 440 "${COMMON_DATE_FOLDER}/*"
+  else
+    # Create member folder and go there
+    mkdir -p ${COMMON_DATE_FOLDER}
+    cd ${COMMON_DATE_FOLDER} || exit
+
+    # Save filenames to be used later by other scripts.
+    echo "${AN_FILE}" > an_file.txt
+    echo "${FG_FILE}" > fg_file.txt
+
+    # Copy the first-guess and analysis files.
+    cp "${FG_SOURCE}" "${FG_FILE}"
+    cp "${AN_SOURCE}" "${AN_FILE}"
+
+    # Change permissions to read only.
+    chmod 440 ./*
+  fi
+fi
\ No newline at end of file
diff --git a/templates/real-from-d2-ana/prepare_date_remote.sh b/templates/real-from-d2-ana/prepare_date_remote.sh
new file mode 100644
index 0000000..29ad0aa
--- /dev/null
+++ b/templates/real-from-d2-ana/prepare_date_remote.sh
@@ -0,0 +1,54 @@
+#!/bin/bash -l
+
+# This script is executed on the remote system at which the simulation will happen
+# and will be used if the initial conditions are in this same remote system.
+DATA_IS_LOCAL=%SIMULATION.INITIAL_CONDITIONS.LOCAL%
+
+
+if [ "${DATA_IS_LOCAL}" != "True" ]; then
+  # Get some variables provided by autosubmit.
+  WORKDIR=%HPCROOTDIR%
+  STARTDATE=%SDATE%
+  HPCUSER=%HPCUSER%
+  HPCHOST=%HPCHOST%
+  # Define date directory, create it and go there
+  COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata
+
+  AN_MEMBER=$(printf "%03d" %SIMULATION.INITIAL_CONDITIONS.MEMBER%)
+  INITIAL_CONDITIONS_PARENT_FOLDER=%SIMULATION.INITIAL_CONDITIONS.PARENT_FOLDER%
+  INITIAL_CONDITIONS_PATH=${INITIAL_CONDITIONS_PARENT_FOLDER}/${STARTDATE:0:6}/${STARTDATE:0:8}T00
+
+  AN_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igaf*00.m${AN_MEMBER}.grb" | sort | tail -n 1)
+  FG_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igfff00030000.m${AN_MEMBER}.grb" | sort | tail -n 1)
+
+  AN_FILE=$(basename "${AN_SOURCE}")
+  FG_FILE=$(basename "${FG_SOURCE}")
+
+  # Find files
+  if [ ! -f "${AN_SOURCE}" ]; then
+    echo "Analysis file for date ${STARTDATE} not found!"
+    exit 1
+  fi
+
+  if [ ! -f "${FG_SOURCE}" ]; then
+    echo "FG file for date ${STARTDATE} not found!"
+    exit 1
+  fi
+
+
+  # Check if we copy the initial conditions from the local system or the remote one
+  # Create member folder and go there
+  mkdir -p ${COMMON_DATE_FOLDER}
+  cd ${COMMON_DATE_FOLDER} || exit
+
+  # Save filenames to be used later by other scripts.
+  echo "${AN_FILE}" > an_file.txt
+  echo "${FG_FILE}" > fg_file.txt
+
+  # Copy the first-guess and analysis files.
+  cp "${FG_SOURCE}" "${FG_FILE}"
+  cp "${AN_SOURCE}" "${AN_FILE}"
+
+  # Change permissions to read only.
+  chmod 440 ./*
+fi
\ No newline at end of file
diff --git a/templates/real-from-d2-ana/prepare_experiment.sh b/templates/real-from-d2-ana/prepare_experiment.sh
new file mode 100644
index 0000000..36ba043
--- /dev/null
+++ b/templates/real-from-d2-ana/prepare_experiment.sh
@@ -0,0 +1,45 @@
+#!/bin/bash -l
+
+# Get some variables provided by autosubmit.
+WORKDIR=%HPCROOTDIR%
+DYNAMICS_GRID_FILENAME=%simulation.dynamics_grid_filename%
+RADIATION_GRID_FILE=%simulation.radiation_grid_filename%
+EXTERNAL_PARAMETERS_FILE=%simulation.external_parameters_filename%
+
+
+# Activate spack
+. ${WORKDIR}/proj/platforms/common/spack_utils.sh
+load_spack "%spack.init%" "%spack.root%" "%spack.url%" "%spack.branch%" "%spack.externals%" "%spack.compiler%" "%spack.disable_local_config%" "%spack.user_cache_path%" "%spack.user_config_path%" "%spack.upstreams%"
+
+# Load icon module needed to retrieve some data
+spack load --first icon-nwp@%ICON_VERSION%
+
+# Create a folder for the common inidata and go there
+COMMON_INIDATA_FOLDER=${WORKDIR}/inidata
+mkdir -p "${COMMON_INIDATA_FOLDER}"
+cd "${COMMON_INIDATA_FOLDER}" || exit
+
+# Download or copy required input files
+function download_file() {
+  URL=$1
+  FILE=${2:-$(basename "$URL")}
+  if [ ! -e "$FILE" ]; then
+    echo "Download $URL => $FILE"
+    wget -q "$URL" -O "$FILE"
+  fi
+}
+
+# Download grid files and external parameters
+BASEURL=http://icon-downloads.mpimet.mpg.de/grids/public/edzw
+download_file $BASEURL/${DYNAMICS_GRID_FILENAME}
+download_file $BASEURL/${RADIATION_GRID_FILE}
+download_file $BASEURL/${EXTERNAL_PARAMETERS_FILE}
+
+# Link input for radiation
+ln -sf "${ICON_DATA_PATH}/rrtmg_lw.nc" .
+ln -sf "${ICON_DATA_PATH}/ECHAM6_CldOptProps.nc" .
+ln -sf "${ICON_BASE_PATH}/run/ana_varnames_map_file.txt" .
+
+
+# Change permissions to read only.
+chmod 440 ./*
\ No newline at end of file
diff --git a/templates/real-from-d2-ana/prepare_member.sh b/templates/real-from-d2-ana/prepare_member.sh
new file mode 100644
index 0000000..ff7ebc1
--- /dev/null
+++ b/templates/real-from-d2-ana/prepare_member.sh
@@ -0,0 +1,24 @@
+#!/bin/bash -l
+
+# Get some variables provided by autosubmit.
+WORKDIR=%HPCROOTDIR%
+STARTDATE=%SDATE%
+MEMBER=%MEMBER%
+
+# Common folder with data needed for all simulations
+COMMON_INIDATA_FOLDER=${WORKDIR}/inidata
+# Common folder for the same start date
+COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata
+
+# Member folder
+MEMBER_DIR=${WORKDIR}/${STARTDATE}/${MEMBER}
+
+# Create member folder and go there
+mkdir -p ${MEMBER_DIR}
+
+cd ${MEMBER_DIR} || exit
+
+
+# Link all files from the common inidata folder and the common date folder
+ln -sf ${COMMON_INIDATA_FOLDER}/* .
+ln -sf ${COMMON_DATE_FOLDER}/* .
diff --git a/templates/real-from-d2-ana/prepare_namelist.py b/templates/real-from-d2-ana/prepare_namelist.py
new file mode 100644
index 0000000..f259ad5
--- /dev/null
+++ b/templates/real-from-d2-ana/prepare_namelist.py
@@ -0,0 +1,164 @@
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import f90nml
+import yaml
+
+logger = logging.getLogger("prepare_chunk")
+logger.setLevel(logging.INFO)
+
+# Get some autosubmit variables
+WORKDIR = "%HPCROOTDIR%"
+STARTDATE = "%SDATE%"
+MEMBER = "%MEMBER%"
+CHUNK = "%CHUNK%"
+# Get run directory
+RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}")
+ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%")
+MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%")
+# TODO: This is a bit ugly
+# Read first-guess and analysis filenames from files:
+first_guess_filename = (RUNDIR / "fg_file.txt").read_text().strip()
+analysis_filename = (RUNDIR / "an_file.txt").read_text().strip()
+
+# Example of date format "2018-06-01T00:00:00Z"
+date_format = "%simulation.date_format%"
+
+START_YEAR = "%Chunk_START_YEAR%"
+START_MONTH = "%Chunk_START_MONTH%"
+START_DAY = "%Chunk_START_DAY%"
+START_HOUR = "%Chunk_START_HOUR%"
+
+END_YEAR = "%Chunk_END_YEAR%"
+END_MONTH = "%Chunk_END_MONTH%"
+END_DAY = "%Chunk_END_DAY%"
+END_HOUR = "%Chunk_END_HOUR%"
+
+Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR))
+Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR))
+
+# Read custom namelist parameters from configuration
+atmosphere_namelist_string = """
+%atmosphere_namelist%
+"""
+
+master_namelist_string = """
+%master_namelist%
+"""
+
+# Compute difference in seconds
+checkpoint_time = int((Chunk_END_DATE - Chunk_START_DATE).total_seconds())
+
+# TODO: Is that really necessary?
+# Add 10 minutes to allow the model to write the restarts
+Chunk_END_DATE = Chunk_END_DATE + timedelta(minutes=10)
+
+atmosphere_namelist_replacements = {
+    "time_nml": {
+        "dt_restart": checkpoint_time
+    },
+    "io_nml": {
+        "dt_checkpoint": checkpoint_time
+    },
+
+    "grid_nml": {
+        "dynamics_grid_filename": "%simulation.dynamics_grid_filename%",
+        "radiation_grid_filename": "%simulation.radiation_grid_filename%",
+    },
+
+    "extpar_nml": {
+        "extpar_filename": "%simulation.external_parameters_filename%",
+    },
+
+    "initicon_nml": {
+        "dwdfg_filename": first_guess_filename,
+        "dwdana_filename": analysis_filename,
+    }
+}
+
+master_namelist_replacements = {
+    "master_nml": {
+        "lrestart": False if "%CHUNK%" == "1" else True,
+    },
+    "master_time_control_nml": {
+        "experimentStartDate": Chunk_START_DATE.strftime(date_format),
+        "experimentStopDate": Chunk_END_DATE.strftime(date_format),
+    }
+}
+
+
+def read_namelist(namelist_string: str) -> dict:
+    """
+    Function to read the custom namelist specifications provided in the configuration files.
+    It accepts both yaml and f90nml format.
+    :param namelist_string:
+    :return:
+    """
+    parameters = yaml.safe_load(namelist_string)
+    if isinstance(parameters, str):
+        parameters = f90nml.reads(nml_string=namelist_string).todict()
+    return parameters
+
+
+def patch_output_entries(namelist: f90nml.Namelist) -> f90nml.Namelist:
+    output_entries = [entry for entry in namelist["output_nml"]]
+    for entry in output_entries:
+        for key in entry:
+            if entry[key] == "#OUTPUT_START#":
+                entry[key] = Chunk_START_DATE.strftime(date_format)
+            elif entry[key] == "#OUTPUT_END#":
+                entry[key] = Chunk_END_DATE.strftime(date_format)
+
+    return namelist
+
+
+def main():
+    """
+    Main function that processes both atmosphere and master namelists and adds the necessary patches
+    :return:
+    """
+    # Process atmosphere namelist
+    atmosphere_namelist = f90nml.read(ATMOSPHERE_NAMELIST_PATH.as_posix())
+    # Convert output_nml to a co-group.
+    atmosphere_namelist.create_cogroup("output_nml")
+    print("Original atmosphere namelist:")
+    print(atmosphere_namelist)
+    atmosphere_namelist.patch(atmosphere_namelist_replacements)
+
+    # Read custom namelist parameters from configuration file
+    atmosphere_custom_namelist = read_namelist(atmosphere_namelist_string)
+
+    if atmosphere_custom_namelist is not None:
+        try:
+            atmosphere_namelist.patch(atmosphere_custom_namelist)
+        except AttributeError:
+            raise AssertionError("Problem applying the namelist patch! Probably related with the output section.")
+
+    # Patch output entries:
+    atmosphere_namelist = patch_output_entries(atmosphere_namelist)
+
+    print("Patched atmosphere namelist:")
+    print(atmosphere_namelist)
+
+    atmosphere_output_namelist = (RUNDIR / "icon_atmosphere.namelist")
+    f90nml.write(nml=atmosphere_namelist, nml_path=atmosphere_output_namelist.as_posix(), force=True)
+
+    master_namelist = f90nml.read(MASTER_NAMELIST_PATH.as_posix())
+    print("Original master namelist:")
+    print(master_namelist)
+    # Read custom namelist parameters from configuration file
+    master_custom_namelist = read_namelist(master_namelist_string)
+    # Process atmosphere namelist
+    master_namelist.patch(master_namelist_replacements)
+    if master_custom_namelist is not None:
+        master_namelist.patch(master_custom_namelist)
+    print("Patched master namelist:")
+    print(master_namelist)
+    master_output_namelist = (RUNDIR / "icon_master.namelist")
+    f90nml.write(nml=master_namelist, nml_path=master_output_namelist.as_posix(), force=True)
+
+
+if __name__ == '__main__':
+    main()
+
-- 
GitLab