From c2e15b42771e1cea056ba6731324b8f0376b72df Mon Sep 17 00:00:00 2001 From: "Takumi.Matsunobu" <Takumi.Matsunobu@physik.uni-muenchen.de> Date: Wed, 31 May 2023 14:16:15 +0200 Subject: [PATCH] initialise the branch for ICON-D2 from DWD ana --- .gitignore | 1 + conf/real-from-d2-ana/jobs.yml | 21 +++ .../real-from-d2-ana/icon_atmosphere.namelist | 0 .../real-from-d2-ana/prepare_date_local.sh | 87 ++++++++++ .../real-from-d2-ana/prepare_date_remote.sh | 54 ++++++ .../real-from-d2-ana/prepare_experiment.sh | 45 +++++ templates/real-from-d2-ana/prepare_member.sh | 24 +++ .../real-from-d2-ana/prepare_namelist.py | 164 ++++++++++++++++++ 8 files changed, 396 insertions(+) create mode 100644 conf/real-from-d2-ana/jobs.yml create mode 100644 namelists/real-from-d2-ana/icon_atmosphere.namelist create mode 100644 templates/real-from-d2-ana/prepare_date_local.sh create mode 100644 templates/real-from-d2-ana/prepare_date_remote.sh create mode 100644 templates/real-from-d2-ana/prepare_experiment.sh create mode 100644 templates/real-from-d2-ana/prepare_member.sh create mode 100644 templates/real-from-d2-ana/prepare_namelist.py diff --git a/.gitignore b/.gitignore index eeb8a6e..df1465e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ **/__pycache__ +.ipynb_checkpoints \ No newline at end of file diff --git a/conf/real-from-d2-ana/jobs.yml b/conf/real-from-d2-ana/jobs.yml new file mode 100644 index 0000000..2ce9bae --- /dev/null +++ b/conf/real-from-d2-ana/jobs.yml @@ -0,0 +1,21 @@ +JOBS: + PREPARE_EXPERIMENT: + FILE: templates/real-from-dwd-ana/prepare_experiment.sh + + PREPARE_DATE_LOCAL: + FILE: templates/real-from-dwd-ana/prepare_date_local.sh + RUNNING: date + WALLCLOCK: 01:00 + PLATFORM: LOCAL + + PREPARE_DATE_REMOTE: + FILE: templates/real-from-dwd-ana/prepare_date_remote.sh + RUNNING: date + WALLCLOCK: 01:00 + + PREPARE_MEMBER: + FILE: templates/real-from-dwd-ana/prepare_member.sh + DEPENDENCIES: PREPARE_EXPERIMENT PREPARE_DATE_REMOTE PREPARE_DATE_LOCAL + + PREPARE_NAMELIST: + FILE: templates/real-from-dwd-ana/prepare_namelist.py \ No newline at end of file diff --git a/namelists/real-from-d2-ana/icon_atmosphere.namelist b/namelists/real-from-d2-ana/icon_atmosphere.namelist new file mode 100644 index 0000000..e69de29 diff --git a/templates/real-from-d2-ana/prepare_date_local.sh b/templates/real-from-d2-ana/prepare_date_local.sh new file mode 100644 index 0000000..0f579e8 --- /dev/null +++ b/templates/real-from-d2-ana/prepare_date_local.sh @@ -0,0 +1,87 @@ +#!/bin/bash -l + +# This script is executed on the machine at which autosubmit is executed. +# and will be used if the initial conditions are in this same system. + +# Because it can happen that the initial conditions as well and the execution happens in the local +# system we need to define these two variables: +DATA_IS_LOCAL=%SIMULATION.INITIAL_CONDITIONS.LOCAL% + +if [ "x%HPCARCH%" == "xlocal" ]; then + RUN_MACHINE_IS_LOCAL="True" +else + RUN_MACHINE_IS_LOCAL="False" +fi + + + +if [ "${DATA_IS_LOCAL}" == "True" ]; then + # Get some variables provided by autosubmit. + WORKDIR=%HPCROOTDIR% + STARTDATE=%SDATE% + HPCUSER=%HPCUSER% + HPCHOST=%HPCHOST% + + # Define date directory, create it and go there + COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata + + AN_MEMBER=$(printf "%03d" %SIMULATION.INITIAL_CONDITIONS.MEMBER%) + INITIAL_CONDITIONS_PARENT_FOLDER=%SIMULATION.INITIAL_CONDITIONS.PARENT_FOLDER% + INITIAL_CONDITIONS_PATH=${INITIAL_CONDITIONS_PARENT_FOLDER}/${STARTDATE:0:6}/${STARTDATE:0:8}T00 + + AN_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igaf*00.m${AN_MEMBER}.grb" | sort | tail -n 1) + FG_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igfff00030000.m${AN_MEMBER}.grb" | sort | tail -n 1) + + AN_FILE=$(basename "${AN_SOURCE}") + FG_FILE=$(basename "${FG_SOURCE}") + + # Find files + if [ ! -f "${AN_SOURCE}" ]; then + echo "Analysis file for date ${STARTDATE} not found!" + exit 1 + fi + + if [ ! -f "${FG_SOURCE}" ]; then + echo "FG file for date ${STARTDATE} not found!" + exit 1 + fi + + + # Check if we copy the initial conditions from the local system or the remote one + if [ "${RUN_MACHINE_IS_LOCAL}" != "True" ]; then + # Create member folder + ssh "${HPCUSER}@${HPCHOST}" mkdir -p ${COMMON_DATE_FOLDER} + + # Save filenames to be used later by other scripts. + echo "${AN_FILE}" > an_file.txt + echo "${FG_FILE}" > fg_file.txt + rsync -v an_file.txt "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/an_file.txt" + rsync -v fg_file.txt "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/fg_file.txt" + + # Remove temporary files. + rm an_file.txt + rm fg_file.txt + + # Copy the first-guess and analysis files. + rsync -v "${FG_SOURCE}" "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/${FG_FILE}" + rsync -v "${AN_SOURCE}" "${HPCUSER}@${HPCHOST}":"${COMMON_DATE_FOLDER}/${AN_FILE}" + + # Change permissions to read only. + ssh "${HPCUSER}@${HPCHOST}" chmod 440 "${COMMON_DATE_FOLDER}/*" + else + # Create member folder and go there + mkdir -p ${COMMON_DATE_FOLDER} + cd ${COMMON_DATE_FOLDER} || exit + + # Save filenames to be used later by other scripts. + echo "${AN_FILE}" > an_file.txt + echo "${FG_FILE}" > fg_file.txt + + # Copy the first-guess and analysis files. + cp "${FG_SOURCE}" "${FG_FILE}" + cp "${AN_SOURCE}" "${AN_FILE}" + + # Change permissions to read only. + chmod 440 ./* + fi +fi \ No newline at end of file diff --git a/templates/real-from-d2-ana/prepare_date_remote.sh b/templates/real-from-d2-ana/prepare_date_remote.sh new file mode 100644 index 0000000..29ad0aa --- /dev/null +++ b/templates/real-from-d2-ana/prepare_date_remote.sh @@ -0,0 +1,54 @@ +#!/bin/bash -l + +# This script is executed on the remote system at which the simulation will happen +# and will be used if the initial conditions are in this same remote system. +DATA_IS_LOCAL=%SIMULATION.INITIAL_CONDITIONS.LOCAL% + + +if [ "${DATA_IS_LOCAL}" != "True" ]; then + # Get some variables provided by autosubmit. + WORKDIR=%HPCROOTDIR% + STARTDATE=%SDATE% + HPCUSER=%HPCUSER% + HPCHOST=%HPCHOST% + # Define date directory, create it and go there + COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata + + AN_MEMBER=$(printf "%03d" %SIMULATION.INITIAL_CONDITIONS.MEMBER%) + INITIAL_CONDITIONS_PARENT_FOLDER=%SIMULATION.INITIAL_CONDITIONS.PARENT_FOLDER% + INITIAL_CONDITIONS_PATH=${INITIAL_CONDITIONS_PARENT_FOLDER}/${STARTDATE:0:6}/${STARTDATE:0:8}T00 + + AN_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igaf*00.m${AN_MEMBER}.grb" | sort | tail -n 1) + FG_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igfff00030000.m${AN_MEMBER}.grb" | sort | tail -n 1) + + AN_FILE=$(basename "${AN_SOURCE}") + FG_FILE=$(basename "${FG_SOURCE}") + + # Find files + if [ ! -f "${AN_SOURCE}" ]; then + echo "Analysis file for date ${STARTDATE} not found!" + exit 1 + fi + + if [ ! -f "${FG_SOURCE}" ]; then + echo "FG file for date ${STARTDATE} not found!" + exit 1 + fi + + + # Check if we copy the initial conditions from the local system or the remote one + # Create member folder and go there + mkdir -p ${COMMON_DATE_FOLDER} + cd ${COMMON_DATE_FOLDER} || exit + + # Save filenames to be used later by other scripts. + echo "${AN_FILE}" > an_file.txt + echo "${FG_FILE}" > fg_file.txt + + # Copy the first-guess and analysis files. + cp "${FG_SOURCE}" "${FG_FILE}" + cp "${AN_SOURCE}" "${AN_FILE}" + + # Change permissions to read only. + chmod 440 ./* +fi \ No newline at end of file diff --git a/templates/real-from-d2-ana/prepare_experiment.sh b/templates/real-from-d2-ana/prepare_experiment.sh new file mode 100644 index 0000000..36ba043 --- /dev/null +++ b/templates/real-from-d2-ana/prepare_experiment.sh @@ -0,0 +1,45 @@ +#!/bin/bash -l + +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +DYNAMICS_GRID_FILENAME=%simulation.dynamics_grid_filename% +RADIATION_GRID_FILE=%simulation.radiation_grid_filename% +EXTERNAL_PARAMETERS_FILE=%simulation.external_parameters_filename% + + +# Activate spack +. ${WORKDIR}/proj/platforms/common/spack_utils.sh +load_spack "%spack.init%" "%spack.root%" "%spack.url%" "%spack.branch%" "%spack.externals%" "%spack.compiler%" "%spack.disable_local_config%" "%spack.user_cache_path%" "%spack.user_config_path%" "%spack.upstreams%" + +# Load icon module needed to retrieve some data +spack load --first icon-nwp@%ICON_VERSION% + +# Create a folder for the common inidata and go there +COMMON_INIDATA_FOLDER=${WORKDIR}/inidata +mkdir -p "${COMMON_INIDATA_FOLDER}" +cd "${COMMON_INIDATA_FOLDER}" || exit + +# Download or copy required input files +function download_file() { + URL=$1 + FILE=${2:-$(basename "$URL")} + if [ ! -e "$FILE" ]; then + echo "Download $URL => $FILE" + wget -q "$URL" -O "$FILE" + fi +} + +# Download grid files and external parameters +BASEURL=http://icon-downloads.mpimet.mpg.de/grids/public/edzw +download_file $BASEURL/${DYNAMICS_GRID_FILENAME} +download_file $BASEURL/${RADIATION_GRID_FILE} +download_file $BASEURL/${EXTERNAL_PARAMETERS_FILE} + +# Link input for radiation +ln -sf "${ICON_DATA_PATH}/rrtmg_lw.nc" . +ln -sf "${ICON_DATA_PATH}/ECHAM6_CldOptProps.nc" . +ln -sf "${ICON_BASE_PATH}/run/ana_varnames_map_file.txt" . + + +# Change permissions to read only. +chmod 440 ./* \ No newline at end of file diff --git a/templates/real-from-d2-ana/prepare_member.sh b/templates/real-from-d2-ana/prepare_member.sh new file mode 100644 index 0000000..ff7ebc1 --- /dev/null +++ b/templates/real-from-d2-ana/prepare_member.sh @@ -0,0 +1,24 @@ +#!/bin/bash -l + +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +STARTDATE=%SDATE% +MEMBER=%MEMBER% + +# Common folder with data needed for all simulations +COMMON_INIDATA_FOLDER=${WORKDIR}/inidata +# Common folder for the same start date +COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata + +# Member folder +MEMBER_DIR=${WORKDIR}/${STARTDATE}/${MEMBER} + +# Create member folder and go there +mkdir -p ${MEMBER_DIR} + +cd ${MEMBER_DIR} || exit + + +# Link all files from the common inidata folder and the common date folder +ln -sf ${COMMON_INIDATA_FOLDER}/* . +ln -sf ${COMMON_DATE_FOLDER}/* . diff --git a/templates/real-from-d2-ana/prepare_namelist.py b/templates/real-from-d2-ana/prepare_namelist.py new file mode 100644 index 0000000..f259ad5 --- /dev/null +++ b/templates/real-from-d2-ana/prepare_namelist.py @@ -0,0 +1,164 @@ +import logging +from datetime import datetime, timedelta +from pathlib import Path + +import f90nml +import yaml + +logger = logging.getLogger("prepare_chunk") +logger.setLevel(logging.INFO) + +# Get some autosubmit variables +WORKDIR = "%HPCROOTDIR%" +STARTDATE = "%SDATE%" +MEMBER = "%MEMBER%" +CHUNK = "%CHUNK%" +# Get run directory +RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") +ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%") +MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%") +# TODO: This is a bit ugly +# Read first-guess and analysis filenames from files: +first_guess_filename = (RUNDIR / "fg_file.txt").read_text().strip() +analysis_filename = (RUNDIR / "an_file.txt").read_text().strip() + +# Example of date format "2018-06-01T00:00:00Z" +date_format = "%simulation.date_format%" + +START_YEAR = "%Chunk_START_YEAR%" +START_MONTH = "%Chunk_START_MONTH%" +START_DAY = "%Chunk_START_DAY%" +START_HOUR = "%Chunk_START_HOUR%" + +END_YEAR = "%Chunk_END_YEAR%" +END_MONTH = "%Chunk_END_MONTH%" +END_DAY = "%Chunk_END_DAY%" +END_HOUR = "%Chunk_END_HOUR%" + +Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR)) +Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR)) + +# Read custom namelist parameters from configuration +atmosphere_namelist_string = """ +%atmosphere_namelist% +""" + +master_namelist_string = """ +%master_namelist% +""" + +# Compute difference in seconds +checkpoint_time = int((Chunk_END_DATE - Chunk_START_DATE).total_seconds()) + +# TODO: Is that really necessary? +# Add 10 minutes to allow the model to write the restarts +Chunk_END_DATE = Chunk_END_DATE + timedelta(minutes=10) + +atmosphere_namelist_replacements = { + "time_nml": { + "dt_restart": checkpoint_time + }, + "io_nml": { + "dt_checkpoint": checkpoint_time + }, + + "grid_nml": { + "dynamics_grid_filename": "%simulation.dynamics_grid_filename%", + "radiation_grid_filename": "%simulation.radiation_grid_filename%", + }, + + "extpar_nml": { + "extpar_filename": "%simulation.external_parameters_filename%", + }, + + "initicon_nml": { + "dwdfg_filename": first_guess_filename, + "dwdana_filename": analysis_filename, + } +} + +master_namelist_replacements = { + "master_nml": { + "lrestart": False if "%CHUNK%" == "1" else True, + }, + "master_time_control_nml": { + "experimentStartDate": Chunk_START_DATE.strftime(date_format), + "experimentStopDate": Chunk_END_DATE.strftime(date_format), + } +} + + +def read_namelist(namelist_string: str) -> dict: + """ + Function to read the custom namelist specifications provided in the configuration files. + It accepts both yaml and f90nml format. + :param namelist_string: + :return: + """ + parameters = yaml.safe_load(namelist_string) + if isinstance(parameters, str): + parameters = f90nml.reads(nml_string=namelist_string).todict() + return parameters + + +def patch_output_entries(namelist: f90nml.Namelist) -> f90nml.Namelist: + output_entries = [entry for entry in namelist["output_nml"]] + for entry in output_entries: + for key in entry: + if entry[key] == "#OUTPUT_START#": + entry[key] = Chunk_START_DATE.strftime(date_format) + elif entry[key] == "#OUTPUT_END#": + entry[key] = Chunk_END_DATE.strftime(date_format) + + return namelist + + +def main(): + """ + Main function that processes both atmosphere and master namelists and adds the necessary patches + :return: + """ + # Process atmosphere namelist + atmosphere_namelist = f90nml.read(ATMOSPHERE_NAMELIST_PATH.as_posix()) + # Convert output_nml to a co-group. + atmosphere_namelist.create_cogroup("output_nml") + print("Original atmosphere namelist:") + print(atmosphere_namelist) + atmosphere_namelist.patch(atmosphere_namelist_replacements) + + # Read custom namelist parameters from configuration file + atmosphere_custom_namelist = read_namelist(atmosphere_namelist_string) + + if atmosphere_custom_namelist is not None: + try: + atmosphere_namelist.patch(atmosphere_custom_namelist) + except AttributeError: + raise AssertionError("Problem applying the namelist patch! Probably related with the output section.") + + # Patch output entries: + atmosphere_namelist = patch_output_entries(atmosphere_namelist) + + print("Patched atmosphere namelist:") + print(atmosphere_namelist) + + atmosphere_output_namelist = (RUNDIR / "icon_atmosphere.namelist") + f90nml.write(nml=atmosphere_namelist, nml_path=atmosphere_output_namelist.as_posix(), force=True) + + master_namelist = f90nml.read(MASTER_NAMELIST_PATH.as_posix()) + print("Original master namelist:") + print(master_namelist) + # Read custom namelist parameters from configuration file + master_custom_namelist = read_namelist(master_namelist_string) + # Process atmosphere namelist + master_namelist.patch(master_namelist_replacements) + if master_custom_namelist is not None: + master_namelist.patch(master_custom_namelist) + print("Patched master namelist:") + print(master_namelist) + master_output_namelist = (RUNDIR / "icon_master.namelist") + f90nml.write(nml=master_namelist, nml_path=master_output_namelist.as_posix(), force=True) + + +if __name__ == '__main__': + main() + -- GitLab