From a4f60162a1536670c4dee321dc3da5dddade53b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Tint=C3=B3?= <oriol.tinto@lmu.de> Date: Mon, 20 Mar 2023 10:40:36 +0100 Subject: [PATCH] Add real-from-ideal --- conf/jobs-real-from-ideal.yaml | 126 ++++++++++++++++++ conf/proj-real-from-ideal.yaml | 42 ++++++ ...con_atmosphere_real-from-ideal_01.namelist | 65 +++++++++ ...con_atmosphere_real-from-ideal_02.namelist | 70 ++++++++++ .../icon_master_real-from-ideal.namelist | 16 +++ templates/real-from-ideal/prepare_chunk.py | 95 +++++++++++++ templates/real-from-ideal/prepare_date.sh | 44 ++++++ .../real-from-ideal/prepare_experiment.sh | 45 +++++++ templates/real-from-ideal/prepare_member.sh | 24 ++++ 9 files changed, 527 insertions(+) create mode 100644 conf/jobs-real-from-ideal.yaml create mode 100644 conf/proj-real-from-ideal.yaml create mode 100644 namelists/icon_atmosphere_real-from-ideal_01.namelist create mode 100644 namelists/icon_atmosphere_real-from-ideal_02.namelist create mode 100644 namelists/icon_master_real-from-ideal.namelist create mode 100644 templates/real-from-ideal/prepare_chunk.py create mode 100644 templates/real-from-ideal/prepare_date.sh create mode 100644 templates/real-from-ideal/prepare_experiment.sh create mode 100644 templates/real-from-ideal/prepare_member.sh diff --git a/conf/jobs-real-from-ideal.yaml b/conf/jobs-real-from-ideal.yaml new file mode 100644 index 0000000..c766948 --- /dev/null +++ b/conf/jobs-real-from-ideal.yaml @@ -0,0 +1,126 @@ +# Example job with all options specified +JOBS: + ## Job name + # JOBNAME: + ## Script to execute. If not specified, job will be omitted from workflow. "You can also specify additional files separated by a ",". + # Note: The post-processed additional_files will be sent to %HPCROOT%/LOG_%EXPID% + ## Path relative to the project directory + # FILE: + ## Platform to execute the job. If not specified, defaults to HPCARCH in expedf file. + ## LOCAL is always defined and refers to current machine + # PLATFORM: + ## Queue to add the job to. If not specified, uses PLATFORM default. + # QUEUE: + ## Defines dependencies from job as a list of parents jobs separated by spaces. + ## Dependencies to jobs in previous chunk, member o startdate, use -(DISTANCE) + # DEPENDENCIES:INI SIM-1 CLEAN-2 + ## Define if jobs runs once, once per stardate, once per member or once per chunk. Options: once, date, member, chunk. + ## If not specified, defaults to once + # RUNNING:once + ## Specifies that job has only to be run after X dates, members or chunk. A job will always be created for the last + ## If not specified, defaults to 1 + # FREQUENCY:3 + ## On a job with FREQUENCY > 1, if True, the dependencies are evaluated against all + ## jobs in the frequency interval, otherwise only evaluate dependencies against current + ## iteration. + ## If not specified, defaults to True + # WAIT:False + ## Defines if job is only to be executed in reruns. If not specified, defaults to false. + # RERUN_ONLY:False + ## Wallclock to be submitted to the HPC queue in format HH:MM + # WALLCLOCK:00:05 + + ## Processors number to be submitted to the HPC. If not specified, defaults to 1. + ## Wallclock chunk increase (WALLCLOCK will be increased according to the formula WALLCLOCK + WCHUNKINC * (chunk - 1)). + ## Ideal for sequences of jobs that change their expected running time according to the current chunk. + # WCHUNKINC: 00:01 + # PROCESSORS: 1 + ## Threads number to be submitted to the HPC. If not specified, defaults to 1. + # THREADS: 1 + ## Enables hyper-threading. If not specified, defaults to false. + # HYPERTHREADING: false + ## Tasks number to be submitted to the HPC. If not specified, defaults to 1. + # Tasks: 1 + ## Memory requirements for the job in MB + # MEMORY: 4096 + ## Number of retrials if a job fails. If not specified, defaults to the value given on experiment's autosubmit.yml + # RETRIALS: 4 + ## Allows to put a delay between retries, of retrials if a job fails. If not specified, it will be static + # DELAY_RETRY_TIME: 11 + # DELAY_RETRY_TIME: +11 # will wait 11,22,33,44... + # DELAY_RETRY_TIME: *11 # will wait 11,110,1110,11110... + ## Some jobs can not be checked before running previous jobs. Set this option to false if that is the case + # CHECK: False + ## Select the interpreter that will run the job. Options: bash, python, r Default: bash + # TYPE: bash + ## Specify the path to the interpreter. If empty, use system default based on job type . Default: empty + # EXECUTABLE: /my_python_env/python3 + + BUILD_ICON: + FILE: templates/build_icon.sh + WALLCLOCK: 01:00 + PROCESSORS: 16 + + BUILD_ENSTOOLS: + FILE: templates/build_enstools.sh + DEPENDENCIES: BUILD_ICON + WALLCLOCK: 01:00 + PROCESSORS: 16 + + TRANSFER_NAMELISTS: + FILE: templates/common/transfer_namelists.sh + PLATFORM: LOCAL + + PREPARE_EXPERIMENT: + FILE: templates/real-from-ideal/prepare_experiment.sh + DEPENDENCIES: BUILD_ICON + RUNNING: once + WALLCLOCK: 01:00 + + PREPARE_DATE: + FILE: templates/real-from-ideal/prepare_date.sh + RUNNING: date + WALLCLOCK: 01:00 + + PREPARE_MEMBER: + FILE: templates/real-from-ideal/prepare_member.sh + DEPENDENCIES: PREPARE_EXPERIMENT PREPARE_DATE + RUNNING: member + WALLCLOCK: 01:00 + + PREPARE_CHUNK: + FILE: templates/real-from-ideal/prepare_chunk.py + DEPENDENCIES: TRANSFER_NAMELISTS BUILD_PYTHON_ENVIRONMENT PREPARE_MEMBER RUN_ICON-1 + WALLCLOCK: 00:05 + RUNNING: chunk + TYPE: python + EXECUTABLE: "%HPCROOTDIR%/%python_environment.folder_name%/bin/python3" + + + RUN_ICON: + FILE: templates/run_icon.sh + DEPENDENCIES: PREPARE_CHUNK + WALLCLOCK: 01:00 + RUNNING: chunk + PROCESSORS: 16 + CUSTOM_DIRECTIVES: [ "export OMPI_MCA_btl_tcp_if_include=10.0.0.0/8" ] + + COMPRESS: + FILE: templates/compress.py + DEPENDENCIES: RUN_ICON BUILD_ENSTOOLS COMPRESS-1 + RUNNING: chunk + TYPE: python + EXECUTABLE: "%HPCROOTDIR%/venv/bin/python3" + + TRANSFER: + FILE: templates/transfer.sh + DEPENDENCIES: COMPRESS + WALLCLOCK: 00:10 + RUNNING: member + PLATFORM: LOCAL + + CLEAN: + FILE: templates/clean.sh + DEPENDENCIES: TRANSFER + WALLCLOCK: 00:10 + RUNNING: member diff --git a/conf/proj-real-from-ideal.yaml b/conf/proj-real-from-ideal.yaml new file mode 100644 index 0000000..244c2a6 --- /dev/null +++ b/conf/proj-real-from-ideal.yaml @@ -0,0 +1,42 @@ + + + +spack: + url: git@gitlab.physik.uni-muenchen.de:LDAP_rbg/spack.git + branch: lmu/ubuntu20.04-icon + compiler: gcc@11.3.0 + +icon: + version: 2.6.5-nwp0 + +python_environment: + # Name of the virtual environment in the remote platform experiment folder + folder_name: python_environment + requirements: + # Because there's an issue with numba, for now we need to keep a specific version of numpy + - numpy==1.23 + - enstools-compression + # Just to try a library from a git repository. + - git+https://gitlab.physik.uni-muenchen.de/Oriol.Tinto/otils.git + - f90nml + +simulation: + dynamics_grid_filename: icon_grid_0012_R02B04_G.nc + radiation_grid_filename: icon_grid_0011_R02B03_R.nc + external_parameters_filename: extpar_DOM01.nc + date_format: '%Y-%m-%dT%H:%M:%SZ' + namelist_paths: + # Path to the namelists + master: "%HPCROOTDIR%/namelists/icon_master_real-from-ideal.namelist" + atmosphere: + ideal: "%HPCROOTDIR%/namelists/icon_atmosphere_real-from-ideal_01.namelist" + real: "%HPCROOTDIR%/namelists/icon_atmosphere_real-from-ideal_02.namelist" + + + # List of output file names that will be copied (Wildcards * allowed) + output_file_names: "init-test_DOM01_ML_*.nc init-test-ext_DOM01_ML_*.nc" + files_to_clean: "*.nc" + +data_management: + # Where do we put the output files afterwards? + local_destination_folder: /scratch/o/Oriol.Tinto/tmp/ diff --git a/namelists/icon_atmosphere_real-from-ideal_01.namelist b/namelists/icon_atmosphere_real-from-ideal_01.namelist new file mode 100644 index 0000000..e4c0b89 --- /dev/null +++ b/namelists/icon_atmosphere_real-from-ideal_01.namelist @@ -0,0 +1,65 @@ +&run_nml + ltestcase = .false. + dtime = 300 + output = 'nml' + msg_level = 15 + num_lev = 31 + lvert_nest = .false. + ldynamics = .true. + ltransport = .true. + ntracer = 5 + iforcing = 3 +/ + +&grid_nml + dynamics_parent_grid_id = 0 + dynamics_grid_filename = '%dynamics_grid_filename%' + radiation_grid_filename = '%radiation_grid_filename%' + lredgrid_phys = .true. +/ + +&nh_testcase_nml + nh_test_name = 'APE_nwp' ! testcase selection + ape_sst_case = 'sst_qobs' +/ + +&nonhydrostatic_nml + damp_height = 18000 + rayleigh_coeff = 0.75 +/ + +&nwp_phy_nml + lupatmo_phy = .FALSE. + inwp_surface = 0 + +/ + +&time_nml + dt_restart = '%checkpoint_time%' +/ + +&io_nml + dt_checkpoint = '%checkpoint_time%' +/ + +! the following two output files are used to initialize the next run +&output_nml + file_interval = 'PT3600S' + output_start = '%Chunk_START_DATE%' + output_end = %Chunk_END_DATE%' + output_filename = "init-test" + output_interval = 'PT3600S' + include_last = .true. + mode = 1 + taxis_tunit = 1 + ml_varlist = 'group:dwd_fg_atm_vars', 'group:dwd_fg_sfc_vars' +/ +&output_nml + steps_per_file = 1 + output_start = '%Chunk_START_DATE%' + output_end = '%Chunk_START_DATE%' + output_filename = "init-test-ext" + include_last = .true. + output_interval = 'PT3600S' + ml_varlist = 'depth_lk', 'emis_rad', 'fr_lake', 'fr_land', 'topography_c', 'soiltyp', 'sso_stdh', 'sso_theta', 'sso_gamma', 'sso_sigma' +/ diff --git a/namelists/icon_atmosphere_real-from-ideal_02.namelist b/namelists/icon_atmosphere_real-from-ideal_02.namelist new file mode 100644 index 0000000..98dd2f8 --- /dev/null +++ b/namelists/icon_atmosphere_real-from-ideal_02.namelist @@ -0,0 +1,70 @@ +&run_nml + ltestcase = .false. + dtime = 300 + output = 'nml' + msg_level = 15 + num_lev = 31 + lvert_nest = .false. + ldynamics = .true. + ltransport = .true. + ntracer = 5 + iforcing = 3 +/ + +&grid_nml + dynamics_parent_grid_id = 0 + dynamics_grid_filename = '%dynamics_grid_filename%' + radiation_grid_filename = '%radiation_grid_filename%' + lredgrid_phys = .true. +/ + +&extpar_nml + itopo = 1 + extpar_filename = 'extpar_DOM01.nc' +/ + +&initicon_nml + init_mode = 1, + dwdfg_filename = 'init-test-fg_DOM01_ML_0001.nc' + dwdana_filename = 'init-test-ana_DOM01_ML_0001.nc' + lconsistency_checks = .false. +/ + +&nonhydrostatic_nml + damp_height = 18000 + rayleigh_coeff = 0.75 +/ + +¶llel_nml + nproma = 16 +/ + +! LATBC files +&output_nml + file_interval = 'PT3600S' + output_start = '%Chunk_START_DATE%' + output_end = '%Chunk_END_DATE%' + output_filename = "latbc" + output_interval = 'PT3600S' + include_last = .true. + ml_varlist = 'u', 'v', 'w', 'theta_v', 'rho', 'qv', 'qc', 'qi', 'qr', 'qs', 'z_ifc' +/ + +! First Guess file +&output_nml + file_interval = 'PT3600S' + output_start = '%Chunk_START_DATE%' + output_end = '%Chunk_START_DATE%' + output_filename = "init" + output_interval = 'PT3600S' + include_last = .true. + ml_varlist = 'group:dwd_fg_atm_vars', 'group:dwd_fg_sfc_vars' +/ + +&time_nml + dt_restart = '%checkpoint_time%' +/ + +&io_nml + dt_checkpoint = '%checkpoint_time%' +/ diff --git a/namelists/icon_master_real-from-ideal.namelist b/namelists/icon_master_real-from-ideal.namelist new file mode 100644 index 0000000..fe401c4 --- /dev/null +++ b/namelists/icon_master_real-from-ideal.namelist @@ -0,0 +1,16 @@ +&master_nml + lrestart = "%is_restart%" + lrestart_write_last = .true. +/ + +&master_model_nml + model_type = 1 ! atmospheric model + model_name = "ATMO" ! name of this model component + model_namelist_filename = "icon_atmosphere.namelist" +/ + +&master_time_control_nml + calendar = "proleptic gregorian" + experimentStartDate = '%Chunk_START_DATE%' + experimentStopDate = '%Chunk_END_DATE%' +/ \ No newline at end of file diff --git a/templates/real-from-ideal/prepare_chunk.py b/templates/real-from-ideal/prepare_chunk.py new file mode 100644 index 0000000..6656e4e --- /dev/null +++ b/templates/real-from-ideal/prepare_chunk.py @@ -0,0 +1,95 @@ +import logging +import re +from datetime import datetime, timedelta +from pathlib import Path + +import f90nml + +logger = logging.getLogger("prepare_chunk") +logger.setLevel(logging.INFO) + +# Get some autosubmit variables +WORKDIR = "%HPCROOTDIR%" +STARTDATE = "%SDATE%" +MEMBER = "%MEMBER%" +CHUNK = "%CHUNK%" + +# Example of date format "2018-06-01T00:00:00Z" +date_format = "%simulation.date_format%" + +START_YEAR = "%Chunk_START_YEAR%" +START_MONTH = "%Chunk_START_MONTH%" +START_DAY = "%Chunk_START_DAY%" +START_HOUR = "%Chunk_START_HOUR%" + +END_YEAR = "%Chunk_END_YEAR%" +END_MONTH = "%Chunk_END_MONTH%" +END_DAY = "%Chunk_END_DAY%" +END_HOUR = "%Chunk_END_HOUR%" + +Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR)) +Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR)) + +# Compute difference in seconds +checkpoint_time = int((Chunk_END_DATE - Chunk_START_DATE).total_seconds()) + +# TODO: Is that really necessary? +# Add 10 minutes to allow the model to write the restarts +Chunk_END_DATE = Chunk_END_DATE + timedelta(minutes=10) +# Get run directory +RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") + +# TODO: This is a bit ugly +# Read first-guess and analysis filenames from files: +first_guess_filename = (RUNDIR / "fg_file.txt").read_text().strip() +analysis_filename = (RUNDIR / "an_file.txt").read_text().strip() + +# Get some variable replacements from the proj.yml file through autosubmit +variable_replacements = { + "dynamics_grid_filename": "%simulation.dynamics_grid_filename%", + "radiation_grid_filename": "%simulation.radiation_grid_filename%", + "external_parameters_filename": "%simulation.external_parameters_filename%", + "first_guess_filename": first_guess_filename, + "analysis_filename": analysis_filename, + "Chunk_START_DATE": Chunk_START_DATE.strftime(date_format), + "Chunk_END_DATE": Chunk_END_DATE.strftime(date_format), + "is_restart": False if "%CHUNK%" == "1" else True, + "checkpoint_time": checkpoint_time, +} + + +def adapt_namelist(input_namelist: str, output_namelist: str): + input_namelist = Path(input_namelist) + output_namelist = Path(output_namelist) + + namelist = f90nml.read(input_namelist.as_posix()) + group_keys = [gk for gk in namelist] + + for group in group_keys: + variable_keys = [vk for vk in namelist[group]] + for variable in variable_keys: + value = namelist[group][variable] + m = re.match(r"%(.*)%", str(value)) + if m: + key = m.group(1) + + if key not in variable_replacements: + raise AssertionError(f"The namelist {input_namelist.as_posix()!r} contains the variable {key!r} " + f"which is not in the list of provided replacements:\n" + f"{[v for v in variable_replacements]}") + logger.info(f"Replacing {group}>{variable}:{key} with {variable_replacements[key]!r}") + namelist[group][variable] = variable_replacements[key] + + f90nml.write(nml=namelist, nml_path=output_namelist.as_posix(), force=True) + + +if __name__ == '__main__': + atmosphere_namelist_path = "%simulation.namelist_paths.atmosphere%" + master_namelist_path = "%simulation.namelist_paths.master%" + + # Adapt atmosphere namelist + adapt_namelist(input_namelist=atmosphere_namelist_path, + output_namelist=(RUNDIR / "icon_atmosphere.namelist").as_posix()) + # Adapt master namelist + adapt_namelist(input_namelist=master_namelist_path, + output_namelist=(RUNDIR / "icon_master.namelist").as_posix()) diff --git a/templates/real-from-ideal/prepare_date.sh b/templates/real-from-ideal/prepare_date.sh new file mode 100644 index 0000000..4152601 --- /dev/null +++ b/templates/real-from-ideal/prepare_date.sh @@ -0,0 +1,44 @@ +#!/bin/bash -l + +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +STARTDATE=%SDATE% + +# Define date directory, create it and go there +COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata +# Create member folder and go there +mkdir -p ${COMMON_DATE_FOLDER} +cd ${COMMON_DATE_FOLDER} || exit + +# some settings +AN_MEMBER=$(printf "%03d" %initial_conditions.member%) +INITIAL_CONDITIONS_PARENT_FOLDER=%initial_conditions.parent_folder% + +INITIAL_CONDITIONS_PATH=${INITIAL_CONDITIONS_PARENT_FOLDER}/${STARTDATE:0:6}/${STARTDATE:0:8}T00 + +AN_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igaf*00.m${AN_MEMBER}.grb" | sort | tail -n 1) +FG_SOURCE=$(find ${INITIAL_CONDITIONS_PATH} -name "igfff00030000.m${AN_MEMBER}.grb" | sort | tail -n 1) + +if [ ! -f "${AN_SOURCE}" ]; then + echo "Analysis file for date ${STARTDATE} not found!" + exit 1 +fi + +if [ ! -f "${FG_SOURCE}" ]; then + echo "FG file for date ${STARTDATE} not found!" + exit 1 +fi + +AN_FILE=$(basename "${AN_SOURCE}") +FG_FILE=$(basename "${FG_SOURCE}") + +# Save filenames to be used later by other scripts. +echo "${AN_FILE}" > an_file.txt +echo "${FG_FILE}" > fg_file.txt + +# Copy the first-guess and analysis files. +cp "${FG_SOURCE}" "${FG_FILE}" +cp "${AN_SOURCE}" "${AN_FILE}" + +# Change permissions to read only. +chmod 440 ./* diff --git a/templates/real-from-ideal/prepare_experiment.sh b/templates/real-from-ideal/prepare_experiment.sh new file mode 100644 index 0000000..5d9b03d --- /dev/null +++ b/templates/real-from-ideal/prepare_experiment.sh @@ -0,0 +1,45 @@ +#!/bin/bash -l + +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +DYNAMICS_GRID_FILENAME=%simulation.dynamics_grid_filename% +RADIATION_GRID_FILE=%simulation.radiation_grid_filename% +EXTERNAL_PARAMETERS_FILE=%simulation.external_parameters_filename% + + +# Activate spack +SPACK_ENV=${WORKDIR}/spack/share/spack/setup-env.sh +source ${SPACK_ENV} + +# Load icon module needed to retrieve some data +spack load icon-nwp@%ICON_VERSION% + +# Create a folder for the common inidata and go there +COMMON_INIDATA_FOLDER=${WORKDIR}/inidata +mkdir -p "${COMMON_INIDATA_FOLDER}" +cd "${COMMON_INIDATA_FOLDER}" || exit + +# Download or copy required input files +function download_file() { + URL=$1 + FILE=${2:-$(basename "$URL")} + if [ ! -e "$FILE" ]; then + echo "Download $URL => $FILE" + wget -q "$URL" -O "$FILE" + fi +} + +# Download grid files and external parameters +BASEURL=http://icon-downloads.mpimet.mpg.de/grids/public/edzw +download_file $BASEURL/${DYNAMICS_GRID_FILENAME} +download_file $BASEURL/${RADIATION_GRID_FILE} +download_file $BASEURL/${EXTERNAL_PARAMETERS_FILE} + +# Link input for radiation +ln -sf "${ICON_DATA_PATH}/rrtmg_lw.nc" . +ln -sf "${ICON_DATA_PATH}/ECHAM6_CldOptProps.nc" . +ln -sf "${ICON_BASE_PATH}/run/ana_varnames_map_file.txt" . + + +# Change permissions to read only. +chmod 440 ./* \ No newline at end of file diff --git a/templates/real-from-ideal/prepare_member.sh b/templates/real-from-ideal/prepare_member.sh new file mode 100644 index 0000000..ff7ebc1 --- /dev/null +++ b/templates/real-from-ideal/prepare_member.sh @@ -0,0 +1,24 @@ +#!/bin/bash -l + +# Get some variables provided by autosubmit. +WORKDIR=%HPCROOTDIR% +STARTDATE=%SDATE% +MEMBER=%MEMBER% + +# Common folder with data needed for all simulations +COMMON_INIDATA_FOLDER=${WORKDIR}/inidata +# Common folder for the same start date +COMMON_DATE_FOLDER=${WORKDIR}/${STARTDATE}/inidata + +# Member folder +MEMBER_DIR=${WORKDIR}/${STARTDATE}/${MEMBER} + +# Create member folder and go there +mkdir -p ${MEMBER_DIR} + +cd ${MEMBER_DIR} || exit + + +# Link all files from the common inidata folder and the common date folder +ln -sf ${COMMON_INIDATA_FOLDER}/* . +ln -sf ${COMMON_DATE_FOLDER}/* . -- GitLab