Skip to content
Snippets Groups Projects
Commit 0b2d2560 authored by Jonas Spaeth's avatar Jonas Spaeth
Browse files

implement FROM_MEMBERS and prepare_parent_date.py

parent 6a8f152c
Branches spin_off
No related tags found
No related merge requests found
Pipeline #19452 canceled
# TODO: How to specify mother-member/ date combinations as start points for spin-offs? # TODO: How to specify mother-member/ date combinations as start points for spin-offs?
experiment: experiment:
# DATELIST: 20201001 DATELIST: "[20201001, 20201003]" # TODO: like this or with squared brackets?
# MEMBERS: "m[1-2]" FROM_MEMBERS: "[1, 3]" # requires same length as datelist
DATELIST_FROM_MOTHERMEMBERS: # list of (date, mothermember) to run spinoffs from MEMBERS: "m[1-2]" # members for new spinoff ensemble
- [20201002, 2]
- [20201003, 2]
MEMBERS: "m[1-2]" # number of members in spinoff ensemble
CHUNKSIZEUNIT: day CHUNKSIZEUNIT: day
CHUNKSIZE: 7 CHUNKSIZE: 7
NUMCHUNKS: 2 NUMCHUNKS: 2
CHUNKINI: 0 CHUNKINI: 0
CALENDAR: standard CALENDAR: standard
\ No newline at end of file RSFILE_MAXDAYS_BACK: 30
\ No newline at end of file
...@@ -25,39 +25,39 @@ JOBS: ...@@ -25,39 +25,39 @@ JOBS:
############# parent experiment ############# parent experiment
PREPARE_PARENT_EXPERIMENT: PREPARE_PARENT_EXPERIMENT:
FILE: templates/event-generator/prepare_parent_experiment.sh FILE: templates/spin-off/prepare_parent_experiment.sh
DEPENDENCIES: BUILD_ICON DEPENDENCIES: BUILD_ICON
RUNNING: once RUNNING: once
WALLCLOCK: 01:00 WALLCLOCK: 01:00
PREPARE_PARENT_DATE: PREPARE_PARENT_DATE: # TODO: ----> get index of date and select appropriate member
FILE: templates/event-generator/prepare_parent_date.sh FILE: templates/spin-off/prepare_parent_date.py
RUNNING: date RUNNING: date
WALLCLOCK: 01:00 WALLCLOCK: 01:00
PLATFORM: LOCAL PLATFORM: LOCAL
PREPARE_PARENT_MEMBER: # PREPARE_PARENT_MEMBER:
FILE: templates/event-generator/prepare_parent_member.sh # FILE: templates/event-generator/prepare_parent_member.sh # TODO: In here: get ens member for each date
DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE # DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE
RUNNING: member # RUNNING: date
WALLCLOCK: 01:00 # WALLCLOCK: 01:00
ADAPT_PARENT_MEMBER: # ADAPT_PARENT_MEMBER: # TODO: no multiple members
FILE: templates/event-generator/adapt_parent_member.sh # FILE: templates/event-generator/adapt_parent_member.sh
RUNNING: member # RUNNING: date
WALLCLOCK: 00:20 # WALLCLOCK: 00:20
DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT # DEPENDENCIES: PREPARE_PARENT_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT
PREPARE_PARENT_NAMELIST: PREPARE_PARENT_NAMELIST:
FILE: templates/event-generator/prepare_parent_namelist.py FILE: templates/spin-off/prepare_parent_namelist.py
DEPENDENCIES: ADAPT_PARENT_MEMBER RUN_PARENT_ICON-1 DEPENDENCIES: PREPARE_PARENT_EXPERIMENT PREPARE_PARENT_DATE RUN_PARENT_ICON-1
WALLCLOCK: 00:05 WALLCLOCK: 00:05
RUNNING: chunk RUNNING: chunk
TYPE: python TYPE: python
EXECUTABLE: "%HPCROOTDIR%/%python_environment.folder_name%/bin/python3" EXECUTABLE: "%HPCROOTDIR%/%python_environment.folder_name%/bin/python3"
RUN_PARENT_ICON: RUN_PARENT_ICON:
FILE: templates/common/run_icon.sh FILE: templates/spin-off/run_parent_icon.sh
DEPENDENCIES: PREPARE_PARENT_NAMELIST COMPRESS-1 # TODO: remove COMPRESS-1? DEPENDENCIES: PREPARE_PARENT_NAMELIST COMPRESS-1 # TODO: remove COMPRESS-1?
WALLCLOCK: 08:00 WALLCLOCK: 08:00
RUNNING: chunk RUNNING: chunk
...@@ -93,7 +93,7 @@ JOBS: ...@@ -93,7 +93,7 @@ JOBS:
DEPENDENCIES: PREPARE_SPINOFF_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT DEPENDENCIES: PREPARE_SPINOFF_MEMBER TRANSFER_PROJECT BUILD_PYTHON_ENVIRONMENT
PREPARE_SPINOFF_NAMELIST: PREPARE_SPINOFF_NAMELIST:
FILE: templates/event-generator/prepare_spinoff_namelist.py FILE: templates/spin-off/prepare_spinoff_namelist.py
DEPENDENCIES: ADAPT_SPINOFF_MEMBER RUN_ICON-1 DEPENDENCIES: ADAPT_SPINOFF_MEMBER RUN_ICON-1
WALLCLOCK: 00:05 WALLCLOCK: 00:05
RUNNING: chunk RUNNING: chunk
......
...@@ -19,5 +19,5 @@ simulation: ...@@ -19,5 +19,5 @@ simulation:
local: true local: true
# /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201031T000000Z.nc # /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201031T000000Z.nc
# parent_folder: /archive/meteo/external-models/dwd/icon/oper/icon_oper_eps_gridded-global_rolling/ # parent_folder: /archive/meteo/external-models/dwd/icon/oper/icon_oper_eps_gridded-global_rolling/
parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1/ parent_folder: /scratch/p/Philip.Rupp/experiments/2chunks/20201001/
sea_surface_temperature_forcing: /project/meteo/w2w/Z2/autoicon/dummy_sst_enso_neutral.nc sea_surface_temperature_forcing: /project/meteo/w2w/Z2/autoicon/dummy_sst_enso_neutral.nc
...@@ -6,10 +6,49 @@ From this date, the event-generator member can be restarted. ...@@ -6,10 +6,49 @@ From this date, the event-generator member can be restarted.
from glob import glob from glob import glob
from dateutil.parser import parse from dateutil.parser import parse
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path
import ast
import os
DIR_MOTHER_RUN = '/scratch/p/Philip.Rupp/experiments/2chunks/20201001/m1' # Get some autosubmit variables
SPIN_OFF_START = '2020-10-20' WORKDIR = "%HPCROOTDIR%"
NOT_MORE_THAN_N_DAYS_BACK = 30 STARTDATE = "%SDATE%"
# Get further autosubmit variables
CURRENT_DATE = "%SDATE%"
DATELIST_STRING = "%experiment.DATELIST%"
FROM_MEMBERS_STRING = "%experiment.FROM_MEMBERS%"
NOT_MORE_THAN_N_DAYS_BACK = "%experiment.RSFILE_MAXDAYS_BACK%"
# parse FROM_MEMBERS and DATELIST into a python list
DATELIST = ast.literal_eval(DATELIST_STRING)
FROM_MEMBERS = ast.literal_eval(FROM_MEMBERS_STRING)
# get index of current start date in datelist and therefrom infer from_member
def find_index_in_list(number_list, target_number):
try:
index = number_list.index(target_number)
return index
except ValueError:
return -1
index = find_index_in_list(DATELIST, CURRENT_DATE)
if index != -1:
print(f"The date {CURRENT_DATE} is at index {index} in the datelist.")
else:
print(f"The date {CURRENT_DATE} is not in the datelist.")
FROM_MEMBER = FROM_MEMBERS[index]
# determine parent directory for restart
PARENTDIR = "%initial_conditions.parent_folder%" + f"m{FROM_MEMBER:0d}/"
# create target directory for restart parent
RUNDIR = f"{WORKDIR}/{STARTDATE}_from_parent_{FROM_MEMBER}"
try:
os.makedirs(RUNDIR)
print(f"Directory '{RUNDIR}' created successfully.")
except OSError as e:
print(f"An error occurred: {e}")
def infer_restart_dates_from_files_in_direcory(directory): def infer_restart_dates_from_files_in_direcory(directory):
...@@ -22,7 +61,7 @@ def infer_restart_dates_from_files_in_direcory(directory): ...@@ -22,7 +61,7 @@ def infer_restart_dates_from_files_in_direcory(directory):
def find_date_before(target_date, dates_list, max_lag=None): def find_date_before(target_date, dates_list, max_lag=None):
# Convert the target_date to a datetime object if it's not already # Convert the target_date to a datetime object if it's not already
if not isinstance(target_date, datetime): if not isinstance(target_date, datetime):
target_date = datetime.strptime(target_date, '%Y-%m-%d') # Adjust the format if needed target_date = datetime.strptime(target_date, '%Y%m%d') # Adjust the format if needed
# Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided) # Calculate the minimum date allowed (if NOT_MORE_THAN_N_DAYS is provided)
if max_lag is not None: if max_lag is not None:
...@@ -40,7 +79,37 @@ def find_date_before(target_date, dates_list, max_lag=None): ...@@ -40,7 +79,37 @@ def find_date_before(target_date, dates_list, max_lag=None):
return None # No date found before the target_date return None # No date found before the target_date
def link_restart_files(from_restart_date, from_directoy, into_directory):
# sample restart files:
# chunks/20201001/m1/icon_grid_0010_R02B04_G_restart_atm_20201016T000000Z.nc
date_string = from_restart_date.strftime("%Y%m%dT00")
pattern = f"{from_directoy}/*restart*{date_string}*.nc"
files = glob(pattern)
if len(files) == 0:
raise AssertionError(f"No restart files match pattern {pattern}.")
elif len(files) > 1:
raise AssertionError(f"More than 1 file matches pattern {pattern}: {files}.")
else:
restart_file = files[0]
# Path to the source file
source_file_path = restart_file
# Path to the directory where to create the symlink
symlink_directory = into_directory
# Name of the symlink (same as original)
symlink_name = source_file_path.split('/')[-1]
# Create the symbolic link
symlink_path = os.path.join(symlink_directory, symlink_name)
os.symlink(source_file_path, symlink_path)
print(f"Symbolic link '{symlink_name}' created at '{symlink_directory}'")
if __name__ == "__main__": if __name__ == "__main__":
restart_file_dates = infer_restart_dates_from_files_in_direcory(DIR_MOTHER_RUN) # find available restart files in event generator run
restart_from = find_date_before(SPIN_OFF_START, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK) restart_file_dates = infer_restart_dates_from_files_in_direcory(PARENTDIR)
print(restart_from) # find appropriate restart file prior to spin-off start date
restart_from = find_date_before(CURRENT_DATE, restart_file_dates, max_lag=NOT_MORE_THAN_N_DAYS_BACK)
print(f"Restarting parent run on {restart_from}.")
# link appropriate restart files into into parental re-run directory
link_restart_files(from_restart_date=restart_from, from_directoy=PARENTDIR, into_directory=RUNDIR)
import logging import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from dateutil.parser import parse
import f90nml import f90nml
import yaml import yaml
...@@ -11,10 +12,10 @@ logger.setLevel(logging.INFO) ...@@ -11,10 +12,10 @@ logger.setLevel(logging.INFO)
# Get some autosubmit variables # Get some autosubmit variables
WORKDIR = "%HPCROOTDIR%" WORKDIR = "%HPCROOTDIR%"
STARTDATE = "%SDATE%" STARTDATE = "%SDATE%"
MEMBER = "%MEMBER%" # MEMBER = "%MEMBER%"
CHUNK = "%CHUNK%" # CHUNK = "%CHUNK%"
# Get run directory # Get run directory
RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") # RUNDIR = Path(f"{WORKDIR}/{STARTDATE}/{MEMBER}") # _from_parent_
ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%") ATMOSPHERE_NAMELIST_PATH = Path("%simulation.namelist_paths.atmosphere%")
MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%") MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%")
...@@ -22,30 +23,60 @@ MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%") ...@@ -22,30 +23,60 @@ MASTER_NAMELIST_PATH = Path("%simulation.namelist_paths.master%")
# analysis_filename = (RUNDIR / "igaf*.m*.grb") # analysis_filename = (RUNDIR / "igaf*.m*.grb")
# analysis_filename = analysis_filename.parent.glob(analysis_filename.name).__next__() # analysis_filename = analysis_filename.parent.glob(analysis_filename.name).__next__()
# analysis_filename = analysis_filename.name # analysis_filename = analysis_filename.name
analysis_filename = "analysis.nc" # analysis_filename = "analysis.nc"
# first_guess_filename = (RUNDIR / "igfff00030000.m*.grb").resolve() # first_guess_filename = (RUNDIR / "igfff00030000.m*.grb").resolve()
# first_guess_filename = first_guess_filename.parent.glob(first_guess_filename.name).__next__() # first_guess_filename = first_guess_filename.parent.glob(first_guess_filename.name).__next__()
# first_guess_filename = first_guess_filename.name # first_guess_filename = first_guess_filename.name
first_guess_filename = "first_guess.nc" # first_guess_filename = "first_guess.nc"
# restart_filename = "restart.nc"
# Example of date format "2018-06-01T00:00:00Z" # Example of date format "2018-06-01T00:00:00Z"
date_format = "%simulation.date_format%" date_format = "%simulation.date_format%"
START_YEAR = "%Chunk_START_YEAR%" # this would be for the spin off, not for reproducing the parent
START_MONTH = "%Chunk_START_MONTH%" # START_YEAR = "%Chunk_START_YEAR%"
START_DAY = "%Chunk_START_DAY%" # START_MONTH = "%Chunk_START_MONTH%"
START_HOUR = "%Chunk_START_HOUR%" # START_DAY = "%Chunk_START_DAY%"
# START_HOUR = "%Chunk_START_HOUR%"
END_YEAR = "%Chunk_END_YEAR%" #
END_MONTH = "%Chunk_END_MONTH%" # END_YEAR = "%Chunk_END_YEAR%"
END_DAY = "%Chunk_END_DAY%" # END_MONTH = "%Chunk_END_MONTH%"
END_HOUR = "%Chunk_END_HOUR%" # END_DAY = "%Chunk_END_DAY%"
# END_HOUR = "%Chunk_END_HOUR%"
Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR))
Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR)) # Chunk_START_DATE = datetime(year=int(START_YEAR), month=int(START_MONTH), day=int(START_DAY), hour=int(START_HOUR))
# Chunk_END_DATE = datetime(year=int(END_YEAR), month=int(END_MONTH), day=int(END_DAY), hour=int(END_HOUR))
# from which date are we actually starting?
from glob import glob
pattern = f"{WORKDIR}/{STARTDATE}_from_member_*"
files = glob(pattern)
if len(files) == 0:
raise AssertionError(f"No files match pattern {pattern}.")
elif len(files) > 1:
raise AssertionError(f"More than 1 file matches pattern {pattern}.")
else:
RUNDIR = files[0]
print(f"Rundirectory is {RUNDIR}")
pattern = f"{RUNDIR}/*_restart_atm_*"
restart_files_in_rundir = glob(pattern)
if len(restart_files_in_rundir) == 0:
raise AssertionError(f"No files match pattern {pattern}.")
elif len(restart_files_in_rundir) > 1:
raise AssertionError(f"More than 1 file matches pattern {pattern}.")
else:
f = restart_files_in_rundir[0]
inferred_restart_file_date = parse(f.split("atm")[-1], fuzzy=True, ignoretz=True)
Chunk_START_DATE = inferred_restart_file_date
Chunk_END_DATE = datetime.strptime(STARTDATE, "%Y%m%d")
print(f"Start chunk on: {Chunk_START_DATE}, end chunk on: {Chunk_END_DATE}")
# Read custom namelist parameters from configuration # Read custom namelist parameters from configuration
atmosphere_namelist_string = """ atmosphere_namelist_string = """
...@@ -81,15 +112,16 @@ atmosphere_namelist_replacements = { ...@@ -81,15 +112,16 @@ atmosphere_namelist_replacements = {
"extpar_filename": "extpar.nc", "extpar_filename": "extpar.nc",
}, },
"initicon_nml": { # "initicon_nml": {
"dwdfg_filename": first_guess_filename, # "dwdfg_filename": first_guess_filename,
"dwdana_filename": analysis_filename, # "dwdana_filename": analysis_filename,
} # }
} }
master_namelist_replacements = { master_namelist_replacements = {
"master_nml": { "master_nml": {
"lrestart": False if "%CHUNK%" == "1" else True, # "lrestart": False if "%CHUNK%" == "1" else True,
"lrestart": True,
}, },
"master_time_control_nml": { "master_time_control_nml": {
"experimentStartDate": Chunk_START_DATE.strftime(date_format), "experimentStartDate": Chunk_START_DATE.strftime(date_format),
...@@ -171,4 +203,3 @@ def main(): ...@@ -171,4 +203,3 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()
# Get some variables provided by autosubmit.
WORKDIR=%HPCROOTDIR%
ICON_VERSION=%ICON_VERSION%
STARTDATE=%SDATE%
# Define rundir
RUNDIR=$(find ${WORKDIR} -name "${STARTDATE}_from_parent_" | sort -n | head -n 1)
# Go to the member rundir
cd ${RUNDIR}
# Activate spack
. ${WORKDIR}/production_project/platforms/common/spack_utils.sh
load_spack "%spack.init%" "%spack.root%" "%spack.url%" "%spack.branch%" "%spack.externals%" "%spack.compiler%" "%spack.disable_local_config%" "%spack.user_cache_path%" "%spack.user_config_path%" "%spack.upstreams%"
# Get proper load command.
SPACK_BUILD_ICON="%ICON.BUILD_CMD%"
SPACK_LOAD_ICON="%ICON.LOAD_CMD%"
if [ "${SPACK_LOAD_ICON}" == "build_cmd" ]; then
SPACK_LOAD_ICON=${SPACK_BUILD_ICON}
fi
# Load icon module
spack load --first ${SPACK_LOAD_ICON}
# Set environment variable for eccodes-dwd definitions:
source ${WORKDIR}/eccodes_defs.env
# Increase stack size limit
ulimit -s unlimited
# Run icon
srun icon
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment