Skip to content
Snippets Groups Projects
Commit 01afe858 authored by Oriol Tintó's avatar Oriol Tintó
Browse files

Merge branch 'fix' into main

parents 02ec359f 67d0c564
No related branches found
No related tags found
No related merge requests found
Showing
with 159 additions and 17 deletions
FROM ubuntu:latest
# Copy recipes and set workdir
COPY dependencies workdir
WORKDIR workdir
# Install some dependencies through apt
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt update \
&& apt install -yq vim make cmake wget git python3 python3-pip python3-venv \
&& apt install -yq swig gcc gfortran pkg-config libzstd-dev \
&& apt install -yq libproj-dev proj-data proj-bin libgeos-dev libeccodes-dev
# Install the filters + libpressio + enstools-compression
RUN make
File moved
File moved
# Using some enstools functions
from enstools.core.tempdir import TempDir
import hdf5plugin
import enstools.io
import enstools.compression.api
from enstools.compression.metrics import DatasetMetrics
import xarray as xr
# A couple of imports to filter out messages
import warnings
import logging
logger = logging.getLogger()
logger.setLevel(logging.WARNING)
# Few cases to try
cases = [
# Repeating few times the same approaches to show that it
# somehow the differences depends on the data
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.001",
"lossy,sz,rel,0.00001",
# Try other modes
"lossy,sz,pw_rel,0.001",
"lossy,sz,pw_rel,0.00001",
"lossy,sz,abs,0.001",
"lossy,sz,abs,0.00001",
"lossy,zfp,accuracy,0.001",
"lossy,zfp,accuracy,0.00001",
"lossy,zfp,rate,3.2",
"lossy,zfp,rate,6.4",
"lossy,zfp,precision,6",
"lossy,zfp,precision,12",
]
def evaluate_case(compression: str,dataset: xr.Dataset = None):
"""
Compress a dataset with the hdf5 filters and reload it and compare it with the same dataset which has been compressed
using libpressio.
"""
# Filters case
if dataset is None:
dataset = create_dummy_xarray_dataset()
working_directory = TempDir().getpath()
filename = f"{working_directory}/test_file.nc"
enstools.io.write(dataset, filename, compression=compression)
filters_dataset = enstools.io.read(filename)
# Emulator case
emulator_dataset, _ = enstools.compression.emulation.emulate_compression_on_dataset(dataset,
compression=compression,
in_place=False)
# Define which metric we are showing in case the data is not bit to bit identical
metric_name = "pearson_correlation"
metric_name = "correlation_I"
# The different metrics that can be used can be found in enstools.scores
# Dictionary to store the differences
variables_with_differences = {}
# Object to compute metrics between the data compressed with the filter and the data compressed with libpressio.
filter_emulator_metrics = DatasetMetrics(filters_dataset, emulator_dataset)
for variable in dataset.data_vars:
# Check if both datasets are identical
are_equal = (filters_dataset[variable] ==
emulator_dataset[variable]).all().values
# In case they are not equal, store the desired metric.
if not are_equal:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
met = filter_emulator_metrics[variable][metric_name].values
variables_with_differences[variable] = met
print(f"{filter_emulator_metrics[variable][metric_name].values=}")
print(f"{compression}")
print(variables_with_differences)
def main():
# In case of trying to use the same dataset for all the cases, pass this dataset to evaluate_case.
# Otherwise it will create a dummy dataset for each different case.
dataset = create_dummy_xarray_dataset()
# Loop over the different cases
for case in cases:
evaluate_case(dataset=None, compression=case)
def create_dummy_xarray_dataset(variables: list = None) -> xr.Dataset:
"""
Create a dummy dataset using random numbers.
"""
import numpy as np
import pandas as pd
# Create a synthetic dataset representing a 4D variable (3D + time)
if variables is None:
variables = ["temperature", "vorticity", "pressure"]
nx, ny, nz, t = 261, 121, 70, 1
lon = np.linspace(-180, 180, nx)
lat = np.linspace(-90, 90, ny)
levels = np.array(range(nz))
data_size = (t, nz, nx, ny)
var_dimensions = ["time", "level", "lon", "lat"]
# Select data type
data_type = np.float32
# Create some random data
var_data = data_type(50 * np.random.randn(*data_size))
var_dict = {var: (var_dimensions, var_data) for var in variables}
ds = xr.Dataset(
var_dict,
# Set up the coordinates
coords={
"lon": lon,
"lat": lat,
"level": levels,
"time": pd.date_range("2014-09-06", periods=t),
"reference_time": pd.Timestamp("2014-09-05"),
},
)
return ds
if __name__ == "__main__":
main()
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
......@@ -13,7 +13,7 @@ function clean_modules() {
# base directory for installation, depends on site
function get_install_base() {
RESULT=$(pwd)/../${INSTALLATION_DIR}
RESULT=${INSTALLATION_DIR}
echo $RESULT
}
......
File moved
export DEBIAN_FRONTEND=noninteractive
apt update
# Util if there's need to modify something on the way
apt install -yq vim
# Common dependencies
apt install -yq make cmake git python3 python3-pip python3-venv
# libpressio&filter dependencies
apt install -yq swig gcc gfortran pkg-config libzstd-dev
# Additional dependencies for enstools
apt install -yq libproj-dev proj-data proj-bin libgeos-dev libeccodes-dev
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment