Merge branch 'fix' into main

01afe858 · Oriol Tintó · 02ec359f · 67d0c564 · 01afe858 · 01afe858
Commit 01afe858 authored 2 years ago by Oriol Tintó
--- a/Dockerfile
+++ b/Dockerfile
+FROM ubuntu:latest
+
+# Copy recipes and set workdir
+COPY dependencies workdir
+WORKDIR workdir
+
+# Install some dependencies through apt
+RUN export DEBIAN_FRONTEND=noninteractive \
+    && apt update \
+    && apt install -yq vim make cmake wget git python3 python3-pip python3-venv \
+    && apt install -yq swig gcc gfortran pkg-config libzstd-dev \
+    && apt install -yq libproj-dev proj-data proj-bin libgeos-dev libeccodes-dev
+
+
+# Install the filters + libpressio + enstools-compression
+RUN make
--- a/Makefile
+++ b/Makefile
--- a/Readme.md
+++ b/Readme.md
--- a/dependencies/check_libpressio_filter_consistency.py
+++ b/dependencies/check_libpressio_filter_consistency.py
+# Using some enstools functions
+from enstools.core.tempdir import TempDir
+import hdf5plugin
+import enstools.io
+import enstools.compression.api
+from enstools.compression.metrics import DatasetMetrics
+
+import xarray as xr
+
+# A couple of imports to filter out messages
+import warnings
+import logging
+logger = logging.getLogger()
+logger.setLevel(logging.WARNING)
+
+
+# Few cases to try
+cases = [
+    # Repeating few times the same approaches to show that it
+    # somehow the differences depends on the data
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.001",
+    "lossy,sz,rel,0.00001",
+
+    # Try other modes
+    "lossy,sz,pw_rel,0.001",
+    "lossy,sz,pw_rel,0.00001",
+
+    "lossy,sz,abs,0.001",
+    "lossy,sz,abs,0.00001",
+
+    "lossy,zfp,accuracy,0.001",
+    "lossy,zfp,accuracy,0.00001",
+
+    "lossy,zfp,rate,3.2",
+    "lossy,zfp,rate,6.4",
+
+    "lossy,zfp,precision,6",
+    "lossy,zfp,precision,12",
+]
+
+
+def evaluate_case(compression: str,dataset: xr.Dataset = None):
+    """
+    Compress a dataset with the hdf5 filters and reload it and compare it with the same dataset which has been compressed
+    using libpressio.
+    """
+    # Filters case
+    if dataset is None:
+        dataset = create_dummy_xarray_dataset()
+    working_directory = TempDir().getpath()
+    filename = f"{working_directory}/test_file.nc"
+    enstools.io.write(dataset, filename, compression=compression)
+    filters_dataset = enstools.io.read(filename)
+
+    # Emulator case
+    emulator_dataset, _ = enstools.compression.emulation.emulate_compression_on_dataset(dataset,
+                                                                                  compression=compression,
+                                                                                  in_place=False)
+
+    # Define which metric we are showing in case the data is not bit to bit identical
+    metric_name = "pearson_correlation"
+    metric_name = "correlation_I"
+
+    # The different metrics that can be used can be found in enstools.scores
+
+    # Dictionary to store the differences
+    variables_with_differences = {}
+
+    # Object to compute metrics between the data compressed with the filter and the data compressed with libpressio.
+    filter_emulator_metrics = DatasetMetrics(filters_dataset, emulator_dataset)
+    for variable in dataset.data_vars:
+        # Check if both datasets are identical
+        are_equal = (filters_dataset[variable] ==
+                     emulator_dataset[variable]).all().values
+        # In case they are not equal, store the desired metric.
+        if not are_equal:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                met = filter_emulator_metrics[variable][metric_name].values
+                variables_with_differences[variable] = met
+                print(f"{filter_emulator_metrics[variable][metric_name].values=}")
+
+    print(f"{compression}")
+    print(variables_with_differences)
+
+
+def main():
+    # In case of trying to use the same dataset for all the cases, pass this dataset to evaluate_case.
+    # Otherwise it will create a dummy dataset for each different case.
+    dataset = create_dummy_xarray_dataset()
+    # Loop over the different cases
+    for case in cases:
+        evaluate_case(dataset=None, compression=case)
+
+
+def create_dummy_xarray_dataset(variables: list = None) -> xr.Dataset:
+    """
+    Create a dummy dataset using random numbers.
+    """
+    import numpy as np
+    import pandas as pd
+
+    # Create a synthetic dataset representing a 4D variable (3D + time)
+    if variables is None:
+        variables = ["temperature", "vorticity", "pressure"]
+    nx, ny, nz, t = 261, 121, 70, 1
+    lon = np.linspace(-180, 180, nx)
+    lat = np.linspace(-90, 90, ny)
+    levels = np.array(range(nz))
+
+    data_size = (t, nz, nx, ny)
+    var_dimensions = ["time", "level", "lon", "lat"]
+
+    # Select data type
+    data_type = np.float32
+    # Create some random data
+    var_data = data_type(50 * np.random.randn(*data_size))
+
+    var_dict = {var: (var_dimensions, var_data) for var in variables}
+
+    ds = xr.Dataset(
+        var_dict,
+        # Set up the coordinates
+        coords={
+            "lon": lon,
+            "lat": lat,
+            "level": levels,
+            "time": pd.date_range("2014-09-06", periods=t),
+            "reference_time": pd.Timestamp("2014-09-05"),
+        },
+    )
+    return ds
+
+
+if __name__ == "__main__":
+    main()
--- a/config.sh
+++ b/config.sh
--- a/environment.sh
+++ b/environment.sh
--- a/recipes/Makefile
+++ b/recipes/Makefile
--- a/recipes/compile_cblosc.sh
+++ b/recipes/compile_cblosc.sh
--- a/recipes/compile_hdf5.sh
+++ b/recipes/compile_hdf5.sh
--- a/recipes/compile_libpressio.sh
+++ b/recipes/compile_libpressio.sh
--- a/recipes/compile_stdcompat.sh
+++ b/recipes/compile_stdcompat.sh
--- a/recipes/compile_sz.sh
+++ b/recipes/compile_sz.sh
--- a/recipes/compile_zfp.sh
+++ b/recipes/compile_zfp.sh
--- a/recipes/config.sh
+++ b/recipes/config.sh
--- a/recipes/etc/shared_functions.sh
+++ b/recipes/etc/shared_functions.sh
@@ -13,7 +13,7 @@ function clean_modules() {

 # base directory for installation, depends on site
 function get_install_base() {
-	RESULT=$(pwd)/../${INSTALLATION_DIR}
+	RESULT=${INSTALLATION_DIR}
 	echo $RESULT
 }


--- a/recipes/install_enstools.sh
+++ b/recipes/install_enstools.sh
--- a/test_enstools.sh
+++ b/test_enstools.sh
--- a/setup_dependencies.sh
+++ b/setup_dependencies.sh
-export DEBIAN_FRONTEND=noninteractive
-apt update
-
-# Util if there's need to modify something on the way
-apt install -yq vim
-
-# Common dependencies
-apt install -yq make cmake git python3 python3-pip python3-venv
-
-# libpressio&filter dependencies
-apt install -yq swig gcc gfortran pkg-config libzstd-dev
-
-# Additional dependencies for enstools
-apt install -yq libproj-dev proj-data proj-bin libgeos-dev libeccodes-dev
-
-