diff --git a/VERSION b/VERSION index 06a38dc0db93378eb1940d555d6cabbb99ec9385..38160ca1e19436eb9f1e7bf3fff95c95bc275929 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2023.6 +2023.11 diff --git a/enstools/compression/analyzer/analyze_data_array.py b/enstools/compression/analyzer/analyze_data_array.py index 308544ba18fbc9012c1e4872017fa97fae6c70db..dcf14b9b7c7352b187dd40c4f5b7c187b1f756c7 100644 --- a/enstools/compression/analyzer/analyze_data_array.py +++ b/enstools/compression/analyzer/analyze_data_array.py @@ -19,13 +19,14 @@ import xarray import enstools.encoding.chunk_size from enstools.compression.emulators import DefaultEmulator -from enstools.compression.errors import ConditionsNotFulfilledError +from enstools.compression.errors import ConditionsNotFulfilledError, ConstantValues from enstools.compression.slicing import MultiDimensionalSliceCollection from enstools.encoding.api import VariableEncoding from enstools.encoding.dataset_encoding import find_chunk_sizes, convert_to_bytes from enstools.encoding.rules import COMPRESSION_SPECIFICATION_SEPARATOR from .analysis_options import AnalysisOptions from .analyzer_utils import get_metrics, get_parameter_range, bisection_method +from enstools.compression.emulation import emulate_compression_on_data_array # These metrics will be used to select within the different encodings when aiming at a certain compression ratio. ANALYSIS_DIAGNOSTIC_METRICS = ["correlation_I", "ssim_I"] @@ -45,7 +46,7 @@ def find_direct_relation(parameter_range, function_to_nullify): return eval_last_percentile > eval_first_percentile -def get_one_slice(data_array: xarray.DataArray, chunk_size: str = "100KB"): +def get_one_slice(data_array: xarray.DataArray, chunk_size: str = "100KB"): chunk_memory_size = convert_to_bytes(chunk_size) chunk_sizes = find_chunk_sizes(data_array, chunk_memory_size) chunk_sizes = [chunk_sizes[dim] for dim in data_array.dims] @@ -53,8 +54,16 @@ def get_one_slice(data_array: xarray.DataArray, chunk_size: str = "100KB"): big_chunk_size = max(set([s.size for s in multi_dimensional_slice.objects.ravel()])) big_chunks = [s for s in multi_dimensional_slice.objects.ravel() if s.size == big_chunk_size] - return {dim: size for dim, size in zip(data_array.dims, big_chunks[0].slices)} + for chunk_index in range(len(big_chunks)): + slices = {dim: size for dim, size in zip(data_array.dims, big_chunks[chunk_index].slices)} + data_array_slice = data_array.isel(**slices) + # Check if the range of the slice is greater than 0 + if data_array_slice.size > 0 and np.ptp(data_array_slice.values) > 0: + return data_array_slice + + # If all slices have a range of 0, raise an exception + raise ConstantValues("All slices have constant values or are empty.") def analyze_data_array(data_array: xarray.DataArray, options: AnalysisOptions) -> Tuple[str, dict]: @@ -62,9 +71,32 @@ def analyze_data_array(data_array: xarray.DataArray, options: AnalysisOptions) - Find the compression specification corresponding to a certain data array and a given set of compression options. """ - slices = get_one_slice(data_array, - chunk_size=enstools.encoding.chunk_size.analysis_chunk_size) - data_array = data_array.isel(**slices) + try: + data_array = get_one_slice(data_array, + chunk_size=enstools.encoding.chunk_size.analysis_chunk_size, + ) + except ConstantValues: + # Issue a warning that all values in the data array are constant + warning_message = f"All values in the variable {data_array.name} are constant." + warnings.warn(warning_message) + + # In case all values are constant, return lossless. + # First let's find out the compression ratio + _, metrics = emulate_compression_on_data_array(data_array, + compression_specification=VariableEncoding("lossless"), + in_place=False) + + return "lossless", metrics + + # Compute the range of the data values in the slice + data_range = np.ptp(data_array.values) # ptp (peak-to-peak) calculates the range + + # Check if the range is zero + if data_range == 0: + raise ValueError("The range of the data_array slice is zero.") + + # Check that the range is not 0 + # Check if the array contains any nan contains_nan = np.isnan(data_array.values).any() if contains_nan: diff --git a/enstools/compression/emulation.py b/enstools/compression/emulation.py index 139d46dcd6490f71fd68443bd6ec68967a9b2f1c..9d7a6fbf070adb7ea1a65a2c6f2b1a6ed797f0ad 100755 --- a/enstools/compression/emulation.py +++ b/enstools/compression/emulation.py @@ -115,8 +115,8 @@ def emulate_compression_on_numpy_array(data: numpy.ndarray, compression_specific """ - if isinstance(compression_specification, (LosslessEncoding, NullEncoding)): - return data, {} + if isinstance(compression_specification, NullEncoding): + return data, {"compression_ratio": 1} emulator_backend = DefaultEmulator diff --git a/enstools/compression/errors.py b/enstools/compression/errors.py index 4edf08b07c801618f21d6b0bbd3742eca7aab0d1..f18804ec8c1bd1eaf879ee83e038b497f55be33b 100644 --- a/enstools/compression/errors.py +++ b/enstools/compression/errors.py @@ -1,3 +1,9 @@ from enstools.core.errors import EnstoolsError + + class ConditionsNotFulfilledError(EnstoolsError): - ... \ No newline at end of file + ... + + +class ConstantValues(Exception): + pass diff --git a/requirements.txt b/requirements.txt index bad8fac561176057f1a66ac98a79bcf505bef934..6d80422124a936041863f7bbc3a2e69515614dc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -enstools>=2023.1 +enstools>=2023.11 enstools-encoding>=2023.6 zfpy hdf5plugin>=4.0.0 diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 184f417a1a8b79694df0a91793d64cd48e5e68e1..05162dc89e512198704717e0cedc81ee2c2982bb 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -15,6 +15,32 @@ class TestAnalyzer(TestClass): input_path = input_tempdir / ds analyze_files(file_paths=[input_path]) + def test_analyzer_constant_array(self): + import enstools.compression.xr_accessor # noqa + import numpy as np + import xarray as xr + + shape = (100, 100, 100) + data = np.zeros(shape) + data_array = xr.DataArray(data) + # Expect a warning about constant values + with pytest.warns(UserWarning, match="All values in the variable .* are constant."): + specs, metrics = data_array.compression.analyze() + + data_array.compression(specs) + + def test_analyzer_without_lat_lon(self): + import enstools.compression.xr_accessor # noqa + import numpy as np + import xarray as xr + + shape = (100, 100, 100) + data = np.random.random(size=shape) + data_array = xr.DataArray(data) + specs, metrics = data_array.compression.analyze() + data_array.compression(specs) + + def test_zfp_analyzer(self): from enstools.compression.api import analyze_files input_tempdir = self.input_directory_path @@ -60,8 +86,9 @@ class TestAnalyzer(TestClass): for var in metrics: if abs(metrics[var][cr_label] - thresholds[cr_label]) > TOLERANCE: - raise AssertionError(f"Case:{input_path.name}.The resulting compression ratio of {metrics[var][cr_label]:.2f}" - f"x is not close enough to the target of {thresholds[cr_label]:.2f}") + raise AssertionError( + f"Case:{input_path.name}.The resulting compression ratio of {metrics[var][cr_label]:.2f}" + f"x is not close enough to the target of {thresholds[cr_label]:.2f}") def test_sz_analyzer(self): from enstools.compression.api import analyze_files @@ -85,6 +112,7 @@ class TestAnalyzer(TestClass): compressor="zfp", mode="rate", ) + def test_rmse(self): from enstools.compression.api import analyze_files input_tempdir = self.input_directory_path