diff --git a/VERSION b/VERSION index 38160ca1e19436eb9f1e7bf3fff95c95bc275929..5e8741dd4e4599326bebe8e01cd280d406d6643d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2023.11 +2023.11.1 diff --git a/enstools/compression/xr_accessor.py b/enstools/compression/xr_accessor.py index 75357888fc479b9da07d7951a2a553a3499a238c..42ea83d8b8140777e3094390f35617d80b14408f 100644 --- a/enstools/compression/xr_accessor.py +++ b/enstools/compression/xr_accessor.py @@ -17,6 +17,8 @@ from enstools.encoding.api import VariableEncoding, DatasetEncoding from enstools.compression.emulation import emulate_compression_on_data_array, emulate_compression_on_dataset from enstools.compression.analyzer.analysis_options import AnalysisOptions from enstools.compression.analyzer.analyzer import analyze_data_array, analyze_dataset +from enstools.encoding.dataset_encoding import convert_to_bytes, find_chunk_sizes +from enstools.encoding.chunk_size import chunk_size as default_chunk_size @xarray.register_dataarray_accessor("compression") @@ -24,6 +26,7 @@ class EnstoolsCompressionDataArrayAccessor: """ Enstools-compression DataArray accessor class. """ + def __init__(self, xarray_obj: xarray.DataArray): """ Initialize the accessor saving a reference of the data array. @@ -34,7 +37,7 @@ class EnstoolsCompressionDataArrayAccessor: """ self._obj = xarray_obj - def emulate(self, compression: str, in_place=False) -> xarray.DataArray: + def emulate(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray: """ Emulate compression on a data array. @@ -42,20 +45,39 @@ class EnstoolsCompressionDataArrayAccessor: ---------- compression: str in_place: bool + chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing + the enstools.encoding.chunk_sizes.chunk_size module variable. Returns ------- xarray.DataArray """ compression_specification = VariableEncoding(compression) + + # Chunking! + ############################# + data_array = self._obj + type_size = data_array.dtype.itemsize + + if chunk_size is None: + chunk_size = default_chunk_size + + chunk_memory_size = convert_to_bytes(chunk_size) + optimal_chunk_size = chunk_memory_size / type_size + chunk_sizes = find_chunk_sizes(data_array=data_array, chunk_size=optimal_chunk_size) + chunk_sizes = tuple(chunk_sizes[d] for d in data_array.dims) + compression_specification.set_chunk_sizes(chunk_sizes) + data_array, metrics = emulate_compression_on_data_array(data_array=self._obj, compression_specification=compression_specification, in_place=in_place) + ############################# + data_array.attrs["compression_specification"] = compression data_array.attrs["compression_ratio"] = f"{metrics['compression_ratio']:.2f}" return data_array - def __call__(self, compression: str, in_place=False) -> xarray.DataArray: + def __call__(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray: """ Calling the accessor directly uses emulate method. @@ -63,6 +85,8 @@ class EnstoolsCompressionDataArrayAccessor: ---------- compression: str in_place: bool + chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing + the enstools.encoding.chunk_sizes.chunk_size module variable. Returns ------- @@ -70,7 +94,7 @@ class EnstoolsCompressionDataArrayAccessor: """ - return self.emulate(compression=compression, in_place=in_place) + return self.emulate(compression=compression, in_place=in_place, chunk_size=chunk_size) def analyze(self, constrains="correlation_I:5,ssim_I:2", @@ -199,6 +223,7 @@ class EnstoolsCompressionToCompressedNetcdf: """ Accessor to enable the method to_compressed_netcdf to xarray Datasets """ + def __init__(self, xarray_obj: xarray.Dataset): """ Initialize the accessor saving a reference of the dataset.