Skip to content
Snippets Groups Projects

The xr_accessor emulator by default uses the same chunking as the disk...

Merged Oriol.Tinto requested to merge dev-emulator-chunk-size into main
2 files
+ 29
4
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -17,6 +17,8 @@ from enstools.encoding.api import VariableEncoding, DatasetEncoding
from enstools.compression.emulation import emulate_compression_on_data_array, emulate_compression_on_dataset
from enstools.compression.analyzer.analysis_options import AnalysisOptions
from enstools.compression.analyzer.analyzer import analyze_data_array, analyze_dataset
from enstools.encoding.dataset_encoding import convert_to_bytes, find_chunk_sizes
from enstools.encoding.chunk_size import chunk_size as default_chunk_size
@xarray.register_dataarray_accessor("compression")
@@ -24,6 +26,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
Enstools-compression DataArray accessor class.
"""
def __init__(self, xarray_obj: xarray.DataArray):
"""
Initialize the accessor saving a reference of the data array.
@@ -34,7 +37,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
self._obj = xarray_obj
def emulate(self, compression: str, in_place=False) -> xarray.DataArray:
def emulate(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
"""
Emulate compression on a data array.
@@ -42,20 +45,39 @@ class EnstoolsCompressionDataArrayAccessor:
----------
compression: str
in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns
-------
xarray.DataArray
"""
compression_specification = VariableEncoding(compression)
# Chunking!
#############################
data_array = self._obj
type_size = data_array.dtype.itemsize
if chunk_size is None:
chunk_size = default_chunk_size
chunk_memory_size = convert_to_bytes(chunk_size)
optimal_chunk_size = chunk_memory_size / type_size
chunk_sizes = find_chunk_sizes(data_array=data_array, chunk_size=optimal_chunk_size)
chunk_sizes = tuple(chunk_sizes[d] for d in data_array.dims)
compression_specification.set_chunk_sizes(chunk_sizes)
data_array, metrics = emulate_compression_on_data_array(data_array=self._obj,
compression_specification=compression_specification,
in_place=in_place)
#############################
data_array.attrs["compression_specification"] = compression
data_array.attrs["compression_ratio"] = f"{metrics['compression_ratio']:.2f}"
return data_array
def __call__(self, compression: str, in_place=False) -> xarray.DataArray:
def __call__(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
"""
Calling the accessor directly uses emulate method.
@@ -63,6 +85,8 @@ class EnstoolsCompressionDataArrayAccessor:
----------
compression: str
in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns
-------
@@ -70,7 +94,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
return self.emulate(compression=compression, in_place=in_place)
return self.emulate(compression=compression, in_place=in_place, chunk_size=chunk_size)
def analyze(self,
constrains="correlation_I:5,ssim_I:2",
@@ -199,6 +223,7 @@ class EnstoolsCompressionToCompressedNetcdf:
"""
Accessor to enable the method to_compressed_netcdf to xarray Datasets
"""
def __init__(self, xarray_obj: xarray.Dataset):
"""
Initialize the accessor saving a reference of the dataset.
Loading