Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • w2w/enstools-compression
1 result
Show changes
Commits on Source (3)
2023.11
2023.11.1
......@@ -17,6 +17,8 @@ from enstools.encoding.api import VariableEncoding, DatasetEncoding
from enstools.compression.emulation import emulate_compression_on_data_array, emulate_compression_on_dataset
from enstools.compression.analyzer.analysis_options import AnalysisOptions
from enstools.compression.analyzer.analyzer import analyze_data_array, analyze_dataset
from enstools.encoding.dataset_encoding import convert_to_bytes, find_chunk_sizes
from enstools.encoding.chunk_size import chunk_size as default_chunk_size
@xarray.register_dataarray_accessor("compression")
......@@ -24,6 +26,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
Enstools-compression DataArray accessor class.
"""
def __init__(self, xarray_obj: xarray.DataArray):
"""
Initialize the accessor saving a reference of the data array.
......@@ -34,7 +37,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
self._obj = xarray_obj
def emulate(self, compression: str, in_place=False) -> xarray.DataArray:
def emulate(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
"""
Emulate compression on a data array.
......@@ -42,20 +45,39 @@ class EnstoolsCompressionDataArrayAccessor:
----------
compression: str
in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns
-------
xarray.DataArray
"""
compression_specification = VariableEncoding(compression)
# Chunking!
#############################
data_array = self._obj
type_size = data_array.dtype.itemsize
if chunk_size is None:
chunk_size = default_chunk_size
chunk_memory_size = convert_to_bytes(chunk_size)
optimal_chunk_size = chunk_memory_size / type_size
chunk_sizes = find_chunk_sizes(data_array=data_array, chunk_size=optimal_chunk_size)
chunk_sizes = tuple(chunk_sizes[d] for d in data_array.dims)
compression_specification.set_chunk_sizes(chunk_sizes)
data_array, metrics = emulate_compression_on_data_array(data_array=self._obj,
compression_specification=compression_specification,
in_place=in_place)
#############################
data_array.attrs["compression_specification"] = compression
data_array.attrs["compression_ratio"] = f"{metrics['compression_ratio']:.2f}"
return data_array
def __call__(self, compression: str, in_place=False) -> xarray.DataArray:
def __call__(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
"""
Calling the accessor directly uses emulate method.
......@@ -63,6 +85,8 @@ class EnstoolsCompressionDataArrayAccessor:
----------
compression: str
in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns
-------
......@@ -70,7 +94,7 @@ class EnstoolsCompressionDataArrayAccessor:
"""
return self.emulate(compression=compression, in_place=in_place)
return self.emulate(compression=compression, in_place=in_place, chunk_size=chunk_size)
def analyze(self,
constrains="correlation_I:5,ssim_I:2",
......@@ -199,6 +223,7 @@ class EnstoolsCompressionToCompressedNetcdf:
"""
Accessor to enable the method to_compressed_netcdf to xarray Datasets
"""
def __init__(self, xarray_obj: xarray.Dataset):
"""
Initialize the accessor saving a reference of the dataset.
......