Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • w2w/enstools-compression
1 result
Show changes
Commits on Source (3)
2023.11 2023.11.1
...@@ -17,6 +17,8 @@ from enstools.encoding.api import VariableEncoding, DatasetEncoding ...@@ -17,6 +17,8 @@ from enstools.encoding.api import VariableEncoding, DatasetEncoding
from enstools.compression.emulation import emulate_compression_on_data_array, emulate_compression_on_dataset from enstools.compression.emulation import emulate_compression_on_data_array, emulate_compression_on_dataset
from enstools.compression.analyzer.analysis_options import AnalysisOptions from enstools.compression.analyzer.analysis_options import AnalysisOptions
from enstools.compression.analyzer.analyzer import analyze_data_array, analyze_dataset from enstools.compression.analyzer.analyzer import analyze_data_array, analyze_dataset
from enstools.encoding.dataset_encoding import convert_to_bytes, find_chunk_sizes
from enstools.encoding.chunk_size import chunk_size as default_chunk_size
@xarray.register_dataarray_accessor("compression") @xarray.register_dataarray_accessor("compression")
...@@ -24,6 +26,7 @@ class EnstoolsCompressionDataArrayAccessor: ...@@ -24,6 +26,7 @@ class EnstoolsCompressionDataArrayAccessor:
""" """
Enstools-compression DataArray accessor class. Enstools-compression DataArray accessor class.
""" """
def __init__(self, xarray_obj: xarray.DataArray): def __init__(self, xarray_obj: xarray.DataArray):
""" """
Initialize the accessor saving a reference of the data array. Initialize the accessor saving a reference of the data array.
...@@ -34,7 +37,7 @@ class EnstoolsCompressionDataArrayAccessor: ...@@ -34,7 +37,7 @@ class EnstoolsCompressionDataArrayAccessor:
""" """
self._obj = xarray_obj self._obj = xarray_obj
def emulate(self, compression: str, in_place=False) -> xarray.DataArray: def emulate(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
""" """
Emulate compression on a data array. Emulate compression on a data array.
...@@ -42,20 +45,39 @@ class EnstoolsCompressionDataArrayAccessor: ...@@ -42,20 +45,39 @@ class EnstoolsCompressionDataArrayAccessor:
---------- ----------
compression: str compression: str
in_place: bool in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns Returns
------- -------
xarray.DataArray xarray.DataArray
""" """
compression_specification = VariableEncoding(compression) compression_specification = VariableEncoding(compression)
# Chunking!
#############################
data_array = self._obj
type_size = data_array.dtype.itemsize
if chunk_size is None:
chunk_size = default_chunk_size
chunk_memory_size = convert_to_bytes(chunk_size)
optimal_chunk_size = chunk_memory_size / type_size
chunk_sizes = find_chunk_sizes(data_array=data_array, chunk_size=optimal_chunk_size)
chunk_sizes = tuple(chunk_sizes[d] for d in data_array.dims)
compression_specification.set_chunk_sizes(chunk_sizes)
data_array, metrics = emulate_compression_on_data_array(data_array=self._obj, data_array, metrics = emulate_compression_on_data_array(data_array=self._obj,
compression_specification=compression_specification, compression_specification=compression_specification,
in_place=in_place) in_place=in_place)
#############################
data_array.attrs["compression_specification"] = compression data_array.attrs["compression_specification"] = compression
data_array.attrs["compression_ratio"] = f"{metrics['compression_ratio']:.2f}" data_array.attrs["compression_ratio"] = f"{metrics['compression_ratio']:.2f}"
return data_array return data_array
def __call__(self, compression: str, in_place=False) -> xarray.DataArray: def __call__(self, compression: str, in_place=False, chunk_size=None) -> xarray.DataArray:
""" """
Calling the accessor directly uses emulate method. Calling the accessor directly uses emulate method.
...@@ -63,6 +85,8 @@ class EnstoolsCompressionDataArrayAccessor: ...@@ -63,6 +85,8 @@ class EnstoolsCompressionDataArrayAccessor:
---------- ----------
compression: str compression: str
in_place: bool in_place: bool
chunk_size: str If not used, the default chunk size will be used (10MB). It can also be modified by changing
the enstools.encoding.chunk_sizes.chunk_size module variable.
Returns Returns
------- -------
...@@ -70,7 +94,7 @@ class EnstoolsCompressionDataArrayAccessor: ...@@ -70,7 +94,7 @@ class EnstoolsCompressionDataArrayAccessor:
""" """
return self.emulate(compression=compression, in_place=in_place) return self.emulate(compression=compression, in_place=in_place, chunk_size=chunk_size)
def analyze(self, def analyze(self,
constrains="correlation_I:5,ssim_I:2", constrains="correlation_I:5,ssim_I:2",
...@@ -199,6 +223,7 @@ class EnstoolsCompressionToCompressedNetcdf: ...@@ -199,6 +223,7 @@ class EnstoolsCompressionToCompressedNetcdf:
""" """
Accessor to enable the method to_compressed_netcdf to xarray Datasets Accessor to enable the method to_compressed_netcdf to xarray Datasets
""" """
def __init__(self, xarray_obj: xarray.Dataset): def __init__(self, xarray_obj: xarray.Dataset):
""" """
Initialize the accessor saving a reference of the dataset. Initialize the accessor saving a reference of the dataset.
......