From 04e548a0e05625d22542c8e9b930c38b21cd4a31 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Wed, 8 Jun 2022 13:45:06 +0200 Subject: [PATCH 01/10] Fix name in field access --- enstools/feature/identification/threshold/identification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 5644de6..b707f49 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -38,8 +38,8 @@ class DoubleThresholdIdentification(IdentificationStrategy): def precompute(self, dataset: xr.Dataset, **kwargs): assert self.field in dataset - dataset['inner_thresh_mask'] = xr.zeros_like(dataset['E'], dtype=int) - dataset['outer_thresh_mask'] = xr.zeros_like(dataset['E'], dtype=int) + dataset['inner_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) + dataset['outer_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) return dataset -- GitLab From 709aaf4478adfd27f33c1b247590fddf66d960d3 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Wed, 8 Jun 2022 13:45:48 +0200 Subject: [PATCH 02/10] Fix inner/outer mask usage --- .../identification/threshold/identification.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index b707f49..8a541c4 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -49,9 +49,10 @@ class DoubleThresholdIdentification(IdentificationStrategy): if self.processing_mode == "2d": field = field.reshape((1,) + field.shape) - # TODO give operator to routine - inner_thresh_mask = np.ma.masked_where(self.compare(field, self.inner_threshold),field).mask - outer_thresh_mask = np.ma.masked_where(self.compare(field, self.outer_threshold),field).mask + _is_inner = self.compare(field, self.inner_threshold) + _is_outer = self.compare(field, self.outer_threshold) + inner_thresh_mask = np.ma.getmaskarray(np.ma.masked_where(_is_inner, field)) + outer_thresh_mask = np.ma.getmaskarray(np.ma.masked_where(_is_outer, field)) ids, outer_features, inner_features = detection_double_thresh( feature_id=0, inner_thresh_mask=inner_thresh_mask, @@ -87,11 +88,14 @@ class DoubleThresholdIdentification(IdentificationStrategy): mask_inner[inner_features == id_] = 1 if self.processing_mode == "2d": mask = mask.reshape(mask.shape[1:]) + mask_outer = mask_outer.reshape(mask_outer.shape[1:]) + mask_inner = mask_inner.reshape(mask_inner.shape[1:]) mask_to_proto(mask, properties.mask, compress=self.compress) obj_list.append(obj) - dataset["inner_thresh_mask"].values = dataset["inner_thresh_mask"].values + mask_inner - dataset["outer_thresh_mask"].values = dataset["outer_thresh_mask"].values + mask_outer + + dataset["inner_thresh_mask"].values = dataset["inner_thresh_mask"].values + mask_inner + dataset["outer_thresh_mask"].values = dataset["outer_thresh_mask"].values + mask_outer # return the dataset (can be changed here), and the list of objects return dataset, obj_list -- GitLab From 6b490699e8b9338bc85dd2aead9bb12f6d66d7bd Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Wed, 8 Jun 2022 18:40:49 +0200 Subject: [PATCH 03/10] Add internal documentation --- .../threshold/identification.py | 71 +++++++++++-------- .../identification/threshold/processing.py | 7 ++ 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 8a541c4..476845b 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -13,42 +13,62 @@ class DoubleThresholdIdentification(IdentificationStrategy): self.field = field self.outer_threshold = outer_threshold self.inner_threshold = inner_threshold - # Save comparison operator name and get associated comparison function - self.comparison_operator = comparison_operator + # Save normalized comparison operator name and store associated + # comparison function if comparison_operator in { ">=", "ge" }: + self.comparison_operator = ">=" self.compare = operator.ge elif comparison_operator in { ">", "gt" }: + self.comparison_operator = ">" self.compare = operator.gt elif comparison_operator in { "<=", "le" }: + self.comparison_operator = "<=" self.compare = operator.le elif comparison_operator in { "<", "lt" }: + self.comparison_operator = "<" self.compare = operator.lt else: raise ValueError(f"unknown comparison operator '{comparison_operator}'") # Check that thresholds are properly sorted for the given comparison # operator. Always allow equality to make single threshold # identification possible. - assert self.inner_threshold == self.outer_threshold or self.compare(self.inner_threshold, self.outer_threshold) + assert (self.inner_threshold == self.outer_threshold + or self.compare(self.inner_threshold, self.outer_threshold)), \ + "threshold values incompatible with ordering, check inner and outer values" # Compression of object mask array in protobuf objects self.compress = compress # Specify processing mode 2d or 3d: In 2d, identification will be performed on 2d levels, # in 3d the identification will be performed per 3d block - assert processing_mode in { "2d", "3d" } + assert processing_mode in { "2d", "3d" }, "choose processing mode from '2d' or '3d'" self.processing_mode = processing_mode + def __repr__(self): + return "\n".join([ + "DoubleThresholdIdentification(", + f" field='{self.field}',", + f" outer_threshold={self.outer_threshold},", + f" inner_threshold={self.inner_threshold},", + f" comparison_operator='{self.comparison_operator}',", + f" processing_mode='{self.processing_mode}',", + f" compress=" + ("True" if self.compress else "False"), + ")" + ]) + def precompute(self, dataset: xr.Dataset, **kwargs): - assert self.field in dataset + assert self.field in dataset, f"required field '{self.field}' does not exist" dataset['inner_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) dataset['outer_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) - return dataset def identify(self, dataset: xr.Dataset, **kwargs): + # Field used for detection field = dataset[self.field].values - + # Handle 2D processing as special case of 3D processing if self.processing_mode == "2d": field = field.reshape((1,) + field.shape) - + # Create masks for outer and inner fields based on the given comparison + # operator and threshold values and pass to the feature extraction + # routine _is_inner = self.compare(field, self.inner_threshold) _is_outer = self.compare(field, self.outer_threshold) inner_thresh_mask = np.ma.getmaskarray(np.ma.masked_where(_is_inner, field)) @@ -59,26 +79,23 @@ class DoubleThresholdIdentification(IdentificationStrategy): outer_thresh_mask=outer_thresh_mask, entire_globe=True ) - - # Let's say you detected 5 objects: + # Process each feature obj_list = [] - for id_ in ids: # Feature 0 is the "no-feature feature" if id_ == 0: continue - - # get an instance of a new object, can pass an ID or set in manually afterwards + # Create a new protobuf object to encapsulate the feature data obj = self.get_new_object() - # set some ID to it - obj.id = id_ # TODO Needs offset? What happens when executed in parallel? - - # get properties of object and populate them (like defined in template.proto) + # TODO use an offset or some other method to avoid duplicate ids + # TODO when executed in parallel + obj.id = id_ + # Store attributes of the feature specific to the detection + # technique in the object properties properties = obj.properties properties.outer_threshold = self.outer_threshold - properties.inner_threshold = self.inner_threshold - properties.comparison_operator = ">" # TODO - + properties.comparison_operator = self.comparison_operator + # Create feature-specfic mask mask_outer = np.zeros_like(field, dtype=np.int8) mask_inner = np.zeros_like(field, dtype=np.int8) mask = np.zeros_like(field, dtype=np.int8) @@ -90,22 +107,18 @@ class DoubleThresholdIdentification(IdentificationStrategy): mask = mask.reshape(mask.shape[1:]) mask_outer = mask_outer.reshape(mask_outer.shape[1:]) mask_inner = mask_inner.reshape(mask_inner.shape[1:]) - + # Store feature-specific mask in object properties mask_to_proto(mask, properties.mask, compress=self.compress) + # Collect protobuf feature representations in output object list obj_list.append(obj) - + # Add the features to the full masks of the dataset dataset["inner_thresh_mask"].values = dataset["inner_thresh_mask"].values + mask_inner dataset["outer_thresh_mask"].values = dataset["outer_thresh_mask"].values + mask_outer - - # return the dataset (can be changed here), and the list of objects + # Return the updated dataset and the list of objects return dataset, obj_list def postprocess(self, dataset: xr.Dataset, pb2_desc, **kwargs): - # Here post process: called once after processing the whole dataset. - # If needed, the dataset or objects can be changed here. - - # objects are already in finished protobuf description, cf. the JSON output. - # pb2_desc.sets is list of trackable sets, each one contains the timesteps list, where each entry is a valid time and contains a list of objects. + # Nothing to do here for now return dataset, pb2_desc diff --git a/enstools/feature/identification/threshold/processing.py b/enstools/feature/identification/threshold/processing.py index b4e18bd..c8f68eb 100644 --- a/enstools/feature/identification/threshold/processing.py +++ b/enstools/feature/identification/threshold/processing.py @@ -3,6 +3,11 @@ import numpy as np from numba import njit +# Protobuf does not have a native multidimensional array data type but to store +# individual feature masks we need to serialize the int8 numpy array used for +# the masks. A MaskArray protobuf type is defined in threshold.proto and can be +# written and read with these routines: + def mask_to_proto(arr, proto_obj, compress=False): proto_obj.shape.extend(arr.shape) data = np.require(arr, dtype=np.int8).tobytes() @@ -18,6 +23,8 @@ def proto_to_mask(proto_obj): return np.frombuffer(data, dtype=np.int8).reshape(proto_obj.shape) +# Feature extration + @njit def detection_double_thresh(feature_id, inner_thresh_mask, outer_thresh_mask, entire_globe): -- GitLab From 276426e302e0de8bdfb534819d9220450e4fe5a3 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Wed, 8 Jun 2022 16:11:15 +0200 Subject: [PATCH 04/10] Add documentation to threshold-based methods --- .../identification/threshold/__init__.py | 2 + .../threshold/identification.py | 52 +++++++++++++++++++ .../identification/threshold/processing.py | 22 ++++++++ 3 files changed, 76 insertions(+) diff --git a/enstools/feature/identification/threshold/__init__.py b/enstools/feature/identification/threshold/__init__.py index 14311c2..37e9ac1 100644 --- a/enstools/feature/identification/threshold/__init__.py +++ b/enstools/feature/identification/threshold/__init__.py @@ -1 +1,3 @@ from .identification import DoubleThresholdIdentification,SingleThresholdIdentification +from .processing import proto_to_mask + diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 476845b..4a6fb49 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -7,6 +7,35 @@ import xarray as xr class DoubleThresholdIdentification(IdentificationStrategy): + """Identification technique based on two fixed thresholds. + + Parameters + ---------- + field : str + Name in the input dataset of the field in which to detect features. + outer_threshold : number + Value of the outer detection threshold. + inner_threshold : number + Value of the inner detection threshold. + comparison_operator : ">=" | ">" | "<" | "<=" + Comparison operator used to determine which regions are inside of the + thresholds. Use ">=" or ">" to obtain features that lie above (i.e. + exceed) the thresholds and "<=" and "<" to obtain features that lie + below. + processing_mode : "2d" | "3d" + Detect features on individual vertical levels ("2d") or across vertical + levels ("3d"). + compress : boolean + Toggle zlib compression of feature object masks in protobuf output. + Highly recommended to reduce the output size. + + + A feature must have at least one inner region. A feature may contain + multiple inner regions if they lie inside the same outer region. + + Additional fields added to the input dataset during processing: + `inner_thresh_mask`, `outer_thresh_mask`. + """ def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, processing_mode="2d", compress=True, **kwargs): @@ -55,12 +84,14 @@ class DoubleThresholdIdentification(IdentificationStrategy): ]) def precompute(self, dataset: xr.Dataset, **kwargs): + """""" # Should not be called directly by users, hide in docs assert self.field in dataset, f"required field '{self.field}' does not exist" dataset['inner_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) dataset['outer_thresh_mask'] = xr.zeros_like(dataset[self.field], dtype=int) return dataset def identify(self, dataset: xr.Dataset, **kwargs): + """""" # Should not be called directly by users, hide in docs # Field used for detection field = dataset[self.field].values # Handle 2D processing as special case of 3D processing @@ -118,11 +149,32 @@ class DoubleThresholdIdentification(IdentificationStrategy): return dataset, obj_list def postprocess(self, dataset: xr.Dataset, pb2_desc, **kwargs): + """""" # Should not be called directly by users, hide in docs # Nothing to do here for now return dataset, pb2_desc class SingleThresholdIdentification(DoubleThresholdIdentification): + """Identification technique based on a fixed threshold. + + Parameters + ---------- + field : str + Name in the input dataset of the field in which to detect features. + threshold : number + Value of the detection threshold. + comparison_operator : ">=" | ">" | "<" | "<=" + See :py:class:`DoubleThresholdIdentification`. + *args + Given to :py:class:`DoubleThresholdIdentification`. + **kwargs + Given to :py:class:`DoubleThresholdIdentification`. + + + Provided for convenience, :py:class:`SingleThresholdIdentification` objects + are :py:class:`DoubleThresholdIdentification` instances configured to use + the same inner and outer threshold. + """ def __init__(self, field, threshold, comparison_operator, *args, **kwargs): super().__init__(field, threshold, threshold, comparison_operator, *args, **kwargs) diff --git a/enstools/feature/identification/threshold/processing.py b/enstools/feature/identification/threshold/processing.py index c8f68eb..bd63026 100644 --- a/enstools/feature/identification/threshold/processing.py +++ b/enstools/feature/identification/threshold/processing.py @@ -16,7 +16,29 @@ def mask_to_proto(arr, proto_obj, compress=False): proto_obj.data = data proto_obj.zlib = compress + def proto_to_mask(proto_obj): + """Convert a protobuf-serialized object mask to a numpy array. + + Parameters + ---------- + proto_obj + Serialized object mask. + + Returns + ------- + array of int8 + + + A feature obtained with :py:class:`DoubleThresholdIdentification` carries + an object mask serialized to the protobuf format used for the detection + output. This function converts the serialized output to a numpy array, + which is 0-valued outside of the feature, 1-valued inside the region + between the outer an inner threshold, 2-valued inside the inner threshold. + + >>> from enstools.feature.identification.threshold import proto_to_mask + >>> arr = proto_to_mask(obj.properties.mask) + """ data = proto_obj.data if proto_obj.zlib: data = zlib.decompress(data) -- GitLab From c4a005f0d42f449991bfdb8024912bd1e3bda41a Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 10:40:32 +0200 Subject: [PATCH 05/10] Remove processing_mode argument (is handled by pipeline) --- .../identification/threshold/identification.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 4a6fb49..a4706ea 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -22,9 +22,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): thresholds. Use ">=" or ">" to obtain features that lie above (i.e. exceed) the thresholds and "<=" and "<" to obtain features that lie below. - processing_mode : "2d" | "3d" - Detect features on individual vertical levels ("2d") or across vertical - levels ("3d"). compress : boolean Toggle zlib compression of feature object masks in protobuf output. Highly recommended to reduce the output size. @@ -37,8 +34,7 @@ class DoubleThresholdIdentification(IdentificationStrategy): `inner_thresh_mask`, `outer_thresh_mask`. """ - def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, - processing_mode="2d", compress=True, **kwargs): + def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, compress=True): self.field = field self.outer_threshold = outer_threshold self.inner_threshold = inner_threshold @@ -66,10 +62,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): "threshold values incompatible with ordering, check inner and outer values" # Compression of object mask array in protobuf objects self.compress = compress - # Specify processing mode 2d or 3d: In 2d, identification will be performed on 2d levels, - # in 3d the identification will be performed per 3d block - assert processing_mode in { "2d", "3d" }, "choose processing mode from '2d' or '3d'" - self.processing_mode = processing_mode def __repr__(self): return "\n".join([ @@ -78,7 +70,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): f" outer_threshold={self.outer_threshold},", f" inner_threshold={self.inner_threshold},", f" comparison_operator='{self.comparison_operator}',", - f" processing_mode='{self.processing_mode}',", f" compress=" + ("True" if self.compress else "False"), ")" ]) -- GitLab From 8041f6dc9df714ce026cf043b94a8254f4a2545c Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 10:47:51 +0200 Subject: [PATCH 06/10] Remove individual object masks stored in protobuf --- .../identification/threshold/__init__.py | 3 +- .../threshold/identification.py | 18 ++------ .../identification/threshold/processing.py | 43 ------------------- .../identification/threshold/threshold.proto | 13 ++---- 4 files changed, 7 insertions(+), 70 deletions(-) diff --git a/enstools/feature/identification/threshold/__init__.py b/enstools/feature/identification/threshold/__init__.py index 37e9ac1..11af4de 100644 --- a/enstools/feature/identification/threshold/__init__.py +++ b/enstools/feature/identification/threshold/__init__.py @@ -1,3 +1,2 @@ -from .identification import DoubleThresholdIdentification,SingleThresholdIdentification -from .processing import proto_to_mask +from .identification import DoubleThresholdIdentification, SingleThresholdIdentification diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index a4706ea..0aa8c85 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -1,5 +1,5 @@ from ..identification import IdentificationStrategy -from .processing import mask_to_proto, detection_double_thresh +from .processing import detection_double_thresh import operator import numpy as np @@ -22,9 +22,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): thresholds. Use ">=" or ">" to obtain features that lie above (i.e. exceed) the thresholds and "<=" and "<" to obtain features that lie below. - compress : boolean - Toggle zlib compression of feature object masks in protobuf output. - Highly recommended to reduce the output size. A feature must have at least one inner region. A feature may contain @@ -34,7 +31,7 @@ class DoubleThresholdIdentification(IdentificationStrategy): `inner_thresh_mask`, `outer_thresh_mask`. """ - def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, compress=True): + def __init__(self, field, outer_threshold, inner_threshold, comparison_operator): self.field = field self.outer_threshold = outer_threshold self.inner_threshold = inner_threshold @@ -60,8 +57,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): assert (self.inner_threshold == self.outer_threshold or self.compare(self.inner_threshold, self.outer_threshold)), \ "threshold values incompatible with ordering, check inner and outer values" - # Compression of object mask array in protobuf objects - self.compress = compress def __repr__(self): return "\n".join([ @@ -69,8 +64,7 @@ class DoubleThresholdIdentification(IdentificationStrategy): f" field='{self.field}',", f" outer_threshold={self.outer_threshold},", f" inner_threshold={self.inner_threshold},", - f" comparison_operator='{self.comparison_operator}',", - f" compress=" + ("True" if self.compress else "False"), + f" comparison_operator='{self.comparison_operator}'", ")" ]) @@ -120,17 +114,11 @@ class DoubleThresholdIdentification(IdentificationStrategy): # Create feature-specfic mask mask_outer = np.zeros_like(field, dtype=np.int8) mask_inner = np.zeros_like(field, dtype=np.int8) - mask = np.zeros_like(field, dtype=np.int8) - mask[outer_features == id_] = 1 - mask[inner_features == id_] = 2 mask_outer[outer_features == id_] = 1 mask_inner[inner_features == id_] = 1 if self.processing_mode == "2d": - mask = mask.reshape(mask.shape[1:]) mask_outer = mask_outer.reshape(mask_outer.shape[1:]) mask_inner = mask_inner.reshape(mask_inner.shape[1:]) - # Store feature-specific mask in object properties - mask_to_proto(mask, properties.mask, compress=self.compress) # Collect protobuf feature representations in output object list obj_list.append(obj) # Add the features to the full masks of the dataset diff --git a/enstools/feature/identification/threshold/processing.py b/enstools/feature/identification/threshold/processing.py index bd63026..75d0f46 100644 --- a/enstools/feature/identification/threshold/processing.py +++ b/enstools/feature/identification/threshold/processing.py @@ -1,50 +1,7 @@ -import zlib import numpy as np from numba import njit -# Protobuf does not have a native multidimensional array data type but to store -# individual feature masks we need to serialize the int8 numpy array used for -# the masks. A MaskArray protobuf type is defined in threshold.proto and can be -# written and read with these routines: - -def mask_to_proto(arr, proto_obj, compress=False): - proto_obj.shape.extend(arr.shape) - data = np.require(arr, dtype=np.int8).tobytes() - if compress: - data = zlib.compress(data) - proto_obj.data = data - proto_obj.zlib = compress - - -def proto_to_mask(proto_obj): - """Convert a protobuf-serialized object mask to a numpy array. - - Parameters - ---------- - proto_obj - Serialized object mask. - - Returns - ------- - array of int8 - - - A feature obtained with :py:class:`DoubleThresholdIdentification` carries - an object mask serialized to the protobuf format used for the detection - output. This function converts the serialized output to a numpy array, - which is 0-valued outside of the feature, 1-valued inside the region - between the outer an inner threshold, 2-valued inside the inner threshold. - - >>> from enstools.feature.identification.threshold import proto_to_mask - >>> arr = proto_to_mask(obj.properties.mask) - """ - data = proto_obj.data - if proto_obj.zlib: - data = zlib.decompress(data) - return np.frombuffer(data, dtype=np.int8).reshape(proto_obj.shape) - - # Feature extration @njit diff --git a/enstools/feature/identification/threshold/threshold.proto b/enstools/feature/identification/threshold/threshold.proto index 56ae52f..3c3ec83 100644 --- a/enstools/feature/identification/threshold/threshold.proto +++ b/enstools/feature/identification/threshold/threshold.proto @@ -4,16 +4,9 @@ syntax = "proto2"; * The "Properties" message is required and contains all the properties of an object. */ -message MaskArray { - repeated int32 shape = 1; - required bytes data = 2; - required bool zlib = 3; -} - message Properties { - required MaskArray mask = 1; - required float outer_threshold = 2; - required float inner_threshold = 3; - required string comparison_operator = 4; + required float outer_threshold = 1; + required float inner_threshold = 2; + required string comparison_operator = 3; } -- GitLab From a0f03ccc3fa04d8bb259cbec4518f78a323e33d7 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 10:55:23 +0200 Subject: [PATCH 07/10] Set proper object ids in inner_thresh_mask and outer_thresh_mask --- .../threshold/identification.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 0aa8c85..6d1ff14 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -111,19 +111,18 @@ class DoubleThresholdIdentification(IdentificationStrategy): properties.outer_threshold = self.outer_threshold properties.inner_threshold = self.inner_threshold properties.comparison_operator = self.comparison_operator - # Create feature-specfic mask - mask_outer = np.zeros_like(field, dtype=np.int8) - mask_inner = np.zeros_like(field, dtype=np.int8) - mask_outer[outer_features == id_] = 1 - mask_inner[inner_features == id_] = 1 - if self.processing_mode == "2d": - mask_outer = mask_outer.reshape(mask_outer.shape[1:]) - mask_inner = mask_inner.reshape(mask_inner.shape[1:]) # Collect protobuf feature representations in output object list obj_list.append(obj) + # Create feature-specfic mask + inner_mask = inner_features == id_ + outer_mask = outer_features == id_ + if self.processing_mode == "2d": + outer_mask = outer_mask.reshape(outer_mask.shape[1:]) + inner_mask = inner_mask.reshape(inner_mask.shape[1:]) # Add the features to the full masks of the dataset - dataset["inner_thresh_mask"].values = dataset["inner_thresh_mask"].values + mask_inner - dataset["outer_thresh_mask"].values = dataset["outer_thresh_mask"].values + mask_outer + dataset["inner_thresh_mask"].values[inner_mask] = id_ + dataset["outer_thresh_mask"].values[outer_mask] = id_ + # Return the updated dataset and the list of objects return dataset, obj_list -- GitLab From 478ad692e7177e802863e374b2b003e1b1853ff3 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 12:57:20 +0200 Subject: [PATCH 08/10] Generate unique ids in postprocessing --- .../threshold/identification.py | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index 6d1ff14..f3d2e55 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -1,3 +1,6 @@ +from enstools.misc import get_time_dim, get_ensemble_dim # TODO relative? +from enstools.feature.util.enstools_utils import get_vertical_dim, get_init_time_dim +from enstools.feature.util.data_utils import SplitDimension from ..identification import IdentificationStrategy from .processing import detection_double_thresh @@ -102,8 +105,6 @@ class DoubleThresholdIdentification(IdentificationStrategy): if id_ == 0: continue # Create a new protobuf object to encapsulate the feature data obj = self.get_new_object() - # TODO use an offset or some other method to avoid duplicate ids - # TODO when executed in parallel obj.id = id_ # Store attributes of the feature specific to the detection # technique in the object properties @@ -128,7 +129,37 @@ class DoubleThresholdIdentification(IdentificationStrategy): def postprocess(self, dataset: xr.Dataset, pb2_desc, **kwargs): """""" # Should not be called directly by users, hide in docs - # Nothing to do here for now + dims_map = {} + if (ens_dim := get_ensemble_dim(dataset)) is not None: + dims_map[ens_dim] = SplitDimension.SplitDimensionDim.ENSEMBLE_MEMBER + if (init_dim := get_init_time_dim(dataset)) is not None: + dims_map[init_dim] = SplitDimension.SplitDimensionDim.INIT_TIME + if self.processing_mode == "2d" and (level_dim := get_vertical_dim(dataset)) is not None: + dims_map[level_dim] = SplitDimension.SplitDimensionDim.LEVEL + + time_dim = get_time_dim(dataset) + + offset = 0 + + for obj_set in pb2_desc.sets: + selection = { dim: getattr(obj_set, pb_name) for dim, pb_name in dims_map.items() } + + for timestep in obj_set.timesteps: + new_offset = offset + + selection[time_dim] = getattr(timestep, SplitDimension.SplitDimensionDim.VALID_TIME) + field = dataset.sel(selection) + + field["inner_thresh_mask"] += offset + field["outer_thresh_mask"] += offset + + for obj in timestep.objects: + obj.id = obj.id + offset + if obj.id > new_offset: + new_offset = obj.id + + offset = new_offset + return dataset, pb2_desc -- GitLab From 6cac4e8aac3be6e8560e736b1ddf0fd93a2eee38 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 14:53:27 +0200 Subject: [PATCH 09/10] Clean up postprocessing --- .../threshold/identification.py | 69 ++++++++++--------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index f3d2e55..c3bb9b2 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -34,7 +34,7 @@ class DoubleThresholdIdentification(IdentificationStrategy): `inner_thresh_mask`, `outer_thresh_mask`. """ - def __init__(self, field, outer_threshold, inner_threshold, comparison_operator): + def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, unique_ids=True): self.field = field self.outer_threshold = outer_threshold self.inner_threshold = inner_threshold @@ -60,6 +60,7 @@ class DoubleThresholdIdentification(IdentificationStrategy): assert (self.inner_threshold == self.outer_threshold or self.compare(self.inner_threshold, self.outer_threshold)), \ "threshold values incompatible with ordering, check inner and outer values" + self.unique_ids = unique_ids def __repr__(self): return "\n".join([ @@ -129,37 +130,41 @@ class DoubleThresholdIdentification(IdentificationStrategy): def postprocess(self, dataset: xr.Dataset, pb2_desc, **kwargs): """""" # Should not be called directly by users, hide in docs - dims_map = {} - if (ens_dim := get_ensemble_dim(dataset)) is not None: - dims_map[ens_dim] = SplitDimension.SplitDimensionDim.ENSEMBLE_MEMBER - if (init_dim := get_init_time_dim(dataset)) is not None: - dims_map[init_dim] = SplitDimension.SplitDimensionDim.INIT_TIME - if self.processing_mode == "2d" and (level_dim := get_vertical_dim(dataset)) is not None: - dims_map[level_dim] = SplitDimension.SplitDimensionDim.LEVEL - - time_dim = get_time_dim(dataset) - - offset = 0 - - for obj_set in pb2_desc.sets: - selection = { dim: getattr(obj_set, pb_name) for dim, pb_name in dims_map.items() } - - for timestep in obj_set.timesteps: - new_offset = offset - - selection[time_dim] = getattr(timestep, SplitDimension.SplitDimensionDim.VALID_TIME) - field = dataset.sel(selection) - - field["inner_thresh_mask"] += offset - field["outer_thresh_mask"] += offset - - for obj in timestep.objects: - obj.id = obj.id + offset - if obj.id > new_offset: - new_offset = obj.id - - offset = new_offset - + if self.unique_ids: + dataarray = dataset[self.field] + # Names of additional dimensions are pre-specified. Create a mapping + # that connects dimension names in the dataset with the pre-specified + # names used for the objects. Only consider dimensions that actually + # appear in the dataset. + dims_map = {} + if (ens_dim := get_ensemble_dim(dataarray)) is not None: + dims_map[ens_dim] = SplitDimension.SplitDimensionDim.ENSEMBLE_MEMBER + if (init_dim := get_init_time_dim(dataarray)) is not None: + dims_map[init_dim] = SplitDimension.SplitDimensionDim.INIT_TIME + if self.processing_mode == "2d" and (level_dim := get_vertical_dim(dataarray)) is not None: + dims_map[level_dim] = SplitDimension.SplitDimensionDim.LEVEL + # Valid time is a special dimension and has its own level in the object + # collection + time_dim = get_time_dim(dataarray) + # Generate unique ids by applying offsets to the existing ids + offset = 0 + # Iterate over dimensions other than valid time (these are flattened) + for obj_set in pb2_desc.sets: + selection = { dim: getattr(obj_set, pb_name) for dim, pb_name in dims_map.items() } + # Iterate over the valid times + for timestep in obj_set.timesteps: + selection[time_dim] = getattr(timestep, SplitDimension.SplitDimensionDim.VALID_TIME) + field = dataset.sel(selection) + # Increase ids by offset wherever an object was found + field["inner_thresh_mask"].data[field["inner_thresh_mask"].data != 0] += offset + field["outer_thresh_mask"].data[field["outer_thresh_mask"].data != 0] += offset + # Change ids of objects and generate new offset + new_offset = offset + for obj in timestep.objects: + obj.id = obj.id + offset + if obj.id > new_offset: + new_offset = obj.id + offset = new_offset return dataset, pb2_desc -- GitLab From ed617a96d79159d6b021b23fc954640f63ccbcd4 Mon Sep 17 00:00:00 2001 From: Christopher Polster <cpolster@uni-mainz.de> Date: Tue, 14 Jun 2022 15:44:40 +0200 Subject: [PATCH 10/10] Allow IdentificationTechnique to provide protobuf reference --- .../threshold/identification.py | 3 +++ enstools/feature/pipeline.py | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/enstools/feature/identification/threshold/identification.py b/enstools/feature/identification/threshold/identification.py index c3bb9b2..642cb1f 100644 --- a/enstools/feature/identification/threshold/identification.py +++ b/enstools/feature/identification/threshold/identification.py @@ -2,6 +2,7 @@ from enstools.misc import get_time_dim, get_ensemble_dim # TODO relative? from enstools.feature.util.enstools_utils import get_vertical_dim, get_init_time_dim from enstools.feature.util.data_utils import SplitDimension from ..identification import IdentificationStrategy +from .._proto_gen import threshold_pb2 from .processing import detection_double_thresh import operator @@ -34,6 +35,8 @@ class DoubleThresholdIdentification(IdentificationStrategy): `inner_thresh_mask`, `outer_thresh_mask`. """ + pb_reference = threshold_pb2 + def __init__(self, field, outer_threshold, inner_threshold, comparison_operator, unique_ids=True): self.field = field self.outer_threshold = outer_threshold diff --git a/enstools/feature/pipeline.py b/enstools/feature/pipeline.py index d60f958..c64a239 100644 --- a/enstools/feature/pipeline.py +++ b/enstools/feature/pipeline.py @@ -19,7 +19,15 @@ class FeaturePipeline: 3D, per 3D block. """ - def __init__(self, proto_ref, processing_mode='2d'): + def __init__(self, proto_ref=None, processing_mode='2d'): + """ + Specify processing mode 2d or 3d: In 2d, identification and tracking will be performed on 2d levels, in 3d per 3d block. + + Parameters + ---------- + proto_ref + processing_mode + """ self.id_tech = None self.tr_tech = None @@ -42,8 +50,15 @@ class FeaturePipeline: The identification strategy to use in the pipeline. """ self.id_tech = strategy - self.id_tech.pb_reference = self.pb_reference self.id_tech.processing_mode = self.processing_mode + if self.pb_reference is None: + try: + self.pb_reference = self.id_tech.pb_reference + except AttributeError: + raise Exception("a protobuf reference must be provided by either " + "the feature pipeline or the identification technique/strategy") + else: + self.id_tech.pb_reference = self.pb_reference def set_tracking_strategy(self, strategy: TrackingStrategy): """ -- GitLab