From 1ad29ff9ac0a05f9861d1d0b943842ed9a7d0788 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann <Nikolai.Hartmann@physik.uni-muenchen.de> Date: Tue, 21 Aug 2018 10:36:38 +0200 Subject: [PATCH] introducing transform member function --- toolkit.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/toolkit.py b/toolkit.py index d366a56..29adbfb 100755 --- a/toolkit.py +++ b/toolkit.py @@ -559,6 +559,14 @@ class ClassificationProject(object): return self._scaler + def transform(self, x): + return self.scaler.transform(x) + + + def inverse_transform(self, x): + return self.scaler.inverse_transform(x) + + @property def history(self): params_file = os.path.join(self.project_dir, "history_params.json") @@ -592,7 +600,6 @@ class ClassificationProject(object): def _transform_data(self): if not self.data_transformed: - # todo: what to do about the outliers? Where do they come from? if logger.level <= logging.DEBUG: logger.debug("training data before transformation: {}".format(self.x_train)) logger.debug("minimum values: {}".format([np.min(self.x_train[:,i][~np.isnan(self.x_train[:,i])]) @@ -929,7 +936,7 @@ class ClassificationProject(object): def evaluate(self, x_eval, mode=None): logger.debug("Evaluate score for {}".format(x_eval)) - x_eval = self.scaler.transform(x_eval) + x_eval = self.transform(x_eval) logger.debug("Evaluate for transformed array: {}".format(x_eval)) return self.predict(x_eval, mode=mode) @@ -1697,12 +1704,30 @@ class ClassificationProjectRNN(ClassificationProject): eval_score("train") + def _batch_transform(self, x, fn, batch_size): + "Transform array in batches, temporarily setting mask_values to nan" + transformed = np.empty(len(x)) + for start in range(0, len(x), batch_size): + stop = start+batch_size + x_batch = np.array(x[start:stop]) # copy + x_batch[x_batch == self.mask_value] = np.nan + x_batch = fn(x_batch) + x_batch[np.isnan(x_batch)] = self.mask_value + transformed[start:stop] = x_batch + return transformed + + + def transform(self, x, batch_size=10000): + return self._batch_transform(x, self.scaler.transform, batch_size) + + + def inverse_transform(self, x, batch_size=10000): + return self._batch_transform(x, self.scaler.inverse_transform, batch_size) + + def evaluate(self, x_eval, mode=None): logger.debug("Evaluate score for {}".format(x_eval)) - x_eval = np.array(x_eval) # copy - x_eval[x_eval==self.mask_value] = np.nan - x_eval = self.scaler.transform(x_eval) - x_eval[np.isnan(x_eval)] = self.mask_value + x_eval = self.transform(x_eval) logger.debug("Evaluate for transformed array: {}".format(x_eval)) return self.predict(self.get_input_list(x_eval), mode=mode) -- GitLab