Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
K
KerasROOTClassification
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Eric.Schanet
KerasROOTClassification
Commits
96a77543
Commit
96a77543
authored
6 years ago
by
Nikolai.Hartmann
Browse files
Options
Downloads
Patches
Plain Diff
Implementing WeightedRobustScaler (now default)
parent
b607030c
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
toolkit.py
+12
-11
12 additions, 11 deletions
toolkit.py
utils.py
+47
-0
47 additions, 0 deletions
utils.py
with
59 additions
and
11 deletions
toolkit.py
+
12
−
11
View file @
96a77543
...
...
@@ -29,16 +29,16 @@ import h5py
from
sklearn.preprocessing
import
StandardScaler
,
RobustScaler
from
sklearn.externals
import
joblib
from
sklearn.metrics
import
roc_curve
,
auc
from
keras.models
import
Sequential
from
keras.layers
import
Dense
,
Dropout
from
keras.models
import
model_from_json
from
keras.callbacks
import
History
,
EarlyStopping
,
CSVLogger
,
ModelCheckpoint
from
keras.optimizers
import
SGD
import
keras.optimizers
import
matplotlib.pyplot
as
plt
from
.utils
import
WeightedRobustScaler
,
weighted_quantile
# configure number of cores
# this doesn't seem to work, but at least with these settings keras only uses 4 processes
import
tensorflow
as
tf
...
...
@@ -197,7 +197,7 @@ class ClassificationProject(object):
validation_split
=
0.33
,
activation_function
=
'
relu
'
,
activation_function_output
=
'
sigmoid
'
,
scaler_type
=
"
RobustScaler
"
,
scaler_type
=
"
Weighted
RobustScaler
"
,
step_signal
=
2
,
step_bkg
=
2
,
optimizer
=
"
SGD
"
,
...
...
@@ -450,18 +450,18 @@ class ClassificationProject(object):
logger
.
info
(
"
Loaded existing scaler from {}
"
.
format
(
filename
))
except
IOError
:
logger
.
info
(
"
Creating new {}
"
.
format
(
self
.
scaler_type
))
scaler_fit_kwargs
=
dict
()
if
self
.
scaler_type
==
"
StandardScaler
"
:
self
.
_scaler
=
StandardScaler
()
elif
self
.
scaler_type
==
"
RobustScaler
"
:
self
.
_scaler
=
RobustScaler
()
elif
self
.
scaler_type
==
"
WeightedRobustScaler
"
:
self
.
_scaler
=
WeightedRobustScaler
()
scaler_fit_kwargs
[
"
weights
"
]
=
self
.
w_train
*
np
.
array
(
self
.
class_weight
)[
self
.
y_train
.
astype
(
int
)]
else
:
raise
ValueError
(
"
Scaler type {} unknown
"
.
format
(
self
.
scaler_type
))
logger
.
info
(
"
Fitting {} to training data
"
.
format
(
self
.
scaler_type
))
self
.
_scaler
.
fit
(
self
.
x_train
)
# i think this would refit to test data (and overwrite the parameters)
# probably we either want to fit only training data or training and test data together
# logger.info("Fitting StandardScaler to test data")
# self._scaler.fit(self.x_test)
self
.
_scaler
.
fit
(
self
.
x_train
,
**
scaler_fit_kwargs
)
joblib
.
dump
(
self
.
_scaler
,
filename
)
return
self
.
_scaler
...
...
@@ -866,9 +866,10 @@ class ClassificationProject(object):
# calculate percentiles to get a heuristic for the range to be plotted
# (should in principle also be done with weights, but for now do it unweighted)
range_sig
=
np
.
percentile
(
sig
,
[
1
,
99
])
range_bkg
=
np
.
percentile
(
sig
,
[
1
,
99
])
plot_range
=
(
min
(
range_sig
[
0
],
range_bkg
[
0
]),
max
(
range_sig
[
1
],
range_sig
[
1
]))
# range_sig = np.percentile(sig, [1, 99])
# range_bkg = np.percentile(sig, [1, 99])
# plot_range = (min(range_sig[0], range_bkg[0]), max(range_sig[1], range_sig[1]))
plot_range
=
weighted_quantile
(
self
.
x_train
[:,
var_index
],
[
0.1
,
0.99
],
sample_weight
=
self
.
w_train
*
np
.
array
(
self
.
class_weight
)[
self
.
y_train
.
astype
(
int
)])
logger
.
debug
(
"
Calculated range based on percentiles: {}
"
.
format
(
plot_range
))
...
...
This diff is collapsed.
Click to expand it.
utils.py
+
47
−
0
View file @
96a77543
...
...
@@ -5,6 +5,7 @@ import logging
import
numpy
as
np
import
keras.backend
as
K
from
sklearn.preprocessing
import
RobustScaler
from
meme
import
cache
...
...
@@ -95,3 +96,49 @@ def get_max_activation_events(model, ranges, ntries, layer, neuron, seed=42, **k
losses
=
np
.
concatenate
([
losses
,
loss
])
return
losses
,
events
"
from https://stackoverflow.com/questions/21844024/weighted-percentile-using-numpy#29677616
"
def
weighted_quantile
(
values
,
quantiles
,
sample_weight
=
None
,
values_sorted
=
False
,
old_style
=
False
):
"""
Very close to np.percentile, but supports weights.
NOTE: quantiles should be in [0, 1]!
:param values: np.array with data
:param quantiles: array-like with many quantiles needed
:param sample_weight: array-like of the same length as `array`
:param values_sorted: bool, if True, then will avoid sorting of initial array
:param old_style: if True, will correct output to be consistent with np.percentile.
:return: np.array with computed quantiles.
"""
values
=
np
.
array
(
values
)
quantiles
=
np
.
array
(
quantiles
)
if
sample_weight
is
None
:
sample_weight
=
np
.
ones
(
len
(
values
))
sample_weight
=
np
.
array
(
sample_weight
)
assert
np
.
all
(
quantiles
>=
0
)
and
np
.
all
(
quantiles
<=
1
),
'
quantiles should be in [0, 1]
'
if
not
values_sorted
:
sorter
=
np
.
argsort
(
values
)
values
=
values
[
sorter
]
sample_weight
=
sample_weight
[
sorter
]
weighted_quantiles
=
np
.
cumsum
(
sample_weight
)
-
0.5
*
sample_weight
if
old_style
:
# To be convenient with np.percentile
weighted_quantiles
-=
weighted_quantiles
[
0
]
weighted_quantiles
/=
weighted_quantiles
[
-
1
]
else
:
weighted_quantiles
/=
np
.
sum
(
sample_weight
)
return
np
.
interp
(
quantiles
,
weighted_quantiles
,
values
)
class
WeightedRobustScaler
(
RobustScaler
):
def
fit
(
self
,
X
,
y
=
None
,
weights
=
None
):
RobustScaler
.
fit
(
self
,
X
,
y
)
if
weights
is
None
:
return
self
else
:
wqs
=
np
.
array
([
weighted_quantile
(
X
[:,
i
],
[
0.25
,
0.5
,
0.75
],
sample_weight
=
weights
)
for
i
in
range
(
X
.
shape
[
1
])])
self
.
center_
=
wqs
[:,
1
]
self
.
scale_
=
wqs
[:,
2
]
-
wqs
[:,
0
]
return
self
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment