Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
K
KerasROOTClassification
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Eric.Schanet
KerasROOTClassification
Commits
a396d67b
Commit
a396d67b
authored
6 years ago
by
Nikolai
Browse files
Options
Downloads
Patches
Plain Diff
introduce option to rename branch expressions
parent
96a77543
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
plotting.py
+26
-26
26 additions, 26 deletions
plotting.py
scripts/plot_NN_2D.py
+4
-4
4 additions, 4 deletions
scripts/plot_NN_2D.py
toolkit.py
+43
-11
43 additions, 11 deletions
toolkit.py
with
73 additions
and
41 deletions
plotting.py
+
26
−
26
View file @
a396d67b
...
...
@@ -346,24 +346,24 @@ if __name__ == "__main__":
print
(
"
Mean signal:
"
)
for
branch_index
,
val
in
enumerate
(
mean_signal
):
print
(
"
{:>20}: {:<10.3f}
"
.
format
(
c
.
branche
s
[
branch_index
],
val
))
print
(
"
{:>20}: {:<10.3f}
"
.
format
(
c
.
field
s
[
branch_index
],
val
))
plot_NN_vs_var_1D
(
"
met.pdf
"
,
mean_signal
,
scorefun
=
c
.
evaluate
,
var_index
=
c
.
branche
s
.
index
(
"
met
"
),
var_index
=
c
.
field
s
.
index
(
"
met
"
),
var_range
=
(
0
,
1000
,
10
),
var_label
=
"
met [GeV]
"
)
plot_NN_vs_var_1D
(
"
mt.pdf
"
,
mean_signal
,
scorefun
=
c
.
evaluate
,
var_index
=
c
.
branche
s
.
index
(
"
mt
"
),
var_index
=
c
.
field
s
.
index
(
"
mt
"
),
var_range
=
(
0
,
500
,
10
),
var_label
=
"
mt [GeV]
"
)
plot_NN_vs_var_2D
(
"
mt_vs_met.pdf
"
,
means
=
mean_signal
,
scorefun
=
c
.
evaluate
,
varx_index
=
c
.
branche
s
.
index
(
"
met
"
),
vary_index
=
c
.
branche
s
.
index
(
"
mt
"
),
varx_index
=
c
.
field
s
.
index
(
"
met
"
),
vary_index
=
c
.
field
s
.
index
(
"
mt
"
),
nbinsx
=
100
,
xmin
=
0
,
xmax
=
1000
,
nbinsy
=
100
,
ymin
=
0
,
ymax
=
500
,
varx_label
=
"
met [GeV]
"
,
vary_label
=
"
mt [GeV]
"
)
...
...
@@ -371,14 +371,14 @@ if __name__ == "__main__":
plot_NN_vs_var_2D_all
(
"
mt_vs_met_all.pdf
"
,
means
=
mean_signal
,
model
=
c
.
model
,
transform_function
=
c
.
scaler
.
transform
,
var1_index
=
c
.
branche
s
.
index
(
"
met
"
),
var1_range
=
(
0
,
1000
,
10
),
var2_index
=
c
.
branche
s
.
index
(
"
mt
"
),
var2_range
=
(
0
,
500
,
10
),
var1_index
=
c
.
field
s
.
index
(
"
met
"
),
var1_range
=
(
0
,
1000
,
10
),
var2_index
=
c
.
field
s
.
index
(
"
mt
"
),
var2_range
=
(
0
,
500
,
10
),
var1_label
=
"
met [GeV]
"
,
var2_label
=
"
mt [GeV]
"
)
plot_NN_vs_var_2D
(
"
mt_vs_met_crosscheck.pdf
"
,
means
=
mean_signal
,
scorefun
=
get_single_neuron_function
(
c
.
model
,
layer
=
3
,
neuron
=
0
,
scaler
=
c
.
scaler
),
varx_index
=
c
.
branche
s
.
index
(
"
met
"
),
vary_index
=
c
.
branche
s
.
index
(
"
mt
"
),
varx_index
=
c
.
field
s
.
index
(
"
met
"
),
vary_index
=
c
.
field
s
.
index
(
"
mt
"
),
nbinsx
=
100
,
xmin
=
0
,
xmax
=
1000
,
nbinsy
=
100
,
ymin
=
0
,
ymax
=
500
,
varx_label
=
"
met [GeV]
"
,
vary_label
=
"
mt [GeV]
"
)
...
...
@@ -388,7 +388,7 @@ if __name__ == "__main__":
# transformed events
c
.
load
(
reload
=
True
)
ranges
=
[
np
.
percentile
(
c
.
x_test
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
branche
s
))]
ranges
=
[
np
.
percentile
(
c
.
x_test
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
field
s
))]
losses
,
events
=
get_max_activation_events
(
c
.
model
,
ranges
,
ntries
=
100000
,
layer
=
3
,
neuron
=
0
,
threshold
=
0.2
)
...
...
@@ -396,8 +396,8 @@ if __name__ == "__main__":
plot_hist_2D_events
(
"
mt_vs_met_actmaxhist.pdf
"
,
events
[:,
c
.
branche
s
.
index
(
"
met
"
)],
events
[:,
c
.
branche
s
.
index
(
"
mt
"
)],
events
[:,
c
.
field
s
.
index
(
"
met
"
)],
events
[:,
c
.
field
s
.
index
(
"
mt
"
)],
100
,
0
,
1000
,
100
,
0
,
500
,
varx_label
=
"
met [GeV]
"
,
vary_label
=
"
mt [GeV]
"
,
...
...
@@ -405,7 +405,7 @@ if __name__ == "__main__":
plot_hist_2D_events
(
"
mt_vs_output_actmax.pdf
"
,
events
[:,
c
.
branche
s
.
index
(
"
mt
"
)],
events
[:,
c
.
field
s
.
index
(
"
mt
"
)],
losses
,
100
,
0
,
500
,
100
,
0
,
1
,
...
...
@@ -417,13 +417,13 @@ if __name__ == "__main__":
def
test_cond_max_act
():
c
.
load
(
reload
=
True
)
ranges
=
[
np
.
percentile
(
c
.
x_test
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
branche
s
))]
ranges
=
[
np
.
percentile
(
c
.
x_test
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
field
s
))]
plot_cond_avg_actmax_2D
(
"
mt_vs_met_cond_actmax.pdf
"
,
c
.
model
,
3
,
0
,
ranges
,
c
.
branche
s
.
index
(
"
met
"
),
c
.
branche
s
.
index
(
"
mt
"
),
c
.
field
s
.
index
(
"
met
"
),
c
.
field
s
.
index
(
"
mt
"
),
30
,
0
,
1000
,
30
,
0
,
500
,
scaler
=
c
.
scaler
,
...
...
@@ -439,7 +439,7 @@ if __name__ == "__main__":
plot_hist_2D_events
(
"
mt_vs_output_signal_test.pdf
"
,
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
mt
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
mt
"
)],
c
.
scores_test
[
c
.
y_test
==
1
].
reshape
(
-
1
),
100
,
0
,
1000
,
100
,
0
,
1
,
...
...
@@ -449,8 +449,8 @@ if __name__ == "__main__":
plot_hist_2D_events
(
"
mt_vs_met_signal.pdf
"
,
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
mt
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
mt
"
)],
100
,
0
,
1000
,
100
,
0
,
500
,
varx_label
=
"
met [GeV]
"
,
...
...
@@ -460,8 +460,8 @@ if __name__ == "__main__":
plot_hist_2D_events
(
"
mt_vs_met_backgound.pdf
"
,
utrf_x_test
[
c
.
y_test
==
0
][:,
c
.
branche
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
0
][:,
c
.
branche
s
.
index
(
"
mt
"
)],
utrf_x_test
[
c
.
y_test
==
0
][:,
c
.
field
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
0
][:,
c
.
field
s
.
index
(
"
mt
"
)],
100
,
0
,
1000
,
100
,
0
,
500
,
varx_label
=
"
met [GeV]
"
,
...
...
@@ -472,7 +472,7 @@ if __name__ == "__main__":
# plot_hist_2D_events(
# "apl_vs_output_actmax.pdf",
# events[:,c.
branche
s.index("LepAplanarity")],
# events[:,c.
field
s.index("LepAplanarity")],
# losses,
# 100, 0, 0.1,
# 100, 0, 1,
...
...
@@ -487,8 +487,8 @@ if __name__ == "__main__":
plot_profile_2D
(
"
mt_vs_met_profilemean_sig.pdf
"
,
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
mt
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
mt
"
)],
c
.
scores_test
[
c
.
y_test
==
1
].
reshape
(
-
1
),
20
,
0
,
500
,
20
,
0
,
1000
,
...
...
@@ -497,8 +497,8 @@ if __name__ == "__main__":
plot_profile_2D
(
"
mt_vs_met_profilemax_sig.pdf
"
,
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
branche
s
.
index
(
"
mt
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
met
"
)],
utrf_x_test
[
c
.
y_test
==
1
][:,
c
.
field
s
.
index
(
"
mt
"
)],
c
.
scores_test
[
c
.
y_test
==
1
].
reshape
(
-
1
),
20
,
0
,
500
,
20
,
0
,
1000
,
...
...
This diff is collapsed.
Click to expand it.
scripts/plot_NN_2D.py
+
4
−
4
View file @
a396d67b
...
...
@@ -55,9 +55,9 @@ neuron = args.neuron
if
layer
is
None
:
layer
=
c
.
layers
varx_index
=
c
.
branche
s
.
index
(
args
.
varx
)
varx_index
=
c
.
field
s
.
index
(
args
.
varx
)
if
not
plot_vs_activation
:
vary_index
=
c
.
branche
s
.
index
(
args
.
vary
)
vary_index
=
c
.
field
s
.
index
(
args
.
vary
)
else
:
vary_index
=
0
# dummy value in this case
...
...
@@ -151,7 +151,7 @@ elif args.mode.startswith("hist"):
else
:
# ranges in which to sample the random events
x_test_scaled
=
c
.
scaler
.
transform
(
c
.
x_test
)
ranges
=
[
np
.
percentile
(
x_test_scaled
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
branche
s
))]
ranges
=
[
np
.
percentile
(
x_test_scaled
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
field
s
))]
losses
,
events
=
get_max_activation_events
(
c
.
model
,
ranges
,
ntries
=
args
.
ntries_actmax
,
step
=
args
.
step_size
,
layer
=
layer
,
neuron
=
neuron
,
threshold
=
args
.
threshold
)
events
=
c
.
scaler
.
inverse_transform
(
events
)
valsx
=
events
[:,
varx_index
]
...
...
@@ -176,7 +176,7 @@ elif args.mode.startswith("cond_actmax"):
x_test_scaled
=
c
.
scaler
.
transform
(
c
.
x_test
)
# ranges in which to sample the random events
ranges
=
[
np
.
percentile
(
x_test_scaled
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
branche
s
))]
ranges
=
[
np
.
percentile
(
x_test_scaled
[:,
var_index
],
[
1
,
99
])
for
var_index
in
range
(
len
(
c
.
field
s
))]
plot_cond_avg_actmax_2D
(
args
.
output_filename
,
...
...
This diff is collapsed.
Click to expand it.
toolkit.py
+
43
−
11
View file @
a396d67b
...
...
@@ -90,6 +90,8 @@ class ClassificationProject(object):
:param branches: list of branch names or expressions to be used as input values for training
:param rename_branches: dictionary that maps branch expressions to names for better readability
:param weight_expr: expression to weight the events in the loss function
:param data_dir: if given, load the data from a previous project with the given name
...
...
@@ -186,6 +188,7 @@ class ClassificationProject(object):
def
_init_from_args
(
self
,
name
,
signal_trees
,
bkg_trees
,
branches
,
weight_expr
,
rename_branches
=
None
,
project_dir
=
None
,
data_dir
=
None
,
identifiers
=
None
,
...
...
@@ -214,6 +217,9 @@ class ClassificationProject(object):
self
.
signal_trees
=
signal_trees
self
.
bkg_trees
=
bkg_trees
self
.
branches
=
branches
if
rename_branches
is
None
:
rename_branches
=
{}
self
.
rename_branches
=
rename_branches
self
.
weight_expr
=
weight_expr
self
.
selection
=
selection
...
...
@@ -294,6 +300,27 @@ class ClassificationProject(object):
# track if we are currently training
self
.
is_training
=
False
self
.
_fields
=
None
@property
def
fields
(
self
):
"
Renamed branch expressions
"
if
self
.
_fields
is
None
:
self
.
_fields
=
[]
for
branch_expr
in
self
.
branches
:
self
.
_fields
.
append
(
self
.
rename_branches
.
get
(
branch_expr
,
branch_expr
))
return
self
.
_fields
def
rename_fields
(
self
,
ar
):
"
Rename fields of structured array
"
fields
=
list
(
ar
.
dtype
.
names
)
renamed_fields
=
[]
for
old_name
in
fields
:
renamed_fields
.
append
(
self
.
rename_branches
.
get
(
old_name
,
old_name
))
ar
.
dtype
.
names
=
tuple
(
renamed_fields
)
def
_load_data
(
self
):
...
...
@@ -330,20 +357,25 @@ class ClassificationProject(object):
selection
=
self
.
selection
,
start
=
1
,
step
=
self
.
step_bkg
)
self
.
rename_fields
(
self
.
s_train
)
self
.
rename_fields
(
self
.
b_train
)
self
.
rename_fields
(
self
.
s_test
)
self
.
rename_fields
(
self
.
b_test
)
self
.
s_eventlist_train
=
self
.
s_train
[
self
.
identifiers
].
astype
(
dtype
=
[(
branchName
,
"
u8
"
)
for
branchName
in
self
.
identifiers
])
self
.
b_eventlist_train
=
self
.
b_train
[
self
.
identifiers
].
astype
(
dtype
=
[(
branchName
,
"
u8
"
)
for
branchName
in
self
.
identifiers
])
self
.
_dump_training_list
()
# now we don't need the identifiers anymore
self
.
s_train
=
self
.
s_train
[
self
.
branche
s
+
[
self
.
weight_expr
]]
self
.
b_train
=
self
.
b_train
[
self
.
branche
s
+
[
self
.
weight_expr
]]
self
.
s_train
=
self
.
s_train
[
self
.
field
s
+
[
self
.
weight_expr
]]
self
.
b_train
=
self
.
b_train
[
self
.
field
s
+
[
self
.
weight_expr
]]
# create x (input), y (target) and w (weights) arrays
# the first block will be signals, the second block backgrounds
self
.
x_train
=
rec2array
(
self
.
s_train
[
self
.
branche
s
])
self
.
x_train
=
np
.
concatenate
((
self
.
x_train
,
rec2array
(
self
.
b_train
[
self
.
branche
s
])))
self
.
x_test
=
rec2array
(
self
.
s_test
[
self
.
branche
s
])
self
.
x_test
=
np
.
concatenate
((
self
.
x_test
,
rec2array
(
self
.
b_test
[
self
.
branche
s
])))
self
.
x_train
=
rec2array
(
self
.
s_train
[
self
.
field
s
])
self
.
x_train
=
np
.
concatenate
((
self
.
x_train
,
rec2array
(
self
.
b_train
[
self
.
field
s
])))
self
.
x_test
=
rec2array
(
self
.
s_test
[
self
.
field
s
])
self
.
x_test
=
np
.
concatenate
((
self
.
x_test
,
rec2array
(
self
.
b_test
[
self
.
field
s
])))
self
.
w_train
=
self
.
s_train
[
self
.
weight_expr
]
self
.
w_train
=
np
.
concatenate
((
self
.
w_train
,
self
.
b_train
[
self
.
weight_expr
]))
self
.
w_test
=
self
.
s_test
[
self
.
weight_expr
]
...
...
@@ -551,7 +583,7 @@ class ClassificationProject(object):
self
.
_model
=
Sequential
()
# first hidden layer
self
.
_model
.
add
(
Dense
(
self
.
nodes
,
input_dim
=
len
(
self
.
branche
s
),
activation
=
self
.
activation_function
))
self
.
_model
.
add
(
Dense
(
self
.
nodes
,
input_dim
=
len
(
self
.
field
s
),
activation
=
self
.
activation_function
))
# the other hidden layers
for
layer_number
in
range
(
self
.
layers
-
1
):
self
.
_model
.
add
(
Dense
(
self
.
nodes
,
activation
=
self
.
activation_function
))
...
...
@@ -704,7 +736,7 @@ class ClassificationProject(object):
self
.
load
()
for
branch_index
,
branch
in
enumerate
(
self
.
branche
s
):
for
branch_index
,
branch
in
enumerate
(
self
.
field
s
):
self
.
plot_input
(
branch_index
)
self
.
total_epochs
=
self
.
_read_info
(
"
epochs
"
,
0
)
...
...
@@ -795,9 +827,9 @@ class ClassificationProject(object):
logger
.
info
(
"
Evaluating score for entry {}/{}
"
.
format
(
start
,
entries
))
logger
.
debug
(
"
Loading next batch
"
)
x_from_tree
=
tree2array
(
tree
,
branches
=
self
.
branche
s
+
self
.
identifiers
,
branches
=
self
.
field
s
+
self
.
identifiers
,
start
=
start
,
stop
=
start
+
batch_size
)
x_eval
=
rec2array
(
x_from_tree
[
self
.
branche
s
])
x_eval
=
rec2array
(
x_from_tree
[
self
.
field
s
])
if
len
(
self
.
identifiers
)
>
0
:
# create list of booleans that indicate which events where used for training
...
...
@@ -854,7 +886,7 @@ class ClassificationProject(object):
def
plot_input
(
self
,
var_index
):
"
plot a single input variable
"
branch
=
self
.
branche
s
[
var_index
]
branch
=
self
.
field
s
[
var_index
]
fig
,
ax
=
plt
.
subplots
()
bkg
=
self
.
x_train
[:,
var_index
][
self
.
y_train
==
0
]
sig
=
self
.
x_train
[:,
var_index
][
self
.
y_train
==
1
]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment