@@ -128,21 +128,39 @@ state_config:
128128
129129# USER METADATA
130130# These are arbitrary keys/values that you can have Triage apply to the
131- # metadata for each matrix in the experiment. Any keys you include here can
132- # be used in the 'model_group_keys' below.
131+ # metadata for every matrix in the experiment. Any keys you include here can
132+ # be used in the 'model_group_keys' below. For example, if you run experiments
133+ # that share a temporal configuration but that use different label definitions
134+ # (say, labeling building inspections with *any* violation as positive or
135+ # labeling only building inspections with severe health and safety violations
136+ # as positive), you can use the user metadata keys to indicate that the matrices
137+ # from these experiments have different labeling criteria. The matrices from the
138+ # two experiments will have different filenames (and not be overwritten or
139+ # inappropriately reused), and if you add the label_definition key to the model
140+ # group keys, models made on different label definition will have different
141+ # groups. In this way, user metadata can be used to expand Triage beyond its
142+ # explicitly supported functionality.
133143user_metadata :
134144 label_definition : ' severe_violations'
135145
136146# MODEL GROUPING
137147# Model groups are aimed at defining models which are equivalent across time splits.
148+ # In other words, you will probably want to define model groups by any parameters
149+ # that distinguish models *other than the beginning end dates of their data.*
138150# By default, the classifier module name, hyperparameters, and feature names are used.
151+ #
139152#
140153# model_group_keys defines a list of *additional* matrix metadata keys that
141- # should be considered when creating a model group
154+ # should be considered when creating a model group. For example, if the models are
155+ # built on matrices with different history lengths (train durations), different
156+ # in the next month, next year, or next two years), the frequency of rows for each
157+ # labeling windows (e.g., inspection violations entity (train example frequency), or
158+ # the definition of a positive label.
142159model_group_keys :
143160 - ' train_duration'
144161 - ' train_label_window'
145162 - ' train_example_frequency'
163+ - ' label_definition'
146164
147165# GRID CONFIGURATION
148166# The classifier/hyperparameter combinations that should be trained
0 commit comments