Skip to content

Commit b732272

Browse files
authored
Merge pull request #18 from VowpalWabbit/renames
Renames
2 parents 8fc5139 + bd9796c commit b732272

File tree

7 files changed

+94
-94
lines changed

7 files changed

+94
-94
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -257,27 +257,27 @@ The default `VwPolicy` is initialized with some default arguments. The default e
257257

258258
The `VwPolicy`'s data files can be stored and examined or used to do [off policy evaluation](https://vowpalwabbit.org/docs/vowpal_wabbit/python/latest/tutorials/off_policy_evaluation.html) for hyper parameter tuning.
259259

260-
The way to do this is to set a log file path to `vw_logs` on chain creation:
260+
The way to do this is to set a log file path to `rl_logs` on chain creation:
261261

262-
`picker = learn_to_pick.PickBest.create(vw_logs=<path to log FILE>, [...])`
262+
`picker = learn_to_pick.PickBest.create(rl_logs=<path to log FILE>, [...])`
263263

264264
### Advanced featurization options
265265

266266
#### auto_embed
267267

268268
By default the input that is wrapped (`ToSelectFrom`, `BasedOn`) is not tampered with. This might not be sufficient featurization, so based on how complex the scenario is you can set auto-embeddings to ON
269269

270-
`picker = learn_to_pick.PickBest.create(feature_embedder=learn_to_pick.PickBestFeatureEmbedder(auto_embed=True), [...])`
270+
`picker = learn_to_pick.PickBest.create(featurizer=learn_to_pick.PickBestFeaturizer(auto_embed=True), [...])`
271271

272272
This will produce more complex embeddings and featurizations of the inputs, likely accelerating RL learning, albeit at the cost of increased runtime.
273273

274-
By default, [sbert.net's sentence_transformers's ](https://www.sbert.net/docs/pretrained_models.html#model-overview) `all-mpnet-base-v2` model will be used for these embeddings but you can set a different embeddings model by initializing feature_embedder with a different model. You could also set an entirely different embeddings encoding object, as long as it has an `encode()` function that returns a list of the encodings.
274+
By default, [sbert.net's sentence_transformers's ](https://www.sbert.net/docs/pretrained_models.html#model-overview) `all-mpnet-base-v2` model will be used for these embeddings but you can set a different embeddings model by initializing featurizer with a different model. You could also set an entirely different embeddings encoding object, as long as it has an `encode()` function that returns a list of the encodings.
275275

276276
```python
277277
from sentence_transformers import SentenceTransformer
278278

279279
picker = learn_to_pick.PickBest.create(
280-
feature_embedder=learn_to_pick.PickBestFeatureEmbedder(
280+
featurizer=learn_to_pick.PickBestFeaturizer(
281281
auto_embed=True,
282282
model=SentenceTransformer("all-mpnet-base-v2")
283283
),

notebooks/readme.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
"\n",
8888
"# with delayed score\n",
8989
"\n",
90-
"picker = PickBest.create(selection_scorer=None)\n",
90+
"picker = learn_to_pick.PickBest.create(selection_scorer=None)\n",
9191
"result = picker.run(pick = learn_to_pick.ToSelectFrom([\"option1\", \"option2\"]),\n",
9292
" criteria = learn_to_pick.BasedOn(\"some criteria\")\n",
9393
")\n",
@@ -297,7 +297,7 @@
297297
"metadata": {},
298298
"outputs": [],
299299
"source": [
300-
"picker = learn_to_pick.PickBest.create(vw_logs='./logs', selection_scorer=None)"
300+
"picker = learn_to_pick.PickBest.create(rl_logs='./logs', selection_scorer=None)"
301301
]
302302
},
303303
{
@@ -319,7 +319,7 @@
319319
"metadata": {},
320320
"outputs": [],
321321
"source": [
322-
"picker = learn_to_pick.PickBest.create(vw_logs='./logs', selection_scorer=None, feature_embedder=learn_to_pick.PickBestFeatureEmbedder(auto_embed=True))\n",
322+
"picker = learn_to_pick.PickBest.create(rl_logs='./logs', selection_scorer=None, featurizer=learn_to_pick.PickBestFeaturizer(auto_embed=True))\n",
323323
"result = picker.run(pick = learn_to_pick.ToSelectFrom([\"option1\", \"option2\"]),\n",
324324
" criteria = learn_to_pick.BasedOn(\"age:32\")\n",
325325
")\n",
@@ -355,11 +355,11 @@
355355
"from sentence_transformers import SentenceTransformer\n",
356356
"\n",
357357
"picker = learn_to_pick.PickBest.create(\n",
358-
" feature_embedder=learn_to_pick.PickBestFeatureEmbedder(\n",
358+
" featurizer=learn_to_pick.PickBestFeaturizer(\n",
359359
" auto_embed=True,\n",
360360
" model=SentenceTransformer(\"multi-qa-mpnet-base-dot-v1\")\n",
361361
" ),\n",
362-
" vw_logs='./logs',\n",
362+
" rl_logs='./logs',\n",
363363
" selection_scorer=None\n",
364364
")\n",
365365
"\n",
@@ -378,10 +378,10 @@
378378
"outputs": [],
379379
"source": [
380380
"picker = learn_to_pick.PickBest.create(\n",
381-
" feature_embedder=learn_to_pick.PickBestFeatureEmbedder(\n",
381+
" featurizer=learn_to_pick.PickBestFeaturizer(\n",
382382
" auto_embed=False,\n",
383383
" ),\n",
384-
" vw_logs='./logs',\n",
384+
" rl_logs='./logs',\n",
385385
" selection_scorer=None\n",
386386
")\n",
387387
"\n",

src/learn_to_pick/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
AutoSelectionScorer,
55
BasedOn,
66
Embed,
7-
Embedder,
7+
Featurizer,
88
ModelRepository,
99
Policy,
1010
SelectionScorer,
@@ -17,7 +17,7 @@
1717
from learn_to_pick.pick_best import (
1818
PickBest,
1919
PickBestEvent,
20-
PickBestFeatureEmbedder,
20+
PickBestFeaturizer,
2121
PickBestRandomPolicy,
2222
PickBestSelected,
2323
)
@@ -41,14 +41,14 @@ def configure_logger() -> None:
4141
"PickBest",
4242
"PickBestEvent",
4343
"PickBestSelected",
44-
"PickBestFeatureEmbedder",
44+
"PickBestFeaturizer",
4545
"PickBestRandomPolicy",
4646
"Embed",
4747
"BasedOn",
4848
"ToSelectFrom",
4949
"SelectionScorer",
5050
"AutoSelectionScorer",
51-
"Embedder",
51+
"Featurizer",
5252
"ModelRepository",
5353
"Policy",
5454
"VwPolicy",

src/learn_to_pick/base.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def __init__(
181181
self,
182182
model_repo: ModelRepository,
183183
vw_cmd: List[str],
184-
feature_embedder: Embedder,
184+
featurizer: Featurizer,
185185
vw_logger: VwLogger,
186186
*args: Any,
187187
**kwargs: Any,
@@ -190,35 +190,35 @@ def __init__(
190190
self.model_repo = model_repo
191191
self.vw_cmd = vw_cmd
192192
self.workspace = self.model_repo.load(vw_cmd)
193-
self.feature_embedder = feature_embedder
193+
self.featurizer = featurizer
194194
self.vw_logger = vw_logger
195195

196196
def predict(self, event: TEvent) -> Any:
197197
import vowpal_wabbit_next as vw
198198

199199
text_parser = vw.TextFormatParser(self.workspace)
200200
return self.workspace.predict_one(
201-
parse_lines(text_parser, self.feature_embedder.format(event))
201+
parse_lines(text_parser, self.featurizer.format(event))
202202
)
203203

204204
def learn(self, event: TEvent) -> None:
205205
import vowpal_wabbit_next as vw
206206

207-
vw_ex = self.feature_embedder.format(event)
207+
vw_ex = self.featurizer.format(event)
208208
text_parser = vw.TextFormatParser(self.workspace)
209209
multi_ex = parse_lines(text_parser, vw_ex)
210210
self.workspace.learn_one(multi_ex)
211211

212212
def log(self, event: TEvent) -> None:
213213
if self.vw_logger.logging_enabled():
214-
vw_ex = self.feature_embedder.format(event)
214+
vw_ex = self.featurizer.format(event)
215215
self.vw_logger.log(vw_ex)
216216

217217
def save(self) -> None:
218218
self.model_repo.save(self.workspace)
219219

220220

221-
class Embedder(Generic[TEvent], ABC):
221+
class Featurizer(Generic[TEvent], ABC):
222222
def __init__(self, *args: Any, **kwargs: Any):
223223
pass
224224

@@ -325,12 +325,12 @@ class RLLoop(Generic[TEvent]):
325325
- metrics (Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]): Tracker for metrics, can be set to None.
326326
327327
Initialization Attributes:
328-
- feature_embedder (Embedder): Embedder used for the `BasedOn` and `ToSelectFrom` inputs.
328+
- featurizer (Featurizer): Featurizer used for the `BasedOn` and `ToSelectFrom` inputs.
329329
- model_save_dir (str, optional): Directory for saving the VW model. Default is the current directory.
330330
- reset_model (bool): If set to True, the model starts training from scratch. Default is False.
331331
- vw_cmd (List[str], optional): Command line arguments for the VW model.
332332
- policy (Type[VwPolicy]): Policy used by the chain.
333-
- vw_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs.
333+
- rl_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs.
334334
- metrics_step (int): Step for the metrics tracker. Default is -1. If set without metrics_window_size, average metrics will be tracked, otherwise rolling window metrics will be tracked.
335335
- metrics_window_size (int): Window size for the metrics tracker. Default is -1. If set, rolling window metrics will be tracked.
336336

src/learn_to_pick/pick_best.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ def __init__(
4242
self.based_on = based_on
4343

4444

45-
class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]):
45+
class PickBestFeaturizer(base.Featurizer[PickBestEvent]):
4646
"""
47-
Text Embedder class that embeds the `BasedOn` and `ToSelectFrom` inputs into a format that can be used by the learning policy
47+
Text Featurizer class that embeds the `BasedOn` and `ToSelectFrom` inputs into a format that can be used by the learning policy
4848
4949
Attributes:
5050
model name (Any, optional): The type of embeddings to be used for feature representation. Defaults to BERT SentenceTransformer.
@@ -259,7 +259,7 @@ class PickBest(base.RLLoop[PickBestEvent]):
259259
RLLoop
260260
261261
Attributes:
262-
feature_embedder (PickBestFeatureEmbedder, optional): Is an advanced attribute. Responsible for embedding the `BasedOn` and `ToSelectFrom` inputs. If omitted, a default embedder is utilized.
262+
featurizer (PickBestFeaturizer, optional): Is an advanced attribute. Responsible for embedding the `BasedOn` and `ToSelectFrom` inputs. If omitted, a default embedder is utilized.
263263
"""
264264

265265
def _call_before_predict(self, inputs: Dict[str, Any]) -> PickBestEvent:
@@ -341,11 +341,11 @@ def create(
341341
selection_scorer = base.AutoSelectionScorer(llm=llm)
342342

343343
policy_args = {
344-
"feature_embedder": kwargs.pop("feature_embedder", None),
344+
"featurizer": kwargs.pop("featurizer", None),
345345
"vw_cmd": kwargs.pop("vw_cmd", None),
346346
"model_save_dir": kwargs.pop("model_save_dir", None),
347347
"reset_model": kwargs.pop("reset_model", None),
348-
"vw_logs": kwargs.pop("vw_logs", None),
348+
"rl_logs": kwargs.pop("rl_logs", None),
349349
}
350350

351351
if policy and any(policy_args.values()):
@@ -366,14 +366,14 @@ def create(
366366

367367
@staticmethod
368368
def create_policy(
369-
feature_embedder: Optional[base.Embedder] = None,
369+
featurizer: Optional[base.Featurizer] = None,
370370
vw_cmd: Optional[List[str]] = None,
371371
model_save_dir: str = "./",
372372
reset_model: bool = False,
373-
vw_logs: Optional[Union[str, os.PathLike]] = None
373+
rl_logs: Optional[Union[str, os.PathLike]] = None
374374
):
375-
if not feature_embedder:
376-
feature_embedder = PickBestFeatureEmbedder(auto_embed=False)
375+
if not featurizer:
376+
featurizer = PickBestFeaturizer(auto_embed=False)
377377

378378
vw_cmd = vw_cmd or []
379379
interactions = []
@@ -391,7 +391,7 @@ def create_policy(
391391
"--quiet",
392392
]
393393

394-
if feature_embedder.auto_embed:
394+
if featurizer.auto_embed:
395395
interactions += [
396396
"--interactions=@#",
397397
"--ignore_linear=@",
@@ -405,8 +405,8 @@ def create_policy(
405405
model_save_dir, with_history=True, reset=reset_model
406406
),
407407
vw_cmd=vw_cmd,
408-
feature_embedder=feature_embedder,
409-
vw_logger=base.VwLogger(vw_logs),
408+
featurizer=featurizer,
409+
vw_logger=base.VwLogger(rl_logs),
410410
)
411411

412412
def _default_policy(self):

0 commit comments

Comments
 (0)