Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Commit 462d7ff

Browse files
source: df: Added docstring and doctestable example
Fixes: #671 Signed-off-by: sakshamarora1 <[email protected]>
1 parent 82cd22e commit 462d7ff

File tree

2 files changed

+62
-0
lines changed

2 files changed

+62
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88
### Added
9+
- Docstrings and doctestable example for DataFlowSource
910
- XGBoost Regression Model
1011
- Pre-Trained PyTorch torchvision Models
1112
- Spacy model for NER

dffml/source/df.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,67 @@ async def __aexit__(self, exc_type, exc_value, traceback):
6666

6767
@entrypoint("df")
6868
class DataFlowSource(BaseSource):
69+
"""
70+
>>> import asyncio
71+
>>> from dffml import *
72+
>>>
73+
>>> records = [
74+
... Record(
75+
... "0",
76+
... data={
77+
... "features": {
78+
... "Years": 1,
79+
... "Expertise": 3,
80+
... "Trust": 0.2,
81+
... "Salary": 20,
82+
... }
83+
... },
84+
... ),
85+
... ]
86+
>>>
87+
>>> features = Features(
88+
... Feature("Years", int, 1),
89+
... Feature("Expertise", int, 1),
90+
... Feature("Trust", float, 1),
91+
... Feature("Salary", int, 1),
92+
... )
93+
>>>
94+
>>> dataflow = DataFlow(multiply, AssociateDefinition)
95+
>>> dataflow.flow["multiply"].inputs["multiplicand"] = [
96+
... {"seed": ["Years", "Expertise", "Trust", "Salary"]}
97+
... ]
98+
>>> dataflow.seed = [
99+
... Input(
100+
... value={
101+
... feature.name: multiply.op.outputs["product"].name
102+
... for feature in features
103+
... },
104+
... definition=AssociateDefinition.op.inputs["spec"],
105+
... ),
106+
... Input(value=10, definition=multiply.op.inputs["multiplier"],),
107+
... ]
108+
>>>
109+
>>>
110+
>>> memory_source = Sources(MemorySource(MemorySourceConfig(records=records)))
111+
>>>
112+
>>> source = DataFlowSource(
113+
... DataFlowSourceConfig(
114+
... source=memory_source, dataflow=dataflow, features=features,
115+
... )
116+
... )
117+
>>>
118+
>>>
119+
>>> async def main():
120+
... async with source as src:
121+
... async with src() as sctx:
122+
... async for record in sctx.records():
123+
... print(record.features())
124+
...
125+
>>>
126+
>>> asyncio.run(main())
127+
{'Years': 10, 'Expertise': 30, 'Trust': 2.0, 'Salary': 200}
128+
"""
129+
69130
CONFIG = DataFlowSourceConfig
70131
CONTEXT = DataFlowSourceContext
71132

0 commit comments

Comments
 (0)