Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Commit 2ddaf97

Browse files
John Andersenpdxjohnny
authored andcommitted
docs: usage: Integration example
Signed-off-by: John Andersen <[email protected]>
1 parent 9784b6a commit 2ddaf97

File tree

24 files changed

+968
-98
lines changed

24 files changed

+968
-98
lines changed

dffml/cli.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from .log import LOGGER
1717
from .version import VERSION
18+
from .repo import Repo
1819
from .port import Port
1920
from .feature import Feature, Features, Data
2021
from .source.source import BaseSource, Sources, SubsetSources
@@ -135,15 +136,17 @@ class Merge(CMD):
135136
'''
136137

137138
arg_dest = Arg('dest', help='Sources merge repos into',
138-
type=BaseSource.load)
139+
type=BaseSource.load_labeled)
139140
arg_src = Arg('src', help='Sources to pull repos from',
140-
type=BaseSource.load)
141+
type=BaseSource.load_labeled)
141142

142143
async def run(self):
143144
async with self.src.withconfig(self.extra_config) as src, \
144145
self.dest.withconfig(self.extra_config) as dest:
145-
async with self.src() as sctx, dest() as dctx:
146-
async for repo in sctx.repos():
146+
async with src() as sctx, dest() as dctx:
147+
async for src in sctx.repos():
148+
repo = Repo(src.src_url)
149+
repo.merge(src)
147150
repo.merge(await dctx.repo(repo.src_url))
148151
await dctx.update(repo)
149152

@@ -313,27 +316,27 @@ async def run(self):
313316
async with self.sources as sources, self.features as features, \
314317
self.model as model:
315318
async with sources() as sctx, model() as mctx:
316-
return float(await mctx.accuracy(sources, features,
319+
return float(await mctx.accuracy(sctx, features,
317320
self.classifications))
318321

319322
class PredictAll(EvaluateAll, MLCMD):
320323
'''Predicts for all sources'''
321324

322-
async def predict(self, model, sources, features, repos):
323-
async with sources() as sctx, model() as mctx:
324-
async for repo, classification, confidence in \
325-
mctx.predict(repos, features, self.classifications):
326-
repo.predicted(classification, confidence)
327-
yield repo
328-
if self.update:
329-
await sctx.update(repo)
325+
async def predict(self, mctx, sctx, features, repos):
326+
async for repo, classification, confidence in \
327+
mctx.predict(repos, features, self.classifications):
328+
repo.predicted(classification, confidence)
329+
yield repo
330+
if self.update:
331+
await sctx.update(repo)
330332

331333
async def run(self):
332334
async with self.sources as sources, self.features as features, \
333335
self.model as model:
334-
async for repo in self.predict(model, sources, features,
335-
self.evaluate(sources, features)):
336-
yield repo
336+
async with sources() as sctx, model() as mctx:
337+
async for repo in self.predict(mctx, sctx, features,
338+
sctx.repos()):
339+
yield repo
337340

338341
class PredictRepo(PredictAll, EvaluateRepo):
339342
'''Predictions for individual repos'''

dffml/feature/feature.py

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,23 @@ async def close(self):
212212
'''
213213
pass
214214

215+
@classmethod
216+
def load(cls, loading=None):
217+
if loading is not None and loading.startswith('def:'):
218+
return cls.load_def(*loading.replace('def:', '').split(':'))
219+
return super().load(loading)
220+
221+
@classmethod
222+
def load_def(cls, name: str, dtype: str, length: str):
223+
return DefFeature(name, cls.convert_dtype(dtype), int(length))
224+
225+
@classmethod
226+
def convert_dtype(cls, dtype: str):
227+
found = pydoc.locate(dtype)
228+
if found is None:
229+
raise TypeError('Failed to convert_dtype %r' % (dtype,))
230+
return found
231+
215232
async def __aenter__(self):
216233
await self.open()
217234
# TODO Context management
@@ -373,46 +390,6 @@ async def submit(self, src: str):
373390
return await super().start(partial(self.evaluate, src), src,
374391
mktask=self.mktask)
375392

376-
@classmethod
377-
def load(cls, *these: str):
378-
'''
379-
Loads all installed loading and returns them as a list. Sources to be
380-
loaded should be registered to ENTRY_POINT via setuptools.
381-
'''
382-
these, loading_classes = cls.load_defs(*these)
383-
for i in pkg_resources.iter_entry_points(Feature.ENTRY_POINT):
384-
loaded = i.load()
385-
if issubclass(loaded, Feature) and loaded.NAME in these:
386-
loading_classes.append(loaded())
387-
self = cls(*loading_classes)
388-
for name in these:
389-
if not name in self.names():
390-
raise KeyError('%s was not found in (%s)' % \
391-
(repr(name), ', '.join(map(str, loading_classes))))
392-
if not self.names():
393-
raise KeyError('No features were loaded')
394-
return self
395-
396-
@classmethod
397-
def load_defs(cls, *args: str):
398-
defs = []
399-
no_def = [arg for arg in args if not arg.startswith('def:')]
400-
for arg in args:
401-
if arg.startswith('def:'):
402-
defs.append(cls.load_def(*arg.replace('def:', '').split(':')))
403-
return no_def, defs
404-
405-
@classmethod
406-
def load_def(cls, name: str, dtype: str, length: str):
407-
return DefFeature(name, cls.convert_dtype(dtype), int(length))
408-
409-
@classmethod
410-
def convert_dtype(cls, dtype: str):
411-
found = pydoc.locate(dtype)
412-
if found is None:
413-
raise TypeError('Failed to convert_dtype %r' % (dtype,))
414-
return found
415-
416393
async def __aenter__(self):
417394
self._stack = AsyncExitStack()
418395
await self._stack.__aenter__()

dffml/repo.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from datetime import datetime
88
from typing import Optional, List, Dict, Any, AsyncIterator
99

10+
from .util.data import merge
1011
from .log import LOGGER
1112

1213
LOGGER = LOGGER.getChild('repo')
@@ -77,7 +78,9 @@ def __init__(self, *,
7778

7879
def dict(self):
7980
data = {key: getattr(self, key, []) for key in self.EXPORTED \
80-
if len(getattr(self, key, []))}
81+
if not isinstance(getattr(self, key, {}), dict) or \
82+
(isinstance(getattr(self, key, {}), dict) and \
83+
getattr(self, key, {}))}
8184
# Do not report if there has been no change since instantiation to
8285
# a default time value
8386
if self.last_updated != self.last_updated_default:
@@ -136,7 +139,7 @@ def __str__(self):
136139

137140
def merge(self, repo: 'Repo'):
138141
data = self.data.dict()
139-
data.update(repo.data.dict())
142+
merge(data, repo.data.dict())
140143
self.data = self.REPO_DATA(**data)
141144
self.extra.update(repo.extra) # type: ignore
142145

dffml/util/data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
'''
44
from functools import wraps
55

6+
def merge(one, two):
7+
for key, value in two.items():
8+
if key in one and isinstance(value, dict):
9+
merge(one[key], two[key])
10+
else:
11+
one[key] = two[key]
12+
613
def traverse_config_set(target, *args):
714
'''
815
>>> traverse_set({'level': {'one': 1}}, 'level', 'one', 42)

docs/concepts/data_flow.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Data Flow
2+
=========
3+
4+
An operation is run if there is new data of data types matching it's parameters
5+
in the data flow network. For

docs/concepts/index.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Models
2+
======
3+
4+
.. toctree::
5+
:glob:
6+
:maxdepth: 2
7+
:caption: Contents:
8+
9+
base

0 commit comments

Comments
 (0)