Skip to content
This repository was archived by the owner on Aug 9, 2024. It is now read-only.

Commit 41c409e

Browse files
committed
(WIP) Implements session_oriented.session
* Adds 'list_of' meta-datasource * Adds documentation
1 parent 0188bbb commit 41c409e

File tree

10 files changed

+196
-22
lines changed

10 files changed

+196
-22
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
revscoring.datasources.session_oriented
2+
=======================================
3+
4+
.. automodule:: revscoring.datasources.session_oriented

revscoring/datasources/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
:class:`~revscoring.Datasource` processors are tended to
99
be :func:`~revscoring.dependencies.solve`'d as dependencies. The
1010
provided datasources are split conceptually into a set of modules. Currently,
11-
there is one module: :mod:`~revscoring.datasources.revision_oriented`.
11+
there are two modules: :mod:`~revscoring.datasources.revision_oriented` and
12+
:mod:`~revscoring.datasources.session_oriented`.
1213
1314
Meta-datasources
1415
++++++++++++++++
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from ..datasource import Datasource
2+
3+
4+
class list_of(Datasource):
5+
6+
def __init__(self, dependent, depends_on=None, name=None):
7+
name = self._format_name(name, [dependent])
8+
super().__init__(
9+
name, self.process, depends_on=depends_on)
10+
self.dependency = dependent
11+
12+
def process(self, *lists_of_values):
13+
return [self.dependency(*values) for values in zip(*lists_of_values)]

revscoring/datasources/meta/selectors.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,7 @@ class filter_keys(Datasource):
116116
117117
:Parameters:
118118
table_datasource : :class:`revscoring.Datasource`
119-
A datasource that generates a table including only the specified
120-
keys
119+
A datasource that generates a table with keys and values
121120
keys : `iterable` ( `hashable` )
122121
The keys to select from the table
123122
name : `str`
@@ -138,3 +137,43 @@ def process(self, table):
138137
new_table[key] = table[key]
139138

140139
return new_table
140+
141+
142+
class first(Datasource):
143+
"""
144+
Returns the first item in an indexable collection (e.g., a list)
145+
146+
:Parameters:
147+
items_datasource : :class:`revscoring.Datasource`
148+
A datasource that generates an indexable sequence
149+
name : `str`
150+
A name for the datasource
151+
"""
152+
153+
def __init__(self, items_datasource, name=None):
154+
name = self._format_name(
155+
name, [items_datasource])
156+
super().__init__(name, self.process, depend_on=[items_datasource])
157+
158+
def process(self, items):
159+
return items[0]
160+
161+
162+
class last(Datasource):
163+
"""
164+
Returns the last item in an indexable collection (e.g., a list)
165+
166+
:Parameters:
167+
items_datasource : :class:`revscoring.Datasource`
168+
A datasource that generates an indexable sequence
169+
name : `str`
170+
A name for the datasource
171+
"""
172+
173+
def __init__(self, items_datasource, name=None):
174+
name = self._format_name(
175+
name, [items_datasource])
176+
super().__init__(name, self.process, depend_on=[items_datasource])
177+
178+
def process(self, items):
179+
return items[-1]
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Implements a set of datasources oriented off of a single revision. This is
3+
useful for extracting features of edit and article quality.
4+
5+
.. autodata:: revscoring.datasources.session_oriented.session
6+
7+
"""
8+
9+
from ..dependencies import DependentSet
10+
from .meta import expanders
11+
from .revision_oriented import Revision, User
12+
13+
14+
def list_of_tree(dependent_set, rewrite_name=None, cache=None):
15+
cache = cache if cache is not None else {}
16+
rewrite_name = rewrite_name if rewrite_name is not None else \
17+
lambda name: name
18+
19+
# Rewrites all dependents.
20+
for attr, dependent in dependent_set.dependents.items():
21+
new_dependent = list_of_ify(dependent, rewrite_name, cache)
22+
setattr(dependent_set, attr, new_dependent)
23+
24+
# Iterate into all sub-DependentSets
25+
for attr, sub_dependent_set in dependent_set.dependent_sets.items():
26+
new_dependent_set = list_of_tree(
27+
sub_dependent_set, rewrite_name, cache)
28+
setattr(dependent_set, attr, new_dependent_set)
29+
30+
return dependent_set
31+
32+
33+
def list_of_ify(dependent, rewrite_name, cache):
34+
new_name = rewrite_name(dependent.name)
35+
if new_name in cache:
36+
return cache[new_name]
37+
else:
38+
new_dependencies = [list_of_ify(dependency, rewrite_name, cache)
39+
for dependency in dependent.dependencies]
40+
return expanders.list_of(
41+
dependent, depends_on=new_dependencies, name=new_name)
42+
43+
44+
class Session(DependentSet):
45+
def __init__(self, name):
46+
super().__init__(name)
47+
self.revisions = list_of_tree(Revision(
48+
name + ".revisions",
49+
include_page_creation=True,
50+
include_content=True,
51+
include_user=False,
52+
include_page_suggested=True))
53+
self.user = list_of_tree(User(
54+
name + ".user",
55+
include_info=True,
56+
include_last_revision=False
57+
))
58+
59+
60+
session = Session("session")

revscoring/dependencies/dependent.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,55 +99,60 @@ class DependentSet:
9999
A base name for the items in the set
100100
"""
101101

102-
def __init__(self, name, _dependents=None, _dependent_sets=None):
103-
self._dependents = _dependents or set()
104-
self._dependent_sets = _dependent_sets or set()
105-
self._name = name
102+
def __init__(self, name, dependents=None, dependent_sets=None):
103+
self.dependents = dependents or {}
104+
self.dependent_sets = dependent_sets or {}
105+
self.name = name
106106

107107
def __setattr__(self, attr, value):
108108
super().__setattr__(attr, value)
109109

110110
if isinstance(value, Dependent):
111111
logger.log(logging.NOTSET,
112-
"Registering {0} to {1}".format(value, self._name))
113-
if value in self._dependents:
112+
"Registering {0} to {1}".format(value, self.name))
113+
if value in self.dependents:
114114
logger.warn("{0} has already been added to {1}. Could be "
115115
.format(value, self) + "overwritten?")
116-
self._dependents.add(value)
116+
self.dependents[attr] = value
117117
elif isinstance(value, DependentSet):
118-
self._dependent_sets.add(value)
118+
self.dependent_sets[attr] = value
119+
else:
120+
pass # Just set it like a regular attribute
119121

120122
# String methods
121123
def __str__(self):
122124
return self.__repr__()
123125

124126
def __repr__(self):
125-
return "{" + self._name + "}"
127+
return "{" + self.name + "}"
126128

127129
def __hash__(self):
128-
return hash('dependent_set.' + self._name)
130+
return hash('dependent_set.' + self.name)
129131

130132
def __eq__(self, other):
131133
return hash(self) == hash(other)
132134

133135
def __ne__(self, other):
134136
return not self == other
135137

138+
def all_dependencies(self):
139+
return set(self.dependents.values()).union(*self.dependent_sets.values())
140+
136141
# Set methods
137142
def __len__(self):
138-
return len(self._dependents.union(*self._dependent_sets))
143+
return len(self.all_dependencies())
139144

140145
def __contains__(self, item):
141-
return item in self._dependents.union(*self._dependent_sets)
146+
return item in self.all_dependencies()
142147

143148
def __iter__(self):
144-
return iter(self._dependents.union(*self._dependent_sets))
149+
return iter(self.all_dependencies())
145150

146151
def __sub__(self, other):
147-
return self._dependents.union(*self._dependent_sets) - other
152+
return self.all_dependencies() - other
148153

149154
def __and__(self, other):
150-
return self._dependents.union(*self._dependent_sets) & other
155+
return self.all_dependencies() & other
151156

152157
def __or__(self, other):
153-
return self._dependents.union(*self._dependent_sets) | other
158+
return self.all_dependencies() | other

revscoring/extractors/api/extractor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44
import mwapi
55

6+
from . import datasources
7+
from .. import Extractor as BaseExtractor
68
from ...datasources import Datasource, revision_oriented
79
from ...dependencies import expand
810
from ...errors import QueryNotSupported, RevisionNotFound, UserNotFound
9-
from .. import Extractor as BaseExtractor
10-
from . import datasources
1111
from .revision_oriented import Revision
1212
from .util import REV_PROPS, USER_PROPS
1313

@@ -45,7 +45,7 @@ def get_property_suggestion_search_doc(self, page):
4545
return datasources.PropertySuggestionDoc(page, self)
4646

4747
def extract(self, rev_ids, dependents, context=None, caches=None,
48-
cache=None, profile=None):
48+
cache=None, profile=None, orientation="revision"):
4949
"""
5050
Extracts a values for a set of
5151
:class:`~revscoring.dependents.dependent.Dependent` (e.g.

revscoring/features/meta/aggregators.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from ..feature import Feature
1818
from ..feature_vector import FeatureVector
1919

20+
any_builtin = any
21+
all_builtin = all
2022
len_builtin = len
2123
sum_builtin = sum
2224
max_builtin = max
@@ -70,6 +72,16 @@ def wrapper(items_datasource, name=None, returns=float, vector=False):
7072
return wrapper
7173

7274

75+
@aggregators_factory
76+
def all(items_datasource, name=None, returns=bool, vector=False):
77+
return all_builtin
78+
79+
80+
@aggregators_factory
81+
def any(items_datasource, name=None, returns=bool, vector=False):
82+
return any_builtin
83+
84+
7385
@aggregators_factory
7486
def sum(items_datasource, name=None, returns=float, vector=False):
7587
return sum_builtin
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from ..aggregators import aggregators_factory
2+
3+
4+
def _first(items):
5+
return items[0]
6+
7+
8+
def _last(items):
9+
return items[-1]
10+
11+
12+
@aggregators_factory
13+
def first(items_datasource, name=None, returns=float, vector=False):
14+
return _first
15+
16+
17+
@aggregators_factory
18+
def last(items_datasource, name=None, returns=float, vector=False):
19+
return _last
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pickle
2+
3+
from revscoring import Datasource, Feature
4+
from revscoring.datasources.meta import expanders
5+
from revscoring.dependencies import solve
6+
7+
8+
def process_chars(text):
9+
return len(text)
10+
11+
text = Datasource("text")
12+
chars = Feature("chars", process_chars, returns=int, depends_on=[text])
13+
many_texts = expanders.list_of(text)
14+
many_chars = expanders.list_of(chars, depends_on=[many_texts])
15+
16+
17+
def test_list_of():
18+
assert solve(many_chars, cache={many_texts: ["foo", "barbaz"]}) == \
19+
[3, 6]
20+
21+
assert pickle.loads(pickle.dumps(many_chars)) == many_chars

0 commit comments

Comments
 (0)