Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Commit 7dd0fa8

Browse files
source: file: Change label to tag
Fixes: #126
1 parent 4d46339 commit 7dd0fa8

File tree

13 files changed

+117
-118
lines changed

13 files changed

+117
-118
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5252
in it's config changed to `predict`.
5353
- SciKit models use `make_config_numpy`.
5454
- Predictions in `repos` are now dictionary.
55+
- All instances of `label` changed to `tag`
5556
### Fixed
5657
- CONTRIBUTING.md has `-e` in the wrong place in the getting setup section.
5758
- Since moving to auto `args()` and `config()`, BaseConfigurable no longer

dffml/source/csv.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class OpenCSVFile:
2727
active: int
2828
lock: asyncio.Lock
2929
write_back_key: bool = True
30-
write_back_label: bool = False
30+
write_back_tag: bool = False
3131

3232
async def inc(self):
3333
async with self.lock:
@@ -40,15 +40,15 @@ async def dec(self):
4040

4141

4242
CSV_SOURCE_CONFIG_DEFAULT_KEY = "key"
43-
CSV_SOURCE_CONFIG_DEFAULT_LABEL = "unlabeled"
44-
CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN = "label"
43+
CSV_SOURCE_CONFIG_DEFAULT_tag = "untagged"
44+
CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN = "tag"
4545

4646

4747
@config
4848
class CSVSourceConfig(FileSourceConfig):
4949
key: str = CSV_SOURCE_CONFIG_DEFAULT_KEY
50-
label: str = CSV_SOURCE_CONFIG_DEFAULT_LABEL
51-
labelcol: str = CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN
50+
tag: str = CSV_SOURCE_CONFIG_DEFAULT_tag
51+
tagcol: str = CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN
5252

5353

5454
# CSVSource is a bit of a mess
@@ -91,24 +91,24 @@ async def read_csv(self, fd, open_file):
9191
# Record what headers are present when the file was opened
9292
if not self.config.key in dict_reader.fieldnames:
9393
open_file.write_back_key = False
94-
if self.config.labelcol in dict_reader.fieldnames:
95-
open_file.write_back_label = True
96-
# Store all the repos by their label in write_out
94+
if self.config.tagcol in dict_reader.fieldnames:
95+
open_file.write_back_tag = True
96+
# Store all the repos by their tag in write_out
9797
open_file.write_out = {}
98-
# If there is no key track row index to be used as key by label
98+
# If there is no key track row index to be used as key by tag
9999
index = {}
100100
for row in dict_reader:
101-
# Grab label from row
102-
label = row.get(self.config.labelcol, self.config.label)
103-
if self.config.labelcol in row:
104-
del row[self.config.labelcol]
105-
index.setdefault(label, 0)
101+
# Grab tag from row
102+
tag = row.get(self.config.tagcol, self.config.tag)
103+
if self.config.tagcol in row:
104+
del row[self.config.tagcol]
105+
index.setdefault(tag, 0)
106106
# Grab key from row
107-
key = row.get(self.config.key, str(index[label]))
107+
key = row.get(self.config.key, str(index[tag]))
108108
if self.config.key in row:
109109
del row[self.config.key]
110110
else:
111-
index[label] += 1
111+
index[tag] += 1
112112
# Repo data we are going to parse from this row (must include
113113
# features).
114114
repo_data = {}
@@ -159,18 +159,18 @@ async def read_csv(self, fd, open_file):
159159
}
160160
repo_data.update({"prediction": predictions})
161161
# If there was no data in the row, skip it
162-
if not repo_data and key == str(index[label] - 1):
162+
if not repo_data and key == str(index[tag] - 1):
163163
continue
164164
# Add the repo to our internal memory representation
165-
open_file.write_out.setdefault(label, {})
166-
open_file.write_out[label][key] = Repo(key, data=repo_data)
165+
open_file.write_out.setdefault(tag, {})
166+
open_file.write_out[tag][key] = Repo(key, data=repo_data)
167167

168168
async def load_fd(self, fd):
169169
"""
170170
Parses a CSV stream into Repo instances
171171
"""
172172
async with self._open_csv(fd) as open_file:
173-
self.mem = open_file.write_out.get(self.config.label, {})
173+
self.mem = open_file.write_out.get(self.config.tag, {})
174174
self.logger.debug("%r loaded %d records", self, len(self.mem))
175175

176176
async def dump_fd(self, fd):
@@ -179,20 +179,20 @@ async def dump_fd(self, fd):
179179
"""
180180
async with self.OPEN_CSV_FILES_LOCK:
181181
open_file = self.OPEN_CSV_FILES[self.config.filename]
182-
open_file.write_out.setdefault(self.config.label, {})
183-
open_file.write_out[self.config.label].update(self.mem)
182+
open_file.write_out.setdefault(self.config.tag, {})
183+
open_file.write_out[self.config.tag].update(self.mem)
184184
# Bail if not last open source for this file
185185
if not (await open_file.dec()):
186186
return
187187
# Add our headers
188188
fieldnames = (
189189
[] if not open_file.write_back_key else [self.config.key]
190190
)
191-
fieldnames.append(self.config.labelcol)
191+
fieldnames.append(self.config.tagcol)
192192
# Get all the feature names
193193
feature_fieldnames = set()
194194
prediction_fieldnames = set()
195-
for label, repos in open_file.write_out.items():
195+
for tag, repos in open_file.write_out.items():
196196
for repo in repos.values():
197197
feature_fieldnames |= set(repo.data.features.keys())
198198
prediction_fieldnames |= set(repo.data.prediction.keys())
@@ -209,12 +209,12 @@ async def dump_fd(self, fd):
209209
# Write out the file
210210
writer = csv.DictWriter(fd, fieldnames=fieldnames)
211211
writer.writeheader()
212-
for label, repos in open_file.write_out.items():
212+
for tag, repos in open_file.write_out.items():
213213
for repo in repos.values():
214214
repo_data = repo.dict()
215215
row = {name: "" for name in fieldnames}
216-
# Always write the label
217-
row[self.config.labelcol] = label
216+
# Always write the tag
217+
row[self.config.tagcol] = tag
218218
# Write the key if it existed
219219
if open_file.write_back_key:
220220
row[self.config.key] = repo.key

dffml/source/file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
@config
1919
class FileSourceConfig:
2020
filename: str
21-
label: str = "unlabeled"
21+
tag: str = "untagged"
2222
readwrite: bool = False
2323
allowempty: bool = False
2424

dffml/source/json.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,14 @@ async def load_fd(self, fd):
7373
repos = self.OPEN_JSON_FILES[self.config.filename].data
7474
self.mem = {
7575
key: Repo(key, data=data)
76-
for key, data in repos.get(self.config.label, {}).items()
76+
for key, data in repos.get(self.config.tag, {}).items()
7777
}
7878
LOGGER.debug("%r loaded %d records", self, len(self.mem))
7979

8080
async def dump_fd(self, fd):
8181
async with self.OPEN_JSON_FILES_LOCK:
8282
repos = self.OPEN_JSON_FILES[self.config.filename].data
83-
repos[self.config.label] = {
83+
repos[self.config.tag] = {
8484
repo.key: repo.dict() for repo in self.mem.values()
8585
}
8686
self.logger.debug(f"{self.config.filename} updated")

dffml/util/testing/source.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,22 +125,22 @@ async def test_update(self):
125125
)
126126
await super().test_update()
127127

128-
async def test_label(self):
128+
async def test_tag(self):
129129
with tempfile.TemporaryDirectory() as testdir:
130130
self.testfile = os.path.join(testdir, str(random.random()))
131-
unlabeled = await self.setUpSource()
132-
labeled = await self.setUpSource()
133-
labeled.config = labeled.config._replace(label="somelabel")
134-
async with unlabeled, labeled:
135-
async with unlabeled() as uctx, labeled() as lctx:
131+
untagged = await self.setUpSource()
132+
tagged = await self.setUpSource()
133+
tagged.config = tagged.config._replace(tag="sometag")
134+
async with untagged, tagged:
135+
async with untagged() as uctx, tagged() as lctx:
136136
await uctx.update(
137137
Repo("0", data={"features": {"feed": 1}})
138138
)
139139
await lctx.update(
140140
Repo("0", data={"features": {"face": 2}})
141141
)
142-
async with unlabeled, labeled:
143-
async with unlabeled() as uctx, labeled() as lctx:
142+
async with untagged, tagged:
143+
async with untagged() as uctx, tagged() as lctx:
144144
repo = await uctx.repo("0")
145145
self.assertIn("feed", repo.features())
146146
repo = await lctx.repo("0")

docs/plugins/dffml_source.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ Uses a CSV file as the source of repo feature data
2828

2929
- filename: String
3030

31-
- label: String
31+
- tag: String
3232

33-
- default: unlabeled
33+
- default: untagged
3434

3535
- readwrite: Boolean
3636

@@ -44,9 +44,9 @@ Uses a CSV file as the source of repo feature data
4444

4545
- default: key
4646

47-
- labelcol: String
47+
- tagcol: String
4848

49-
- default: label
49+
- default: tag
5050

5151
idx1
5252
~~~~
@@ -106,9 +106,9 @@ stored in memory.
106106

107107
- filename: String
108108

109-
- label: String
109+
- tag: String
110110

111-
- default: unlabeled
111+
- default: untagged
112112

113113
- readwrite: Boolean
114114

examples/source/test_custom_sqlite.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ async def setUpSource(self):
1212
CustomSQLiteSourceConfig(filename=self.testfile)
1313
)
1414

15-
@unittest.skip("Labels not implemented")
16-
async def test_label(self):
15+
@unittest.skip("tags not implemented")
16+
async def test_tag(self):
1717
"""
18-
Labels not implemented
18+
tags not implemented
1919
"""

service/http/tests/test_routes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,9 @@ async def test_source(self):
273273
self.cli.app["sources"]["salary"].config,
274274
CSVSourceConfig(
275275
filename="dataset.csv",
276-
label="unlabeled",
276+
tag="untagged",
277277
key="key",
278-
labelcol="label",
278+
tagcol="tag",
279279
allowempty=True,
280280
),
281281
)
@@ -360,7 +360,7 @@ async def test_not_found(self):
360360
ServerException, f"{check} feed face not found"
361361
):
362362
async with self.post(
363-
f"/configure/{check}/feed face/label", json={}
363+
f"/configure/{check}/feed face/tag", json={}
364364
):
365365
pass # pramga: no cov
366366

source/mysql/tests/test_source.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def tearDownClass(cls):
7676
async def setUpSource(self):
7777
return MySQLSource(self.source_config)
7878

79-
@unittest.skip("Labels not implemented")
80-
async def test_label(self):
79+
@unittest.skip("Tags not implemented")
80+
async def test_tag(self):
8181
"""
82-
Labels not implemented
82+
Tags not implemented
8383
"""

tests/integration/test_cli.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ async def test_memory_to_csv(self):
9393
pathlib.Path(filename).read_text(),
9494
inspect.cleandoc(
9595
"""
96-
key,label
97-
A,unlabeled
98-
B,unlabeled
99-
C,unlabeled
96+
key,tag
97+
A,untagged
98+
B,untagged
99+
C,untagged
100100
"""
101101
)
102102
+ "\n",

0 commit comments

Comments
 (0)