source: file: Change label to tag

sudharsana-kjl · web-flow · commit 7dd0fa8f77bb · 2020-02-08T08:13:27.000-08:00
Fixes: #126
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -52,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   in it's config changed to `predict`.
 - SciKit models use `make_config_numpy`.
 - Predictions in `repos` are now dictionary.
+- All instances of `label` changed to `tag`
 ### Fixed
 - CONTRIBUTING.md has `-e` in the wrong place in the getting setup section.
 - Since moving to auto `args()` and `config()`, BaseConfigurable no longer
diff --git a/dffml/source/csv.py b/dffml/source/csv.py
@@ -27,7 +27,7 @@ class OpenCSVFile:
     active: int
     lock: asyncio.Lock
     write_back_key: bool = True
-    write_back_label: bool = False
+    write_back_tag: bool = False
 
     async def inc(self):
         async with self.lock:
@@ -40,15 +40,15 @@ async def dec(self):
 
 
 CSV_SOURCE_CONFIG_DEFAULT_KEY = "key"
-CSV_SOURCE_CONFIG_DEFAULT_LABEL = "unlabeled"
-CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN = "label"
+CSV_SOURCE_CONFIG_DEFAULT_tag = "untagged"
+CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN = "tag"
 
 
 @config
 class CSVSourceConfig(FileSourceConfig):
     key: str = CSV_SOURCE_CONFIG_DEFAULT_KEY
-    label: str = CSV_SOURCE_CONFIG_DEFAULT_LABEL
-    labelcol: str = CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN
+    tag: str = CSV_SOURCE_CONFIG_DEFAULT_tag
+    tagcol: str = CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN
 
 
 # CSVSource is a bit of a mess
@@ -91,24 +91,24 @@ async def read_csv(self, fd, open_file):
         # Record what headers are present when the file was opened
         if not self.config.key in dict_reader.fieldnames:
             open_file.write_back_key = False
-        if self.config.labelcol in dict_reader.fieldnames:
-            open_file.write_back_label = True
-        # Store all the repos by their label in write_out
+        if self.config.tagcol in dict_reader.fieldnames:
+            open_file.write_back_tag = True
+        # Store all the repos by their tag in write_out
         open_file.write_out = {}
-        # If there is no key track row index to be used as key by label
+        # If there is no key track row index to be used as key by tag
         index = {}
         for row in dict_reader:
-            # Grab label from row
-            label = row.get(self.config.labelcol, self.config.label)
-            if self.config.labelcol in row:
-                del row[self.config.labelcol]
-            index.setdefault(label, 0)
+            # Grab tag from row
+            tag = row.get(self.config.tagcol, self.config.tag)
+            if self.config.tagcol in row:
+                del row[self.config.tagcol]
+            index.setdefault(tag, 0)
             # Grab key from row
-            key = row.get(self.config.key, str(index[label]))
+            key = row.get(self.config.key, str(index[tag]))
             if self.config.key in row:
                 del row[self.config.key]
             else:
-                index[label] += 1
+                index[tag] += 1
             # Repo data we are going to parse from this row (must include
             # features).
             repo_data = {}
@@ -159,18 +159,18 @@ async def read_csv(self, fd, open_file):
             }
             repo_data.update({"prediction": predictions})
             # If there was no data in the row, skip it
-            if not repo_data and key == str(index[label] - 1):
+            if not repo_data and key == str(index[tag] - 1):
                 continue
             # Add the repo to our internal memory representation
-            open_file.write_out.setdefault(label, {})
-            open_file.write_out[label][key] = Repo(key, data=repo_data)
+            open_file.write_out.setdefault(tag, {})
+            open_file.write_out[tag][key] = Repo(key, data=repo_data)
 
     async def load_fd(self, fd):
         """
         Parses a CSV stream into Repo instances
         """
         async with self._open_csv(fd) as open_file:
-            self.mem = open_file.write_out.get(self.config.label, {})
+            self.mem = open_file.write_out.get(self.config.tag, {})
         self.logger.debug("%r loaded %d records", self, len(self.mem))
 
     async def dump_fd(self, fd):
@@ -179,20 +179,20 @@ async def dump_fd(self, fd):
         """
         async with self.OPEN_CSV_FILES_LOCK:
             open_file = self.OPEN_CSV_FILES[self.config.filename]
-            open_file.write_out.setdefault(self.config.label, {})
-            open_file.write_out[self.config.label].update(self.mem)
+            open_file.write_out.setdefault(self.config.tag, {})
+            open_file.write_out[self.config.tag].update(self.mem)
             # Bail if not last open source for this file
             if not (await open_file.dec()):
                 return
             # Add our headers
             fieldnames = (
                 [] if not open_file.write_back_key else [self.config.key]
             )
-            fieldnames.append(self.config.labelcol)
+            fieldnames.append(self.config.tagcol)
             # Get all the feature names
             feature_fieldnames = set()
             prediction_fieldnames = set()
-            for label, repos in open_file.write_out.items():
+            for tag, repos in open_file.write_out.items():
                 for repo in repos.values():
                     feature_fieldnames |= set(repo.data.features.keys())
                     prediction_fieldnames |= set(repo.data.prediction.keys())
@@ -209,12 +209,12 @@ async def dump_fd(self, fd):
             # Write out the file
             writer = csv.DictWriter(fd, fieldnames=fieldnames)
             writer.writeheader()
-            for label, repos in open_file.write_out.items():
+            for tag, repos in open_file.write_out.items():
                 for repo in repos.values():
                     repo_data = repo.dict()
                     row = {name: "" for name in fieldnames}
-                    # Always write the label
-                    row[self.config.labelcol] = label
+                    # Always write the tag
+                    row[self.config.tagcol] = tag
                     # Write the key if it existed
                     if open_file.write_back_key:
                         row[self.config.key] = repo.key
diff --git a/dffml/source/file.py b/dffml/source/file.py
@@ -18,7 +18,7 @@
 @config
 class FileSourceConfig:
     filename: str
-    label: str = "unlabeled"
+    tag: str = "untagged"
     readwrite: bool = False
     allowempty: bool = False
 
diff --git a/dffml/source/json.py b/dffml/source/json.py
@@ -73,14 +73,14 @@ async def load_fd(self, fd):
             repos = self.OPEN_JSON_FILES[self.config.filename].data
             self.mem = {
                 key: Repo(key, data=data)
-                for key, data in repos.get(self.config.label, {}).items()
+                for key, data in repos.get(self.config.tag, {}).items()
             }
         LOGGER.debug("%r loaded %d records", self, len(self.mem))
 
     async def dump_fd(self, fd):
         async with self.OPEN_JSON_FILES_LOCK:
             repos = self.OPEN_JSON_FILES[self.config.filename].data
-            repos[self.config.label] = {
+            repos[self.config.tag] = {
                 repo.key: repo.dict() for repo in self.mem.values()
             }
             self.logger.debug(f"{self.config.filename} updated")
diff --git a/dffml/util/testing/source.py b/dffml/util/testing/source.py
@@ -125,22 +125,22 @@ async def test_update(self):
                     )
                     await super().test_update()
 
-    async def test_label(self):
+    async def test_tag(self):
         with tempfile.TemporaryDirectory() as testdir:
             self.testfile = os.path.join(testdir, str(random.random()))
-            unlabeled = await self.setUpSource()
-            labeled = await self.setUpSource()
-            labeled.config = labeled.config._replace(label="somelabel")
-            async with unlabeled, labeled:
-                async with unlabeled() as uctx, labeled() as lctx:
+            untagged = await self.setUpSource()
+            tagged = await self.setUpSource()
+            tagged.config = tagged.config._replace(tag="sometag")
+            async with untagged, tagged:
+                async with untagged() as uctx, tagged() as lctx:
                     await uctx.update(
                         Repo("0", data={"features": {"feed": 1}})
                     )
                     await lctx.update(
                         Repo("0", data={"features": {"face": 2}})
                     )
-            async with unlabeled, labeled:
-                async with unlabeled() as uctx, labeled() as lctx:
+            async with untagged, tagged:
+                async with untagged() as uctx, tagged() as lctx:
                     repo = await uctx.repo("0")
                     self.assertIn("feed", repo.features())
                     repo = await lctx.repo("0")
diff --git a/docs/plugins/dffml_source.rst b/docs/plugins/dffml_source.rst
@@ -28,9 +28,9 @@ Uses a CSV file as the source of repo feature data
 
 - filename: String
 
-- label: String
+- tag: String
 
-  - default: unlabeled
+  - default: untagged
 
 - readwrite: Boolean
 
@@ -44,9 +44,9 @@ Uses a CSV file as the source of repo feature data
 
   - default: key
 
-- labelcol: String
+- tagcol: String
 
-  - default: label
+  - default: tag
 
 idx1
 ~~~~
@@ -106,9 +106,9 @@ stored in memory.
 
 - filename: String
 
-- label: String
+- tag: String
 
-  - default: unlabeled
+  - default: untagged
 
 - readwrite: Boolean
 
diff --git a/examples/source/test_custom_sqlite.py b/examples/source/test_custom_sqlite.py
@@ -12,8 +12,8 @@ async def setUpSource(self):
             CustomSQLiteSourceConfig(filename=self.testfile)
         )
 
-    @unittest.skip("Labels not implemented")
-    async def test_label(self):
+    @unittest.skip("tags not implemented")
+    async def test_tag(self):
         """
-        Labels not implemented
+        tags not implemented
         """
diff --git a/service/http/tests/test_routes.py b/service/http/tests/test_routes.py
@@ -273,9 +273,9 @@ async def test_source(self):
                 self.cli.app["sources"]["salary"].config,
                 CSVSourceConfig(
                     filename="dataset.csv",
-                    label="unlabeled",
+                    tag="untagged",
                     key="key",
-                    labelcol="label",
+                    tagcol="tag",
                     allowempty=True,
                 ),
             )
@@ -360,7 +360,7 @@ async def test_not_found(self):
                     ServerException, f"{check} feed face not found"
                 ):
                     async with self.post(
-                        f"/configure/{check}/feed face/label", json={}
+                        f"/configure/{check}/feed face/tag", json={}
                     ):
                         pass  # pramga: no cov
 
diff --git a/source/mysql/tests/test_source.py b/source/mysql/tests/test_source.py
@@ -76,8 +76,8 @@ def tearDownClass(cls):
     async def setUpSource(self):
         return MySQLSource(self.source_config)
 
-    @unittest.skip("Labels not implemented")
-    async def test_label(self):
+    @unittest.skip("Tags not implemented")
+    async def test_tag(self):
         """
-        Labels not implemented
+        Tags not implemented
         """
diff --git a/tests/integration/test_cli.py b/tests/integration/test_cli.py
@@ -93,10 +93,10 @@ async def test_memory_to_csv(self):
             pathlib.Path(filename).read_text(),
             inspect.cleandoc(
                 """
-                key,label
-                A,unlabeled
-                B,unlabeled
-                C,unlabeled
+                key,tag
+                A,untagged
+                B,untagged
+                C,untagged
                 """
             )
             + "\n",
diff --git a/tests/source/test_csv.py b/tests/source/test_csv.py
@@ -22,45 +22,45 @@ async def setUpSource(self):
             )
         )
 
-    async def test_label(self):
+    async def test_tag(self):
         with tempfile.TemporaryDirectory() as testdir:
             self.testfile = os.path.join(testdir, str(random.random()))
-            unlabeled = await self.setUpSource()
-            labeled = await self.setUpSource()
-            labeled.config = labeled.config._replace(label="somelabel")
-            async with unlabeled, labeled:
-                async with unlabeled() as uctx, labeled() as lctx:
+            untagged = await self.setUpSource()
+            tagged = await self.setUpSource()
+            tagged.config = tagged.config._replace(tag="sometag")
+            async with untagged, tagged:
+                async with untagged() as uctx, tagged() as lctx:
                     await uctx.update(
                         Repo("0", data={"features": {"feed": 1}})
                     )
                     await lctx.update(
                         Repo("0", data={"features": {"face": 2}})
                     )
-                # async with unlabeled, labeled:
-                async with unlabeled() as uctx, labeled() as lctx:
+                # async with untagged, tagged:
+                async with untagged() as uctx, tagged() as lctx:
                     repo = await uctx.repo("0")
                     self.assertIn("feed", repo.features())
                     repo = await lctx.repo("0")
                     self.assertIn("face", repo.features())
             with open(self.testfile, "r") as fd:
                 dict_reader = csv.DictReader(fd, dialect="strip")
-                rows = {row["label"]: {row["key"]: row} for row in dict_reader}
-                self.assertIn("unlabeled", rows)
-                self.assertIn("somelabel", rows)
-                self.assertIn("0", rows["unlabeled"])
-                self.assertIn("0", rows["somelabel"])
-                self.assertIn("feed", rows["unlabeled"]["0"])
-                self.assertIn("face", rows["somelabel"]["0"])
-                self.assertEqual("1", rows["unlabeled"]["0"]["feed"])
-                self.assertEqual("2", rows["somelabel"]["0"]["face"])
+                rows = {row["tag"]: {row["key"]: row} for row in dict_reader}
+                self.assertIn("untagged", rows)
+                self.assertIn("sometag", rows)
+                self.assertIn("0", rows["untagged"])
+                self.assertIn("0", rows["sometag"])
+                self.assertIn("feed", rows["untagged"]["0"])
+                self.assertIn("face", rows["sometag"]["0"])
+                self.assertEqual("1", rows["untagged"]["0"]["feed"])
+                self.assertEqual("2", rows["sometag"]["0"]["face"])
 
     def test_config_default(self):
         config = CSVSource.config(
             parse_unknown("--source-csv-filename", "feedface")
         )
         self.assertEqual(config.filename, "feedface")
-        self.assertEqual(config.label, "unlabeled")
-        self.assertEqual(config.labelcol, "label")
+        self.assertEqual(config.tag, "untagged")
+        self.assertEqual(config.tagcol, "tag")
         self.assertEqual(config.key, "key")
         self.assertFalse(config.readwrite)
         self.assertFalse(config.allowempty)
@@ -70,19 +70,19 @@ def test_config_set(self):
             parse_unknown(
                 "--source-csv-filename",
                 "feedface",
-                "--source-csv-label",
-                "default-label",
-                "--source-csv-labelcol",
-                "dffml_label",
+                "--source-csv-tag",
+                "default-tag",
+                "--source-csv-tagcol",
+                "dffml_tag",
                 "--source-csv-key",
                 "SourceURLColumn",
                 "--source-csv-readwrite",
                 "--source-csv-allowempty",
             )
         )
         self.assertEqual(config.filename, "feedface")
-        self.assertEqual(config.label, "default-label")
-        self.assertEqual(config.labelcol, "dffml_label")
+        self.assertEqual(config.tag, "default-tag")
+        self.assertEqual(config.tagcol, "dffml_tag")
         self.assertEqual(config.key, "SourceURLColumn")
         self.assertTrue(config.readwrite)
         self.assertTrue(config.allowempty)
diff --git a/tests/source/test_file.py b/tests/source/test_file.py
diff --git a/tests/test_cli.py b/tests/test_cli.py