Skip to content

Commit 45706e2

Browse files
authored
Merge pull request #1 from nodestream-proj/improvements
Improving code quality and adding tests
2 parents 38f9147 + 6657c1d commit 45706e2

File tree

11 files changed

+473
-267
lines changed

11 files changed

+473
-267
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
strategy:
99
fail-fast: false
1010
matrix:
11-
python-version: ["3.10", "3.11"]
11+
python-version: ["3.10", "3.11", "3.12", "3.13"]
1212

1313
steps:
1414
- uses: actions/checkout@v3

.vscode/settings.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"python.testing.pytestArgs": [
3+
"."
4+
],
5+
"python.testing.unittestEnabled": false,
6+
"python.testing.pytestEnabled": true
7+
}

nodestream_plugin_semantic/chunk.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from abc import ABC, abstractmethod
22
from typing import Iterable
33

4-
from nodestream.subclass_registry import SubclassRegistry
54
from nodestream.pluggable import Pluggable
5+
from nodestream.subclass_registry import SubclassRegistry
66

77
from .model import Content
88

@@ -25,7 +25,8 @@ def from_file_data(type, **chunker_kwargs) -> "Chunker":
2525
return CHUNKER_SUBCLASS_REGISTRY.get(type)(**chunker_kwargs)
2626

2727
@abstractmethod
28-
def chunk(self, content: Content) -> Iterable[Content]: ...
28+
def chunk(self, content: Content) -> Iterable[Content]:
29+
...
2930

3031

3132
class SplitOnDelimiterChunker(Chunker):

nodestream_plugin_semantic/content_types.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
from abc import ABC, abstractmethod
2-
from typing import Iterable
32
from pathlib import Path
3+
from typing import Iterable
44

55
from nodestream.subclass_registry import SubclassRegistry
66

7-
87
CONTENT_TYPE_SUBCLASS_REGISTRY = SubclassRegistry()
98
PLAIN_TEXT_ALIAS = "plain_text"
109
PLAIN_TEXT_EXTENSIONS = {".txt", ".md"}

nodestream_plugin_semantic/embed.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
from abc import ABC, abstractmethod
22

3-
from nodestream.subclass_registry import SubclassRegistry
43
from nodestream.pluggable import Pluggable
4+
from nodestream.subclass_registry import SubclassRegistry
55

66
from .model import Content, Embedding
77

8-
98
EMBEDDER_SUBCLASS_REGISTRY = SubclassRegistry()
109

1110

nodestream_plugin_semantic/model.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from dataclasses import dataclass
21
import hashlib
3-
from typing import List, Optional, Iterable
4-
5-
from nodestream.model import DesiredIngestion, Node
2+
from dataclasses import dataclass
3+
from typing import Iterable, List, Optional
64

5+
from nodestream.model import DesiredIngestion, Node, Relationship
76

87
Embedding = List[float | int]
98
CONTENT_NODE_TYPE_ID_PROPERTY = "id"
@@ -66,8 +65,9 @@ def make_ingestible(
6665

6766
if self.parent:
6867
self.parent.apply_to_node(node_type, related := Node())
68+
relationship = Relationship(type=relationship_type)
6969
ingest.add_relationship(
70-
related_node=related, type=relationship_type, outbound=False
70+
related_node=related, relationship=relationship, outbound=False
7171
)
7272

7373
return ingest

nodestream_plugin_semantic/pipeline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
from glob import glob
22
from pathlib import Path
3-
from typing import Optional, List, Dict
3+
from typing import Dict, List, Optional
44

55
from nodestream.model import DesiredIngestion
66
from nodestream.pipeline import Extractor, Transformer
77
from nodestream.pipeline.value_providers import (
8-
ValueProvider,
98
JmespathValueProvider,
109
ProviderContext,
10+
ValueProvider,
1111
)
1212
from nodestream.schema import (
13+
Cardinality,
1314
ExpandsSchema,
14-
SchemaExpansionCoordinator,
1515
GraphObjectSchema,
16-
Cardinality,
16+
SchemaExpansionCoordinator,
1717
)
1818

1919
from .chunk import Chunker
20+
from .content_types import PLAIN_TEXT_ALIAS, ContentType
2021
from .embed import Embedder
21-
from .content_types import ContentType, PLAIN_TEXT_ALIAS
2222
from .model import Content
2323

2424
DEFAULT_ID = JmespathValueProvider.from_string_expression("id")

poetry.lock

Lines changed: 280 additions & 249 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,16 @@ nodestream = "^0.13.0"
2727
semchunk = "^2.2.0"
2828

2929
[tool.poetry.group.dev.dependencies]
30-
pytest = "^7.4.0"
30+
pytest = "^8.2.0"
3131
pytest-mock = "^3.11.1"
3232
ruff = "^0.0.284"
3333
isort = "^5.12.0"
3434
black = "^23.7.0"
3535
pytest-cov = "^4.1.0"
36+
pytest-asyncio = "^0.24.0"
37+
38+
[tool.isort]
39+
profile = "black"
3640

3741
[build-system]
3842
requires = ["poetry-core"]

tests/test_model.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from unittest.mock import Mock
2+
3+
from nodestream.model import DesiredIngestion, Node
4+
5+
from nodestream_plugin_semantic.model import Content, hash
6+
7+
8+
def test_content_from_text():
9+
content_text = "test content"
10+
content = Content.from_text(content_text)
11+
assert content.content == content_text
12+
assert content.id == hash(content_text)
13+
assert content.parent is None
14+
15+
16+
def test_content_add_metadata():
17+
content = Content.from_text("test content")
18+
content.add_metadata("key", "value")
19+
assert content.metadata == {"key": "value"}
20+
21+
22+
def test_content_split_on_delimiter():
23+
content_text = "line1\nline2\nline3"
24+
content = Content.from_text(content_text)
25+
lines = list(content.split_on_delimiter("\n"))
26+
assert len(lines) == 3
27+
assert lines[0].content == "line1"
28+
assert lines[1].content == "line2"
29+
assert lines[2].content == "line3"
30+
assert all(line.parent == content for line in lines)
31+
32+
33+
def test_content_assign_embedding():
34+
content = Content.from_text("test content")
35+
embedding = [0.1, 0.2, 0.3]
36+
content.assign_embedding(embedding)
37+
assert content.embedding == embedding
38+
39+
40+
def test_content_apply_to_node():
41+
content = Content.from_text("test content")
42+
node = Mock(spec=Node)
43+
content.apply_to_node("test_type", node)
44+
node.type = "test_type"
45+
node.key_values.set_property.assert_called_with("id", content.id)
46+
node.properties.set_property.assert_any_call("content", content.content)
47+
48+
49+
def test_content_make_ingestible():
50+
parent_content = Content.from_text("parent content")
51+
child_content = Content.from_text("child content", parent=parent_content)
52+
ingest = child_content.make_ingestible("test_type", "test_relationship")
53+
54+
assert isinstance(ingest, DesiredIngestion)
55+
assert ingest.source.type == "test_type"
56+
ingest.source.key_values == {"id": child_content.id}
57+
ingest.source.properties == {"content": child_content.content}
58+
59+
assert len(ingest.relationships) == 1
60+
relationship = ingest.relationships[0]
61+
assert relationship.relationship.type == "test_relationship"
62+
assert relationship.outbound is False
63+
assert relationship.to_node.type == "test_type"
64+
relationship.to_node.key_values == {"id": parent_content.id}
65+
relationship.to_node.properties == {"content": parent_content.content}

0 commit comments

Comments
 (0)