Skip to content

Commit 22b73e8

Browse files
committed
🐛 Ensure file properties are passed through
1 parent e031767 commit 22b73e8

File tree

4 files changed

+31
-10
lines changed

4 files changed

+31
-10
lines changed

openaleph_procrastinate/helpers.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from contextlib import contextmanager
66
from pathlib import Path
7-
from typing import ContextManager, Generator
7+
from typing import ContextManager, Generator, Iterable
88

99
from anystore.logic.virtual import VirtualIO
1010
from followthemoney import EntityProxy
@@ -60,6 +60,18 @@ def load_entity(dataset: str, entity_id: str) -> EntityProxy:
6060
return entity
6161

6262

63+
def load_entities(
64+
dataset: str, entity_ids: Iterable[str]
65+
) -> Generator[EntityProxy, None, None]:
66+
"""
67+
Batch retrieve entities from the fragment store.
68+
"""
69+
store = get_fragments(
70+
dataset, database_uri=settings.fragments_uri, **sqlalchemy_pool
71+
)
72+
yield from store.iterate(entity_ids)
73+
74+
6375
@contextmanager
6476
def entity_writer(
6577
dataset: str, origin: str = OPAL_ORIGIN

openaleph_procrastinate/model.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
MIN_PRIORITY,
2121
OpenAlephSettings,
2222
)
23-
from openaleph_procrastinate.util import make_checksum_entity
23+
from openaleph_procrastinate.util import make_file_entity
2424

2525
settings = OpenAlephSettings()
2626

@@ -125,7 +125,9 @@ def log(self) -> BoundLogger:
125125
batch=self.batch,
126126
)
127127

128-
def get_writer(self: Self, origin: str = helpers.OPAL_ORIGIN) -> ContextManager[BulkLoader]:
128+
def get_writer(
129+
self: Self, origin: str = helpers.OPAL_ORIGIN
130+
) -> ContextManager[BulkLoader]:
129131
"""Get the writer for the dataset of the current job"""
130132
return helpers.entity_writer(self.dataset, origin)
131133

@@ -145,8 +147,8 @@ def load_entities(self: Self) -> Generator[EntityProxy, None, None]:
145147
if not settings.procrastinate_dehydrate_entities:
146148
yield from self.get_entities()
147149
else:
148-
for data in self.payload["entities"]:
149-
yield helpers.load_entity(self.dataset, data["id"])
150+
entity_ids = [e["id"] for e in self.payload["entities"]]
151+
yield from helpers.load_entities(self.dataset, entity_ids)
150152

151153
# Helpers for file jobs that access the servicelayer archive
152154

@@ -201,7 +203,7 @@ def from_entities(
201203
context: Job context
202204
"""
203205
if dehydrate:
204-
entities_ = (make_checksum_entity(e, quiet=True) for e in entities)
206+
entities_ = (make_file_entity(e, quiet=True) for e in entities)
205207
entities = (e for e in entities_ if e is not None)
206208
return cls(
207209
dataset=dataset,

openaleph_procrastinate/util.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def make_stub_entity(
2222
)
2323

2424

25-
def make_checksum_entity(
25+
def make_file_entity(
2626
e: E, entity_type: Type[E] | None = ValueEntity, quiet: bool | None = False
2727
) -> E | None:
2828
"""
@@ -32,4 +32,6 @@ def make_checksum_entity(
3232
stub = make_stub_entity(e, entity_type)
3333
if stub is not None:
3434
stub.add("contentHash", e.get("contentHash", quiet=q), quiet=q)
35+
stub.add("fileName", e.get("fileName", quiet=q), quiet=q)
36+
stub.add("parent", e.get("parent", quiet=q), quiet=q)
3537
return stub

tests/test_util.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ def test_util():
88
e.id = "a"
99
e.add("fileName", "test.txt")
1010
e.add("contentHash", "123")
11+
e.add("parent", "1")
1112

1213
assert util.make_stub_entity(e).to_dict() == {
1314
"caption": "test.txt",
@@ -18,16 +19,20 @@ def test_util():
1819
"referents": [],
1920
}
2021

21-
assert util.make_checksum_entity(e).to_dict() == {
22+
assert util.make_file_entity(e).to_dict() == {
2223
"caption": "test.txt",
2324
"id": "a",
2425
"schema": "Document",
25-
"properties": {"contentHash": ["123"]},
26+
"properties": {
27+
"contentHash": ["123"],
28+
"fileName": ["test.txt"],
29+
"parent": ["1"],
30+
},
2631
"datasets": ["default"],
2732
"referents": [],
2833
}
2934

3035
for i in ("", None):
3136
e.id = i
3237
assert util.make_stub_entity(e) is None
33-
assert util.make_checksum_entity(e) is None
38+
assert util.make_file_entity(e) is None

0 commit comments

Comments
 (0)