Skip to content

Commit fea31e7

Browse files
committed
wip
1 parent a8d6daa commit fea31e7

File tree

7 files changed

+187
-28
lines changed

7 files changed

+187
-28
lines changed

share/admin/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ class RawDatumAdmin(admin.ModelAdmin):
5858
list_display = ('id', 'identifier', 'source_config_label', 'datestamp', 'date_created', 'date_modified', )
5959
readonly_fields = ('datum__pre', 'sha256')
6060
exclude = ('datum',)
61-
raw_id_fields = ('jobs',)
6261
paginator = TimeLimitedPaginator
6362

6463
def identifier(self, obj):
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Generated by Django 3.2.25 on 2025-03-06 19:58
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('share', '0076_rawdatum_share_rawdatum_expiration_idx'),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name='formattedmetadatarecord',
15+
unique_together=None,
16+
),
17+
migrations.RemoveField(
18+
model_name='formattedmetadatarecord',
19+
name='suid',
20+
),
21+
migrations.AlterUniqueTogether(
22+
name='harvestjob',
23+
unique_together=None,
24+
),
25+
migrations.RemoveField(
26+
model_name='harvestjob',
27+
name='source_config',
28+
),
29+
migrations.RemoveField(
30+
model_name='normalizeddata',
31+
name='raw',
32+
),
33+
migrations.RemoveField(
34+
model_name='normalizeddata',
35+
name='source',
36+
),
37+
migrations.RemoveField(
38+
model_name='normalizeddata',
39+
name='tasks',
40+
),
41+
migrations.RemoveField(
42+
model_name='providerregistration',
43+
name='submitted_by',
44+
),
45+
migrations.RemoveField(
46+
model_name='rawdatumjob',
47+
name='datum',
48+
),
49+
migrations.RemoveField(
50+
model_name='rawdatumjob',
51+
name='job',
52+
),
53+
migrations.RemoveField(
54+
model_name='sourcestat',
55+
name='config',
56+
),
57+
migrations.DeleteModel(
58+
name='PGLock',
59+
),
60+
migrations.RemoveField(
61+
model_name='rawdatum',
62+
name='jobs',
63+
),
64+
migrations.RemoveField(
65+
model_name='sourceconfig',
66+
name='earliest_date',
67+
),
68+
migrations.RemoveField(
69+
model_name='sourceconfig',
70+
name='full_harvest',
71+
),
72+
migrations.RemoveField(
73+
model_name='sourceconfig',
74+
name='harvest_after',
75+
),
76+
migrations.RemoveField(
77+
model_name='sourceconfig',
78+
name='harvest_interval',
79+
),
80+
migrations.RemoveField(
81+
model_name='sourceconfig',
82+
name='harvester_key',
83+
),
84+
migrations.RemoveField(
85+
model_name='sourceconfig',
86+
name='harvester_kwargs',
87+
),
88+
migrations.RemoveField(
89+
model_name='sourceconfig',
90+
name='private_harvester_kwargs',
91+
),
92+
migrations.RemoveField(
93+
model_name='sourceconfig',
94+
name='private_transformer_kwargs',
95+
),
96+
migrations.RemoveField(
97+
model_name='sourceconfig',
98+
name='rate_limit_allowance',
99+
),
100+
migrations.RemoveField(
101+
model_name='sourceconfig',
102+
name='rate_limit_period',
103+
),
104+
migrations.RemoveField(
105+
model_name='sourceconfig',
106+
name='regulator_steps',
107+
),
108+
migrations.RemoveField(
109+
model_name='sourceconfig',
110+
name='transformer_kwargs',
111+
),
112+
migrations.AlterField(
113+
model_name='rawdatum',
114+
name='no_output',
115+
field=models.BooleanField(help_text='Indicates that this RawDatum resulted in an empty graph when transformed. This allows the RawDataJanitor to find records that have not been processed. Records that result in an empty graph will not have an Indexcard associated with them, which would otherwise look like data that has not yet been processed.', null=True),
116+
),
117+
migrations.DeleteModel(
118+
name='FormattedMetadataRecord',
119+
),
120+
migrations.DeleteModel(
121+
name='HarvestJob',
122+
),
123+
migrations.DeleteModel(
124+
name='NormalizedData',
125+
),
126+
migrations.DeleteModel(
127+
name='ProviderRegistration',
128+
),
129+
migrations.DeleteModel(
130+
name='RawDatumJob',
131+
),
132+
migrations.DeleteModel(
133+
name='SourceStat',
134+
),
135+
]

share/models/ingest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class SourceConfig(models.Model):
132132

133133
source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='source_configs')
134134
base_url = models.URLField(null=True)
135+
transformer_key = models.TextField(null=True)
135136

136137
disabled = models.BooleanField(default=False)
137138

tests/factories/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class SourceConfigFactory(DjangoModelFactory):
4646
label = factory.Faker('sentence')
4747
base_url = factory.Faker('url')
4848
source = factory.SubFactory(SourceFactory)
49+
transformer_key = None
4950

5051
class Meta:
5152
model = share_db.SourceConfig

tests/share/search/_util.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
1+
import typing
2+
13
from tests import factories
24

35
from primitive_metadata import primitive_rdf as rdf
46

57
from trove import models as trove_db
6-
from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT
8+
from trove import digestive_tract
79

810

9-
def create_indexcard(focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard:
11+
def create_indexcard(
12+
focus_iri: str,
13+
rdf_tripledict: rdf.RdfTripleDictionary,
14+
deriver_iris: typing.Collection[str] = (),
15+
) -> trove_db.Indexcard:
1016
_suid = factories.SourceUniqueIdentifierFactory()
1117
_indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid)
1218
update_indexcard_content(_indexcard, focus_iri, rdf_tripledict)
13-
# an osfmap_json card is required for indexing, but not used in these tests
14-
trove_db.DerivedIndexcard.objects.get_or_create(
15-
upriver_indexcard=_indexcard,
16-
deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']),
17-
)
19+
if deriver_iris:
20+
digestive_tract.derive(_indexcard, deriver_iris)
1821
return _indexcard
1922

2023

@@ -33,7 +36,6 @@ def update_indexcard_content(
3336
'turtle_checksum_iri': 'foo', # not enforced
3437
},
3538
)
36-
self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri
3739

3840

3941
def create_supplement(

tests/share/search/index_strategy/_common_trovesearch_tests.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@
66

77
from primitive_metadata import primitive_rdf as rdf
88

9-
from tests import factories
109
from share.search import messages
1110
from trove import models as trove_db
1211
from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams
1312
from trove.trovesearch.search_handle import PropertypathUsage
14-
from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT
13+
from trove.vocab.namespaces import RDFS, RDF, DCTERMS, OWL, FOAF, DCAT, TROVE
14+
from tests.share.search._util import (
15+
create_indexcard,
16+
update_indexcard_content,
17+
create_supplement,
18+
)
19+
1520
from ._with_real_services import RealElasticTestCase
1621

1722

@@ -214,7 +219,6 @@ def _assert_cardsearch_iris(self, queryparams: dict, expected_focus_iris: Iterab
214219
assert isinstance(_cardsearch_params, CardsearchParams)
215220
_cardsearch_handle = self.index_strategy.pls_handle_cardsearch(_cardsearch_params)
216221
# assumes all results fit on one page
217-
breakpoint() # TODO: _indexcard_focus_by_uuid
218222
_actual_result_iris: set[str] | list[str] = [
219223
self._indexcard_focus_by_uuid[_result.card_uuid]
220224
for _result in _cardsearch_handle.search_result_page
@@ -300,23 +304,23 @@ def _fill_test_data_for_querying(self):
300304
FOAF.name: {rdf.literal('some one else')},
301305
},
302306
})
303-
self._create_supplement(_card_a, BLARG.a, {
307+
create_supplement(_card_a, BLARG.a, {
304308
BLARG.a: {
305309
DCTERMS.replaces: {BLARG.a_past},
306310
DCAT.servesDataset: {
307311
rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(10)}}),
308312
},
309313
},
310314
})
311-
self._create_supplement(_card_b, BLARG.b, {
315+
create_supplement(_card_b, BLARG.b, {
312316
BLARG.b: {
313317
DCTERMS.replaces: {BLARG.b_past},
314318
DCAT.servesDataset: {
315319
rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(7)}}),
316320
},
317321
},
318322
})
319-
self._create_supplement(_card_c, BLARG.c, {
323+
create_supplement(_card_c, BLARG.c, {
320324
BLARG.c: {
321325
DCTERMS.replaces: {BLARG.c_past},
322326
DCAT.servesDataset: {
@@ -607,6 +611,20 @@ def valuesearch_sameas_cases(self):
607611
{BLARG.subj_ac, BLARG.subj_a, BLARG.subj_c, BLARG.subj_bc},
608612
)
609613

614+
def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard:
615+
_indexcard = create_indexcard(focus_iri, rdf_tripledict, (TROVE['derive/osfmap_json'],))
616+
self._indexcard_focus_by_uuid[str(_indexcard.uuid)] = focus_iri
617+
return _indexcard
618+
619+
def _update_indexcard_content(
620+
self,
621+
indexcard: trove_db.Indexcard,
622+
focus_iri: str,
623+
rdf_tripledict: rdf.RdfTripleDictionary,
624+
) -> None:
625+
update_indexcard_content(indexcard, focus_iri, rdf_tripledict)
626+
self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri
627+
610628
def _index_indexcards(self, indexcards: Iterable[trove_db.Indexcard]):
611629
_messages_chunk = messages.MessagesChunk(
612630
messages.MessageType.UPDATE_INDEXCARD,

tests/share/search/index_strategy/test_sharev2_elastic8.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,36 @@
1-
import json
1+
from primitive_metadata import primitive_rdf as rdf
22

3-
from tests import factories
43
from share.search import messages
54
from share.search.index_strategy.sharev2_elastic8 import Sharev2Elastic8IndexStrategy
6-
from share.util import IDObfuscator
5+
from trove.vocab.namespaces import DCTERMS, SHAREv2
6+
from tests.share.search._util import create_indexcard
77
from ._with_real_services import RealElasticTestCase
88

99

10+
BLARG = rdf.IriNamespace('http://blarg.example/')
11+
12+
1013
class TestSharev2Elastic8(RealElasticTestCase):
1114
# for RealElasticTestCase
1215
def get_index_strategy(self):
1316
return Sharev2Elastic8IndexStrategy('test_sharev2_elastic8')
1417

1518
def setUp(self):
1619
super().setUp()
17-
self.__suid = factories.SourceUniqueIdentifierFactory()
18-
self.__fmr = factories.FormattedMetadataRecordFactory(
19-
suid=self.__suid,
20-
record_format='sharev2_elastic',
21-
formatted_metadata=json.dumps({
22-
'id': IDObfuscator.encode(self.__suid),
23-
'title': 'hello',
24-
})
20+
self.__indexcard = create_indexcard(
21+
BLARG.hello,
22+
{
23+
BLARG.hello: {
24+
DCTERMS.title: {rdf.Literal('hello', language='en')},
25+
},
26+
},
27+
deriver_iris=[SHAREv2.sharev2_elastic],
2528
)
2629

2730
def test_without_daemon(self):
2831
_messages_chunk = messages.MessagesChunk(
2932
messages.MessageType.INDEX_SUID,
30-
[self.__suid.id],
33+
[self.__indexcard.source_record_suid_id],
3134
)
3235
self._assert_happypath_without_daemon(
3336
_messages_chunk,
@@ -37,7 +40,7 @@ def test_without_daemon(self):
3740
def test_with_daemon(self):
3841
_messages_chunk = messages.MessagesChunk(
3942
messages.MessageType.INDEX_SUID,
40-
[self.__suid.id],
43+
[self.__indexcard.source_record_suid_id],
4144
)
4245
self._assert_happypath_with_daemon(
4346
_messages_chunk,

0 commit comments

Comments
 (0)