Skip to content

Commit a6952ab

Browse files
committed
wip
1 parent c76d227 commit a6952ab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1150
-1608
lines changed

.github/workflows/run_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474

7575
- name: run tests
7676
run: |
77-
coverage run -m pytest --create-db
77+
coverage run -m pytest --create-db -x
7878
coverage xml -o _shtrove_coverage.xml
7979
env:
8080
DATABASE_PASSWORD: postgres

api/base/views.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,11 @@ def initial(self, request, *args, **kwargs):
4545
class RootView(views.APIView):
4646
def get(self, request):
4747
links = {
48-
'rawdata': 'api:rawdatum-list',
4948
'sources': 'api:source-list',
5049
'users': 'api:user-list',
5150
'status': 'api:status',
52-
'rss': 'api:rss',
53-
'atom': 'api:atom',
51+
'rss': 'api:feeds.rss',
52+
'atom': 'api:feeds.atom',
5453
}
5554
ret = {k: request.build_absolute_uri(reverse(v)) for k, v in links.items()}
5655
return Response(ret)

api/rawdata/serializers.py

Lines changed: 0 additions & 10 deletions
This file was deleted.

api/rawdata/urls.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

api/rawdata/views.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

api/sourceconfigs/views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
from api.sourceconfigs.serializers import SourceConfigSerializer
44
from api.base import ShareViewSet
5+
from api.pagination import CursorPagination
56

67
from share.models import SourceConfig
78

89

910
class SourceConfigViewSet(ShareViewSet, viewsets.ReadOnlyModelViewSet):
1011
serializer_class = SourceConfigSerializer
12+
pagination_class = CursorPagination
1113

1214
ordering = ('id', )
1315

api/urls.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
urlpatterns = [
1010
url('^$', RootView.as_view()),
1111
url('^', include('api.banners.urls')),
12-
url('^', include('api.rawdata.urls')),
1312
url('^', include('api.sourceconfigs.urls')),
1413
url('^', include('api.sources.urls')),
1514
url('^', include('api.suids.urls')),

project/settings.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def split(string, delim):
358358
CELERY_TASK_DEFAULT_ROUTING_KEY = 'share_default'
359359

360360
URGENT_TASK_QUEUES = {
361-
'trove.digestive_tract.task__extract_and_derive': 'digestive_tract.urgent',
361+
'trove.digestive_tract.task__derive': 'digestive_tract.urgent',
362362
}
363363

364364

@@ -440,6 +440,10 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw):
440440

441441
SHARE_WEB_URL = os.environ.get('SHARE_WEB_URL', 'http://localhost:8003').rstrip('/') + '/'
442442
SHARE_USER_AGENT = os.environ.get('SHARE_USER_AGENT', 'SHAREbot/{} (+{})'.format(VERSION, SHARE_WEB_URL))
443+
SHARE_ADMIN_USERNAME = os.environ.get('SHARE_ADMIN_USERNAME', 'admin')
444+
SHARE_ADMIN_PASSWORD = os.environ.get('SHARE_ADMIN_PASSWORD')
445+
if DEBUG and (SHARE_ADMIN_PASSWORD is None):
446+
SHARE_ADMIN_PASSWORD = 'password'
443447

444448
# Skip some of the more intensive operations on works that surpass these limits
445449
SHARE_LIMITS = {

share/admin/__init__.py

Lines changed: 7 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
from django.apps import apps
2-
from django.urls import re_path as url
32
from django.contrib import admin
4-
from django.http import HttpResponseRedirect
5-
from django.template.response import TemplateResponse
6-
from django.urls import path, reverse
7-
from django.utils.html import format_html
3+
from django.urls import path
84

95
from oauth2_provider.models import AccessToken
106

@@ -15,7 +11,6 @@
1511
CeleryTaskResult,
1612
FeatureFlag,
1713
IndexBackfill,
18-
RawDatum,
1914
ShareUser,
2015
SiteBanner,
2116
Source,
@@ -51,26 +46,6 @@ class ShareUserAdmin(admin.ModelAdmin):
5146
search_fields = ['username']
5247

5348

54-
@linked_fk('suid')
55-
class RawDatumAdmin(admin.ModelAdmin):
56-
show_full_result_count = False
57-
list_select_related = ('suid__source_config', )
58-
list_display = ('id', 'identifier', 'source_config_label', 'datestamp', 'date_created', 'date_modified', )
59-
readonly_fields = ('datum__pre', 'sha256')
60-
exclude = ('datum',)
61-
paginator = TimeLimitedPaginator
62-
63-
def identifier(self, obj):
64-
return obj.suid.identifier
65-
66-
def source_config_label(self, obj):
67-
return obj.suid.source_config.label
68-
69-
def datum__pre(self, instance):
70-
return format_html('<pre>{}</pre>', instance.datum)
71-
datum__pre.short_description = 'datum' # type: ignore[attr-defined]
72-
73-
7449
class AccessTokenAdmin(admin.ModelAdmin):
7550
raw_id_fields = ('user',)
7651
list_display = ('token', 'user', 'scope')
@@ -91,11 +66,10 @@ def save_model(self, request, obj, form, change):
9166

9267
@linked_fk('source')
9368
class SourceConfigAdmin(admin.ModelAdmin):
94-
list_display = ('label', 'source_', 'version', 'enabled', 'button_actions')
69+
list_display = ('label', 'source_', 'version', 'enabled',)
9570
list_select_related = ('source',)
96-
readonly_fields = ('button_actions',)
9771
search_fields = ['label', 'source__name', 'source__long_title']
98-
actions = ['schedule_full_ingest']
72+
actions = ['schedule_derive']
9973

10074
def source_(self, obj):
10175
return obj.source.long_title
@@ -104,42 +78,10 @@ def enabled(self, obj):
10478
return not obj.disabled
10579
enabled.boolean = True # type: ignore[attr-defined]
10680

107-
@admin.action(description='schedule re-ingest of all raw data for each source config')
108-
def schedule_full_ingest(self, request, queryset):
81+
@admin.action(description='schedule re-derive of all cards for each selected source config')
82+
def schedule_derive(self, request, queryset):
10983
for _id in queryset.values_list('id', flat=True):
110-
digestive_tract.task__schedule_extract_and_derive_for_source_config.delay(_id)
111-
112-
def get_urls(self):
113-
return [
114-
url(
115-
r'^(?P<config_id>.+)/ingest/$',
116-
self.admin_site.admin_view(self.start_ingest),
117-
name='source-config-ingest'
118-
)
119-
] + super().get_urls()
120-
121-
def button_actions(self, obj):
122-
return format_html(
123-
' '.join((
124-
('<a class="button" href="{ingest_href}">Ingest</a>' if not obj.disabled else ''),
125-
)),
126-
ingest_href=reverse('admin:source-config-ingest', args=[obj.pk]),
127-
)
128-
button_actions.short_description = 'Buttons' # type: ignore[attr-defined]
129-
130-
def start_ingest(self, request, config_id):
131-
config = self.get_object(request, config_id)
132-
if request.method == 'POST':
133-
digestive_tract.task__schedule_extract_and_derive_for_source_config.delay(config.pk)
134-
url = reverse(
135-
'admin:share_sourceconfig_changelist',
136-
current_app=self.admin_site.name,
137-
)
138-
return HttpResponseRedirect(url)
139-
else:
140-
context = self.admin_site.each_context(request)
141-
context['source_config'] = config
142-
return TemplateResponse(request, 'admin/start-ingest.html', context)
84+
digestive_tract.task__schedule_derive_for_source_config.delay(_id)
14385

14486

14587
@linked_fk('user')
@@ -157,26 +99,16 @@ def access_token(self, obj):
15799
@linked_fk('source_config')
158100
@linked_fk('focus_identifier')
159101
@linked_many('formattedmetadatarecord_set', defer=('formatted_metadata',))
160-
@linked_many('raw_data', defer=('datum',))
161102
@linked_many('indexcard_set')
162103
class SourceUniqueIdentifierAdmin(admin.ModelAdmin):
163104
readonly_fields = ('identifier',)
164105
paginator = TimeLimitedPaginator
165-
actions = ('reingest', 'delete_cards_for_suid')
106+
actions = ('delete_cards_for_suid',)
166107
list_filter = (SourceConfigFilter,)
167108
list_select_related = ('source_config',)
168109
show_full_result_count = False
169110
search_fields = ('identifier',)
170111

171-
def reingest(self, request, queryset):
172-
_raw_id_queryset = (
173-
RawDatum.objects
174-
.latest_by_suid_queryset(queryset)
175-
.values_list('id', flat=True)
176-
)
177-
for _raw_id in _raw_id_queryset:
178-
digestive_tract.task__extract_and_derive.delay(raw_id=_raw_id)
179-
180112
def delete_cards_for_suid(self, request, queryset):
181113
for suid in queryset:
182114
digestive_tract.expel_suid(suid)
@@ -220,7 +152,6 @@ class FeatureFlagAdmin(admin.ModelAdmin):
220152
admin_site.register(CeleryTaskResult, CeleryTaskResultAdmin)
221153
admin_site.register(FeatureFlag, FeatureFlagAdmin)
222154
admin_site.register(IndexBackfill, IndexBackfillAdmin)
223-
admin_site.register(RawDatum, RawDatumAdmin)
224155
admin_site.register(ShareUser, ShareUserAdmin)
225156
admin_site.register(SiteBanner, SiteBannerAdmin)
226157
admin_site.register(Source, SourceAdmin)

share/exceptions.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,3 @@
11

22
class ShareException(Exception):
33
pass
4-
5-
6-
class HarvestError(ShareException):
7-
pass
8-
9-
10-
class IngestError(ShareException):
11-
pass
12-
13-
14-
class TransformError(IngestError):
15-
pass
16-
17-
18-
class RegulateError(IngestError):
19-
pass
20-
21-
22-
class MergeRequired(IngestError):
23-
"""A node disambiguated to multiple objects in the database.
24-
"""
25-
pass
26-
27-
28-
class IngestConflict(IngestError):
29-
"""Multiple data being ingested at the same time conflicted.
30-
"""
31-
pass

0 commit comments

Comments
 (0)