diff --git a/deploy/playbooks/04_cron.yml b/deploy/playbooks/04_cron.yml new file mode 100644 index 0000000..76284f1 --- /dev/null +++ b/deploy/playbooks/04_cron.yml @@ -0,0 +1,9 @@ +- name: Scheduled tasks using the bot user + hosts: intbot_app + + tasks: + - name: "Download pretalx data every hour" + ansible.builtin.cron: + name: "Download pretalx data every hour" + minute: "5" # run on the 5th minute of every hour + job: "make prod/cron/pretalx" diff --git a/deploy/templates/app/Makefile.app.j2 b/deploy/templates/app/Makefile.app.j2 index a000963..b247ea6 100644 --- a/deploy/templates/app/Makefile.app.j2 +++ b/deploy/templates/app/Makefile.app.j2 @@ -1,18 +1,22 @@ +MAKE_APP="docker compose run app make" echo: "Dummy target, to not run something accidentally" prod/migrate: - docker compose run app make in-container/migrate + $(MAKE_APP) in-container/migrate prod/shell: - docker compose run app make in-container/shell + $(MAKE_APP) in-container/shell prod/db_shell: - docker compose run app make in-container/db_shell + $(MAKE_APP) in-container/db_shell prod/manage: - docker compose run app make in-container/manage ARG=$(ARG) + $(MAKE_APP) in-container/manage ARG=$(ARG) + +prod/cron/pretalx: + $(MAKE_APP) in-container/manage ARG="download_pretalx_data --event=europython-2025" logs: docker compose logs -f diff --git a/intbot/core/admin.py b/intbot/core/admin.py index 25ada2e..5047065 100644 --- a/intbot/core/admin.py +++ b/intbot/core/admin.py @@ -1,6 +1,6 @@ import json -from core.models import DiscordMessage, Webhook +from core.models import DiscordMessage, PretalxData, Webhook from django.contrib import admin from django.utils.html import format_html @@ -26,12 +26,12 @@ class WebhookAdmin(admin.ModelAdmin): "processed_at", ] - def pretty_meta(self, obj): + def pretty_meta(self, obj: Webhook): return format_html("
{}
", json.dumps(obj.meta, indent=4)) pretty_meta.short_description = "Meta" - def pretty_content(self, obj): + def pretty_content(self, obj: Webhook): return format_html("
{}
", json.dumps(obj.content, indent=4)) pretty_content.short_description = "Content" @@ -61,11 +61,38 @@ class DiscordMessageAdmin(admin.ModelAdmin): "sent_at", ] - def content_short(self, obj): + def content_short(self, obj: DiscordMessage): # NOTE(artcz) This can create false shortcuts, but for most messages is # good enough, because most of them are longer than 20 chars return f"{obj.content[:10]}...{obj.content[-10:]}" +class PretalxDataAdmin(admin.ModelAdmin): + list_display = [ + "uuid", + "resource", + "created_at", + "modified_at", + ] + list_filter = [ + "created_at", + "resource", + ] + readonly_fields = fields = [ + "uuid", + "resource", + "pretty_content", + "created_at", + "modified_at", + "processed_at", + ] + + def pretty_content(self, obj: PretalxData): + return format_html("
{}
", json.dumps(obj.content, indent=4)) + + pretty_content.short_description = "Content" + + admin.site.register(Webhook, WebhookAdmin) admin.site.register(DiscordMessage, DiscordMessageAdmin) +admin.site.register(PretalxData, PretalxDataAdmin) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py new file mode 100644 index 0000000..17be122 --- /dev/null +++ b/intbot/core/integrations/pretalx.py @@ -0,0 +1,89 @@ +import logging +from typing import Any + +import httpx +from core.models import PretalxData +from django.conf import settings + +logger = logging.getLogger(__name__) + +PRETALX_EVENTS = [ + "europython-2022", + "europython-2023", + "europython-2024", + "europython-2025", +] + +ENDPOINTS = { + # Questions need to be passed to include answers in the same endpoint, + # saving us later time with joining the answers. + PretalxData.PretalxResources.submissions: "submissions/?questions=all", + PretalxData.PretalxResources.speakers: "speakers/?questions=all", +} + + +JsonType = dict[str, Any] + + +def get_event_url(event: str) -> str: + assert event in PRETALX_EVENTS + + return f"https://pretalx.com/api/events/{event}/" + + +def fetch_pretalx_data( + event: str, resource: PretalxData.PretalxResources +) -> list[JsonType]: + headers = { + "Authorization": f"Token {settings.PRETALX_API_TOKEN}", + "Content-Type": "application/json", + } + + base_url = get_event_url(event) + endpoint = ENDPOINTS[resource] + url = f"{base_url}{endpoint}" + + # Pretalx paginates the output, so we will need to do multiple requests and + # then merge multiple pages to one big dictionary + results = [] + page = 0 + + # This takes advantage of the fact that url will contain a url to the + # next page, until there is more data to fetch. If this is the last page, + # then the url will be None (falsy), and thus stop the while loop. + while url: + page += 1 + response = httpx.get(url, headers=headers) + + if response.status_code != 200: + raise Exception(f"Error {response.status_code}: {response.text}") + + logger.info("Fetching data from %s, page %s", url, page) + + data = response.json() + results += data["results"] + url = data["next"] + + return results + + +def download_latest_submissions(event: str) -> PretalxData: + data = fetch_pretalx_data(event, PretalxData.PretalxResources.submissions) + + pretalx_data = PretalxData.objects.create( + resource=PretalxData.PretalxResources.submissions, + content=data, + ) + + return pretalx_data + + +def download_latest_speakers(event: str) -> PretalxData: + data = fetch_pretalx_data(event, PretalxData.PretalxResources.speakers) + + pretalx_data = PretalxData.objects.create( + resource=PretalxData.PretalxResources.speakers, + content=data, + ) + + return pretalx_data diff --git a/intbot/core/management/commands/download_pretalx_data.py b/intbot/core/management/commands/download_pretalx_data.py new file mode 100644 index 0000000..86da3e5 --- /dev/null +++ b/intbot/core/management/commands/download_pretalx_data.py @@ -0,0 +1,28 @@ +from core.integrations.pretalx import ( + PRETALX_EVENTS, + download_latest_speakers, + download_latest_submissions, +) +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = "Downloads latest pretalx data" + + def add_arguments(self, parser): + # Add keyword argument event + parser.add_argument( + "--event", + choices=PRETALX_EVENTS, + help="slug of the event (for example `europython-2025`)", + required=True, + ) + + def handle(self, **kwargs): + event = kwargs["event"] + + self.stdout.write(f"Downloading latest speakers from pretalx... {event}") + download_latest_speakers(event) + + self.stdout.write(f"Downloading latest submissions from pretalx... {event}") + download_latest_submissions(event) diff --git a/intbot/core/migrations/0005_add_pretalx_data_model.py b/intbot/core/migrations/0005_add_pretalx_data_model.py new file mode 100644 index 0000000..e3d1a9c --- /dev/null +++ b/intbot/core/migrations/0005_add_pretalx_data_model.py @@ -0,0 +1,43 @@ +# Generated by Django 5.1.4 on 2025-04-18 11:43 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0004_add_inbox_item_model"), + ] + + operations = [ + migrations.CreateModel( + name="PretalxData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("uuid", models.UUIDField(default=uuid.uuid4)), + ( + "resource", + models.CharField( + choices=[ + ("submissions", "Submissions"), + ("speakers", "Speakers"), + ("schedule", "Schedule"), + ], + max_length=255, + ), + ), + ("content", models.JSONField()), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("modified_at", models.DateTimeField(auto_now=True)), + ("processed_at", models.DateTimeField(blank=True, null=True)), + ], + ), + ] diff --git a/intbot/core/models.py b/intbot/core/models.py index 5a09f63..3192ab0 100644 --- a/intbot/core/models.py +++ b/intbot/core/models.py @@ -81,3 +81,32 @@ def summary(self) -> str: def __str__(self): return f"{self.uuid} {self.author}: {self.content[:30]}" + + +class PretalxData(models.Model): + """ + Table to store raw data download from pretalx for later parsing. + + We first download data from pretalx to this table, and then fire a separate + background task that pulls data from this table and stores in separate + "business" tables, like "Proposal" or "Speaker". + """ + + class PretalxResources(models.TextChoices): + submissions = "submissions", "Submissions" + speakers = "speakers", "Speakers" + schedule = "schedule", "Schedule" + + uuid = models.UUIDField(default=uuid.uuid4) + resource = models.CharField( + max_length=255, + choices=PretalxResources.choices, + ) + content = models.JSONField() + + created_at = models.DateTimeField(auto_now_add=True) + modified_at = models.DateTimeField(auto_now=True) + processed_at = models.DateTimeField(blank=True, null=True) + + def __str__(self): + return f"{self.uuid}" diff --git a/intbot/intbot/settings.py b/intbot/intbot/settings.py index 1a7b907..a1d555a 100644 --- a/intbot/intbot/settings.py +++ b/intbot/intbot/settings.py @@ -197,6 +197,9 @@ def get(name) -> str: ZAMMAD_GROUP_SPONSORS = get("ZAMMAD_GROUP_SPONSORS") ZAMMAD_GROUP_GRANTS = get("ZAMMAD_GROUP_GRANTS") +# Pretalx +PRETALX_API_TOKEN = get("PRETALX_API_TOKEN") + if DJANGO_ENV == "dev": DEBUG = True @@ -282,6 +285,8 @@ def get(name) -> str: ZAMMAD_GROUP_HELPDESK = "TestZammad Helpdesk" ZAMMAD_GROUP_BILLING = "TestZammad Billing" + PRETALX_API_TOKEN = "Test-Pretalx-API-token" + elif DJANGO_ENV == "local_container": DEBUG = False diff --git a/intbot/tests/test_admin.py b/intbot/tests/test_admin.py index d731edc..fd537ce 100644 --- a/intbot/tests/test_admin.py +++ b/intbot/tests/test_admin.py @@ -2,7 +2,7 @@ Sanity checks (mostly) if the admin resources are available """ -from core.models import DiscordMessage, Webhook +from core.models import DiscordMessage, PretalxData, Webhook def test_admin_for_webhooks_sanity_check(admin_client): @@ -32,3 +32,35 @@ def test_admin_for_discordmessages_sanity_check(admin_client): assert str(dm.uuid).encode() in response.content assert dm.channel_id.encode() in response.content assert dm.channel_name.encode() in response.content + + +def test_admin_list_for_pretalx_data(admin_client): + """Simple sanity check if the page loads correctly""" + url = "/admin/core/pretalxdata/" + pd = PretalxData.objects.create( + resource=PretalxData.PretalxResources.speakers, + content={}, + ) + assert pd.uuid + + response = admin_client.get(url) + + assert response.status_code == 200 + assert str(pd.uuid).encode() in response.content + assert pd.get_resource_display().encode() in response.content + + +def test_admin_change_for_pretalx_data(admin_client): + """Simple sanity check if the page loads correctly""" + url = "/admin/core/pretalxdata/" + pd = PretalxData.objects.create( + resource=PretalxData.PretalxResources.speakers, + content={}, + ) + assert pd.uuid + + response = admin_client.get(f"{url}{pd.pk}/change/") + + assert response.status_code == 200 + assert str(pd.uuid).encode() in response.content + assert pd.get_resource_display().encode() in response.content diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py new file mode 100644 index 0000000..382c53d --- /dev/null +++ b/intbot/tests/test_integrations/test_pretalx.py @@ -0,0 +1,135 @@ +import pytest +import respx +from core.integrations import pretalx +from core.models import PretalxData +from httpx import Response + + +def submissions_pages_generator(url): + """ + Generator to simulate pagination. + + Extracted to a generator because we use it in multiple places + """ + yield Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + + yield Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + + +def speaker_pages_generator(url): + """ + Generator to simulate pagination. + + Extracted to a generator because we use it in multiple places + """ + yield Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + + yield Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + + +@respx.mock +def test_fetch_submissions_from_pretalx(): + url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all" + data = submissions_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) + + submissions = pretalx.fetch_pretalx_data( + "europython-2025", + PretalxData.PretalxResources.submissions, + ) + + assert submissions == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + + +@respx.mock +def test_fetch_speakers_from_pretalx(): + url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all" + data = speaker_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) + + submissions = pretalx.fetch_pretalx_data( + "europython-2025", + PretalxData.PretalxResources.speakers, + ) + + assert submissions == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + + +@respx.mock +@pytest.mark.django_db +def test_download_latest_submissions(): + url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all" + data = submissions_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) + + pretalx.download_latest_submissions("europython-2025") + + pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.submissions) + assert pd.resource == "submissions" + assert pd.content == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + + +@respx.mock +@pytest.mark.django_db +def test_download_latest_speakers(): + url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all" + data = speaker_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) + + pretalx.download_latest_speakers("europython-2025") + + pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.speakers) + assert pd.resource == "speakers" + assert pd.content == [ + {"hello": "world"}, + {"foo": "bar"}, + ]