Add new snapshot view in history plugin (#3601)

leplatrem · web-flow · commit 680bda481c5d · 2025-11-12T17:40:31.000+01:00
* Add new snapshot view in history plugin

* Check permissions

* Add missing OpenAPI docs
diff --git a/docs/api/1.x/history.rst b/docs/api/1.x/history.rst
@@ -167,6 +167,56 @@ Purge  history
 Using the same querystring parameters as the GET endpoint, the deletion can be partial.
 
 
+.. _history-snapshot:
+
+Collection Snapshot
+===================
+
+.. http:get:: /buckets/(bucket_id)/snapshot/collections/(collection_id)@(timestamp)
+
+    :synopsis: Return the state of the collection at timestamp `timestamp`.
+
+    **Optional authentication**
+
+    **Example Request**
+
+    .. sourcecode:: bash
+
+        $ http GET http://localhost:8888/v1/buckets/blog/snapshot/collections/articles@1469006098757 --auth="bob:p4ssw0rd" --verbose
+
+    .. sourcecode:: http
+
+        GET /v1/buckets/blog/snapshot/collections/articles@1469006098757 HTTP/1.1
+        Accept: */*
+        Accept-Encoding: gzip, deflate
+        Authorization: Basic dG9rZW46Ym9iLXRva2Vu
+        Connection: keep-alive
+        Host: localhost:8888
+
+  **Example Response**
+
+  .. sourcecode:: http
+
+        HTTP/1.1 200 OK
+        Access-Control-Expose-Headers: Content-Length, Expires, Alert, Retry-After, Last-Modified, Total-Objects, ETag, Pragma, Cache-Control, Backoff, Next-Page
+        Cache-Control: no-cache, no-store
+        Content-Length: 1906
+        Content-Type: application/json; charset=UTF-8
+        Date: Wed, 20 Jul 2016 09:15:02 GMT
+        Etag: "1469006098757"
+        Last-Modified: Wed, 20 Jul 2016 09:14:58 GMT
+        Server: waitress
+
+        {
+            "data": [
+                {
+                    "id": "b3b76c56-b6df-4195-8189-d79da4a128e1",
+                    "last_modified": 1469006316529,
+                    "title": "Original title"
+                }
+            ]
+        }
+
 Conflict resolution
 ===================
 
diff --git a/kinto/plugins/history/views.py b/kinto/plugins/history/views.py
@@ -1,8 +1,10 @@
 import colander
+from pyramid.httpexceptions import HTTPForbidden
 
-from kinto.core import resource
+from kinto.core import Service, resource, utils
+from kinto.core.resource.schema import ErrorResponseSchema
 from kinto.core.resource.viewset import ViewSet
-from kinto.core.storage import Filter
+from kinto.core.storage import Filter, Sort
 from kinto.core.utils import instance_uri
 
 
@@ -64,3 +66,120 @@ def _extract_filters(self):
             filters_str_id.append(filt)
 
         return filters_str_id
+
+
+snapshot = Service(
+    name="history_snapshot",
+    path="/buckets/{bucket_id}/snapshot/collections/{collection_id}@{timestamp}",
+    description="Reconstruct collection at given timestamp",
+)
+
+
+def timestamp_validator(request, **kwargs):
+    """
+    Validates that the timestamp is an integer.
+    """
+    timestamp = request.matchdict["timestamp"]
+    try:
+        if int(timestamp) < 0:
+            raise ValueError
+    except ValueError:
+        request.errors.add("path", "timestamp", "Invalid timestamp %r" % timestamp)
+
+
+class SnapshotPathSchema(colander.MappingSchema):
+    bucket_id = colander.SchemaNode(colander.String())
+    collection_id = colander.SchemaNode(colander.String())
+    timestamp = colander.SchemaNode(colander.Integer())
+
+
+class SnapshotSchema(colander.MappingSchema):
+    path = SnapshotPathSchema()
+
+
+snapshot_response_schemas = {
+    "401": ErrorResponseSchema(description="The collection is not publicly readable."),
+    "403": ErrorResponseSchema(description="No permission to read this collection."),
+    "200": colander.SchemaNode(
+        colander.Mapping(),
+        description="Returns the records at the given timestamp.",
+    ),
+}
+
+
+@snapshot.get(
+    schema=SnapshotSchema(),
+    validators=(timestamp_validator,),
+    response_schemas=snapshot_response_schemas,
+)
+def get_snapshot(request):
+    """Reconstructs the collection as it was at the given timestamp."""
+    bucket_id = request.matchdict["bucket_id"]
+    collection_id = request.matchdict["collection_id"]
+    timestamp = int(request.matchdict["timestamp"])
+
+    bucket_uri = instance_uri(request, "bucket", id=bucket_id)
+    collection_uri = instance_uri(request, "collection", bucket_id=bucket_id, id=collection_id)
+
+    # Check that user has read permission on the collection.
+    # This is manual code, because we are outside the normal resource system.
+    if not request.registry.permission.check_permission(
+        request.prefixed_principals,
+        [
+            (bucket_uri, "read"),
+            (bucket_uri, "write"),
+            (collection_uri, "read"),
+            (collection_uri, "write"),
+        ],
+    ):
+        raise HTTPForbidden()
+
+    # List all the records that have changed since the given timestamp.
+    all_records = request.registry.storage.list_all(
+        parent_id=collection_uri,
+        resource_name="record",
+        include_deleted=True,  # Include tombstones
+    )
+
+    unchanged_records = [
+        r for r in all_records if r["last_modified"] <= timestamp and not r.get("deleted")
+    ]
+    changed_rids = [r["id"] for r in all_records if r["last_modified"] > timestamp]
+    if not changed_rids:
+        # No change after timestamp, return all records as-is.
+        return {"data": sorted(unchanged_records, key=lambda r: r["last_modified"], reverse=True)}
+
+    # History entries store the current version. We need to pick the most recent
+    # entry before the timestamp for each record_id to obtain the records' state
+    # before it was changed or deleted.
+    history_entries = request.registry.storage.list_all(
+        parent_id=bucket_uri,
+        resource_name="history",
+        filters=[
+            Filter("resource_name", "record", utils.COMPARISON.EQ),
+            Filter("collection_id", collection_id, utils.COMPARISON.EQ),
+            Filter("record_id", changed_rids, utils.COMPARISON.IN),
+            Filter("target.data.last_modified", timestamp, utils.COMPARISON.MAX),
+        ],
+        sorting=[Sort("last_modified", -1)],  # Most recent first
+        # TODO: add storage option to keep only the latest entry per record_id
+    )
+
+    most_recent_entry = {}
+    for entry in history_entries:
+        rid = entry["record_id"]
+        if rid not in most_recent_entry:
+            most_recent_entry[rid] = entry
+
+    # Records created after the timestamp (not existing in history) should not appear.
+    # Records deleted or updated after the timestamp should be reverted to their most recent
+    # version before the timestamp.
+    result_records = unchanged_records
+    for rid in changed_rids:
+        if rid not in most_recent_entry:
+            # Record was created after the timestamp, skip it.
+            continue
+        history_entry = most_recent_entry[rid]
+        result_records.append(history_entry["target"]["data"])
+
+    return {"data": sorted(result_records, key=lambda r: r["last_modified"], reverse=True)}
diff --git a/tests/plugins/test_history.py b/tests/plugins/test_history.py