Skip to content

Commit bca088f

Browse files
committed
id: using hashlib implemented
1 parent 9895288 commit bca088f

File tree

2 files changed

+118
-55
lines changed

2 files changed

+118
-55
lines changed

backend/datamodel.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,42 @@
1-
from datetime import datetime
21

3-
from pydantic import BaseModel, Field
2+
from datetime import datetime
3+
from typing import Optional, Dict, Any
4+
from pydantic import BaseModel, Field, root_validator
5+
import hashlib
6+
import json
47

58

69
class SchemaDefinition(BaseModel):
7-
id: str = Field(..., description="Unique identifier for the schema")
8-
name: str | None = Field(
10+
id: str = Field(..., description="Unique identifier for the schema (content hash)")
11+
name: Optional[str] = Field(
912
None,
1013
description="Human-readable name of the schema",
1114
min_length=3,
1215
)
13-
version: str | None = Field(None, description="Version of the schema")
14-
content: dict | None = Field(
16+
version: Optional[str] = Field(None, description="Version of the schema")
17+
content: Optional[Dict[str, Any]] = Field(
1518
None, description="The actual schema content as a dictionary"
1619
)
17-
updated_at: datetime | None = Field(
20+
updated_at: Optional[datetime] = Field(
1821
None, description="Timestamp of the last update"
1922
)
23+
24+
@staticmethod
25+
def _compute_hash(name: Optional[str], version: Optional[str], content: Optional[Dict[str, Any]]) -> str:
26+
"""
27+
Compute a deterministic SHA-256 hash from the canonical JSON of {name, version, content}.
28+
"""
29+
payload = {"name": name, "version": version, "content": content}
30+
canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
31+
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
32+
33+
@root_validator(pre=True)
34+
def assign_id_from_content(cls, values):
35+
"""
36+
Always (re)compute `id` from the content so it is deterministic and content-addressed.
37+
Any provided `id` is ignored to ensure correctness.
38+
"""
39+
name = values.get("name")
40+
version = values.get("version")
41+
content = values.get("content")
42+
values["id"] = cls._compute_hash(name, version, content)

backend/main.py

Lines changed: 88 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
12
from datetime import datetime
2-
from typing import Any, Optional
3-
from uuid import uuid4
3+
from typing import Any, List, Dict, Optional
44

55
from database import schemas_collection
66
from datamodel import SchemaDefinition
77
from fastapi import FastAPI, HTTPException
88
from fastapi.encoders import jsonable_encoder
99
from fastapi.middleware.cors import CORSMiddleware
1010

11+
1112
# ---- FastAPI app & CORS ----
1213
app = FastAPI()
1314
app.add_middleware(
@@ -19,91 +20,130 @@
1920
)
2021

2122

23+
def _compute_hash_from_doc(doc: Dict[str, Any]) -> str:
24+
"""
25+
Helper to compute the same SHA-256 hash used by SchemaDefinition
26+
without instantiating a model (used for quick normalization paths).
27+
"""
28+
# Import locally to avoid circular imports and to use the same logic
29+
from datamodel import SchemaDefinition
30+
return SchemaDefinition._compute_hash(
31+
doc.get("name"),
32+
doc.get("version"),
33+
doc.get("content"),
34+
)
35+
36+
2237
# ---- Routes ----
23-
@app.get("/schemas", response_model=list[SchemaDefinition])
24-
async def get_all_schemas() -> list[dict[str, Any]]:
38+
@app.get("/schemas", response_model=List[SchemaDefinition])
39+
async def get_all_schemas() -> List[Dict[str, Any]]:
2540
"""
26-
Retrieve all schemas. Ensures each document has `id` as a string and a valid `updated_at`.
41+
Retrieve all schemas. Ensures each document has `id` as the content hash
42+
and a valid `updated_at`. If the stored `id` is missing/mismatched,
43+
it will be recomputed to keep the collection consistent.
2744
"""
2845
docs = list(schemas_collection.find())
29-
normalized: list[dict[str, Any]] = []
30-
46+
normalized: List[Dict[str, Any]] = []
3147
for d in docs:
32-
# Ensure `id` exists and is a non-empty string
33-
if not isinstance(d.get("id"), str) or not d["id"].strip():
34-
d["id"] = str(uuid4())
35-
36-
# Ensure `updated_at` is present (optional safeguard)
48+
# Compute the correct content hash
49+
computed_id = _compute_hash_from_doc(d)
50+
if d.get("id") != computed_id:
51+
# Heal legacy/mismatched ids
52+
d["id"] = computed_id
53+
# Do not modify updated_at during passive normalization
54+
schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"id": computed_id}})
55+
56+
# Ensure updated_at exists (server-side default)
3757
if d.get("updated_at") is None:
3858
d["updated_at"] = datetime.utcnow()
59+
schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"updated_at": d["updated_at"]}})
3960

61+
# Remove internal MongoDB _id from outward JSON
62+
d.pop("_id", None)
4063
normalized.append(d)
4164

42-
# No BSON present; plain JSON encoding is fine
4365
return jsonable_encoder(normalized)
4466

4567

4668
@app.post("/schemas", response_model=SchemaDefinition)
47-
async def add_schema(schema: SchemaDefinition) -> dict[str, Any]:
69+
async def add_schema(schema: SchemaDefinition) -> Dict[str, Any]:
4870
"""
49-
Add a new schema. If `id` is missing/empty, generate one; always refresh `updated_at`.
71+
Add a new schema. `id` is deterministically computed from {name, version, content}.
72+
Server sets `updated_at`.
5073
"""
51-
doc = schema.dict()
52-
53-
# Guarantee a usable id
54-
if not isinstance(doc.get("id"), str) or not doc["id"].strip():
55-
doc["id"] = str(uuid4())
56-
57-
# Server-side timestamp
74+
# Rebuild model explicitly to guarantee id is based on content, not caller-supplied id
75+
model = SchemaDefinition(
76+
name=schema.name,
77+
version=schema.version,
78+
content=schema.content,
79+
updated_at=None,
80+
id="ignored" # ignored by validator; kept for clarity
81+
)
82+
doc = model.dict()
5883
doc["updated_at"] = datetime.utcnow()
5984

60-
# Insert as-is (no ObjectId conversions)
85+
# Insert as-is (no ObjectId conversions for id)
6186
schemas_collection.insert_one(doc)
6287

6388
# Return exactly what we stored
6489
return jsonable_encoder(doc)
6590

6691

67-
@app.put("/schemas/{id}", response_model=dict[str, str])
68-
async def update_schema(id: str, update: SchemaDefinition) -> dict[str, str]:
92+
@app.put("/schemas/{id}", response_model=Dict[str, str])
93+
async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
6994
"""
70-
Update schema by `id`. Ignores `id` field in the payload (primary key is immutable).
71-
Only non-None fields are updated; `updated_at` is refreshed automatically.
95+
Update schema by `id`. Because `id` is a content hash, any change in
96+
{name, version, content} will produce a new `id`. This endpoint:
97+
1) Finds the existing document by the current `id`.
98+
2) Merges provided fields (ignores `None` and any `id` supplied).
99+
3) Recomputes `id` from merged content.
100+
4) Replaces the document and returns the (possibly new) `id`.
72101
"""
73102
if not isinstance(id, str) or not id.strip():
74-
raise HTTPException(
75-
status_code=400, detail="Invalid schema id (must be a non-empty string)"
76-
)
103+
raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
77104

78-
# Ignore None values and prevent changing the primary key
79-
update_fields = {
80-
k: v for k, v in update.dict().items() if v is not None and k != "id"
81-
}
105+
existing = schemas_collection.find_one({"id": id})
106+
if not existing:
107+
raise HTTPException(status_code=404, detail="Schema not found")
82108

83-
if update_fields:
84-
update_fields["updated_at"] = datetime.utcnow()
109+
# Merge non-None fields from the payload (ignore any 'id' from client)
110+
payload = update.dict()
111+
merged = {
112+
"name": payload.get("name", existing.get("name")),
113+
"version": payload.get("version", existing.get("version")),
114+
"content": payload.get("content", existing.get("content")),
115+
}
116+
# Compute new hash-based id using SchemaDefinition logic
117+
new_model = SchemaDefinition(**merged, id="ignored", updated_at=None)
118+
new_id = new_model.id
119+
120+
# Build final doc to store
121+
final_doc = {
122+
"id": new_id,
123+
"name": merged["name"],
124+
"version": merged["version"],
125+
"content": merged["content"],
126+
"updated_at": datetime.utcnow(),
127+
}
85128

86-
result = schemas_collection.update_one(
87-
{"id": id}, {"$set": update_fields} if update_fields else {}
88-
)
89-
if result.matched_count == 0:
90-
raise HTTPException(status_code=404, detail="Schema not found")
129+
# Replace the existing document (matched by the previous id)
130+
replace_result = schemas_collection.replace_one({"id": id}, final_doc)
131+
if replace_result.matched_count == 0:
132+
raise HTTPException(status_code=404, detail="Schema not found during update")
91133

92-
return {"message": "Schema updated"}
134+
# If id changed, the caller now has to reference the new id
135+
return {"message": "Schema updated", "id": new_id}
93136

94137

95-
@app.delete("/schemas/{id}", response_model=dict[str, str])
96-
async def delete_schema(id: str) -> dict[str, str]:
138+
@app.delete("/schemas/{id}", response_model=Dict[str, str])
139+
async def delete_schema(id: str) -> Dict[str, str]:
97140
"""
98141
Delete schema by `id`.
99142
"""
100143
if not isinstance(id, str) or not id.strip():
101-
raise HTTPException(
102-
status_code=400, detail="Invalid schema id (must be a non-empty string)"
103-
)
144+
raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
104145

105146
result = schemas_collection.delete_one({"id": id})
106147
if result.deleted_count == 0:
107148
raise HTTPException(status_code=404, detail="Schema not found")
108-
109149
return {"message": "Schema deleted"}

0 commit comments

Comments
 (0)