Skip to content

Commit 437ef25

Browse files
committed
Increment major version automatically
1 parent b47c037 commit 437ef25

File tree

10 files changed

+479
-8
lines changed

10 files changed

+479
-8
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ repos:
3434
- django-stubs
3535
- djangorestframework-stubs
3636
- pandas-stubs
37+
- pandas
38+
- deepdiff
3739
- types-redis
3840
- strawberry-graphql
3941
- strawberry-graphql-django

api/models/Resource.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import random
33
import uuid
4+
from pathlib import Path
45
from typing import TYPE_CHECKING, Any, Optional
56

67
import structlog
@@ -15,6 +16,7 @@
1516

1617
from api.managers.dvc_manager import DVCManager
1718
from api.utils.enums import DataType
19+
from api.utils.version_detection import detect_version_change_type
1820

1921
if TYPE_CHECKING:
2022
from api.models.Dataset import Dataset
@@ -162,7 +164,7 @@ def version_resource_with_dvc(sender, instance: ResourceFileDetails, created, **
162164
)
163165
return
164166

165-
# Determine version number using semantic versioning
167+
# Get the latest version
166168
last_version: Optional[ResourceVersion] = (
167169
instance.resource.versions.order_by("-created_at").first()
168170
)
@@ -171,9 +173,64 @@ def version_resource_with_dvc(sender, instance: ResourceFileDetails, created, **
171173
if last_version is None:
172174
new_version = "v1.0.0"
173175
else:
174-
# Default to minor version increment, could be configurable in the future
176+
# Determine appropriate version increment type by analyzing changes
177+
try:
178+
# Get the previous version file path
179+
# We need to create a temporary copy of the previous version
180+
import shutil
181+
import tempfile
182+
183+
from django.core.files.storage import default_storage
184+
185+
# Create a temporary directory for the previous version
186+
with tempfile.TemporaryDirectory() as temp_dir:
187+
# Get the previous version file path
188+
prev_file_name = f"prev_version_{instance.resource.id}.{instance.file.name.split('.')[-1]}"
189+
prev_file_path = os.path.join(temp_dir, prev_file_name)
190+
191+
# Use DVC to get the previous version
192+
try:
193+
# Try to checkout the previous version using DVC
194+
rel_path = Path(instance.file.path).relative_to(
195+
settings.DVC_REPO_PATH
196+
)
197+
tag_name = f"{instance.resource.name}-{last_version.version_number}"
198+
199+
# Save current file to temp location
200+
current_file_path = instance.file.path
201+
temp_current = os.path.join(temp_dir, "current_file")
202+
shutil.copy2(current_file_path, temp_current)
203+
204+
# Checkout previous version
205+
dvc.rollback_to_version(instance.file.path, tag_name)
206+
207+
# Copy the previous version to our temp location
208+
shutil.copy2(instance.file.path, prev_file_path)
209+
210+
# Restore current file
211+
shutil.copy2(temp_current, current_file_path)
212+
213+
# Detect version change type
214+
increment_type = detect_version_change_type(
215+
prev_file_path, current_file_path
216+
)
217+
logger.info(
218+
f"Detected version change type: {increment_type} for {instance.resource.name}"
219+
)
220+
except Exception as e:
221+
logger.warning(
222+
f"Could not analyze version changes: {str(e)}, defaulting to minor version"
223+
)
224+
increment_type = "minor"
225+
except Exception as e:
226+
logger.warning(
227+
f"Error in version detection: {str(e)}, defaulting to minor version"
228+
)
229+
increment_type = "minor"
230+
231+
# Increment version based on detected change type
175232
new_version = _increment_version(
176-
last_version.version_number, increment_type="minor"
233+
last_version.version_number, increment_type=increment_type
177234
)
178235

179236
# Use chunked mode for large files (over 100MB)

api/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
ResourceDataTable,
1111
ResourceFileDetails,
1212
ResourcePreviewDetails,
13+
ResourceVersion,
1314
)
1415
from api.models.ResourceChartDetails import ResourceChartDetails
1516
from api.models.ResourceChartImage import ResourceChartImage

api/schema/resource_schema.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
from strawberry.file_uploads import Upload
1313
from strawberry.types import Info
1414

15+
from api.managers.dvc_manager import DVCManager
1516
from api.models import (
1617
Dataset,
1718
Resource,
1819
ResourceFileDetails,
1920
ResourcePreviewDetails,
2021
ResourceSchema,
22+
ResourceVersion,
2123
)
24+
from api.models.Resource import _increment_version
2225
from api.types.type_resource import TypeResource
2326
from api.utils.constants import FORMAT_MAPPING
2427
from api.utils.data_indexing import index_resource_data
@@ -64,6 +67,16 @@ class UpdateFileResourceInput:
6467
preview_details: Optional[PreviewDetails] = strawberry.field(default=None)
6568

6669

70+
@strawberry.input
71+
class CreateMajorVersionInput:
72+
"""Input type for creating a major version of a resource."""
73+
74+
resource_id: uuid.UUID = strawberry.field()
75+
description: str = strawberry.field(
76+
description="Description of the changes in this major version"
77+
)
78+
79+
6780
@strawberry.enum
6881
class FieldType(Enum):
6982
"""Enum for field types."""
@@ -339,3 +352,71 @@ def delete_file_resource(self, info: Info, resource_id: uuid.UUID) -> bool:
339352
return True
340353
except Resource.DoesNotExist as e:
341354
raise ValueError(f"Resource with ID {resource_id} does not exist.")
355+
356+
@strawberry_django.mutation(handle_django_errors=True)
357+
@trace_resolver(name="create_major_version", attributes={"component": "resource"})
358+
def create_major_version(
359+
self, info: Info, input: CreateMajorVersionInput
360+
) -> TypeResource:
361+
"""Create a major version for a resource.
362+
363+
This should be used when significant changes are made to the resource data structure,
364+
such as schema changes, column additions/removals, or other breaking changes.
365+
"""
366+
import os
367+
368+
from django.conf import settings
369+
370+
try:
371+
# Get the resource
372+
resource = Resource.objects.get(id=input.resource_id)
373+
except Resource.DoesNotExist:
374+
raise ValueError(f"Resource with ID {input.resource_id} does not exist")
375+
376+
# Get the latest version
377+
last_version = resource.versions.order_by("-created_at").first()
378+
379+
if not last_version:
380+
logger.warning(
381+
f"No previous version found for resource {resource.name}, creating initial version"
382+
)
383+
new_version = "v1.0.0"
384+
else:
385+
# Increment major version
386+
new_version = _increment_version(
387+
last_version.version_number, increment_type="major"
388+
)
389+
390+
# Initialize DVC manager
391+
dvc = DVCManager(settings.DVC_REPO_PATH)
392+
393+
# Get the resource file path
394+
file_path = resource.resourcefiledetails.file.path
395+
396+
# Determine if file is large and should use chunking
397+
file_size = (
398+
resource.resourcefiledetails.file.size
399+
if hasattr(resource.resourcefiledetails.file, "size")
400+
else os.path.getsize(file_path)
401+
)
402+
use_chunked = file_size > 100 * 1024 * 1024 # 100MB threshold
403+
404+
# Track with DVC
405+
dvc_file = dvc.track_resource(file_path, chunked=use_chunked)
406+
message = f"Major version update for resource: {resource.name} to version {new_version}"
407+
dvc.commit_version(dvc_file, message)
408+
dvc.tag_version(f"{resource.name}-{new_version}")
409+
410+
# Create version record
411+
ResourceVersion.objects.create(
412+
resource=resource,
413+
version_number=new_version,
414+
change_description=input.description,
415+
)
416+
417+
# Update resource version field
418+
resource.version = new_version
419+
resource.save(update_fields=["version"])
420+
421+
logger.info(f"Created major version {new_version} for resource {resource.name}")
422+
return TypeResource.from_django(resource)

api/types/charts/base_chart.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ def _get_y_values(
623623
if pd.notna(value):
624624
x_to_y_map[x_val] = float(value)
625625
elif pd.notna(y_val):
626-
x_to_y_map[x_val] = float(y_val)
626+
x_to_y_map[x_val] = float(y_val) # type: ignore
627627
except (ValueError, TypeError) as e:
628628
logger.warning(f"Error converting y-value for {x_val}: {e}")
629629
except Exception as e:

api/types/type_usecase.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from api.types.type_usecase_metadata import TypeUseCaseMetadata
1414
from api.utils.enums import UseCaseStatus
1515

16-
use_case_status: EnumType = strawberry.enum(UseCaseStatus)
16+
use_case_status = strawberry.enum(UseCaseStatus) # type: ignore
1717

1818

1919
@strawberry_django.filter(UseCase)

api/urls.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@
3131
)
3232
),
3333
),
34-
re_path(
34+
re_path( # type: ignore
3535
r"download/(?P<type>resource|access_resource|chart|chart_image)/(?P<id>[0-9a-f]{8}\-[0-9a-f]{4}\-4[0-9a-f]{3}\-[89ab][0-9a-f]{3}\-[0-9a-f]{12})",
3636
download,
3737
),
38-
re_path(
38+
re_path( # type: ignore
3939
r"generate-dynamic-chart/(?P<resource_id>[0-9a-f]{8}\-[0-9a-f]{4}\-4[0-9a-f]{3}\-[89ab][0-9a-f]{3}\-[0-9a-f]{12})",
4040
generate_dynamic_chart,
4141
name="generate_dynamic_chart",

api/utils/elasticsearch_telemetry_patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,4 @@ def patch_elasticsearch_instrumentation() -> None:
4747
from opentelemetry.instrumentation.elasticsearch import utils
4848

4949
# Replace the internal _flatten_dict function with our patched version
50-
utils._flatten_dict = patched_flatten_dict
50+
utils._flatten_dict = patched_flatten_dict # type: ignore

0 commit comments

Comments
 (0)