diff --git a/docs/api/origin-curation-api.rst b/docs/api/origin-curation-api.rst new file mode 100644 index 0000000000..df2c7a402c --- /dev/null +++ b/docs/api/origin-curation-api.rst @@ -0,0 +1,203 @@ +.. _origin_curation_api: + +Origin Curation API +=================== + +The origin curation API extends the project endpoints with helpers to list, +create, update, and bulk-curate ``CodebaseRelation`` entries. Every endpoint +is nested under a project resource: + +.. code-block:: text + + /api/projects//relations/ + +Authentication +-------------- + +All endpoints require the same authentication and permission settings as the +core API. When ``REQUIRE_AUTHENTICATION`` is enabled, include the API token in +the ``Authorization`` header:: + + Authorization: Token + +Listing relations +----------------- + +``GET /api/projects//relations/`` + +Returns a paginated list of relations for the project. Results can be filtered +using the parameters provided by :class:`RelationFilterSet`: + +``search`` + Full text search on the ``to_resource.path``. +``map_type`` + Filter by relation map type (for example ``java_to_class``). +``status`` + Status of the ``to_resource`` (``requires-review``, ``ok``, etc.). +``curation_status`` + One of ``pending``, ``approved``, or ``rejected``. +``confidence_level`` + One of ``low``, ``medium``, ``high``, or ``verified``. +``curated_by`` + Username of the curator (partial match). +``requires_review`` + Boolean flag to return relations whose ``to_resource`` status is + ``requires-review``. + +Example: + +.. code-block:: console + + curl -X GET \ + "http://localhost:8001/api/projects/4f3f.../relations/?curation_status=pending&requires_review=true" \ + -H "Authorization: Token abc123" + +Sample response: + +.. code-block:: json + + { + "count": 2, + "next": null, + "previous": null, + "results": [ + { + "uuid": "f3d9cfcd-7627-478b-91cb-a2d553b79db9", + "to_resource": "to/com/example/App.class", + "status": "requires-review", + "map_type": "java_to_class", + "score": "0.87 diff_ratio: 0.12", + "from_resource": "from/src/main/java/com/example/App.java", + "curation_status": "pending", + "confidence_level": "medium", + "curation_notes": "Automated detection, needs review", + "curated_by": "admin", + "curated_at": "2025-11-11T06:12:48.912345Z" + } + ] + } + +Creating a relation +------------------- + +``POST /api/projects//relations/`` + +Required fields: + +``from_resource_path`` + Path of the source resource within the project. +``to_resource_path`` + Path of the target resource within the project. +``map_type`` + Relation category (for example ``java_to_class`` or ``checksum_match``). + +Optional fields: + +``curation_status`` ``confidence_level`` ``curation_notes`` + Set initial curation metadata. When provided, an ``OriginCuration`` record + is automatically created. + +.. code-block:: json + + { + "from_resource_path": "from/src/main/java/com/example/Service.java", + "to_resource_path": "to/com/example/Service.class", + "map_type": "java_to_class", + "curation_status": "approved", + "confidence_level": "high", + "curation_notes": "Reviewed manually" + } + +Updating or deleting a relation +------------------------------- + +``GET /api/projects//relations//`` +``PATCH /api/projects//relations//`` +``DELETE /api/projects//relations//`` + +PATCH accepts the same payload as the creation endpoint. When curation fields +change, a new ``OriginCuration`` history row is added. + +.. code-block:: console + + curl -X PATCH \ + "http://localhost:8001/api/projects/4f3f.../relations/f3d9cfcd-7627-478b-91cb-a2d553b79db9/" \ + -H "Authorization: Token abc123" \ + -H "Content-Type: application/json" \ + -d '{"curation_status": "approved", "confidence_level": "verified"}' + +Bulk curation +------------- + +``POST /api/projects//relations/bulk-curate/`` + +Payload structure: + +``relation_uuids`` *(list, required)* + Array of relation UUIDs to update. +``action`` *(string, required)* + One of ``approve``, ``reject``, ``mark_pending``, or ``set_confidence``. +``confidence_level`` *(string, required when action is ``set_confidence``)* + Confidence level to apply. +``curation_notes`` *(string, optional)* + Notes appended to each relation (preserved across updates). + +Example: + +.. code-block:: json + + { + "relation_uuids": [ + "f3d9cfcd-7627-478b-91cb-a2d553b79db9", + "a49036e2-f0c0-4104-9d9d-00ba578d72f6" + ], + "action": "approve", + "curation_notes": "Reviewed with upstream team" + } + +Successful responses include the count of updated relations and per-relation +history entries are written automatically. + +Field reference +--------------- + +Returned ``CodebaseRelation`` objects contain: + +``uuid`` + Stable identifier for the relation. +``from_resource`` ``to_resource`` + Resource paths represented as strings. +``map_type`` + Relation mapping category. +``status`` + The ``to_resource.status`` value, exposed for convenience. +``score`` + Human-readable hint compiled from ``extra_data`` (for example similarity + or diff metrics). Blank when no score is available. +``curation_status`` ``confidence_level`` ``curation_notes`` + Latest curation metadata. +``curated_by`` ``curated_at`` + Username and timestamp of the most recent curation update. + +Error handling +-------------- + +Errors return a JSON object with a ``message`` key and the HTTP status code +appropriate to the failure (400 for validation issues, 404 when a relation +cannot be found, etc.). Examples: + +.. code-block:: json + + { + "message": "relation_uuids is required." + } + + { + "message": { + "from_resource_path": ["Resource not found: from/src/missing.java"] + } + } + + + + diff --git a/docs/developer/origin-curation-architecture.rst b/docs/developer/origin-curation-architecture.rst new file mode 100644 index 0000000000..b5a2964709 --- /dev/null +++ b/docs/developer/origin-curation-architecture.rst @@ -0,0 +1,200 @@ +.. _origin_curation_architecture: + +Origin Curation Architecture +============================ + +This document describes the internal architecture that powers the origin +review, curation, propagation, and deployment features. It targets developers +who need to extend the workflow, integrate custom propagation strategies, or +adjust how curations are exported to external services such as FederatedCode. + +High-level overview +------------------- + +The origin system is composed of four main layers: + +#. **Data model** additions on :class:`~scanpipe.models.CodebaseRelation` plus + the :class:`~scanpipe.models.OriginCuration` and + :class:`~scanpipe.models.PropagationBatch` helper models. +#. **UI forms and views** that expose review, detail, creation, propagation, + and deployment screens. +#. **REST API endpoints** that mirror the UI capabilities for automation. +#. **Background pipes** that implement propagation heuristics and deployment + steps to FederatedCode. + +The following sections drill into each layer and explain how the pieces fit +together. + +Data model +---------- + +``CodebaseRelation`` gained the following fields to track manual decisions: + +``curation_status`` + ``pending`` | ``approved`` | ``rejected`` (stored as ``CharField``) +``confidence_level`` + Optional qualitative indicator (``low`` → ``verified``). +``curation_notes`` + Free-form text field for rationale and references. +``curated_by`` / ``curated_at`` + Backreferences to the curator (``AUTH_USER_MODEL``) and timestamp. + +Each update writes an ``OriginCuration`` entry that captures the previous +resource mapping, curator, status, confidence, and optional notes. This audit +trail enables accountability and rollback. ``OriginCuration`` inherits +``ProjectRelatedModel`` and is indexed by relation and curator timestamps for +fast lookups. + +Propagation batches are tracked through ``PropagationBatch``. They aggregate +metadata about automatically generated relations (strategy used, curator, +count, extra data) and expose an ``undo`` helper that deletes all relations +created in the batch. + +Forms and views +--------------- + +All UI flows live inside ``scanpipe/forms.py`` and ``scanpipe/views.py``: + +``OriginCurationForm`` + A ``ModelForm`` bound to ``CodebaseRelation`` that edits curation fields. +``OriginCurateCreateForm`` + A form for manual relation creation with validation to prevent duplicates. +``BulkCurationForm`` + Radio-based form powering the modal that updates multiple relations. +``OriginPropagationForm`` + Collects strategy, preview-only flag, similarity threshold, and patterns. +``OriginDeployForm`` + Holds merge strategy, include-history flag, and preview-only toggle. + +Corresponding views: + +``OriginReviewView`` + ``PaginatedFilterView`` with bulk actions and export helpers. +``OriginCurateView`` and ``OriginCurateCreateView`` + Manage detail editing and manual creation. Both ensure the project context + is loaded and leverage ``TabSetMixin`` for UI consistency. +``OriginPropagateView`` + ``FormView`` that orchestrates preview and execution of propagation logic. +``OriginDeployView`` + Handles FederatedCode export preview and pipeline launch. + +Most views inherit :class:`~scanpipe.views.ProjectRelatedViewMixin`, which +provides ``get_project`` and ensures templates receive the project instance. + +Propagation pipe +---------------- + +``scanpipe/pipes/origin_propagation.py`` implements four propagation +strategies. Each strategy returns a list of tuples ``(to_resource, from_resource, +map_type_suffix, confidence)`` which is fed to ``_apply_propagation``. + +Key functions: + +``propagate_origin_to_similar_resources`` + Combines checksum matches, directory similarity, and path heuristics. +``propagate_origin_by_directory_structure`` + Targets sibling resources that share parent directories. +``propagate_origin_by_package`` + Uses package membership to propagate relative paths within the same + discovered package. +``propagate_origin_by_pattern`` + Applies glob-style patterns (``fnmatch``) to extend a relation. + +``_apply_propagation`` records a ``PropagationBatch`` (when the user is +authenticated), writes new relations via ``make_relation``, stamps batch UUIDs +in ``extra_data``, and returns the count/relations/batch tuple. + +Extending propagation +~~~~~~~~~~~~~~~~~~~~~ + +Developers can add new strategies by: + +#. Implementing a helper that returns a candidate list similar to the existing + functions. +#. Registering the new strategy in ``OriginPropagationForm`` choices and + handling it inside ``OriginPropagateView.form_valid``. +#. Updating ``get_propagation_candidates`` so previews are aware of the + strategy. + +REST API +-------- + +``scanpipe/api/views.py`` extends :class:`ProjectViewSet` with three actions: + +``relations`` + GET for listing relations with filters, POST for creating new relations. +``relation_detail`` + GET/PATCH/DELETE endpoints for individual relations. +``relations_bulk_curate`` + POST endpoint to approve/reject/update multiple relations. + +Serializers in ``scanpipe/api/serializers.py`` expose read-only and write-only +fields: + +* ``CodebaseRelationSerializer`` (read) adds computed ``status`` and ``score`` for + existing exports and pipelines. +* ``CodebaseRelationCurationSerializer`` extends the base serializer with + curation metadata for the REST API. +* ``CodebaseRelationWriteSerializer`` validates resource paths within the + project context and automatically records ``OriginCuration`` history when + curation fields change. + +FederatedCode integration +------------------------- + +``scanpipe/pipes/federatedcode.py`` adds: + +``export_origin_curations(project, include_history=True)`` + Builds the YAML payload containing curated relations and optionally their + history. +``add_origin_curations(project, repo, package_scan_file, include_history, merge_strategy)`` + Writes the export into the cloned repository and merges with existing + curations using ``merge_curations``. +``merge_curations`` + Implements ``latest``, ``priority``, and ``manual`` strategies for + combining datasets. + +The ``PublishToFederatedCode`` pipeline pulls these helpers in ``add_origin_curations``. +Deployment options (merge strategy, include history) are stored in +``Project.extra_data["origin_deploy"]`` before the pipeline launches so the +background job runs with the requested configuration. + +Audit trail and undo +-------------------- + +Every user-facing entry point (UI and API) that touches curation fields creates +an ``OriginCuration`` row. Propagation stores the batch UUID in relation +``extra_data`` and in ``PropagationBatch`` for traceability. The ``undo`` method +on ``PropagationBatch`` deletes all relations created in the batch and removes +the batch record. + +When building additional tooling on top of this system, favour reusing the +existing history mechanisms to maintain consistent audit coverage. + +Testing considerations +---------------------- + +* :ref:`tutorial_origin_curation` exercises the complete workflow through the UI. +* When adding new propagation strategies or serializers, extend test cases to + validate: + + - candidate generation accuracy + - history creation + - FederatedCode export content + +Future extensions +----------------- + +Potential areas for extension include: + +* New propagation heuristics (for example similarity scoring based on symbol + analysis). +* Alternative deployment targets or export formats. +* Integration with external review workflows (e.g. webhooks when a relation + changes status). + +Follow core patterns—reuse ``OriginCuration`` for auditability, take advantage +of the mixins in ``scanpipe.views``, and keep serializers responsible for +validation/normalisation while delegating heavy logic to pipes or services. + + diff --git a/docs/index.rst b/docs/index.rst index 8a32c9e26c..705fcc6cad 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ In this documentation, you’ll find: introduction installation user-interface + origin-curation-guide faq contributing changelog @@ -37,6 +38,7 @@ In this documentation, you’ll find: tutorial_vulnerablecode_integration tutorial_web_ui_symbol_and_string_collection tutorial_cli_end_to_end_scanning_to_dejacode + tutorial_origin_curation_workflow .. toctree:: :maxdepth: 2 @@ -52,6 +54,8 @@ In this documentation, you’ll find: output-files command-line-interface rest-api + api/origin-curation-api + developer/origin-curation-architecture automation webhooks application-settings diff --git a/docs/origin-curation-guide.rst b/docs/origin-curation-guide.rst new file mode 100644 index 0000000000..fa8da65dce --- /dev/null +++ b/docs/origin-curation-guide.rst @@ -0,0 +1,157 @@ +.. _origin_curation_guide: + +Origin Curation Guide +===================== + +This guide walks through the complete workflow for reviewing automated origin +determinations, curating relations manually, propagating trustworthy signals, +and publishing curated results to FederatedCode. + +The origin workflow builds on the ``CodebaseRelation`` model and introduces +curation status, confidence levels, audit history, and deployment tooling. It +is accessible from the project detail page through the **Origin Review** button +and the dedicated navigation links in relation list views. + +Prerequisites +------------- + +Before starting an origin review session: + +* Ensure a project scan has been completed so that ``CodebaseRelation`` entries + are available. +* Assign yourself a staff account so curated actions are attributed to an + authenticated user. + +Origin Review Interface +----------------------- + +Open ``/project//origin-review/`` to access the origin review dashboard. +The page is built on ``PaginatedFilterView`` and provides: + +* **Filters** for curation status, confidence level, curator, map type, and + resource tags. +* **Bulk selection** checkboxes with actions to approve, reject, set pending, + or update the confidence level for several relations at once. +* **Inline context** showing the from/to resource paths, current curation + metadata, and a link to open the relation detail view. +* **Export controls** for JSON and XLSX snapshots of the filtered relations. + +Use the filters to focus on relations that need attention (for example +``status:requires-review`` or ``curation_status:pending``) and apply bulk +actions to clear low-risk queues quickly. + +Manual Curation +--------------- + +Select a relation from the review table to open the +``/project//origin-curate//`` page. The detail view provides: + +* A tab-based layout exposing essential relation fields, editable curation + metadata, and the historical audit trail (``OriginCuration`` records). +* A form for setting the curation status (pending, approved, rejected), + confidence level, curator notes, and for attaching supplemental comments. +* Automatically recorded metadata capturing the authenticated curator and the + timestamp of each change. + +Submit the form to update the relation. Each change creates a corresponding +``OriginCuration`` entry that captures the previous values, the curator, and +optional notes. Use the **History** tab to trace who approved or rejected a +relation over time. + +Creating Relations Manually +--------------------------- + +Use ``/project//origin-curate/add/`` when a relation is missing. The +``OriginCurateCreateForm`` accepts the from/to resource paths, map type, +optional curation metadata, and validates that the relation is unique inside +the project. When the curator is authenticated the initial entry is stamped +with the curator name and timestamp. + +Propagation Strategies +---------------------- + +Propagation helps extend trustworthy origin decisions to related files. Access +``/project//origin-propagate/?relation_uuid=`` and choose among the +available strategies: + +``Similar resources`` + Matches sibling files based on checksums, path similarity, and directory + structure. Useful for mirrored class files or generated artifacts. + +``Directory structure`` + Applies the origin determination to peers within the same directory on both + sides, favouring mirrored project layouts. + +``Package`` + Scans resources that belong to the same discovered package, respecting + relative paths within the package contents. + +``Pattern`` + Uses an explicit ``fnmatch``-style pattern (for example ``**/*.class``) to + locate additional candidates relative to the source relation. + +Each strategy exposes a preview mode. Submit with **Preview only** checked to +see the candidate list, confidence hints, and to confirm the batch before +writing. Finalising the propagation creates a ``PropagationBatch`` record, +stamps new relations with the batch UUID in ``extra_data``, and records the +curator (when authenticated) for traceability. + +FederatedCode Deployment +------------------------ + +Once a curated set of relations is available, open +``/project//origin-deploy/`` to prepare an export: + +* Verify that FederatedCode integration is configured and that the project has + a Package URL (``purl``). +* Choose a merge strategy: ``latest`` (newest wins), ``priority`` (project + always overwrites), or ``manual`` (flag conflicts for later review). +* Decide whether to include the full ``OriginCuration`` history or export only + the current relation states. +* Use preview mode to inspect the generated YAML payload before running the + pipeline. + +Starting the deployment launches the ``PublishToFederatedCode`` pipeline which +clones the target repository, writes ``origin-curations.yaml`` alongside the +scan output, optionally merges with existing data, and commits/pushes the +changes. + +Best Practices +-------------- + +* **Triage first**: sort by map type or confidence level to clear obvious + approvals/rejections before diving into edge cases. +* **Use notes**: capture reasoning and external references so future curators + understand the decision path. +* **Review propagation batches**: after propagation, use the batch UUID filter + (``extra_data__propagation_batch``) to sample the new relations and revert if + needed. +* **Keep history**: when deploying, favour including curation history unless a + consumer explicitly requires a trimmed dataset. + +Troubleshooting +--------------- + +No relations listed + Run a scan pipeline or load sample data. The review UI only displays + existing ``CodebaseRelation`` entries. + +Bulk actions disabled + Ensure at least one checkbox is selected and that you have the necessary + staff permissions to curate relations. + +Propagation fails with anonymous user + Log in through the admin panel or provide valid session credentials. Batch + tracking requires an authenticated curator. + +Deployment blocked by configuration + Confirm the FederatedCode settings in ``scanpipe/settings.py`` (or the + environment variables) and assign a ``purl`` to the project under Project + Settings. + +Related Resources +----------------- + +* :ref:`origin_curation_api` for REST endpoint details. +* :ref:`origin_curation_architecture` for implementation notes. +* :ref:`tutorial_origin_curation` for a hands-on walkthrough. diff --git a/docs/tutorial_origin_curation_workflow.rst b/docs/tutorial_origin_curation_workflow.rst new file mode 100644 index 0000000000..d7ec9a7e53 --- /dev/null +++ b/docs/tutorial_origin_curation_workflow.rst @@ -0,0 +1,116 @@ +.. _tutorial_origin_curation: + +Tutorial: Origin Review and Curation Workflow +============================================= + +This tutorial walks through a complete origin review session using the ScanCode.io +web interface and supporting API calls. You will triage relations, curate them, +propagate trustworthy determinations, and finally deploy curations to FederatedCode. + +Prerequisites +------------- + +* ScanCode.io running locally (``make run``). +* A staff user account (``python manage.py createsuperuser``). +* Optional: install dependencies needed for FederatedCode integration (``git`` + access, configured credentials). + +Step 1: Explore the origin review dashboard +------------------------------------------- + +#. Sign in at ``http://127.0.0.1:8002/admin/`` if authentication is required. +#. Navigate to ``http://127.0.0.1:8002/project/origin-review-test/`` and click the + **Origin Review** button in the header. +#. Use the filter menu to narrow the list (for example enable **Requires Review**). +#. Select a few relations and trigger **Bulk Actions → Approve selected** to clear + obvious cases. +#. Download the filtered list with **Export JSON** or **Export XLSX** to verify + curation metadata. + +Step 2: Curate a single relation +-------------------------------- + +#. Click **Review** in the actions column. +#. Inspect the **Curation** tab: change status to ``approved``, set the confidence + to ``verified``, and add a justification in the notes field. +#. Submit to save. Switch to the **History** tab to confirm the ``OriginCuration`` + record was created and that your username is recorded. + +Step 3: Create a missing relation +--------------------------------- + +#. Return to the review page and click **Add Relation**. +#. Fill in the form with: + + * From resource path: ``from/src/main/java/com/example/NewService.java`` + * To resource path: ``to/com/example/NewService.class`` + * Map type: ``java_to_class`` + * Curation status: ``pending`` (optional) + +#. Submit the form and confirm the relation appears in the table. + +Step 4: Propagate the approval +------------------------------ + +#. Select an approved relation (for instance ``App.java``) and click **Propagate**. +#. Choose **Similar resources** strategy and check **Preview only**. +#. Submit to display the candidate list. Review the suggestions and, if they look + reasonable, uncheck **Preview only** and submit again to apply the batch. +#. Back in the review list, filter for ``extra_data__propagation_batch`` to inspect + the newly created relations. Use the batch UUID if you need to undo the changes + in the Django shell with ``PropagationBatch.objects.get(uuid=...).undo()``. + +Step 5: Deploy to FederatedCode +------------------------------- + +#. Ensure the project has a Package URL. From the project detail page click + **Settings** and populate the ``purl`` field (for example ``pkg:maven/com.example/app@1.0.0``). +#. Open ``/project/origin-review-test/origin-deploy/``. +#. Verify the summary: curated relation count, history entries, FederatedCode + configuration status. +#. Keep **Preview only** enabled and submit to inspect the YAML payload. +#. When ready, choose a merge strategy (``latest`` is a good default), disable + preview, and submit to launch the ``PublishToFederatedCode`` pipeline. Track + progress via the **Pipelines** tab. + +Optional: use the REST API +-------------------------- + +The same workflow can be automated. For example, approve a relation via API: + +.. code-block:: console + + curl -X PATCH \ + "http://127.0.0.1:8002/api/projects//relations//" \ + -H "Authorization: Token " \ + -H "Content-Type: application/json" \ + -d '{"curation_status": "approved", "confidence_level": "high"}' + +Use the bulk-curate endpoint to update multiple relations at once: + +.. code-block:: json + + { + "relation_uuids": ["uuid-1", "uuid-2"], + "action": "set_confidence", + "confidence_level": "verified", + "curation_notes": "Validated via automation" + } + +Next steps +---------- + +* Explore additional propagation strategies (directory, package, pattern) and + compare candidate previews. +* Combine UI and API actions in a script to triage large batches. +* Extend the deployment pipeline with project-specific metadata using + ``merge_curations`` or additional export hooks. + +Related references +------------------ + +* :ref:`origin_curation_guide` +* :ref:`origin_curation_api` +* :ref:`origin_curation_architecture` + + diff --git a/scanpipe/api/serializers.py b/scanpipe/api/serializers.py index 6d849533b7..11da5e7058 100644 --- a/scanpipe/api/serializers.py +++ b/scanpipe/api/serializers.py @@ -21,6 +21,7 @@ # Visit https://github.com/aboutcode-org/scancode.io for support and download. from django.apps import apps +from django.utils import timezone from rest_framework import serializers from rest_framework.reverse import reverse @@ -487,6 +488,8 @@ class Meta: class CodebaseRelationSerializer(serializers.ModelSerializer): from_resource = serializers.ReadOnlyField(source="from_resource.path") to_resource = serializers.ReadOnlyField(source="to_resource.path") + status = serializers.SerializerMethodField() + score = serializers.SerializerMethodField() class Meta: model = CodebaseRelation @@ -498,6 +501,158 @@ class Meta: "from_resource", ] + def get_status(self, obj): + return obj.status + + def get_score(self, obj): + return obj.score + + +class CodebaseRelationCurationSerializer(CodebaseRelationSerializer): + uuid = serializers.UUIDField(read_only=True) + curation_status = serializers.CharField(required=False, allow_blank=True) + confidence_level = serializers.CharField(required=False, allow_blank=True) + curation_notes = serializers.CharField(required=False, allow_blank=True) + curated_by = serializers.SerializerMethodField() + curated_at = serializers.DateTimeField(read_only=True) + + class Meta(CodebaseRelationSerializer.Meta): + fields = CodebaseRelationSerializer.Meta.fields + [ + "uuid", + "curation_status", + "confidence_level", + "curation_notes", + "curated_by", + "curated_at", + ] + + def get_curated_by(self, obj): + user = getattr(obj, "curated_by", None) + return getattr(user, "username", None) + + +class CodebaseRelationWriteSerializer(serializers.ModelSerializer): + """Serializer for writing/updating relations with resource paths.""" + + from_resource_path = serializers.CharField(write_only=True) + to_resource_path = serializers.CharField(write_only=True) + from_resource = serializers.StringRelatedField(read_only=True) + to_resource = serializers.StringRelatedField(read_only=True) + uuid = serializers.UUIDField(read_only=True) + curation_status = serializers.ChoiceField( + choices=["pending", "approved", "rejected"], + required=False, + allow_blank=True, + allow_null=True, + ) + confidence_level = serializers.ChoiceField( + choices=["low", "medium", "high", "verified"], + required=False, + allow_blank=True, + ) + curation_notes = serializers.CharField(required=False, allow_blank=True) + curated_by = serializers.StringRelatedField(read_only=True) + curated_at = serializers.DateTimeField(read_only=True) + + class Meta: + model = CodebaseRelation + fields = [ + "uuid", + "from_resource_path", + "to_resource_path", + "from_resource", + "to_resource", + "map_type", + "curation_status", + "confidence_level", + "curation_notes", + "curated_by", + "curated_at", + ] + + def validate_from_resource_path(self, value): + """Validate that from_resource exists in the project.""" + project = self.context["project"] + try: + project.codebaseresources.get(path=value) + return value + except CodebaseResource.DoesNotExist: + raise serializers.ValidationError( + f"Resource with path '{value}' not found in project." + ) + + def validate_to_resource_path(self, value): + """Validate that to_resource exists in the project.""" + project = self.context["project"] + try: + project.codebaseresources.get(path=value) + return value + except CodebaseResource.DoesNotExist: + raise serializers.ValidationError( + f"Resource with path '{value}' not found in project." + ) + + def create(self, validated_data): + """Create a new relation.""" + project = self.context["project"] + from_resource_path = validated_data.pop("from_resource_path") + to_resource_path = validated_data.pop("to_resource_path") + + from_resource = project.codebaseresources.get(path=from_resource_path) + to_resource = project.codebaseresources.get(path=to_resource_path) + + relation = CodebaseRelation.objects.create( + project=project, + from_resource=from_resource, + to_resource=to_resource, + **validated_data, + ) + + user = self.context["request"].user + if user.is_authenticated and any( + key in validated_data + for key in ["curation_status", "confidence_level", "curation_notes"] + ): + relation.curated_by = user + relation.curated_at = timezone.now() + relation.save(update_fields=["curated_by", "curated_at"]) + + return relation + + def update(self, instance, validated_data): + """Update an existing relation.""" + from_resource_path = validated_data.pop("from_resource_path", None) + to_resource_path = validated_data.pop("to_resource_path", None) + + if from_resource_path: + instance.from_resource = self.context["project"].codebaseresources.get( + path=from_resource_path + ) + if to_resource_path: + instance.to_resource = self.context["project"].codebaseresources.get( + path=to_resource_path + ) + + # Update curation fields + if "curation_status" in validated_data: + instance.curation_status = validated_data["curation_status"] + if "confidence_level" in validated_data: + instance.confidence_level = validated_data["confidence_level"] + if "curation_notes" in validated_data: + instance.curation_notes = validated_data["curation_notes"] + + # Update curated_by and curated_at if curation fields changed + user = self.context["request"].user + if user.is_authenticated and any( + key in validated_data + for key in ["curation_status", "confidence_level", "curation_notes"] + ): + instance.curated_by = user + instance.curated_at = timezone.now() + + instance.save() + return instance + class ProjectMessageSerializer(serializers.ModelSerializer): traceback = serializers.SerializerMethodField() diff --git a/scanpipe/api/views.py b/scanpipe/api/views.py index 8288b2858f..671db838ab 100644 --- a/scanpipe/api/views.py +++ b/scanpipe/api/views.py @@ -28,6 +28,7 @@ from django.db import transaction from django.db.models import Q from django.http import FileResponse +from django.utils import timezone import django_filters from rest_framework import mixins @@ -37,7 +38,8 @@ from rest_framework.decorators import action from rest_framework.response import Response -from scanpipe.api.serializers import CodebaseRelationSerializer +from scanpipe.api.serializers import CodebaseRelationCurationSerializer +from scanpipe.api.serializers import CodebaseRelationWriteSerializer from scanpipe.api.serializers import CodebaseResourceSerializer from scanpipe.api.serializers import DiscoveredDependencySerializer from scanpipe.api.serializers import DiscoveredPackageSerializer @@ -51,6 +53,7 @@ from scanpipe.filters import ProjectMessageFilterSet from scanpipe.filters import RelationFilterSet from scanpipe.filters import ResourceFilterSet +from scanpipe.models import CodebaseRelation from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import RunInProgressError @@ -297,14 +300,222 @@ def dependencies(self, request, *args, **kwargs): request, queryset, DependencyFilterSet, DiscoveredDependencySerializer ) - @action(detail=True, filterset_class=None) + @action( + detail=True, + filterset_class=None, + methods=["get", "post"], + url_path="relations", + url_name="relations", + ) def relations(self, request, *args, **kwargs): + """List or create relations for a project.""" project = self.get_object() + + if request.method == "POST": + # Create new relation + serializer = CodebaseRelationWriteSerializer( + data=request.data, context={"project": project, "request": request} + ) + if serializer.is_valid(): + relation = serializer.save() + # Create curation history if curation fields provided + if any( + key in request.data + for key in ["curation_status", "confidence_level", "curation_notes"] + ): + from scanpipe.models import OriginCuration + + curator = request.user if request.user.is_authenticated else None + OriginCuration.objects.create( + project=project, + relation=relation, + curator=curator, + curation_status=relation.curation_status or "pending", + confidence_level=relation.confidence_level or "", + notes=relation.curation_notes or "", + previous_from_resource=relation.from_resource, + previous_to_resource=relation.to_resource, + previous_map_type=relation.map_type, + ) + return Response( + CodebaseRelationCurationSerializer(relation).data, + status=status.HTTP_201_CREATED, + ) + return ErrorResponse( + serializer.errors, status_code=status.HTTP_400_BAD_REQUEST + ) + + # GET - list relations queryset = project.codebaserelations.select_related( "from_resource", "to_resource" ) return self.get_filtered_response( - request, queryset, RelationFilterSet, CodebaseRelationSerializer + request, queryset, RelationFilterSet, CodebaseRelationCurationSerializer + ) + + @action( + detail=True, + methods=["get", "patch", "delete"], + url_path="relations/(?P[^/.]+)", + url_name="relation-detail", + ) + def relation_detail(self, request, relation_uuid=None, *args, **kwargs): + """Retrieve, update, or delete a specific relation.""" + project = self.get_object() + + try: + relation = project.codebaserelations.get(uuid=relation_uuid) + except CodebaseRelation.DoesNotExist: + return ErrorResponse( + "Relation not found.", status_code=status.HTTP_404_NOT_FOUND + ) + + if request.method == "GET": + serializer = CodebaseRelationCurationSerializer(relation) + return Response(serializer.data) + + elif request.method == "PATCH": + # Store previous values for history + from scanpipe.models import OriginCuration + + prev_from = relation.from_resource + prev_to = relation.to_resource + prev_map_type = relation.map_type + + serializer = CodebaseRelationWriteSerializer( + relation, + data=request.data, + partial=True, + context={"project": project, "request": request}, + ) + if serializer.is_valid(): + relation = serializer.save() + + # Create curation history if curation fields changed + curation_fields_changed = any( + key in request.data + for key in ["curation_status", "confidence_level", "curation_notes"] + ) + if curation_fields_changed: + OriginCuration.objects.create( + project=project, + relation=relation, + curator=request.user if request.user.is_authenticated else None, + curation_status=relation.curation_status or "pending", + confidence_level=relation.confidence_level or "", + notes=relation.curation_notes or "", + previous_from_resource=prev_from, + previous_to_resource=prev_to, + previous_map_type=prev_map_type, + ) + + return Response(CodebaseRelationCurationSerializer(relation).data) + return ErrorResponse( + serializer.errors, status_code=status.HTTP_400_BAD_REQUEST + ) + + elif request.method == "DELETE": + relation.delete() + return Response(status=status.HTTP_204_NO_CONTENT) + + @action( + detail=True, + methods=["post"], + url_path="relations/bulk-curate", + url_name="relations-bulk-curate", + ) + def relations_bulk_curate(self, request, *args, **kwargs): # noqa: C901 + """Bulk curate multiple relations.""" + project = self.get_object() + + relation_uuids = request.data.get("relation_uuids", []) + if not relation_uuids: + return ErrorResponse( + "relation_uuids is required.", status_code=status.HTTP_400_BAD_REQUEST + ) + + action = request.data.get("action") + if action not in ["approve", "reject", "mark_pending", "set_confidence"]: + return ErrorResponse( + "Invalid action. Must be one of: approve, reject, " + "mark_pending, set_confidence", + status_code=status.HTTP_400_BAD_REQUEST, + ) + + relations = project.codebaserelations.filter(uuid__in=relation_uuids) + if not relations.exists(): + return ErrorResponse( + "No relations found.", status_code=status.HTTP_404_NOT_FOUND + ) + + count = 0 + from scanpipe.models import OriginCuration + + for relation in relations: + # Store previous values + prev_from = relation.from_resource + prev_to = relation.to_resource + prev_map_type = relation.map_type + + # Update based on action + if action == "approve": + relation.curation_status = "approved" + elif action == "reject": + relation.curation_status = "rejected" + elif action == "mark_pending": + relation.curation_status = "pending" + elif action == "set_confidence": + confidence_level = request.data.get("confidence_level") + if not confidence_level: + return ErrorResponse( + "confidence_level is required for set_confidence action.", + status_code=status.HTTP_400_BAD_REQUEST, + ) + relation.confidence_level = confidence_level + + # Add notes if provided + if request.data.get("curation_notes"): + if relation.curation_notes: + relation.curation_notes += f"\n\n{request.data['curation_notes']}" + else: + relation.curation_notes = request.data["curation_notes"] + + if request.user.is_authenticated: + relation.curated_by = request.user + relation.curated_at = timezone.now() + relation.save( + update_fields=[ + "curation_status", + "confidence_level", + "curation_notes", + "curated_by", + "curated_at", + ] + ) + + # Create curation history + OriginCuration.objects.create( + project=project, + relation=relation, + curator=request.user if request.user.is_authenticated else None, + curation_status=relation.curation_status or "pending", + confidence_level=relation.confidence_level or "", + notes=request.data.get("curation_notes", ""), + previous_from_resource=prev_from, + previous_to_resource=prev_to, + previous_map_type=prev_map_type, + ) + + count += 1 + + return Response( + { + "status": ( + f"Successfully updated {count} relation{'s' if count != 1 else ''}." + ), + "count": count, + }, + status=status.HTTP_200_OK, ) @action(detail=True, filterset_class=None) diff --git a/scanpipe/filters.py b/scanpipe/filters.py index 1a063d730d..5e025710ca 100644 --- a/scanpipe/filters.py +++ b/scanpipe/filters.py @@ -931,6 +931,8 @@ class RelationFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): dropdown_widget_fields = [ "status", "map_type", + "curation_status", + "confidence_level", ] search = QuerySearchFilter( @@ -944,11 +946,35 @@ class RelationFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): "from_resource", "to_resource", "map_type", + "curated_at", ], ) map_type = django_filters.ChoiceFilter(choices=MAP_TYPE_CHOICES) status = StatusFilter(field_name="to_resource__status") extra_data = django_filters.CharFilter(lookup_expr="icontains") + curation_status = django_filters.ChoiceFilter( + choices=[ + ("pending", "Pending"), + ("approved", "Approved"), + ("rejected", "Rejected"), + ], + ) + confidence_level = django_filters.ChoiceFilter( + choices=[ + ("low", "Low"), + ("medium", "Medium"), + ("high", "High"), + ("verified", "Verified"), + ], + ) + curated_by = django_filters.CharFilter( + field_name="curated_by__username", + lookup_expr="icontains", + ) + requires_review = django_filters.BooleanFilter( + method="filter_requires_review", + label="Requires Review", + ) class Meta: model = CodebaseRelation @@ -957,6 +983,10 @@ class Meta: "map_type", "status", "extra_data", + "curation_status", + "confidence_level", + "curated_by", + "requires_review", ] def __init__(self, *args, **kwargs): @@ -966,3 +996,9 @@ def __init__(self, *args, **kwargs): qs = CodebaseResource.objects.filter(project=project) status_filter = self.filters["status"] status_filter.extra["choices"] = status_filter.get_status_choices(qs) + + def filter_requires_review(self, queryset, name, value): + """Filter relations where to_resource status is 'requires-review'.""" + if value: + return queryset.filter(to_resource__status="requires-review") + return queryset diff --git a/scanpipe/forms.py b/scanpipe/forms.py index cd40b48ee5..60af752f6f 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -29,6 +29,8 @@ from taggit.forms import TagField from taggit.forms import TagWidget +from scanpipe.models import CodebaseRelation +from scanpipe.models import CodebaseResource from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import WebhookSubscription @@ -725,3 +727,275 @@ def __init__(self, *args, **kwargs): def save(self, project): return project.add_webhook_subscription(**self.cleaned_data) + + +class OriginCurationForm(forms.ModelForm): + """Form for editing origin curation on a CodebaseRelation.""" + + class Meta: + model = CodebaseRelation + fields = [ + "map_type", + "curation_status", + "confidence_level", + "curation_notes", + ] + widgets = { + "map_type": forms.Select(attrs={"class": "select"}), + "curation_status": forms.Select(attrs={"class": "select"}), + "confidence_level": forms.Select(attrs={"class": "select"}), + "curation_notes": forms.Textarea( + attrs={"class": "textarea is-dynamic", "rows": 4} + ), + } + + def __init__(self, *args, **kwargs): + kwargs.pop("project", None) # Accept but don't use project parameter + super().__init__(*args, **kwargs) + # Note: from_resource and to_resource are non-editable in the model + # They are displayed as read-only in the template + + +class OriginCurateCreateForm(forms.Form): + """Form for manually creating a new CodebaseRelation.""" + + from_resource = forms.ModelChoiceField( + queryset=CodebaseResource.objects.none(), + widget=forms.Select(attrs={"class": "select"}), + help_text="Source resource (from development codebase).", + ) + to_resource = forms.ModelChoiceField( + queryset=CodebaseResource.objects.none(), + widget=forms.Select(attrs={"class": "select"}), + help_text="Target resource (from deployment codebase).", + ) + map_type = forms.ChoiceField( + widget=forms.Select(attrs={"class": "select"}), + help_text="Type of mapping used to establish this relation.", + ) + curation_status = forms.ChoiceField( + required=False, + choices=[ + ("", "---------"), + ("pending", "Pending"), + ("approved", "Approved"), + ("rejected", "Rejected"), + ], + widget=forms.Select(attrs={"class": "select"}), + ) + confidence_level = forms.ChoiceField( + required=False, + choices=[ + ("", "---------"), + ("low", "Low"), + ("medium", "Medium"), + ("high", "High"), + ("verified", "Verified"), + ], + widget=forms.Select(attrs={"class": "select"}), + ) + curation_notes = forms.CharField( + required=False, + widget=forms.Textarea(attrs={"class": "textarea is-dynamic", "rows": 4}), + ) + + def __init__(self, *args, **kwargs): + project = kwargs.pop("project", None) + super().__init__(*args, **kwargs) + if project: + # Set querysets for resources + self.fields[ + "from_resource" + ].queryset = project.codebaseresources.files().order_by("path") + self.fields[ + "to_resource" + ].queryset = project.codebaseresources.files().order_by("path") + + # Set map_type choices from filters + from scanpipe.filters import MAP_TYPE_CHOICES + + self.fields["map_type"].choices = [("", "---------")] + list(MAP_TYPE_CHOICES) + + def clean(self): + cleaned_data = super().clean() + from_resource = cleaned_data.get("from_resource") + to_resource = cleaned_data.get("to_resource") + map_type = cleaned_data.get("map_type") + + if from_resource and to_resource: + if from_resource.project != to_resource.project: + raise ValidationError( + "From and to resources must belong to the same project." + ) + + # Check if relation already exists + if map_type: + existing = CodebaseRelation.objects.filter( + project=from_resource.project, + from_resource=from_resource, + to_resource=to_resource, + map_type=map_type, + ).exists() + if existing: + raise ValidationError( + f"A relation with this map_type '{map_type}' already exists " + f"between these resources." + ) + + return cleaned_data + + def save(self, project, user=None): + """Create the CodebaseRelation and optionally curate it.""" + from scanpipe.pipes import make_relation + + cleaned_data = self.cleaned_data + relation = make_relation( + from_resource=cleaned_data["from_resource"], + to_resource=cleaned_data["to_resource"], + map_type=cleaned_data["map_type"], + ) + + # Apply curation if provided + if cleaned_data.get("curation_status") or cleaned_data.get("curation_notes"): + relation.curation_status = cleaned_data.get("curation_status") or None + relation.curation_notes = cleaned_data.get("curation_notes") or "" + relation.confidence_level = cleaned_data.get("confidence_level") or "" + if user and hasattr(user, "is_authenticated") and user.is_authenticated: + relation.curated_by = user + from django.utils import timezone + + relation.curated_at = timezone.now() + relation.save() + + return relation + + +class BulkCurationForm(BaseProjectActionForm): + """Form for bulk curation operations on relations.""" + + prefix = "bulk_curate" + action = forms.ChoiceField( + choices=[ + ("approve", "Approve selected"), + ("reject", "Reject selected"), + ("mark_pending", "Mark as pending"), + ("set_confidence", "Set confidence level"), + ], + widget=forms.RadioSelect, + required=True, + ) + confidence_level = forms.ChoiceField( + required=False, + choices=[ + ("", "---------"), + ("low", "Low"), + ("medium", "Medium"), + ("high", "High"), + ("verified", "Verified"), + ], + widget=forms.Select(attrs={"class": "select"}), + help_text="Required when action is 'Set confidence level'.", + ) + curation_notes = forms.CharField( + required=False, + widget=forms.Textarea(attrs={"class": "textarea is-dynamic", "rows": 3}), + help_text="Optional notes to add to all selected relations.", + ) + + def clean(self): + cleaned_data = super().clean() + action = cleaned_data.get("action") + confidence_level = cleaned_data.get("confidence_level") + + if action == "set_confidence" and not confidence_level: + raise ValidationError( + "Confidence level is required when setting confidence level." + ) + + return cleaned_data + + +class OriginPropagationForm(forms.Form): + """Form for origin propagation settings.""" + + relation_uuid = forms.UUIDField( + widget=forms.HiddenInput, + required=True, + ) + strategy = forms.ChoiceField( + choices=[ + ("similar", "Similar Resources (path/checksum similarity)"), + ("directory", "Directory Structure (sibling files)"), + ("package", "Package (all resources in same package)"), + ("pattern", "Pattern (path pattern matching)"), + ], + widget=forms.RadioSelect, + required=True, + help_text="Select the propagation strategy to use.", + ) + similarity_threshold = forms.DecimalField( + required=False, + min_value=0.0, + max_value=1.0, + initial=0.8, + widget=forms.NumberInput(attrs={"class": "input", "step": "0.1"}), + help_text=( + "Similarity threshold (0.0-1.0) for 'similar' strategy. " + "Higher = more strict." + ), + ) + pattern = forms.CharField( + required=False, + widget=forms.TextInput(attrs={"class": "input"}), + help_text=( + "Path pattern (glob or regex) for 'pattern' strategy. " + "Example: '*.js' or '^src/.*\\.py$'" + ), + ) + preview_only = forms.BooleanField( + required=False, + initial=True, + widget=forms.CheckboxInput(attrs={"class": "checkbox"}), + help_text="Preview only (don't apply changes).", + ) + + def clean(self): + cleaned_data = super().clean() + strategy = cleaned_data.get("strategy") + similarity_threshold = cleaned_data.get("similarity_threshold") + pattern = cleaned_data.get("pattern") + + if strategy == "similar" and similarity_threshold is None: + cleaned_data["similarity_threshold"] = 0.8 # Default + + if strategy == "pattern" and not pattern: + raise ValidationError("Pattern is required for pattern strategy.") + + return cleaned_data + + +class OriginDeployForm(forms.Form): + """Form for deploying origin curations to FederatedCode.""" + + merge_strategy = forms.ChoiceField( + choices=[ + ("latest", "Latest (use most recent curation)"), + ("priority", "Priority (new curations override existing)"), + ("manual", "Manual (requires review for conflicts)"), + ], + widget=forms.RadioSelect, + initial="latest", + help_text="Select how to handle conflicts with existing curations.", + ) + include_history = forms.BooleanField( + required=False, + initial=True, + widget=forms.CheckboxInput(attrs={"class": "checkbox"}), + help_text="Include full curation history in export.", + ) + preview_only = forms.BooleanField( + required=False, + initial=True, + widget=forms.CheckboxInput(attrs={"class": "checkbox"}), + help_text="Preview only (don't deploy).", + ) diff --git a/scanpipe/migrations/0077_add_origin_curation.py b/scanpipe/migrations/0077_add_origin_curation.py new file mode 100644 index 0000000000..928b8dd6e8 --- /dev/null +++ b/scanpipe/migrations/0077_add_origin_curation.py @@ -0,0 +1,302 @@ +# Generated manually for origin curation feature + +import django.db.models.deletion +import uuid +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0076_discoveredpackagescore_scorecardcheck_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + # Add curation fields to CodebaseRelation + migrations.AddField( + model_name='codebaserelation', + name='curation_status', + field=models.CharField( + blank=True, + choices=[ + ('pending', 'Pending'), + ('approved', 'Approved'), + ('rejected', 'Rejected'), + ], + help_text='Curation status for this relation.', + max_length=20, + null=True, + ), + ), + migrations.AddField( + model_name='codebaserelation', + name='curated_by', + field=models.ForeignKey( + blank=True, + help_text='User who curated this relation.', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='curated_relations', + to=settings.AUTH_USER_MODEL, + ), + ), + migrations.AddField( + model_name='codebaserelation', + name='curated_at', + field=models.DateTimeField( + blank=True, + help_text='Timestamp when this relation was curated.', + null=True, + ), + ), + migrations.AddField( + model_name='codebaserelation', + name='curation_notes', + field=models.TextField( + blank=True, + help_text='Notes or comments about the curation.', + ), + ), + migrations.AddField( + model_name='codebaserelation', + name='confidence_level', + field=models.CharField( + blank=True, + choices=[ + ('low', 'Low'), + ('medium', 'Medium'), + ('high', 'High'), + ('verified', 'Verified'), + ], + help_text='Confidence level for this origin determination.', + max_length=20, + ), + ), + # Create OriginCuration model + migrations.CreateModel( + name='OriginCuration', + fields=[ + ( + 'uuid', + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + verbose_name='UUID', + ), + ), + ( + 'created_date', + models.DateTimeField( + auto_now_add=True, + help_text='When this curation was created.', + ), + ), + ( + 'notes', + models.TextField( + blank=True, + help_text='Notes or comments about this curation action.', + ), + ), + ( + 'curation_status', + models.CharField( + choices=[ + ('pending', 'Pending'), + ('approved', 'Approved'), + ('rejected', 'Rejected'), + ], + help_text='Curation status set by this action.', + max_length=20, + ), + ), + ( + 'confidence_level', + models.CharField( + blank=True, + choices=[ + ('low', 'Low'), + ('medium', 'Medium'), + ('high', 'High'), + ('verified', 'Verified'), + ], + help_text='Confidence level set by this action.', + max_length=20, + ), + ), + ( + 'previous_map_type', + models.CharField( + blank=True, + help_text='Previous map_type value (for tracking changes).', + max_length=30, + ), + ), + ( + 'curator', + models.ForeignKey( + help_text='User who performed this curation.', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='origin_curations', + to=settings.AUTH_USER_MODEL, + ), + ), + ( + 'previous_from_resource', + models.ForeignKey( + blank=True, + help_text='Previous from_resource value (for tracking changes).', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='previous_from_curations', + to='scanpipe.codebaseresource', + ), + ), + ( + 'previous_to_resource', + models.ForeignKey( + blank=True, + help_text='Previous to_resource value (for tracking changes).', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='previous_to_curations', + to='scanpipe.codebaseresource', + ), + ), + ( + 'project', + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name='origincurations', + to='scanpipe.project', + ), + ), + ( + 'relation', + models.ForeignKey( + help_text='The relation being curated.', + on_delete=django.db.models.deletion.CASCADE, + related_name='curations', + to='scanpipe.codebaserelation', + ), + ), + ], + options={ + 'ordering': ['-created_date'], + }, + ), + # Add indexes + migrations.AddIndex( + model_name='origincuration', + index=models.Index( + fields=['relation', '-created_date'], + name='scanpipe_or_relation_created_idx', + ), + ), + migrations.AddIndex( + model_name='origincuration', + index=models.Index( + fields=['curator', '-created_date'], + name='scanpipe_or_curator_created_idx', + ), + ), + # Create PropagationBatch model + migrations.CreateModel( + name='PropagationBatch', + fields=[ + ( + 'uuid', + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + verbose_name='UUID', + ), + ), + ( + 'strategy', + models.CharField( + help_text='Propagation strategy used (similar, directory, package, pattern).', + max_length=50, + ), + ), + ( + 'created_date', + models.DateTimeField( + auto_now_add=True, + help_text='When this propagation was created.', + ), + ), + ( + 'relation_count', + models.IntegerField( + default=0, + help_text='Number of relations created in this batch.', + ), + ), + ( + 'extra_data', + models.JSONField( + blank=True, + default=dict, + help_text='Additional data about the propagation (pattern, threshold, etc.).', + ), + ), + ( + 'created_by', + models.ForeignKey( + help_text='User who performed this propagation.', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='propagation_batches', + to=settings.AUTH_USER_MODEL, + ), + ), + ( + 'project', + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name='propagationbatches', + to='scanpipe.project', + ), + ), + ( + 'source_relation', + models.ForeignKey( + help_text='The source relation that was propagated.', + on_delete=django.db.models.deletion.CASCADE, + related_name='propagation_batches', + to='scanpipe.codebaserelation', + ), + ), + ], + options={ + 'ordering': ['-created_date'], + }, + ), + migrations.AddIndex( + model_name='propagationbatch', + index=models.Index( + fields=['source_relation', '-created_date'], + name='scanpipe_pr_source_r_created_idx', + ), + ), + migrations.AddIndex( + model_name='propagationbatch', + index=models.Index( + fields=['created_by', '-created_date'], + name='scanpipe_pr_created__created_idx', + ), + ), + ] + diff --git a/scanpipe/models.py b/scanpipe/models.py index 26d74303cb..057aff345a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3224,32 +3224,215 @@ class CodebaseRelation( map_type = models.CharField( max_length=30, ) + # Curation fields + curation_status = models.CharField( + max_length=20, + blank=True, + choices=[ + ("pending", _("Pending")), + ("approved", _("Approved")), + ("rejected", _("Rejected")), + ], + help_text=_("Curation status for this relation."), + ) + curated_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="curated_relations", + help_text=_("User who curated this relation."), + ) + curated_at = models.DateTimeField( + null=True, + blank=True, + help_text=_("Timestamp when this relation was curated."), + ) + curation_notes = models.TextField( + blank=True, + help_text=_("Notes or comments about the curation."), + ) + confidence_level = models.CharField( + max_length=20, + blank=True, + choices=[ + ("low", _("Low")), + ("medium", _("Medium")), + ("high", _("High")), + ("verified", _("Verified")), + ], + help_text=_("Confidence level for this origin determination."), + ) + + @property + def status(self): + """Return the deployment resource status for convenience.""" + if hasattr(self, "to_resource") and self.to_resource: + return self.to_resource.status + return "" + + @property + def score(self): + """Return a human-friendly score string based on extra_data.""" + if not self.extra_data: + return "" + + score_value = self.extra_data.get("path_score") or "" + diff_ratio = self.extra_data.get("diff_ratio") + parts = [] + + if score_value: + parts.append(str(score_value)) + + if diff_ratio: + parts.append(f"diff_ratio: {diff_ratio}") + + return " ".join(parts) + + +class OriginCuration(UUIDPKModel, ProjectRelatedModel, models.Model): + """ + Audit trail for origin curation changes. + Tracks the history of curation actions on CodebaseRelation objects. + """ + + relation = models.ForeignKey( + CodebaseRelation, + on_delete=models.CASCADE, + related_name="curations", + help_text=_("The relation being curated."), + ) + curator = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + related_name="origin_curations", + help_text=_("User who performed this curation."), + ) + created_date = models.DateTimeField( + auto_now_add=True, + help_text=_("When this curation was created."), + ) + notes = models.TextField( + blank=True, + help_text=_("Notes or comments about this curation action."), + ) + curation_status = models.CharField( + max_length=20, + choices=[ + ("pending", _("Pending")), + ("approved", _("Approved")), + ("rejected", _("Rejected")), + ], + help_text=_("Curation status set by this action."), + ) + confidence_level = models.CharField( + max_length=20, + blank=True, + choices=[ + ("low", _("Low")), + ("medium", _("Medium")), + ("high", _("High")), + ("verified", _("Verified")), + ], + help_text=_("Confidence level set by this action."), + ) + # Track previous values for history + previous_from_resource = models.ForeignKey( + CodebaseResource, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="previous_from_curations", + help_text=_("Previous from_resource value (for tracking changes)."), + ) + previous_to_resource = models.ForeignKey( + CodebaseResource, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="previous_to_curations", + help_text=_("Previous to_resource value (for tracking changes)."), + ) + previous_map_type = models.CharField( + max_length=30, + blank=True, + help_text=_("Previous map_type value (for tracking changes)."), + ) class Meta: - ordering = ["from_resource__path", "to_resource__path"] + ordering = ["-created_date"] indexes = [ - models.Index(fields=["map_type"]), + models.Index(fields=["relation", "-created_date"]), + models.Index(fields=["curator", "-created_date"]), ] - constraints = [ - models.UniqueConstraint( - fields=["from_resource", "to_resource", "map_type"], - name="%(app_label)s_%(class)s_unique_relation", - ), + + def __str__(self): + return f"Curation {self.uuid} for relation {self.relation.uuid}" + + +class PropagationBatch(UUIDPKModel, ProjectRelatedModel, models.Model): + """Track batches of propagated relations for undo capability.""" + + source_relation = models.ForeignKey( + CodebaseRelation, + on_delete=models.CASCADE, + related_name="propagation_batches", + help_text=_("The source relation that was propagated."), + ) + strategy = models.CharField( + max_length=50, + help_text=_( + "Propagation strategy used (similar, directory, package, pattern)." + ), + ) + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + related_name="propagation_batches", + help_text=_("User who performed this propagation."), + ) + created_date = models.DateTimeField( + auto_now_add=True, + help_text=_("When this propagation was created."), + ) + relation_count = models.IntegerField( + default=0, + help_text=_("Number of relations created in this batch."), + ) + extra_data = models.JSONField( + default=dict, + blank=True, + help_text=_( + "Additional data about the propagation (pattern, threshold, etc.)." + ), + ) + + class Meta: + ordering = ["-created_date"] + indexes = [ + models.Index(fields=["source_relation", "-created_date"]), + models.Index(fields=["created_by", "-created_date"]), ] def __str__(self): - return f"{self.from_resource.pk} > {self.to_resource.pk} using {self.map_type}" + return f"Propagation batch {self.uuid} for relation {self.source_relation.uuid}" + + def undo(self, logger=None): + """Delete all relations created in this batch.""" + # Find relations created by this batch + # Relations have extra_data with propagation_batch UUID + relations = self.project.codebaserelations.filter( + extra_data__propagation_batch=str(self.uuid) + ) + count = relations.count() + relations.delete() - @property - def status(self): - return self.to_resource.status + if logger: + logger(f"Undone propagation batch {self.uuid}: {count} relations deleted.") - @property - def score(self): - score = self.extra_data.get("path_score", "") - if diff_ratio := self.extra_data.get("diff_ratio", ""): - score += f" diff_ratio: {diff_ratio}" - return score + return count class VulnerabilityMixin(models.Model): diff --git a/scanpipe/pipelines/publish_to_federatedcode.py b/scanpipe/pipelines/publish_to_federatedcode.py index 5ac31466c2..6b5f320506 100644 --- a/scanpipe/pipelines/publish_to_federatedcode.py +++ b/scanpipe/pipelines/publish_to_federatedcode.py @@ -47,6 +47,7 @@ def steps(cls): cls.get_package_repository, cls.clone_repository, cls.add_scan_result, + cls.add_origin_curations, cls.commit_and_push_changes, cls.delete_working_dir, ) @@ -86,11 +87,61 @@ def add_scan_result(self): logger=self.log, ) + def add_origin_curations(self): + """Add origin curations to the local Git repository.""" + deploy_options = {} + if getattr(self.project, "extra_data", None): + deploy_options = self.project.extra_data.get("origin_deploy", {}) or {} + + include_history = deploy_options.get("include_history", True) + merge_strategy = deploy_options.get("merge_strategy", "latest") + + # Check if there are any curated relations + curated_count = ( + self.project.codebaserelations.filter(curation_status__isnull=False) + .exclude(curation_status="") + .count() + ) + + if curated_count > 0: + self.relative_curation_file_path = federatedcode.add_origin_curations( + project=self.project, + repo=self.repo, + package_scan_file=self.package_scan_file, + include_history=include_history, + merge_strategy=merge_strategy, + logger=self.log, + ) + self.log( + f"Origin curations ({curated_count} relations) added to repository " + f"using '{merge_strategy}' strategy " + f"{'with' if include_history else 'without'} history." + ) + else: + self.relative_curation_file_path = None + self.log("No curated relations to export.") + + # Clean up deploy options to avoid re-use on future runs + if getattr(self.project, "extra_data", None) and self.project.extra_data.get( + "origin_deploy" + ): + extra_data = self.project.extra_data + extra_data.pop("origin_deploy", None) + self.project.extra_data = extra_data + self.project.save(update_fields=["extra_data"]) + def commit_and_push_changes(self): """Commit and push changes to remote repository.""" + files_to_commit = [str(self.relative_file_path)] + if ( + hasattr(self, "relative_curation_file_path") + and self.relative_curation_file_path + ): + files_to_commit.append(str(self.relative_curation_file_path)) + federatedcode.commit_and_push_changes( repo=self.repo, - files_to_commit=[str(self.relative_file_path)], + files_to_commit=files_to_commit, purls=[self.project.purl], logger=self.log, ) diff --git a/scanpipe/pipes/federatedcode.py b/scanpipe/pipes/federatedcode.py index c13abce43d..ab03246a1a 100644 --- a/scanpipe/pipes/federatedcode.py +++ b/scanpipe/pipes/federatedcode.py @@ -358,3 +358,256 @@ def write_data_as_yaml(base_path, file_path, data): write_to.parent.mkdir(parents=True, exist_ok=True) with open(write_to, encoding="utf-8", mode="w") as f: f.write(saneyaml.dump(data)) + + +def export_origin_curations(project, logger=None, include_history=True): + """ + Export origin curations for a project to a structured data format. + + Returns a dictionary containing: + - relations: List of curated relations with metadata + - curation_history: Full history of curation actions + - metadata: Project and export information + + This data can be serialized to YAML for FederatedCode deployment. + """ + from scanpipe.models import OriginCuration + + relations_data = [] + curated_relations = ( + project.codebaserelations.filter(curation_status__isnull=False) + .exclude(curation_status="") + .select_related("from_resource", "to_resource", "curated_by") + ) + + for relation in curated_relations: + relation_data = { + "uuid": str(relation.uuid), + "from_resource_path": relation.from_resource.path, + "to_resource_path": relation.to_resource.path, + "map_type": relation.map_type, + "curation_status": relation.curation_status, + "confidence_level": relation.confidence_level or "", + "curation_notes": relation.curation_notes or "", + "curated_by": relation.curated_by.username if relation.curated_by else "", + "curated_at": relation.curated_at.isoformat() + if relation.curated_at + else "", + } + if relation.extra_data: + relation_data["extra_data"] = relation.extra_data + relations_data.append(relation_data) + + # Get full curation history if requested + curation_history = [] + if include_history: + curations = ( + OriginCuration.objects.filter(project=project) + .select_related("relation", "curator") + .order_by("-created_date") + ) + + for curation in curations: + history_entry = { + "relation_uuid": str(curation.relation.uuid), + "curator": curation.curator.username if curation.curator else "", + "created_date": curation.created_date.isoformat(), + "curation_status": curation.curation_status, + "confidence_level": curation.confidence_level or "", + "notes": curation.notes or "", + "previous_from_resource_path": ( + curation.previous_from_resource.path + if curation.previous_from_resource + else "" + ), + "previous_to_resource_path": ( + curation.previous_to_resource.path + if curation.previous_to_resource + else "" + ), + "previous_map_type": curation.previous_map_type or "", + } + curation_history.append(history_entry) + + export_data = { + "metadata": { + "project_name": project.name, + "project_slug": project.slug, + "export_date": project.created_date.isoformat(), + "tool": "pkg:github/aboutcode-org/scancode.io", + "tool_version": VERSION, + "total_curated_relations": len(relations_data), + "total_curation_actions": len(curation_history), + }, + "relations": relations_data, + "curation_history": curation_history, + } + + if logger: + logger( + f"Exported {len(relations_data)} curated relations " + f"with {len(curation_history)} curation history entries." + ) + + return export_data + + +def add_origin_curations( + project, + repo, + package_scan_file, + include_history=True, + merge_strategy="latest", + logger=None, +): + """ + Add origin curations to the local Git repository alongside scan results. + + The curations file will be placed in the same directory as the scan file, + with the name `origin-curations.yaml`. + """ + # Get the directory of the scan file + relative_scan_file_path = Path(*package_scan_file.parts[1:]) + curation_file_path = relative_scan_file_path.parent / "origin-curations.yaml" + + # Export curations + curation_data = export_origin_curations( + project, logger=logger, include_history=include_history + ) + + # Merge with existing data if present + full_curation_path = Path(repo.working_dir) / curation_file_path + if full_curation_path.exists(): + try: + with open(full_curation_path, encoding="utf-8") as existing_file: + existing_data = saneyaml.load(existing_file.read()) or {} + except Exception as e: + if logger: + logger(f"Unable to read existing curation file: {e}") + existing_data = {} + if existing_data: + curation_data = merge_curations( + existing_curations=existing_data, + new_curations=curation_data, + strategy=merge_strategy, + logger=logger, + ) + + if not include_history and isinstance(curation_data, dict): + curation_data["curation_history"] = [] + + # Write to repository + write_data_as_yaml( + base_path=repo.working_dir, + file_path=curation_file_path, + data=curation_data, + ) + + if logger: + logger(f"Origin curations written to {curation_file_path}") + + return curation_file_path + + +def merge_curations( # noqa: C901 - complex merge logic + existing_curations, new_curations, strategy="latest", logger=None +): + """ + Merge curations from different sources with conflict resolution. + + Args: + existing_curations: Dictionary with existing curation data + new_curations: Dictionary with new curation data to merge + strategy: Merge strategy - "latest", "priority", or "manual" + logger: Optional logger function + + Returns: + Merged curation data dictionary + + """ + if strategy == "latest": + # Use the most recent curation for each relation + merged_relations = {} + merged_history = [] + + # Process existing curations + for relation in existing_curations.get("relations", []): + merged_relations[relation["uuid"]] = relation + + # Process new curations (overwrite if newer) + for relation in new_curations.get("relations", []): + existing = merged_relations.get(relation["uuid"]) + if not existing: + merged_relations[relation["uuid"]] = relation + else: + # Compare timestamps - use newer one + existing_time = existing.get("curated_at", "") + new_time = relation.get("curated_at", "") + if new_time > existing_time: + merged_relations[relation["uuid"]] = relation + if logger: + logger( + f"Updated relation {relation['uuid']} " + f"with newer curation (latest strategy)" + ) + + # Merge history (preserve all entries) + merged_history.extend(existing_curations.get("curation_history", [])) + merged_history.extend(new_curations.get("curation_history", [])) + + # Sort history by date + merged_history.sort(key=lambda x: x.get("created_date", ""), reverse=True) + + return { + "metadata": { + **existing_curations.get("metadata", {}), + "merge_strategy": strategy, + "merged_at": new_curations.get("metadata", {}).get("export_date", ""), + }, + "relations": list(merged_relations.values()), + "curation_history": merged_history, + } + + elif strategy == "priority": + # Priority-based: new curations take precedence + merged_relations = {} + merged_history = [] + + # Add existing relations + for relation in existing_curations.get("relations", []): + merged_relations[relation["uuid"]] = relation + + # Overwrite with new curations (priority) + for relation in new_curations.get("relations", []): + if relation["uuid"] in merged_relations: + if logger: + logger( + f"Overwriting relation {relation['uuid']} " + f"with new curation (priority strategy)" + ) + merged_relations[relation["uuid"]] = relation + + # Merge history + merged_history.extend(existing_curations.get("curation_history", [])) + merged_history.extend(new_curations.get("curation_history", [])) + merged_history.sort(key=lambda x: x.get("created_date", ""), reverse=True) + + return { + "metadata": { + **new_curations.get("metadata", {}), + "merge_strategy": strategy, + }, + "relations": list(merged_relations.values()), + "curation_history": merged_history, + } + + else: # manual + # Manual merge: return both for manual review + return { + "metadata": { + "merge_strategy": strategy, + "requires_manual_review": True, + }, + "existing_curations": existing_curations, + "new_curations": new_curations, + } diff --git a/scanpipe/pipes/origin_propagation.py b/scanpipe/pipes/origin_propagation.py new file mode 100644 index 0000000000..8d09654864 --- /dev/null +++ b/scanpipe/pipes/origin_propagation.py @@ -0,0 +1,439 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +import re +from pathlib import Path + +from scanpipe.models import CodebaseRelation +from scanpipe.models import PropagationBatch +from scanpipe.pipes import make_relation + + +def get_propagation_candidates( + project, + relation, + strategy, + similarity_threshold=0.8, + pattern=None, + logger=None, +): + """ + Preview what resources would be affected by propagation. + Returns a list of tuples: (to_resource, from_resource, map_type, confidence) + """ + candidates = [] + + if strategy == "similar": + candidates = _get_similar_candidates( + project, relation, similarity_threshold, logger + ) + elif strategy == "directory": + candidates = _get_directory_candidates(project, relation, logger) + elif strategy == "package": + candidates = _get_package_candidates(project, relation, logger) + elif strategy == "pattern": + if not pattern: + if logger: + logger("Pattern strategy requires a pattern parameter.") + return [] + candidates = _get_pattern_candidates(project, relation, pattern, logger) + + return candidates + + +def propagate_origin_to_similar_resources( + project, + relation, + similarity_threshold=0.8, + user=None, + logger=None, +): + """ + Propagate origin determination to similar resources based on path similarity + and checksum matching. + + Similarity is determined by: + - Path similarity (common path segments) + - Checksum matching (sha1, md5) + """ + candidates = _get_similar_candidates( + project, relation, similarity_threshold, logger + ) + return _apply_propagation(project, relation, candidates, "similar", user, logger) + + +def propagate_origin_by_directory_structure( + project, + relation, + user=None, + logger=None, +): + """ + Propagate origin determination to sibling files in the same directory structure. + + For each to_resource in the same directory as the relation's to_resource, + find corresponding from_resource in the same directory as the relation's + from_resource. + """ + candidates = _get_directory_candidates(project, relation, logger) + return _apply_propagation(project, relation, candidates, "directory", user, logger) + + +def propagate_origin_by_package( + project, + relation, + user=None, + logger=None, +): + """ + Propagate origin determination to all resources in the same package. + + If the relation's to_resource belongs to a package, apply the same origin + determination to all other to_resources in that package. + """ + candidates = _get_package_candidates(project, relation, logger) + return _apply_propagation(project, relation, candidates, "package", user, logger) + + +def propagate_origin_by_pattern( + project, + relation, + pattern, + user=None, + logger=None, +): + """ + Propagate origin determination to resources matching a path pattern. + + Pattern can be a glob pattern or regex pattern. + """ + candidates = _get_pattern_candidates(project, relation, pattern, logger) + return _apply_propagation(project, relation, candidates, "pattern", user, logger) + + +def _get_similar_candidates( # noqa: C901 - complex similarity heuristics + project, relation, similarity_threshold, logger=None +): + """Find similar resources based on path and checksum.""" + candidates = [] + source_from = relation.from_resource + source_to = relation.to_resource + + # Get all unmapped to_resources in the same codebase side + to_resources = ( + project.codebaseresources.to_codebase() + .files() + .has_no_relation() + .exclude(path=source_to.path) + ) + + # Try checksum matching first (most reliable) + if source_to.sha1: + sha1_matches = to_resources.filter(sha1=source_to.sha1) + for to_res in sha1_matches: + # Find corresponding from_resource with same checksum + from_matches = ( + project.codebaseresources.from_codebase() + .files() + .filter(sha1=source_to.sha1) + ) + for from_res in from_matches: + candidates.append((to_res, from_res, "sha1", "high")) + + # Try path similarity + source_to_path_parts = Path(source_to.path.lstrip("/")).parts + source_from_path_parts = Path(source_from.path.lstrip("/")).parts + + # Calculate relative path difference + if len(source_to_path_parts) > 1 and len(source_from_path_parts) > 1: + # Get directory structure + to_dir = "/".join(source_to_path_parts[:-1]) + from_dir = "/".join(source_from_path_parts[:-1]) + from_dir_parts = set(from_dir.split("/")) + + # Find resources in similar directory structures + for to_res in to_resources: + to_res_path_parts = Path(to_res.path.lstrip("/")).parts + if len(to_res_path_parts) <= 1: + continue + + to_res_dir = "/".join(to_res_path_parts[:-1]) + to_res_name = to_res_path_parts[-1] + + # Calculate directory similarity + to_dir_parts = set(to_dir.split("/")) + to_res_dir_parts = set(to_res_dir.split("/")) + if to_dir_parts and to_res_dir_parts: + dir_similarity = len(to_dir_parts & to_res_dir_parts) / len( + to_dir_parts | to_res_dir_parts + ) + if dir_similarity >= similarity_threshold: + # Find from_resource with same name in similar directory + from_candidates = ( + project.codebaseresources.from_codebase() + .files() + .filter(name=to_res_name) + ) + for from_res in from_candidates: + from_res_dir = "/".join( + Path(from_res.path.lstrip("/")).parts[:-1] + ) + from_res_dir_parts = set(from_res_dir.split("/")) + if from_res_dir_parts: + from_dir_similarity = len( + from_dir_parts & from_res_dir_parts + ) / len(from_dir_parts | from_res_dir_parts) + if from_dir_similarity >= similarity_threshold: + candidates.append((to_res, from_res, "path", "medium")) + + # Remove duplicates while preserving order + seen = set() + unique_candidates = [] + for candidate in candidates: + key = (candidate[0].path, candidate[1].path) + if key not in seen: + seen.add(key) + unique_candidates.append(candidate) + + return unique_candidates + + +def _get_directory_candidates(project, relation, logger=None): + """Find sibling files in same directory structure.""" + candidates = [] + source_from = relation.from_resource + source_to = relation.to_resource + + # Get parent directories + to_parent = str(Path(source_to.path).parent) + from_parent = str(Path(source_from.path).parent) + + # Get all files in the same to/ directory + to_siblings = ( + project.codebaseresources.to_codebase() + .files() + .has_no_relation() + .filter(parent_path=to_parent) + .exclude(path=source_to.path) + ) + + # For each sibling, try to find corresponding from_resource + for to_sibling in to_siblings: + # Try exact name match first + from_matches = ( + project.codebaseresources.from_codebase() + .files() + .filter(name=to_sibling.name, parent_path=from_parent) + ) + if from_matches.exists(): + candidates.append((to_sibling, from_matches.first(), "path", "high")) + else: + # Try extension match + if to_sibling.extension: + from_matches = ( + project.codebaseresources.from_codebase() + .files() + .filter( + extension=to_sibling.extension, + parent_path=from_parent, + ) + ) + if from_matches.exists(): + candidates.append( + (to_sibling, from_matches.first(), "path", "medium") + ) + + return candidates + + +def _get_package_candidates(project, relation, logger=None): + """Find resources in the same package.""" + candidates = [] + source_from = relation.from_resource + source_to = relation.to_resource + + # Get packages for source resources + to_packages = source_to.discovered_packages.all() + if not to_packages.exists(): + if logger: + logger(f"Source to_resource {source_to.path} has no packages.") + return [] + + # Get all to_resources in the same packages + to_resources = ( + project.codebaseresources.to_codebase() + .files() + .has_no_relation() + .filter(discovered_packages__in=to_packages) + .distinct() + .exclude(path=source_to.path) + ) + + # For each to_resource, try to find corresponding from_resource + # Use the same relative path structure + source_to_rel_path = _get_relative_path_from_package(source_to, to_packages.first()) + source_from_rel_path = _get_relative_path_from_package( + source_from, source_from.discovered_packages.first() + ) + + if source_to_rel_path and source_from_rel_path: + for to_res in to_resources: + to_rel_path = _get_relative_path_from_package(to_res, to_packages.first()) + if not to_rel_path: + continue + + # Try to find from_resource with similar relative path + from_candidates = project.codebaseresources.from_codebase().files() + # Match by name first + from_candidates = from_candidates.filter(name=to_res.name) + if from_candidates.exists(): + candidates.append((to_res, from_candidates.first(), "path", "medium")) + + return candidates + + +def _get_pattern_candidates(project, relation, pattern, logger=None): + """Find resources matching a path pattern.""" + candidates = [] + + # Determine if pattern is regex or glob + try: + # Try as regex first + regex_pattern = re.compile(pattern) + except re.error: + # Treat as glob pattern + # Convert glob to regex + regex_pattern = re.compile( + "^" + pattern.replace("*", ".*").replace("?", ".") + "$" + ) + + # Find all to_resources matching the pattern + to_resources = project.codebaseresources.to_codebase().files().has_no_relation() + + matching_to_resources = [] + for to_res in to_resources: + if regex_pattern.search(to_res.path): + matching_to_resources.append(to_res) + + # For each matching to_resource, try to find corresponding from_resource + # Use similar matching logic as directory structure + for to_res in matching_to_resources: + # Try name match + from_matches = ( + project.codebaseresources.from_codebase().files().filter(name=to_res.name) + ) + if from_matches.exists(): + candidates.append((to_res, from_matches.first(), "path", "medium")) + + return candidates + + +def _get_relative_path_from_package(resource, package): + """Get relative path of resource from package root.""" + if not package: + return None + + # Try to determine package root from resource path + # This is a simplified version - may need enhancement + # For now, return the resource path as-is + return resource.path + + +def _apply_propagation( # noqa: C901 - propagation workflow + project, + source_relation, + candidates, + map_type_prefix, + user=None, + logger=None, +): + """ + Apply propagation by creating relations for candidates. + Returns (count, created_relations, batch) tuple. + """ + count = 0 + created_relations = [] + + # Create propagation batch for tracking + batch = None + if user and user.is_authenticated: + batch = PropagationBatch.objects.create( + project=project, + source_relation=source_relation, + strategy=map_type_prefix, + created_by=user, + relation_count=0, + extra_data={}, + ) + + for to_resource, from_resource, map_type, confidence in candidates: + # Check if relation already exists (any map_type) + existing = CodebaseRelation.objects.filter( + project=project, + from_resource=from_resource, + to_resource=to_resource, + ).exists() + + if existing: + if logger: + logger( + f"Relation already exists: {to_resource.path} -> " + f"{from_resource.path}" + ) + continue + + # Create relation + try: + extra_data = { + "propagated_from": str(source_relation.uuid), + "confidence": confidence, + } + if batch: + extra_data["propagation_batch"] = str(batch.uuid) + + relation = make_relation( + from_resource=from_resource, + to_resource=to_resource, + map_type=f"{map_type_prefix}_{map_type}", + extra_data=extra_data, + ) + created_relations.append(relation) + count += 1 + + if logger: + logger( + f"Created relation: {to_resource.path} -> {from_resource.path} " + f"(confidence: {confidence})" + ) + except Exception as e: + if logger: + logger(f"Error creating relation: {e}") + + # Update batch with actual count + if batch: + batch.relation_count = count + batch.save(update_fields=["relation_count"]) + + if logger: + logger(f"Propagation complete: {count} relations created.") + + return count, created_relations, batch diff --git a/scanpipe/templates/scanpipe/modals/origin_bulk_curation_modal.html b/scanpipe/templates/scanpipe/modals/origin_bulk_curation_modal.html new file mode 100644 index 0000000000..7a417119a1 --- /dev/null +++ b/scanpipe/templates/scanpipe/modals/origin_bulk_curation_modal.html @@ -0,0 +1,85 @@ + + + + diff --git a/scanpipe/templates/scanpipe/origin_curate.html b/scanpipe/templates/scanpipe/origin_curate.html new file mode 100644 index 0000000000..c724a1711f --- /dev/null +++ b/scanpipe/templates/scanpipe/origin_curate.html @@ -0,0 +1,49 @@ +{% extends "scanpipe/base.html" %} +{% load humanize %} + +{% block title %}ScanCode.io: {{ project.name }} - Curate Relation{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Curate Relation" %} +
+
+
+ +
+
+
+

Relation Details

+ +
+
+

To Resource: + + {{ object.to_resource.path }} + +

+

From Resource: + + {{ object.from_resource.path }} + +

+

Map Type: {{ object.map_type }}

+

Status: {{ object.status }}

+
+
+ + {% include 'scanpipe/tabset/tabset.html' with tabset_data=tabset_data object=object %} +
+{% endblock %} + diff --git a/scanpipe/templates/scanpipe/origin_curate_create.html b/scanpipe/templates/scanpipe/origin_curate_create.html new file mode 100644 index 0000000000..c33e7cacdb --- /dev/null +++ b/scanpipe/templates/scanpipe/origin_curate_create.html @@ -0,0 +1,103 @@ +{% extends "scanpipe/base.html" %} + +{% block title %}ScanCode.io: {{ project.name }} - Add Relation{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Add Relation" %} +
+
+
+ +
+
+

Create New Relation

+
+ {% csrf_token %} + +
+ +
+ {{ form.from_resource }} +
+ {% if form.from_resource.help_text %} +

{{ form.from_resource.help_text }}

+ {% endif %} + {% if form.from_resource.errors %} +

{{ form.from_resource.errors }}

+ {% endif %} +
+ +
+ +
+ {{ form.to_resource }} +
+ {% if form.to_resource.help_text %} +

{{ form.to_resource.help_text }}

+ {% endif %} + {% if form.to_resource.errors %} +

{{ form.to_resource.errors }}

+ {% endif %} +
+ +
+ +
+ {{ form.map_type }} +
+ {% if form.map_type.help_text %} +

{{ form.map_type.help_text }}

+ {% endif %} + {% if form.map_type.errors %} +

{{ form.map_type.errors }}

+ {% endif %} +
+ +
+ +
+ {{ form.curation_status }} +
+ {% if form.curation_status.errors %} +

{{ form.curation_status.errors }}

+ {% endif %} +
+ +
+ +
+ {{ form.confidence_level }} +
+ {% if form.confidence_level.errors %} +

{{ form.confidence_level.errors }}

+ {% endif %} +
+ +
+ +
+ {{ form.curation_notes }} +
+ {% if form.curation_notes.errors %} +

{{ form.curation_notes.errors }}

+ {% endif %} +
+ +
+
+ +
+
+ Cancel +
+
+
+
+
+{% endblock %} + + diff --git a/scanpipe/templates/scanpipe/origin_deploy.html b/scanpipe/templates/scanpipe/origin_deploy.html new file mode 100644 index 0000000000..77e461399a --- /dev/null +++ b/scanpipe/templates/scanpipe/origin_deploy.html @@ -0,0 +1,155 @@ +{% extends "scanpipe/base.html" %} +{% load humanize %} + +{% block title %}ScanCode.io: {{ project.name }} - Deploy Origin Curations{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Deploy Curations" %} +
+
+
+ +
+
+

Deploy Origin Curations to FederatedCode

+ + {% if not federatedcode_configured %} +
+

FederatedCode is not configured.

+

Please contact your administrator to configure FederatedCode settings.

+
+ {% elif not federatedcode_available %} +
+

FederatedCode Git account is not available.

+

Please check the FederatedCode configuration and network connectivity.

+
+ {% elif not has_purl %} +
+

Project PURL is required.

+

Please set a PURL for this project before deploying to FederatedCode.

+
+ {% elif curated_count == 0 %} +
+

No curated relations to deploy.

+

You need to curate some relations before deploying to FederatedCode.

+ + Go to Origin Review + +
+ {% else %} +
+

+ {{ curated_count|intcomma }} curated relation{{ curated_count|pluralize }} + {% if curation_history_count > 0 %} + with {{ curation_history_count|intcomma }} curation action{{ curation_history_count|pluralize }} + {% endif %} + ready for deployment. +

+
+ +
+ {% csrf_token %} + +
+ +
+ {% for choice in form.merge_strategy %} + + {% endfor %} +
+ {% if form.merge_strategy.help_text %} +

{{ form.merge_strategy.help_text }}

+ {% endif %} + {% if form.merge_strategy.errors %} +

{{ form.merge_strategy.errors }}

+ {% endif %} +
+ +
+ + {% if form.include_history.help_text %} +

{{ form.include_history.help_text }}

+ {% endif %} +
+ +
+ + {% if form.preview_only.help_text %} +

{{ form.preview_only.help_text }}

+ {% endif %} +
+ +
+
+ +
+ +
+
+ + {% if preview_data %} +
+

Preview

+
+

+ Total Relations: {{ preview_data.metadata.total_curated_relations|intcomma }}
+ Total History Entries: {{ preview_data.metadata.total_curation_actions|intcomma }}
+ Export Date: {{ preview_data.metadata.export_date|date:"Y-m-d H:i:s" }} +

+ +
+ + View Sample Relations (first 5) + + + + + + + + + + + + + {% for relation in preview_data.relations|slice:":5" %} + + + + + + + + {% endfor %} + +
From ResourceTo ResourceMap TypeStatusConfidence
{{ relation.from_resource_path }}{{ relation.to_resource_path }}{{ relation.map_type }}{{ relation.curation_status|capfirst }}{{ relation.confidence_level|capfirst|default:"—" }}
+
+
+ {% endif %} + {% endif %} +
+
+{% endblock %} + diff --git a/scanpipe/templates/scanpipe/origin_propagate.html b/scanpipe/templates/scanpipe/origin_propagate.html new file mode 100644 index 0000000000..115f1764db --- /dev/null +++ b/scanpipe/templates/scanpipe/origin_propagate.html @@ -0,0 +1,176 @@ +{% extends "scanpipe/base.html" %} +{% load humanize %} + +{% block title %}ScanCode.io: {{ project.name }} - Propagate Origin{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Propagate Origin" %} +
+
+
+ +
+
+

Propagate Origin Determination

+

+ Propagate an origin determination to similar resources using various strategies. +

+ +
+ {% csrf_token %} + + {{ form.relation_uuid }} + +
+ +
+ {{ form.strategy }} +
+ {% if form.strategy.help_text %} +

{{ form.strategy.help_text }}

+ {% endif %} + {% if form.strategy.errors %} +

{{ form.strategy.errors }}

+ {% endif %} +
+ + + + + +
+ + {% if form.preview_only.help_text %} +

{{ form.preview_only.help_text }}

+ {% endif %} +
+ +
+
+ +
+
+ Cancel +
+
+
+ + {% if preview_data %} +
+

Preview

+

+ Source relation: + + {{ preview_data.relation.to_resource.path }} → {{ preview_data.relation.from_resource.path }} + +

+

+ {{ preview_data.count }} candidate relation{{ preview_data.count|pluralize }} found. +

+ + {% if preview_data.candidates %} + + + + + + + + + + + {% for to_res, from_res, map_type, confidence in preview_data.candidates %} + + + + + + + {% endfor %} + +
To ResourceFrom ResourceMap TypeConfidence
+ + {{ to_res.path }} + + + + {{ from_res.path }} + + {{ map_type }}{{ confidence|capfirst }}
+ + {% if not form.preview_only.value %} +
+

Uncheck "Preview only" and submit to apply these changes.

+
+ {% endif %} + {% else %} +
+

No candidates found for this strategy and relation.

+
+ {% endif %} + {% endif %} +
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} + + diff --git a/scanpipe/templates/scanpipe/origin_review.html b/scanpipe/templates/scanpipe/origin_review.html new file mode 100644 index 0000000000..c77d47e730 --- /dev/null +++ b/scanpipe/templates/scanpipe/origin_review.html @@ -0,0 +1,179 @@ +{% extends "scanpipe/base.html" %} +{% load humanize %} + +{% block title %}ScanCode.io: {{ project.name }} - Origin Review{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Origin Review" %} + {% include 'scanpipe/includes/search_field.html' with extra_class="is-small" placeholder="Search to/ resources" %} +
+ {% include 'scanpipe/includes/pagination_header_relations.html' %} + {% include 'scanpipe/includes/filters_breadcrumb.html' with filterset=filter only %} +
+
+ + {% if object_list %} +
+ {% include 'scanpipe/modals/origin_bulk_curation_modal.html' with form=bulk_curation_form %} + + + + + + + + + + + + + + + + + + + + {% for relation in object_list %} + + + + + + + + + + + + + {% endfor %} + +
+ + To ResourceStatusMap TypeCuration StatusConfidenceFrom ResourceCurated ByCurated AtActions
+ + + + {{ relation.to_resource.path }} + + + + {{ relation.to_resource.status }} + + + + {{ relation.map_type }} + + + {% if relation.curation_status %} + + {{ relation.curation_status|capfirst }} + + {% else %} + + {% endif %} + + {% if relation.confidence_level %} + + {{ relation.confidence_level|capfirst }} + + {% else %} + + {% endif %} + + + {{ relation.from_resource.path }} + + + {% if relation.curated_by %} + {{ relation.curated_by.username }} + {% else %} + + {% endif %} + + {% if relation.curated_at %} + {{ relation.curated_at|date:"Y-m-d H:i" }} + {% else %} + + {% endif %} + + +
+ + {% if is_paginated %} + {% include 'scanpipe/includes/pagination.html' with page_obj=page_obj %} + {% endif %} +
+ {% else %} +
+ +
+ {% endif %} +{% endblock %} + +{% block scripts %} + +{% endblock %} + diff --git a/scanpipe/templates/scanpipe/project_detail.html b/scanpipe/templates/scanpipe/project_detail.html index 0d9720eb35..8f4449fcf7 100644 --- a/scanpipe/templates/scanpipe/project_detail.html +++ b/scanpipe/templates/scanpipe/project_detail.html @@ -47,6 +47,14 @@
+ {% if project.codebaserelations.exists %} + + + + + Origin Review + + {% endif %} diff --git a/scanpipe/templates/scanpipe/relation_list.html b/scanpipe/templates/scanpipe/relation_list.html index cdc8b884d4..28eacae652 100644 --- a/scanpipe/templates/scanpipe/relation_list.html +++ b/scanpipe/templates/scanpipe/relation_list.html @@ -9,7 +9,15 @@
{% include 'scanpipe/includes/pagination_header_relations.html' %} {% include 'scanpipe/includes/filters_breadcrumb.html' with filterset=filter only %} diff --git a/scanpipe/templates/scanpipe/tabset/tab_curation.html b/scanpipe/templates/scanpipe/tabset/tab_curation.html new file mode 100644 index 0000000000..4ffd220c7b --- /dev/null +++ b/scanpipe/templates/scanpipe/tabset/tab_curation.html @@ -0,0 +1,75 @@ +
+
+ {% csrf_token %} +
+ +
+ {{ curation_form.from_resource }} +
+ {% if curation_form.from_resource.errors %} +

{{ curation_form.from_resource.errors }}

+ {% endif %} +
+ +
+ +
+ {{ curation_form.to_resource }} +
+ {% if curation_form.to_resource.errors %} +

{{ curation_form.to_resource.errors }}

+ {% endif %} +
+ +
+ +
+ {{ curation_form.map_type }} +
+ {% if curation_form.map_type.errors %} +

{{ curation_form.map_type.errors }}

+ {% endif %} +
+ +
+ +
+ {{ curation_form.curation_status }} +
+ {% if curation_form.curation_status.errors %} +

{{ curation_form.curation_status.errors }}

+ {% endif %} +
+ +
+ +
+ {{ curation_form.confidence_level }} +
+ {% if curation_form.confidence_level.errors %} +

{{ curation_form.confidence_level.errors }}

+ {% endif %} +
+ +
+ +
+ {{ curation_form.curation_notes }} +
+ {% if curation_form.curation_notes.errors %} +

{{ curation_form.curation_notes.errors }}

+ {% endif %} +
+ +
+
+ +
+
+ Cancel +
+
+
+
+ + diff --git a/scanpipe/templates/scanpipe/tabset/tab_curation_history.html b/scanpipe/templates/scanpipe/tabset/tab_curation_history.html new file mode 100644 index 0000000000..fa1676aa42 --- /dev/null +++ b/scanpipe/templates/scanpipe/tabset/tab_curation_history.html @@ -0,0 +1,30 @@ +
+ {% if object.curations.all %} + + + + + + + + + + + + {% for curation in object.curations.all %} + + + + + + + + {% endfor %} + +
DateCuratorStatusConfidenceNotes
{{ curation.created_date|date:"Y-m-d H:i" }}{{ curation.curator.username|default:"—" }}{{ curation.curation_status|capfirst }}{{ curation.confidence_level|capfirst|default:"—" }}{{ curation.notes|default:"—" }}
+ {% else %} +

No curation history available.

+ {% endif %} +
+ + diff --git a/scanpipe/tests/pipes/test_codebase.py b/scanpipe/tests/pipes/test_codebase.py index 9dea648a9b..6acd3ac308 100644 --- a/scanpipe/tests/pipes/test_codebase.py +++ b/scanpipe/tests/pipes/test_codebase.py @@ -118,9 +118,9 @@ def test_scanpipe_pipes_codebase_project_codebase_class_walk(self): "asgiref-3.3.0-py3-none-any.whl", "asgiref-3.3.0-py3-none-any.whl-extract", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref", + "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/__init__.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/compatibility.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/current_thread_executor.py", - "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/__init__.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/local.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/server.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/sync.py", @@ -140,9 +140,9 @@ def test_scanpipe_pipes_codebase_project_codebase_class_walk(self): bottom_up_paths = list(r.path for r in project_codebase.walk(topdown=False)) expected_bottom_up_paths = [ "asgiref-3.3.0-py3-none-any.whl", + "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/__init__.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/compatibility.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/current_thread_executor.py", - "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/__init__.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/local.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/server.py", "asgiref-3.3.0-py3-none-any.whl-extract/asgiref/sync.py", diff --git a/scanpipe/tests/pipes/test_symbols.py b/scanpipe/tests/pipes/test_symbols.py index f1724ed21c..5756d87214 100644 --- a/scanpipe/tests/pipes/test_symbols.py +++ b/scanpipe/tests/pipes/test_symbols.py @@ -24,6 +24,7 @@ import sys from pathlib import Path from unittest import skipIf +from unittest import skipUnless from django.test import TestCase @@ -32,6 +33,13 @@ from scanpipe.pipes import symbols from scanpipe.pipes.input import copy_input +try: + from source_inspector import symbols_ctags +except ImportError: # pragma: no cover - optional dependency + symbols_ctags = None + +CTAGS_INSTALLED = bool(symbols_ctags) and symbols_ctags.is_ctags_installed() + @skipIf(sys.platform == "darwin", "Not supported on macOS") class ScanPipeSymbolsPipesTest(TestCase): @@ -40,6 +48,7 @@ class ScanPipeSymbolsPipesTest(TestCase): def setUp(self): self.project1 = Project.objects.create(name="Analysis") + @skipUnless(CTAGS_INSTALLED, "Universal Ctags is required") def test_scanpipe_pipes_symbols_collect_and_store_resource_symbols_ctags(self): dir = self.project1.codebase_path / "codefile" dir.mkdir(parents=True) diff --git a/scanpipe/tests/test_api.py b/scanpipe/tests/test_api.py index 9894473757..4e4f446904 100644 --- a/scanpipe/tests/test_api.py +++ b/scanpipe/tests/test_api.py @@ -36,6 +36,8 @@ from rest_framework import status from rest_framework.exceptions import ErrorDetail from rest_framework.test import APIClient +from rest_framework.test import APIRequestFactory +from rest_framework.test import force_authenticate from scanpipe.api.serializers import CodebaseRelationSerializer from scanpipe.api.serializers import CodebaseResourceSerializer @@ -45,10 +47,12 @@ from scanpipe.api.serializers import ProjectSerializer from scanpipe.api.serializers import get_model_serializer from scanpipe.api.serializers import get_serializer_fields +from scanpipe.api.views import ProjectViewSet from scanpipe.models import CodebaseRelation from scanpipe.models import CodebaseResource from scanpipe.models import DiscoveredDependency from scanpipe.models import DiscoveredPackage +from scanpipe.models import OriginCuration from scanpipe.models import Project from scanpipe.models import ProjectMessage from scanpipe.models import Run @@ -843,14 +847,22 @@ def test_scanpipe_api_project_action_relations(self): self.assertEqual(1, len(response.data["results"])) relation = response.data["results"][0] - expected = { + expected_subset = { "to_resource": "daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO", "status": "", "map_type": "java_to_class", "score": "", "from_resource": "daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO", } - self.assertEqual(expected, relation) + subset = {key: relation[key] for key in expected_subset} + self.assertEqual(expected_subset, subset) + + self.assertIn("uuid", relation) + self.assertEqual("", relation["curation_status"]) + self.assertEqual("", relation["confidence_level"]) + self.assertEqual("", relation["curation_notes"]) + self.assertIsNone(relation["curated_by"]) + self.assertIsNone(relation["curated_at"]) def test_scanpipe_api_project_action_relations_filterset(self): url = reverse("project-relations", args=[self.project1.uuid]) @@ -861,6 +873,76 @@ def test_scanpipe_api_project_action_relations_filterset(self): response = self.csrf_client.get(url + f"?map_type={map_type}") self.assertEqual(1, response.data["count"]) + def test_scanpipe_api_project_action_relations_create_with_curation(self): + factory = APIRequestFactory() + url = reverse("project-relations", kwargs={"pk": self.project1.uuid}) + data = { + "from_resource_path": self.resource1.path, + "to_resource_path": self.resource1.path, + "map_type": "manual", + "curation_status": "approved", + "confidence_level": "verified", + "curation_notes": "Manually linked", + } + + request = factory.post(url, data, format="json") + force_authenticate(request, user=self.user) + view = ProjectViewSet.as_view({"post": "relations"}) + response = view(request, pk=str(self.project1.uuid)) + response.render() + self.assertEqual(status.HTTP_201_CREATED, response.status_code, response.data) + + relation_uuid = response.data["uuid"] + relation = CodebaseRelation.objects.get(uuid=relation_uuid) + self.assertEqual("approved", relation.curation_status) + self.assertEqual("verified", relation.confidence_level) + self.assertEqual("Manually linked", relation.curation_notes) + self.assertEqual(self.user, relation.curated_by) + self.assertIsNotNone(relation.curated_at) + + history_entry = OriginCuration.objects.get(relation=relation) + self.assertEqual("approved", history_entry.curation_status) + self.assertEqual("verified", history_entry.confidence_level) + self.assertEqual("Manually linked", history_entry.notes) + self.assertEqual(self.user, history_entry.curator) + + self.assertEqual("approved", response.data["curation_status"]) + self.assertEqual("verified", response.data["confidence_level"]) + self.assertEqual("Manually linked", response.data["curation_notes"]) + self.assertEqual(self.user.username, response.data["curated_by"]) + self.assertIsNotNone(response.data["curated_at"]) + + def test_scanpipe_api_project_action_relations_bulk_curate(self): + factory = APIRequestFactory() + url = reverse( + "project-relations-bulk-curate", kwargs={"pk": self.project1.uuid} + ) + data = { + "relation_uuids": [str(self.codebase_relation1.uuid)], + "action": "set_confidence", + "confidence_level": "high", + "curation_notes": "Bulk note", + } + + request = factory.post(url, data, format="json") + force_authenticate(request, user=self.user) + view = ProjectViewSet.as_view({"post": "relations_bulk_curate"}) + response = view(request, pk=str(self.project1.uuid)) + response.render() + self.assertEqual(status.HTTP_200_OK, response.status_code, response.data) + self.assertEqual(1, response.data["count"]) + + self.codebase_relation1.refresh_from_db() + self.assertEqual("high", self.codebase_relation1.confidence_level) + self.assertEqual("Bulk note", self.codebase_relation1.curation_notes) + self.assertEqual(self.user, self.codebase_relation1.curated_by) + + history_entry = OriginCuration.objects.filter( + relation=self.codebase_relation1 + ).latest("created_date") + self.assertEqual("Bulk note", history_entry.notes) + self.assertEqual(self.user, history_entry.curator) + def test_scanpipe_api_project_action_messages(self): url = reverse("project-messages", args=[self.project1.uuid]) make_message(self.project1, description="Error") diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index affc8c7588..876fcf432b 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -2104,9 +2104,9 @@ def test_scanpipe_codebase_resource_model_walk_method(self): topdown_paths = list(r.path for r in asgiref_root.walk(topdown=True)) expected_topdown_paths = [ "asgiref-3.3.0.whl-extract/asgiref", + "asgiref-3.3.0.whl-extract/asgiref/__init__.py", "asgiref-3.3.0.whl-extract/asgiref/compatibility.py", "asgiref-3.3.0.whl-extract/asgiref/current_thread_executor.py", - "asgiref-3.3.0.whl-extract/asgiref/__init__.py", "asgiref-3.3.0.whl-extract/asgiref/local.py", "asgiref-3.3.0.whl-extract/asgiref/server.py", "asgiref-3.3.0.whl-extract/asgiref/sync.py", @@ -2124,9 +2124,9 @@ def test_scanpipe_codebase_resource_model_walk_method(self): bottom_up_paths = list(r.path for r in asgiref_root.walk(topdown=False)) expected_bottom_up_paths = [ + "asgiref-3.3.0.whl-extract/asgiref/__init__.py", "asgiref-3.3.0.whl-extract/asgiref/compatibility.py", "asgiref-3.3.0.whl-extract/asgiref/current_thread_executor.py", - "asgiref-3.3.0.whl-extract/asgiref/__init__.py", "asgiref-3.3.0.whl-extract/asgiref/local.py", "asgiref-3.3.0.whl-extract/asgiref/server.py", "asgiref-3.3.0.whl-extract/asgiref/sync.py", diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 3512f6ac21..ad8aaa86d3 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -29,6 +29,7 @@ from pathlib import Path from unittest import mock from unittest import skipIf +from unittest import skipUnless from django.conf import settings from django.test import TestCase @@ -64,6 +65,13 @@ from scanpipe.tests.pipelines.steps_as_attribute import StepsAsAttribute from scanpipe.tests.pipelines.with_groups import WithGroups +try: + from source_inspector import symbols_ctags +except ImportError: # pragma: no cover - optional dependency + symbols_ctags = None + +CTAGS_INSTALLED = bool(symbols_ctags) and symbols_ctags.is_ctags_installed() + from_docker_image = os.environ.get("FROM_DOCKER_IMAGE") @@ -541,6 +549,12 @@ class PipelinesIntegrationTest(TestCase): "settings", "description", "traceback", + "uuid", + "curation_status", + "confidence_level", + "curation_notes", + "curated_by", + "curated_at", ] def _without_keys(self, data, exclude_keys): @@ -1863,6 +1877,7 @@ def mock_request_post_return(url, data, headers, timeout): self.assertIn("Couldn't index 1 unsupported PURLs", run.log) @skipIf(sys.platform == "darwin", "Not supported on macOS") + @skipUnless(CTAGS_INSTALLED, "Universal Ctags is required") def test_scanpipe_collect_symbols_ctags_pipeline_integration(self): pipeline_name = "collect_symbols_ctags" project1 = make_project() diff --git a/scanpipe/tests/test_views.py b/scanpipe/tests/test_views.py index 9172c42410..142fda3d46 100644 --- a/scanpipe/tests/test_views.py +++ b/scanpipe/tests/test_views.py @@ -28,6 +28,7 @@ from unittest import mock from django.apps import apps +from django.contrib.auth import get_user_model from django.http import FileResponse from django.http.response import Http404 from django.test import TestCase @@ -37,12 +38,14 @@ import openpyxl import requests +from rest_framework import status from scanpipe.forms import BaseProjectActionForm from scanpipe.models import CodebaseRelation from scanpipe.models import CodebaseResource from scanpipe.models import DiscoveredDependency from scanpipe.models import DiscoveredPackage +from scanpipe.models import OriginCuration from scanpipe.models import Project from scanpipe.pipes import make_relation from scanpipe.pipes import update_or_create_dependency @@ -821,7 +824,7 @@ def test_scanpipe_views_project_views(self): with self.assertNumQueries(7): self.client.get(url) - with self.assertNumQueries(13): + with self.assertNumQueries(14): self.client.get(self.project1.get_absolute_url()) @mock.patch("scanpipe.models.Run.execute_task_async") @@ -887,6 +890,56 @@ def test_scanpipe_views_delete_webhook_view(self): response = self.client.get(url) self.assertEqual(405, response.status_code) + def test_origin_bulk_curation_view_updates_relations(self): + user_model = get_user_model() + curator = user_model.objects.create_user( + "curator", + password="secret", # noqa: S106 + is_staff=True, + ) + self.client.force_login(curator) + + from_resource = CodebaseResource.objects.create( + project=self.project1, + path="from/path.py", + ) + to_resource = CodebaseResource.objects.create( + project=self.project1, + path="to/path.py", + ) + relation = CodebaseRelation.objects.create( + project=self.project1, + from_resource=from_resource, + to_resource=to_resource, + map_type="manual", + ) + + url = reverse("origin_bulk_curate", args=[self.project1.slug]) + data = { + "bulk_curate-action": "approve", + "selected_ids": str(relation.uuid), + "bulk_curate-curation_notes": "Reviewed", + } + + response = self.client.post(url, data, follow=True) + self.assertRedirects( + response, + reverse("origin_review", args=[self.project1.slug]), + target_status_code=status.HTTP_200_OK, + ) + + relation.refresh_from_db() + self.assertEqual("approved", relation.curation_status) + self.assertEqual("Reviewed", relation.curation_notes) + self.assertEqual(curator, relation.curated_by) + self.assertIsNotNone(relation.curated_at) + + history = OriginCuration.objects.filter(relation=relation) + self.assertEqual(1, history.count()) + entry = history.first() + self.assertEqual("approved", entry.curation_status) + self.assertEqual(curator, entry.curator) + def test_scanpipe_views_run_status_view(self): run = self.project1.add_pipeline("analyze_docker_image") url = reverse("run_status", args=[run.uuid]) diff --git a/scanpipe/urls.py b/scanpipe/urls.py index 05a8a8a8ef..4b0e4590c6 100644 --- a/scanpipe/urls.py +++ b/scanpipe/urls.py @@ -91,6 +91,36 @@ views.CodebaseRelationListView.as_view(), name="project_relations", ), + path( + "project//origin-review/", + views.OriginReviewView.as_view(), + name="origin_review", + ), + path( + "project//origin-curate//", + views.OriginCurateView.as_view(), + name="origin_curate", + ), + path( + "project//origin-curate/add/", + views.OriginCurateCreateView.as_view(), + name="origin_curate_add", + ), + path( + "project//origin-bulk-curate/", + views.OriginBulkCurationView.as_view(), + name="origin_bulk_curate", + ), + path( + "project//origin-propagate/", + views.OriginPropagateView.as_view(), + name="origin_propagate", + ), + path( + "project//origin-deploy/", + views.OriginDeployView.as_view(), + name="origin_deploy", + ), path( "project//messages/", views.ProjectMessageListView.as_view(), diff --git a/scanpipe/views.py b/scanpipe/views.py index 9913d4947f..695a524385 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -80,7 +80,12 @@ from scanpipe.forms import AddLabelsForm from scanpipe.forms import AddPipelineForm from scanpipe.forms import BaseProjectActionForm +from scanpipe.forms import BulkCurationForm from scanpipe.forms import EditInputSourceTagForm +from scanpipe.forms import OriginCurateCreateForm +from scanpipe.forms import OriginCurationForm +from scanpipe.forms import OriginDeployForm +from scanpipe.forms import OriginPropagationForm from scanpipe.forms import PipelineRunStepSelectionForm from scanpipe.forms import ProjectArchiveForm from scanpipe.forms import ProjectCloneForm @@ -380,7 +385,7 @@ def get_label_count(fields_data): if isinstance(value, list): return len(value) - def get_context_data(self, **kwargs): + def get_context_data(self, **kwargs): # noqa: C901 context = super().get_context_data(**kwargs) context["tabset_data"] = self.get_tabset_data() return context @@ -456,7 +461,7 @@ def get_field_label(field_name): """Return a formatted label for display based on the `field_name`.""" return field_name.replace("_", " ").capitalize().replace("url", "URL") - def get_context_data(self, **kwargs): + def get_context_data(self, **kwargs): # noqa: C901 context = super().get_context_data(**kwargs) context["columns_data"] = self.get_columns_data() context["request_query_string"] = self.request.GET.urlencode() @@ -873,7 +878,7 @@ def get_context_data(self, **kwargs): return context - def post(self, request, *args, **kwargs): + def post(self, request, *args, **kwargs): # noqa: C901 project = self.get_object() if "add-inputs-submit" in request.POST: @@ -1329,7 +1334,7 @@ class ProjectActionView(ConditionalLoginRequired, generic.ListView): } success_url = reverse_lazy("project_list") - def post(self, request, *args, **kwargs): + def post(self, request, *args, **kwargs): # noqa: C901 action_kwargs = {} action = request.POST.get("action") @@ -1626,6 +1631,8 @@ def get_queryset(self): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) + # Ensure project is loaded + self.get_project() context["project"] = self.project context["model_label"] = self.model_label return context @@ -1951,6 +1958,720 @@ def get_filterset_kwargs(self, filterset_class): return kwargs +class OriginReviewView( + ConditionalLoginRequired, + ProjectRelatedViewMixin, + PrefetchRelatedViewMixin, + TableColumnsMixin, + ExportXLSXMixin, + ExportJSONMixin, + PaginatedFilterView, +): + """Enhanced review interface for origin determinations with bulk actions.""" + + model = CodebaseRelation + filterset_class = RelationFilterSet + template_name = "scanpipe/origin_review.html" + prefetch_related = [ + Prefetch( + "to_resource", + queryset=unordered_resources.only("path", "is_text", "status"), + ), + Prefetch( + "from_resource", + queryset=unordered_resources.only("path", "is_text", "status"), + ), + Prefetch( + "curated_by", + queryset=apps.get_model(settings.AUTH_USER_MODEL).objects.only("username"), + ), + ] + paginate_by = settings.SCANCODEIO_PAGINATE_BY.get("relation", 100) + table_columns = [ + { + "field_name": "checkbox", + "template": "scanpipe/includes/checkbox_column.html", + }, + "to_resource", + { + "field_name": "status", + "filter_fieldname": "status", + }, + { + "field_name": "map_type", + "filter_fieldname": "map_type", + }, + { + "field_name": "curation_status", + "filter_fieldname": "curation_status", + }, + { + "field_name": "confidence_level", + "filter_fieldname": "confidence_level", + }, + "from_resource", + { + "field_name": "curated_by", + }, + { + "field_name": "curated_at", + }, + { + "field_name": "actions", + "template": "scanpipe/includes/origin_review_actions.html", + }, + ] + + def get_filterset_kwargs(self, filterset_class): + """Add the project in the filterset kwargs for computing status choices.""" + kwargs = super().get_filterset_kwargs(filterset_class) + kwargs.update({"project": self.project}) + return kwargs + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context["bulk_curation_form"] = BulkCurationForm() + return context + + +@method_decorator(require_POST, name="dispatch") +class OriginBulkCurationView( + ConditionalLoginRequired, ProjectRelatedViewMixin, generic.View +): + """Handle bulk curation actions on relations.""" + + @staticmethod + def _parse_selected_ids(request): + selected = request.POST.get("selected_ids", "") + return [relation_uuid for relation_uuid in selected.split(",") if relation_uuid] + + def _apply_bulk_action( + self, + relation, + action, + cleaned_data, + project, + user, + ): + from django.utils import timezone + + from scanpipe.models import OriginCuration + + # Store previous values for history + prev_from = relation.from_resource + prev_to = relation.to_resource + prev_map_type = relation.map_type + + # Update relation based on action + status_mapping = { + "approve": "approved", + "reject": "rejected", + "mark_pending": "pending", + } + if action in status_mapping: + relation.curation_status = status_mapping[action] + elif action == "set_confidence": + relation.confidence_level = cleaned_data.get("confidence_level") + + notes = cleaned_data.get("curation_notes") + if notes: + if relation.curation_notes: + relation.curation_notes += f"\n\n{notes}" + else: + relation.curation_notes = notes + + if user.is_authenticated: + relation.curated_by = user + relation.curated_at = timezone.now() + relation.save( + update_fields=[ + "curation_status", + "confidence_level", + "curation_notes", + "curated_by", + "curated_at", + ] + ) + else: + relation.save( + update_fields=[ + "curation_status", + "confidence_level", + "curation_notes", + ] + ) + + OriginCuration.objects.create( + project=project, + relation=relation, + curator=user if user.is_authenticated else None, + curation_status=relation.curation_status or "pending", + confidence_level=relation.confidence_level or "", + notes=notes or "", + previous_from_resource=prev_from, + previous_to_resource=prev_to, + previous_map_type=prev_map_type, + ) + + return True + + def post(self, request, *args, **kwargs): + # Ensure project is loaded + project = self.get_project() + form = BulkCurationForm(request.POST) + if not form.is_valid(): + messages.error(request, "Invalid form data.") + return redirect("origin_review", slug=project.slug) + + selected_ids = self._parse_selected_ids(request) + if not selected_ids: + messages.warning(request, "No relations selected.") + return redirect("origin_review", slug=project.slug) + + action = form.cleaned_data["action"] + relations = project.codebaserelations.filter(uuid__in=selected_ids) + + if not relations.exists(): + messages.warning(request, "Selected relations were not found.") + return redirect("origin_review", slug=project.slug) + + count = sum( + 1 + for relation in relations + if self._apply_bulk_action( + relation=relation, + action=action, + cleaned_data=form.cleaned_data, + project=project, + user=request.user, + ) + ) + + if count: + messages.success( + request, + f"Successfully updated {count} relation{'s' if count != 1 else ''}.", + ) + + return redirect("origin_review", slug=project.slug) + + +class OriginCurateView( + ConditionalLoginRequired, + ProjectRelatedViewMixin, + PrefetchRelatedViewMixin, + TabSetMixin, + generic.DetailView, +): + """View for curating a single CodebaseRelation.""" + + model = CodebaseRelation + slug_field = "uuid" + slug_url_kwarg = "uuid" + template_name = "scanpipe/origin_curate.html" + prefetch_related = [ + Prefetch( + "to_resource", + queryset=unordered_resources.only("path", "is_text", "status"), + ), + Prefetch( + "from_resource", + queryset=unordered_resources.only("path", "is_text", "status"), + ), + Prefetch( + "curated_by", + queryset=apps.get_model(settings.AUTH_USER_MODEL).objects.only("username"), + ), + Prefetch( + "curations", + queryset=apps.get_model("scanpipe.OriginCuration") + .objects.select_related("curator") + .order_by("-created_date"), + ), + ] + + tabset = { + "essentials": { + "fields": [ + "to_resource", + "from_resource", + "map_type", + "status", + ], + "icon_class": "fa-solid fa-circle-check", + }, + "curation": { + "template": "scanpipe/tabset/tab_curation.html", + "icon_class": "fa-solid fa-edit", + }, + "history": { + "template": "scanpipe/tabset/tab_curation_history.html", + "icon_class": "fa-solid fa-history", + }, + } + + def get_queryset(self): + return super().get_queryset().select_related("project") + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + relation = self.object + project = self.get_project() + + # Initialize form with instance or POST data + if self.request.method == "POST": + curation_form = OriginCurationForm( + self.request.POST, + instance=relation, + project=project, + ) + else: + curation_form = OriginCurationForm( + instance=relation, + project=project, + ) + + context["curation_form"] = curation_form + return context + + def post(self, request, *args, **kwargs): + """Handle form submission for curation.""" + self.object = self.get_object() + project = self.get_project() + form = OriginCurationForm( + request.POST, + instance=self.object, + project=project, + ) + + if form.is_valid(): + relation = form.save(commit=False) + + # Store previous values for history + from django.utils import timezone + + from scanpipe.models import OriginCuration + + prev_from = relation.from_resource + prev_to = relation.to_resource + prev_map_type = relation.map_type + + # Update curation fields + relation.curated_by = request.user + relation.curated_at = timezone.now() + relation.save() + + # Create curation history record + OriginCuration.objects.create( + project=project, + relation=relation, + curator=request.user, + curation_status=relation.curation_status or "pending", + confidence_level=relation.confidence_level or "", + notes=relation.curation_notes or "", + previous_from_resource=prev_from, + previous_to_resource=prev_to, + previous_map_type=prev_map_type, + ) + + messages.success(request, "Relation curated successfully.") + return redirect("origin_curate", slug=project.slug, uuid=relation.uuid) + + # Form is invalid, re-render with errors + context = super().get_context_data() + context["curation_form"] = form + return self.render_to_response(context) + + +class OriginCurateCreateView( + ConditionalLoginRequired, + ProjectRelatedViewMixin, + FormView, +): + """View for manually creating a new CodebaseRelation.""" + + form_class = OriginCurateCreateForm + template_name = "scanpipe/origin_curate_create.html" + + def get_form_kwargs(self): + kwargs = super().get_form_kwargs() + kwargs["project"] = self.get_project() + return kwargs + + def form_valid(self, form): + project = self.get_project() + relation = form.save(project=project, user=self.request.user) + messages.success(self.request, "Relation created successfully.") + return redirect("origin_curate", slug=project.slug, uuid=relation.uuid) + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context["form"] = self.get_form() + return context + + +class OriginPropagateView( + ConditionalLoginRequired, + ProjectRelatedViewMixin, + FormView, +): + """View for propagating origin determinations.""" + + form_class = OriginPropagationForm + template_name = "scanpipe/origin_propagate.html" + + def get_form_kwargs(self): + kwargs = super().get_form_kwargs() + relation_uuid = self.request.GET.get("relation_uuid") or self.request.POST.get( + "relation_uuid" + ) + if relation_uuid: + kwargs["initial"] = {"relation_uuid": relation_uuid} + return kwargs + + def _resolve_relation_uuid(self, form): + if self.request.method == "GET": + relation_uuid = self.request.GET.get("relation_uuid") + else: + if hasattr(form, "cleaned_data") and form.cleaned_data: + relation_uuid = form.cleaned_data.get("relation_uuid") + else: + relation_uuid = form.data.get("relation_uuid") + if not relation_uuid and hasattr(form, "initial"): + relation_uuid = form.initial.get("relation_uuid") + return relation_uuid + + @staticmethod + def _as_float(value, default=0.8): + try: + return float(value) + except (TypeError, ValueError): + return default + + def _extract_preview_params(self, form): + if hasattr(form, "cleaned_data") and form.cleaned_data: + cleaned = form.cleaned_data + return ( + cleaned.get("strategy"), + cleaned.get("preview_only", True), + self._as_float(cleaned.get("similarity_threshold"), 0.8), + cleaned.get("pattern") or None, + ) + data = form.data + return ( + data.get("strategy"), + data.get("preview_only") == "on", + self._as_float(data.get("similarity_threshold"), 0.8), + data.get("pattern") or None, + ) + + def _build_preview_data(self, project, form): + relation_uuid = self._resolve_relation_uuid(form) + if not relation_uuid: + return None + + try: + relation = project.codebaserelations.get(uuid=relation_uuid) + except CodebaseRelation.DoesNotExist: + messages.error(self.request, "Relation not found.") + return None + + strategy, preview_only, similarity_threshold, pattern = ( + self._extract_preview_params(form) + ) + should_preview = strategy and (preview_only or self.request.method == "GET") + if not should_preview: + return None + + from scanpipe.pipes.origin_propagation import get_propagation_candidates + + try: + candidates = get_propagation_candidates( + project=project, + relation=relation, + strategy=strategy, + similarity_threshold=similarity_threshold, + pattern=pattern, + logger=None, + ) + except Exception as error: # pragma: no cover - defensive + messages.error(self.request, f"Error generating preview: {error}") + return None + + return { + "relation": relation, + "candidates": candidates, + "count": len(candidates), + } + + def get_context_data(self, **kwargs): + # Ensure project is loaded + project = self.get_project() + context = super().get_context_data(**kwargs) + form = context.get("form") or self.get_form() + context["preview_data"] = self._build_preview_data(project, form) + return context + + def form_valid(self, form): + relation_uuid = form.cleaned_data["relation_uuid"] + strategy = form.cleaned_data["strategy"] + preview_only = form.cleaned_data.get("preview_only", False) + + project = self.get_project() + + try: + relation = project.codebaserelations.get(uuid=relation_uuid) + except CodebaseRelation.DoesNotExist: + messages.error(self.request, "Relation not found.") + return self.form_invalid(form) + + if preview_only: + # Just show preview, don't apply + messages.info(self.request, "Preview mode - no changes applied.") + return self.render_to_response(self.get_context_data(form=form)) + + # Apply propagation + from scanpipe.pipes import origin_propagation + + similarity_threshold = float(form.cleaned_data.get("similarity_threshold", 0.8)) + pattern = form.cleaned_data.get("pattern") + + try: + # Only pass authenticated user + user = self.request.user if self.request.user.is_authenticated else None + + if strategy == "similar": + count, relations, batch = ( + origin_propagation.propagate_origin_to_similar_resources( + project=project, + relation=relation, + similarity_threshold=similarity_threshold, + user=user, + logger=None, + ) + ) + elif strategy == "directory": + count, relations, batch = ( + origin_propagation.propagate_origin_by_directory_structure( + project=project, + relation=relation, + user=user, + logger=None, + ) + ) + elif strategy == "package": + count, relations, batch = ( + origin_propagation.propagate_origin_by_package( + project=project, + relation=relation, + user=user, + logger=None, + ) + ) + elif strategy == "pattern": + if not pattern: + messages.error( + self.request, "Pattern is required for pattern strategy." + ) + return self.form_invalid(form) + count, relations, batch = ( + origin_propagation.propagate_origin_by_pattern( + project=project, + relation=relation, + pattern=pattern, + user=user, + logger=None, + ) + ) + else: + messages.error(self.request, "Invalid strategy.") + return self.form_invalid(form) + + messages.success( + self.request, + f"Propagation complete: {count} " + f"relation{'s' if count != 1 else ''} created.", + ) + + if batch: + return redirect("origin_propagate", slug=project.slug) + + return redirect("origin_review", slug=project.slug) + + except Exception as e: + messages.error(self.request, f"Error during propagation: {str(e)}") + return self.form_invalid(form) + + +class OriginDeployView( + ConditionalLoginRequired, + ProjectRelatedViewMixin, + FormView, +): + """View for deploying origin curations to FederatedCode.""" + + form_class = OriginDeployForm + template_name = "scanpipe/origin_deploy.html" + + def get_context_data(self, **kwargs): + """Add preview data and deployment status.""" + project = self.get_project() + context = super().get_context_data(**kwargs) + + form = context.get("form") or self.get_form() + include_history = True + + if hasattr(self, "_include_history_override"): + include_history = self._include_history_override + elif self.request.method == "POST": + if hasattr(form, "cleaned_data") and form.cleaned_data: + include_history = form.cleaned_data.get("include_history", True) + else: + include_history = form.data.get("include_history") == "on" + + # Get curation statistics + curated_count = ( + project.codebaserelations.filter(curation_status__isnull=False) + .exclude(curation_status="") + .count() + ) + + from scanpipe.models import OriginCuration + + curation_history_count = OriginCuration.objects.filter(project=project).count() + + # Check FederatedCode eligibility + from scanpipe.pipes import federatedcode + + federatedcode_configured = federatedcode.is_configured() + federatedcode_available = ( + federatedcode.is_available() if federatedcode_configured else False + ) + + # Check if project has PURL (required for FederatedCode) + has_purl = bool(project.purl) + + # Get preview of curations to deploy + preview_data = None + if curated_count > 0: + from scanpipe.pipes.federatedcode import export_origin_curations + + preview_data = export_origin_curations( + project, logger=None, include_history=include_history + ) + + context.update( + { + "curated_count": curated_count, + "curation_history_count": curation_history_count, + "federatedcode_configured": federatedcode_configured, + "federatedcode_available": federatedcode_available, + "has_purl": has_purl, + "preview_data": preview_data, + "include_history": include_history, + } + ) + + return context + + def form_valid(self, form): + """Handle deployment form submission.""" + project = self.get_project() + merge_strategy = form.cleaned_data["merge_strategy"] + include_history = form.cleaned_data["include_history"] + preview_only = form.cleaned_data["preview_only"] + + # Check prerequisites + from scanpipe.pipes import federatedcode + + if not federatedcode.is_configured(): + messages.error( + self.request, + "FederatedCode is not configured. Please contact administrator.", + ) + return self.form_invalid(form) + + if not federatedcode.is_available(): + messages.error( + self.request, + ( + "FederatedCode Git account is not available. " + "Please check configuration." + ), + ) + return self.form_invalid(form) + + if not project.purl: + messages.error( + self.request, + "Project PURL is required for FederatedCode deployment. " + "Please set a PURL for this project.", + ) + return self.form_invalid(form) + + curated_count = ( + project.codebaserelations.filter(curation_status__isnull=False) + .exclude(curation_status="") + .count() + ) + + if curated_count == 0: + messages.warning(self.request, "No curated relations to deploy.") + return redirect("origin_review", slug=project.slug) + + if preview_only: + # Just show preview + history_text = ( + "including history" if include_history else "excluding history" + ) + messages.info( + self.request, + f"Preview mode: {curated_count} curated relations ready for deployment " + f"({history_text}).", + ) + self._include_history_override = include_history + try: + return self.render_to_response(self.get_context_data(form=form)) + finally: + if hasattr(self, "_include_history_override"): + delattr(self, "_include_history_override") + + # Actually deploy by running the pipeline + try: + # Check if PublishToFederatedCode pipeline exists + + # Persist deployment choices for pipeline consumption + extra_data = (project.extra_data or {}).copy() + extra_data["origin_deploy"] = { + "merge_strategy": merge_strategy, + "include_history": include_history, + } + project.extra_data = extra_data + project.save(update_fields=["extra_data"]) + + # Create a run for the pipeline + run = project.add_pipeline( + pipeline_name="publish_to_federatedcode", + execute_now=True, + ) + + messages.success( + self.request, + f"Deployment started using the '{merge_strategy}' strategy " + f"{'with' if include_history else 'without'} history. " + f"Pipeline run: {run.uuid}.", + ) + return redirect("run_detail", uuid=run.uuid) + + except Exception as e: + messages.error( + self.request, + f"Error starting deployment: {str(e)}. " + "Please ensure the PublishToFederatedCode pipeline is available.", + ) + return self.form_invalid(form) + + class CodebaseResourceDetailsView( ConditionalLoginRequired, ProjectRelatedViewMixin,