Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions edenai_apis/apis/winstonai/config.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
WINSTON_AI_API_URL = "https://api.gowinston.ai/functions/v1"
WINSTON_AI_PLAGIA_DETECTION_URL = "https://api.gowinston.ai/v2/plagiarism"
67 changes: 45 additions & 22 deletions edenai_apis/apis/winstonai/winstonai_api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
from typing import List, Dict
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Remove duplicate Dict import.

The Dict type is imported twice - once on line 2 and again on line 4, causing a redefinition.

Apply this diff to fix the duplicate import:

-from typing import List, Dict
 from http import HTTPStatus
-from typing import Dict, Sequence, Any, Optional
+from typing import List, Dict, Sequence, Any, Optional

Also applies to: 4-4

🤖 Prompt for AI Agents
In edenai_apis/apis/winstonai/winstonai_api.py around lines 2 to 4, the Dict
type is imported twice, causing a duplicate import issue. Remove the redundant
Dict import from either line 2 or line 4 so that Dict is only imported once,
ensuring no redefinition occurs.

from http import HTTPStatus
from typing import Dict, Sequence, Any, Optional
from uuid import uuid4
import requests
from edenai_apis.apis.winstonai.config import WINSTON_AI_API_URL
from edenai_apis.apis.winstonai.config import WINSTON_AI_API_URL, WINSTON_AI_PLAGIA_DETECTION_URL
from edenai_apis.features import ProviderInterface, TextInterface, ImageInterface
from edenai_apis.features.image.ai_detection.ai_detection_dataclass import (
AiDetectionDataClass as ImageAiDetectionDataclass,
Expand Down Expand Up @@ -34,6 +34,7 @@ def __init__(self, api_keys: Optional[Dict[str, Any]] = None):
api_keys=api_keys or {},
)
self.api_url = WINSTON_AI_API_URL
self.plagia_url = WINSTON_AI_PLAGIA_DETECTION_URL
self.headers = {
"Content-Type": "application/json",
"Authorization": f'Bearer {self.api_settings["api_key"]}',
Expand Down Expand Up @@ -135,40 +136,62 @@ def text__plagia_detection(
{
"text": text,
"language": provider_params.get("language", "en"),
"version": provider_params.get("version", "2.0"),
"country": provider_params.get("country", "us"),
"excluded_sources": provider_params.get("excluded_sources", [])
}
)

response = requests.request(
"POST", f"{self.api_url}/plagiarism", headers=self.headers, data=payload
"POST", self.plagia_url, headers=self.headers, data=payload
)

if response.status_code != 200:
raise ProviderException(response.json(), code=response.status_code)

original_response = response.json()
results = original_response.get("results")
result = original_response.get("result")

if results is None:
if result is None:
raise ProviderException(response.json())

plagiarism_score: int = result.get("score")
sources = result.get("sources")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sources variable should be retrieved from original_response and not result.
sources = original_response.get("sources")


candidates: Sequence[PlagiaDetectionCandidate] = []

for source in sources:
source_url = source.get("url")
source_score = source.get("score")
source_prediction = 'plagiarized' if source_score > 5 else 'not plagiarized'

# startIndex, endIndex, sequence
source_plagiarism_found: List[Dict[str, str]] = source.get("plagiarismFound")

plagiarized_text: str = ""

for plagiarism in source_plagiarism_found:
plagiarism_start_index = int(plagiarism.get("startIndex"))
plagiarism_end_index = int(plagiarism.get("endIndex"))

plagiarized_text += " ... " + text[plagiarism_start_index:plagiarism_end_index] + " ... "

Comment on lines +170 to +177
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Optimize plagiarized text construction and add validation.

The current implementation has potential performance issues with string concatenation and lacks validation for array indices.

Apply this diff to improve the implementation:

-            plagiarized_text: str = ""
-
-            for plagiarism in source_plagiarism_found:
-                plagiarism_start_index = int(plagiarism.get("startIndex"))
-                plagiarism_end_index = int(plagiarism.get("endIndex"))
-
-                plagiarized_text += " ... " + text[plagiarism_start_index:plagiarism_end_index] + " ... "
+            plagiarized_segments = []
+            
+            for plagiarism in source_plagiarism_found:
+                start_idx = int(plagiarism.get("startIndex", 0))
+                end_idx = int(plagiarism.get("endIndex", 0))
+                
+                # Validate indices
+                if 0 <= start_idx < end_idx <= len(text):
+                    plagiarized_segments.append(text[start_idx:end_idx])
+            
+            plagiarized_text = " ... ".join(plagiarized_segments)
+            if plagiarized_segments:
+                plagiarized_text = f" ... {plagiarized_text} ... "
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
plagiarized_text: str = ""
for plagiarism in source_plagiarism_found:
plagiarism_start_index = int(plagiarism.get("startIndex"))
plagiarism_end_index = int(plagiarism.get("endIndex"))
plagiarized_text += " ... " + text[plagiarism_start_index:plagiarism_end_index] + " ... "
plagiarized_segments = []
for plagiarism in source_plagiarism_found:
start_idx = int(plagiarism.get("startIndex", 0))
end_idx = int(plagiarism.get("endIndex", 0))
# Validate indices
if 0 <= start_idx < end_idx <= len(text):
plagiarized_segments.append(text[start_idx:end_idx])
plagiarized_text = " ... ".join(plagiarized_segments)
if plagiarized_segments:
plagiarized_text = f" ... {plagiarized_text} ... "
🤖 Prompt for AI Agents
In edenai_apis/apis/winstonai/winstonai_api.py around lines 170 to 177, the code
concatenates substrings to build plagiarized_text using repeated string
addition, which is inefficient. Also, it does not validate that the start and
end indices are within the bounds of the text. To fix this, replace the string
concatenation with collecting substrings in a list and join them once at the end
for better performance. Additionally, add checks to ensure
plagiarism_start_index and plagiarism_end_index are valid indices within the
text length before slicing to avoid errors.


plagia_detection_candidate = PlagiaDetectionCandidate(
url=source_url,
plagia_score=source_score,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The plagia_score should be a value between 0 and 1, we need to divide source_score/100

prediction=source_prediction,
plagiarized_text=plagiarized_text,
plagiarism_founds=source_plagiarism_found
)

candidates.append(plagia_detection_candidate)


plagia_detection_item = PlagiaDetectionItem(text=text, candidates=candidates)

standardized_response = PlagiaDetectionDataClass(
plagia_score=original_response["score"],
items=[
PlagiaDetectionItem(
text=result["title"],
candidates=[
PlagiaDetectionCandidate(
url=result["url"],
plagia_score=1,
prediction="plagiarized",
plagiarized_text=excerpt,
)
for excerpt in result["excerpts"]
],
)
for result in results
],
plagia_score=plagiarism_score,
items=[plagia_detection_item]
)

return ResponseType[PlagiaDetectionDataClass](
Expand Down
Loading