Skip to content

Commit 835c43e

Browse files
author
Your Name
committed
Update fixes for #1071, add fix for #1085
1 parent 47f164f commit 835c43e

File tree

7 files changed

+118
-86
lines changed

7 files changed

+118
-86
lines changed

.envs/.local/.django

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@ SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH='dummy_branch'
3333
# Slack Webhook
3434
# ------------------------------------------------------------------------------
3535
SLACK_WEBHOOK_URL=''
36-
LRM_USER=''
37-
LRM_PASSWORD=''
36+
37+
#Server Credentials
38+
#--------------------------------------------------------------------------------
39+
LRM_DEV_USER=''
40+
LRM_DEV_PASSWORD=''
3841
XLI_USER=''
3942
XLI_PASSWORD=''
4043
LRM_QA_USER=''
4144
LRM_QA_PASSWORD=''
4245

4346
#Server Tokens
4447
#--------------------------------------------------------------------------------
45-
LRMDEV_TOKEN=''
46-
LIS_TOKEN=''
48+
LRM_DEV_TOKEN=''
49+
XLI_TOKEN=''

config/settings/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,9 @@
343343
SLACK_WEBHOOK_URL = env("SLACK_WEBHOOK_URL")
344344
XLI_USER = env("XLI_USER")
345345
XLI_PASSWORD = env("XLI_PASSWORD")
346-
LRM_USER = env("LRM_USER")
347-
LRM_PASSWORD = env("LRM_PASSWORD")
346+
LRM_DEV_USER = env("LRM_DEV_USER")
347+
LRM_DEV_PASSWORD = env("LRM_DEV_PASSWORD")
348348
LRM_QA_USER = env("LRM_QA_USER")
349349
LRM_QA_PASSWORD = env("LRM_QA_PASSWORD")
350+
LRM_DEV_TOKEN=env("LRM_DEV_TOKEN")
351+
XLI_TOKEN=env("XLI_TOKEN")

sde_collections/admin.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,18 @@
99
from .tasks import fetch_and_update_full_text, import_candidate_urls_from_api
1010

1111

12-
@admin.action(description="Import candidate URLs from LRM Dev Server with Full Text")
13-
def fetch_full_text_lrm_dev_action(modeladmin, request, queryset):
12+
def fetch_and_update_text_for_server(modeladmin, request, queryset, server_name):
1413
for collection in queryset:
15-
fetch_and_update_full_text.delay(collection.id, "LRM_DEV")
16-
modeladmin.message_user(request, "Full text fetched and updated from LRM_DEV successfully.")
14+
fetch_and_update_full_text.delay(collection.id, server_name)
15+
modeladmin.message_user(request, f"Started importing URLs from {server_name.upper()} Server")
1716

17+
@admin.action(description="Import candidate URLs from LRM Dev Server with Full Text")
18+
def fetch_full_text_lrm_dev_action(modeladmin, request, queryset):
19+
fetch_and_update_text_for_server(modeladmin, request, queryset, "lrm_dev")
1820

19-
@admin.action(description="Import candidate URLs from Li's Server with Full Text")
21+
@admin.action(description="Import candidate URLs from XLI Server with Full Text")
2022
def fetch_full_text_lis_action(modeladmin, request, queryset):
21-
for collection in queryset:
22-
fetch_and_update_full_text.delay(collection.id, "LIS")
23-
modeladmin.message_user(request, "Full text fetched and updated from Li's Server successfully.")
24-
23+
fetch_and_update_text_for_server(modeladmin, request, queryset, "xli")
2524

2625
@admin.action(description="Generate deployment message")
2726
def generate_deployment_message(modeladmin, request, queryset):
@@ -123,7 +122,7 @@ def import_candidate_urls_from_api_caller(modeladmin, request, queryset, server_
123122
messages.add_message(
124123
request,
125124
messages.INFO,
126-
f"Started importing URLs from the API for: {collection_names} from {server_name.title()}",
125+
f"Started importing URLs from {server_name.upper()} Server",
127126
)
128127

129128

@@ -147,19 +146,19 @@ def import_candidate_urls_secret_production(modeladmin, request, queryset):
147146
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "secret_production")
148147

149148

150-
@admin.action(description="Import candidate URLs from Li's Server")
151-
def import_candidate_urls_lis_server(modeladmin, request, queryset):
152-
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lis_server")
149+
@admin.action(description="Import candidate URLs from XLI Server")
150+
def import_candidate_urls_xli_server(modeladmin, request, queryset):
151+
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "xli")
153152

154153

155154
@admin.action(description="Import candidate URLs from LRM Dev Server")
156155
def import_candidate_urls_lrm_dev_server(modeladmin, request, queryset):
157-
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lrm_dev_server")
156+
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lrm_dev")
158157

159158

160159
@admin.action(description="Import candidate URLs from LRM QA Server")
161160
def import_candidate_urls_lrm_qa_server(modeladmin, request, queryset):
162-
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lrm_qa_server")
161+
import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lrm_qa")
163162

164163

165164
class ExportCsvMixin:
@@ -250,7 +249,7 @@ class CollectionAdmin(admin.ModelAdmin, ExportCsvMixin, UpdateConfigMixin):
250249
import_candidate_urls_production,
251250
import_candidate_urls_secret_test,
252251
import_candidate_urls_secret_production,
253-
import_candidate_urls_lis_server,
252+
import_candidate_urls_xli_server,
254253
import_candidate_urls_lrm_dev_server,
255254
import_candidate_urls_lrm_qa_server,
256255
fetch_full_text_lrm_dev_action,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by Django 4.2.9 on 2024-11-07 17:34
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("sde_collections", "0059_candidateurl_scraped_text"),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name="candidateurl",
15+
name="scraped_text",
16+
field=models.TextField(
17+
blank=True,
18+
default="",
19+
help_text="This is the text scraped by Sinequa",
20+
null=True,
21+
verbose_name="Scraped Text",
22+
),
23+
),
24+
]

sde_collections/models/candidate_url.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ class CandidateURL(models.Model):
3535
blank=True,
3636
help_text="This is the original title scraped by Sinequa",
3737
)
38-
scraped_text = models.TextField(blank=True, null=True)
38+
scraped_text = models.TextField(
39+
"Scraped Text",
40+
default="",
41+
null=True,
42+
blank=True,
43+
help_text="This is the text scraped by Sinequa",
44+
)
3945
generated_title = models.CharField(
4046
"Generated Title",
4147
default="",

sde_collections/sinequa_api.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import Any
2-
2+
import json
33
import requests
44
import urllib3
55
from django.conf import settings
@@ -32,17 +32,17 @@
3232
"query_name": "query-sde-primary",
3333
"base_url": "https://sciencediscoveryengine.nasa.gov",
3434
},
35-
"lis_server": {
35+
"xli": {
3636
"app_name": "nasa-sba-smd",
3737
"query_name": "query-smd-primary",
3838
"base_url": "http://sde-xli.nasa-impact.net",
3939
},
40-
"lrm_dev_server": {
40+
"lrm_dev": {
4141
"app_name": "sde-init-check",
4242
"query_name": "query-init-check",
4343
"base_url": "https://sde-lrm.nasa-impact.net",
4444
},
45-
"lrm_qa_server": {
45+
"lrm_qa": {
4646
"app_name": "sde-init-check",
4747
"query_name": "query-init-check",
4848
"base_url": "https://sde-qa.nasa-impact.net",
@@ -53,15 +53,13 @@
5353
class Api:
5454
def __init__(self, server_name: str) -> None:
5555
self.server_name = server_name
56-
self.app_name: str = server_configs[server_name]["app_name"]
57-
self.query_name: str = server_configs[server_name]["query_name"]
58-
self.base_url: str = server_configs[server_name]["base_url"]
59-
self.xli_user = settings.XLI_USER
60-
self.xli_password = settings.XLI_PASSWORD
61-
self.lrm_user = settings.LRM_USER
62-
self.lrm_password = settings.LRM_PASSWORD
63-
self.lrm_qa_user = settings.LRM_QA_USER
64-
self.lrm_qa_password = settings.LRM_QA_PASSWORD
56+
config = server_configs[server_name]
57+
self.app_name: str = config["app_name"]
58+
self.query_name: str = config["query_name"]
59+
self.base_url: str = config["base_url"]
60+
self.user = getattr(settings, f"{server_name}_USER".upper(), None)
61+
self.password = getattr(settings, f"{server_name}_PASSWORD".upper(), None)
62+
self.token = getattr(settings, f"{server_name}_TOKEN".upper(), None)
6563

6664
def process_response(self, url: str, payload: dict[str, Any]) -> Any:
6765
response = requests.post(url, headers={}, json=payload, verify=False)
@@ -74,14 +72,7 @@ def process_response(self, url: str, payload: dict[str, Any]) -> Any:
7472
return meaningful_response
7573

7674
def query(self, page: int, collection_config_folder: str = "") -> Any:
77-
if self.server_name == "lis_server":
78-
url = f"{self.base_url}/api/v1/search.query?Password={self.xli_password}&User={self.xli_user}"
79-
elif self.server_name == "lrm_dev_server":
80-
url = f"{self.base_url}/api/v1/search.query?Password={self.lrm_password}&User={self.lrm_user}"
81-
elif self.server_name == "lrm_qa_server":
82-
url = f"{self.base_url}/api/v1/search.query?Password={self.lrm_qa_password}&User={self.lrm_qa_user}"
83-
else:
84-
url = f"{self.base_url}/api/v1/search.query"
75+
url = f"{self.base_url}/api/v1/search.query?Password={self.password}&User={self.user}"
8576
payload = {
8677
"app": self.app_name,
8778
"query": {
@@ -94,11 +85,41 @@ def query(self, page: int, collection_config_folder: str = "") -> Any:
9485
}
9586

9687
if collection_config_folder:
97-
if self.server_name in ["lis_server", "lrm_dev_server", "lrm_qa_server"]:
88+
if self.server_name in ["xli", "lrm_dev", "lrm_qa"]:
9889
payload["query"]["advanced"]["collection"] = f"/scrapers/{collection_config_folder}/"
9990
else:
10091
payload["query"]["advanced"]["collection"] = f"/SDE/{collection_config_folder}/"
10192

10293
response = self.process_response(url, payload)
10394

10495
return response
96+
97+
def sql_query(self, sql: str) -> Any:
98+
"""Executes an SQL query on the configured server using token-based authentication."""
99+
if not self.token:
100+
raise ValueError("You must have a token to use the SQL endpoint")
101+
102+
url = f"{self.base_url}/api/v1/engine.sql"
103+
headers = {
104+
"Content-Type": "application/json",
105+
"Authorization": f"Bearer {self.token}"
106+
}
107+
payload = json.dumps({
108+
"method": "engine.sql",
109+
"sql": sql,
110+
"pretty": True,
111+
"log": False,
112+
"output": "json",
113+
"resolveIndexList": "false",
114+
"engines": "default",
115+
})
116+
try:
117+
response = requests.post(url, headers=headers, data=payload, timeout=10)
118+
response.raise_for_status()
119+
return response.json()
120+
except requests.exceptions.RequestException as e:
121+
raise Exception(f"API request failed: {str(e)}")
122+
123+
def get_full_texts(self, collection_config_folder: str) -> Any:
124+
sql = f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection_config_folder}/'"
125+
return self.sql_query(sql)

sde_collections/tasks.py

Lines changed: 18 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -146,37 +146,24 @@ def resolve_title_pattern(title_pattern_id):
146146

147147

148148
@celery_app.task
149-
def fetch_and_update_full_text(collection_id, server_type):
150-
try:
151-
collection = Collection.objects.get(id=collection_id)
152-
except Collection.DoesNotExist:
153-
raise Exception(f"Collection with ID {collection_id} does not exist.")
154-
155-
server_config = get_server_config(server_type)
156-
token = server_config["token"]
157-
url = server_config["url"]
158-
159-
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
160-
161-
payload = json.dumps(
162-
{
163-
"method": "engine.sql",
164-
"sql": f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection.config_folder}/'",
165-
"pretty": True,
166-
"log": False,
167-
"output": "json",
168-
"resolveIndexList": "false",
169-
"engines": "default",
170-
}
171-
)
172-
173-
try:
174-
response = requests.post(url, headers=headers, data=payload, timeout=10)
175-
response.raise_for_status() # Raise exception for HTTP errors
176-
except requests.exceptions.RequestException as e:
177-
raise Exception(f"API request failed: {str(e)}")
178-
179-
records = response.json().get("Rows", [])
149+
def fetch_and_update_full_text(collection_id, server_name):
150+
"""
151+
Task to fetch and update full text and metadata for all URLs associated with a specified collection
152+
from a given server.
153+
154+
Args:
155+
collection_id (int): The identifier for the collection in the database.
156+
server_name (str): The name of the server.
157+
158+
Returns:
159+
str: A message indicating the result of the operation, including the number of URLs processed
160+
or a message if no records were found.
161+
"""
162+
collection = Collection.objects.get(id=collection_id)
163+
api = Api(server_name)
164+
full_texts = api.get_full_texts(collection.config_folder)
165+
166+
records = full_texts.get("Rows", [])
180167
if not records:
181168
return "No records found in the response."
182169

@@ -188,14 +175,4 @@ def fetch_and_update_full_text(collection_id, server_type):
188175
CandidateURL.objects.update_or_create(
189176
url=url, collection=collection, defaults={"scraped_text": full_text, "scraped_title": title}
190177
)
191-
192178
return f"Successfully processed {len(records)} records and updated the database."
193-
194-
195-
def get_server_config(server_type):
196-
if server_type == "LRM_DEV":
197-
return {"url": "https://sde-lrm.nasa-impact.net/api/v1/engine.sql", "token": os.getenv("LRMDEV_TOKEN")}
198-
elif server_type == "LIS":
199-
return {"url": "http://sde-xli.nasa-impact.net/api/v1/engine.sql", "token": os.getenv("LIS_TOKEN")}
200-
else:
201-
raise ValueError("Invalid server type.")

0 commit comments

Comments
 (0)