Skip to content

Commit e4881a9

Browse files
author
Your Name
committed
Fixes issue #1071
1 parent 8678ed6 commit e4881a9

File tree

1 file changed

+1
-123
lines changed

1 file changed

+1
-123
lines changed

sde_collections/tasks.py

Lines changed: 1 addition & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -144,129 +144,6 @@ def resolve_title_pattern(title_pattern_id):
144144
title_pattern = TitlePattern.objects.get(id=title_pattern_id)
145145
title_pattern.apply()
146146

147-
148-
"""
149-
@celery_app.task
150-
def fetch_and_update_full_text(collection_id):
151-
152-
try:
153-
collection = Collection.objects.get(id=collection_id)
154-
except Collection.DoesNotExist:
155-
raise Exception(f"Collection with ID {collection_id} does not exist.")
156-
157-
url = "https://sde-lrm.nasa-impact.net/api/v1/engine.sql" #LRM_DEV Server
158-
sql_command = f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection.config_folder}/'"
159-
token = os.getenv('LRMDEV_TOKEN')
160-
161-
162-
payload = json.dumps({
163-
"method": "engine.sql",
164-
"sql": sql_command,
165-
"pretty": True,
166-
"log": False,
167-
"output": "json",
168-
"resolveIndexList": "false",
169-
"engines": "default"
170-
})
171-
172-
headers = {
173-
'Content-Type': 'application/json',
174-
'Authorization': f'Bearer {token}'
175-
}
176-
177-
response = requests.post(url, headers=headers, data=payload)
178-
if response.status_code == 200:
179-
records = response.json().get("Rows", [])
180-
for record in records:
181-
url, full_text, title = record
182-
if not url or not full_text or not title:
183-
continue
184-
# Directly update or create the entry without checking for content changes
185-
CandidateURL.objects.update_or_create(
186-
url=url,
187-
collection=collection,
188-
defaults={
189-
'scraped_text': full_text,
190-
'scraped_title': title
191-
}
192-
)
193-
194-
return f"Processed {len(records)} records; Updated or created in database."
195-
else:
196-
raise Exception(f"Failed to fetch text: {response.status_code} {response.text}")
197-
"""
198-
199-
# You will have to have a different function for Li's server as it uses user and pw with body to login.
200-
# If the sinequa web token is used, can user&pw be removed from the body? if yes then can integrate, but headers will b diff (auth/cookie). if lis then header1, elif lrm_dev then h2, else h3
201-
# Fill in the tokens in the .django file
202-
203-
# Integrated - LRM devs and Lis separate
204-
"""
205-
@celery_app.task
206-
def fetch_and_update_full_text(collection_id, server_type):
207-
try:
208-
collection = Collection.objects.get(id=collection_id)
209-
except Collection.DoesNotExist:
210-
raise Exception(f"Collection with ID {collection_id} does not exist.")
211-
212-
# Server-specific configurations
213-
server_config = get_server_config(server_type)
214-
215-
# API Request Parameters
216-
payload = json.dumps({
217-
"method": "engine.sql",
218-
"sql": f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection.config_folder}/'",
219-
"pretty": True,
220-
"log": False,
221-
"output": "json",
222-
"resolveIndexList": "false",
223-
"engines": "default"
224-
})
225-
226-
token = server_config["token"]
227-
url = server_config["url"]
228-
headers = {
229-
'Content-Type': 'application/json',
230-
'Authorization': f'Bearer {token}'
231-
}
232-
233-
# Send the request
234-
response = requests.post(url, headers=headers, data=payload)
235-
if response.status_code == 200:
236-
records = response.json().get("Rows", [])
237-
for record in records:
238-
url, full_text, title = record
239-
if not url or not full_text or not title:
240-
continue
241-
CandidateURL.objects.update_or_create(
242-
url=url,
243-
collection=collection,
244-
defaults={
245-
'scraped_text': full_text,
246-
'scraped_title': title
247-
}
248-
)
249-
return f"Processed {len(records)} records; Updated or created in database."
250-
else:
251-
raise Exception(f"Failed to fetch text: {response.status_code} {response.text}")
252-
253-
254-
def get_server_config(server_type):
255-
if server_type == "LRM_DEV":
256-
return {
257-
"url": "https://sde-lrm.nasa-impact.net/api/v1/engine.sql",
258-
"token": os.getenv("LRMDEV_TOKEN")
259-
}
260-
elif server_type == "LIS":
261-
return {
262-
"url": "http://sde-xli.nasa-impact.net/api/v1/engine.sql",
263-
"token": os.getenv("LIS_TOKEN")
264-
}
265-
else:
266-
raise ValueError("Invalid server type.")
267-
"""
268-
269-
270147
@celery_app.task
271148
def fetch_and_update_full_text(collection_id, server_type):
272149
try:
@@ -321,3 +198,4 @@ def get_server_config(server_type):
321198
return {"url": "http://sde-xli.nasa-impact.net/api/v1/engine.sql", "token": os.getenv("LIS_TOKEN")}
322199
else:
323200
raise ValueError("Invalid server type.")
201+

0 commit comments

Comments
 (0)