@@ -144,129 +144,6 @@ def resolve_title_pattern(title_pattern_id):
144
144
title_pattern = TitlePattern .objects .get (id = title_pattern_id )
145
145
title_pattern .apply ()
146
146
147
-
148
- """
149
- @celery_app.task
150
- def fetch_and_update_full_text(collection_id):
151
-
152
- try:
153
- collection = Collection.objects.get(id=collection_id)
154
- except Collection.DoesNotExist:
155
- raise Exception(f"Collection with ID {collection_id} does not exist.")
156
-
157
- url = "https://sde-lrm.nasa-impact.net/api/v1/engine.sql" #LRM_DEV Server
158
- sql_command = f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection.config_folder}/'"
159
- token = os.getenv('LRMDEV_TOKEN')
160
-
161
-
162
- payload = json.dumps({
163
- "method": "engine.sql",
164
- "sql": sql_command,
165
- "pretty": True,
166
- "log": False,
167
- "output": "json",
168
- "resolveIndexList": "false",
169
- "engines": "default"
170
- })
171
-
172
- headers = {
173
- 'Content-Type': 'application/json',
174
- 'Authorization': f'Bearer {token}'
175
- }
176
-
177
- response = requests.post(url, headers=headers, data=payload)
178
- if response.status_code == 200:
179
- records = response.json().get("Rows", [])
180
- for record in records:
181
- url, full_text, title = record
182
- if not url or not full_text or not title:
183
- continue
184
- # Directly update or create the entry without checking for content changes
185
- CandidateURL.objects.update_or_create(
186
- url=url,
187
- collection=collection,
188
- defaults={
189
- 'scraped_text': full_text,
190
- 'scraped_title': title
191
- }
192
- )
193
-
194
- return f"Processed {len(records)} records; Updated or created in database."
195
- else:
196
- raise Exception(f"Failed to fetch text: {response.status_code} {response.text}")
197
- """
198
-
199
- # You will have to have a different function for Li's server as it uses user and pw with body to login.
200
- # If the sinequa web token is used, can user&pw be removed from the body? if yes then can integrate, but headers will b diff (auth/cookie). if lis then header1, elif lrm_dev then h2, else h3
201
- # Fill in the tokens in the .django file
202
-
203
- # Integrated - LRM devs and Lis separate
204
- """
205
- @celery_app.task
206
- def fetch_and_update_full_text(collection_id, server_type):
207
- try:
208
- collection = Collection.objects.get(id=collection_id)
209
- except Collection.DoesNotExist:
210
- raise Exception(f"Collection with ID {collection_id} does not exist.")
211
-
212
- # Server-specific configurations
213
- server_config = get_server_config(server_type)
214
-
215
- # API Request Parameters
216
- payload = json.dumps({
217
- "method": "engine.sql",
218
- "sql": f"SELECT url1, text, title FROM sde_index WHERE collection = '/SDE/{collection.config_folder}/'",
219
- "pretty": True,
220
- "log": False,
221
- "output": "json",
222
- "resolveIndexList": "false",
223
- "engines": "default"
224
- })
225
-
226
- token = server_config["token"]
227
- url = server_config["url"]
228
- headers = {
229
- 'Content-Type': 'application/json',
230
- 'Authorization': f'Bearer {token}'
231
- }
232
-
233
- # Send the request
234
- response = requests.post(url, headers=headers, data=payload)
235
- if response.status_code == 200:
236
- records = response.json().get("Rows", [])
237
- for record in records:
238
- url, full_text, title = record
239
- if not url or not full_text or not title:
240
- continue
241
- CandidateURL.objects.update_or_create(
242
- url=url,
243
- collection=collection,
244
- defaults={
245
- 'scraped_text': full_text,
246
- 'scraped_title': title
247
- }
248
- )
249
- return f"Processed {len(records)} records; Updated or created in database."
250
- else:
251
- raise Exception(f"Failed to fetch text: {response.status_code} {response.text}")
252
-
253
-
254
- def get_server_config(server_type):
255
- if server_type == "LRM_DEV":
256
- return {
257
- "url": "https://sde-lrm.nasa-impact.net/api/v1/engine.sql",
258
- "token": os.getenv("LRMDEV_TOKEN")
259
- }
260
- elif server_type == "LIS":
261
- return {
262
- "url": "http://sde-xli.nasa-impact.net/api/v1/engine.sql",
263
- "token": os.getenv("LIS_TOKEN")
264
- }
265
- else:
266
- raise ValueError("Invalid server type.")
267
- """
268
-
269
-
270
147
@celery_app .task
271
148
def fetch_and_update_full_text (collection_id , server_type ):
272
149
try :
@@ -321,3 +198,4 @@ def get_server_config(server_type):
321
198
return {"url" : "http://sde-xli.nasa-impact.net/api/v1/engine.sql" , "token" : os .getenv ("LIS_TOKEN" )}
322
199
else :
323
200
raise ValueError ("Invalid server type." )
201
+
0 commit comments