1
+ import json
1
2
from typing import Any
2
3
3
4
import requests
16
17
"app_name" : "nasa-sba-smd" ,
17
18
"query_name" : "query-smd-primary" ,
18
19
"base_url" : "https://sciencediscoveryengine.test.nasa.gov" ,
20
+ "index" : "sde_index" ,
19
21
},
20
22
"production" : {
21
23
"app_name" : "nasa-sba-smd" ,
22
24
"query_name" : "query-smd-primary" ,
23
25
"base_url" : "https://sciencediscoveryengine.nasa.gov" ,
26
+ "index" : "sde_index" ,
24
27
},
25
28
"secret_test" : {
26
29
"app_name" : "nasa-sba-sde" ,
27
30
"query_name" : "query-sde-primary" ,
28
31
"base_url" : "https://sciencediscoveryengine.test.nasa.gov" ,
32
+ "index" : "sde_index" ,
29
33
},
30
34
"secret_production" : {
31
35
"app_name" : "nasa-sba-sde" ,
32
36
"query_name" : "query-sde-primary" ,
33
37
"base_url" : "https://sciencediscoveryengine.nasa.gov" ,
38
+ "index" : "sde_index" ,
34
39
},
35
- "lis_server " : {
40
+ "xli " : {
36
41
"app_name" : "nasa-sba-smd" ,
37
42
"query_name" : "query-smd-primary" ,
38
43
"base_url" : "http://sde-xli.nasa-impact.net" ,
44
+ "index" : "sde_index" ,
39
45
},
40
- "lrm_dev_server " : {
46
+ "lrm_dev " : {
41
47
"app_name" : "sde-init-check" ,
42
48
"query_name" : "query-init-check" ,
43
49
"base_url" : "https://sde-lrm.nasa-impact.net" ,
50
+ "index" : "sde_init_check" ,
44
51
},
45
- "lrm_qa_server " : {
52
+ "lrm_qa " : {
46
53
"app_name" : "sde-init-check" ,
47
54
"query_name" : "query-init-check" ,
48
55
"base_url" : "https://sde-qa.nasa-impact.net" ,
51
58
52
59
53
60
class Api :
54
- def __init__ (self , server_name : str ) -> None :
61
+ def __init__ (self , server_name : str = None , user : str = None , password : str = None , token : str = None ) -> None :
55
62
self .server_name = server_name
56
- self .app_name : str = server_configs [server_name ]["app_name" ]
57
- self .query_name : str = server_configs [server_name ]["query_name" ]
58
- self .base_url : str = server_configs [server_name ]["base_url" ]
59
- self .xli_user = settings .XLI_USER
60
- self .xli_password = settings .XLI_PASSWORD
61
- self .lrm_user = settings .LRM_USER
62
- self .lrm_password = settings .LRM_PASSWORD
63
- self .lrm_qa_user = settings .LRM_QA_USER
64
- self .lrm_qa_password = settings .LRM_QA_PASSWORD
63
+ if server_name not in server_configs :
64
+ raise ValueError (f"Server name '{ server_name } ' is not in server_configs" )
65
+
66
+ self .config = server_configs [server_name ]
67
+ self .app_name : str = self .config ["app_name" ]
68
+ self .query_name : str = self .config ["query_name" ]
69
+ self .base_url : str = self .config ["base_url" ]
70
+ self .dev_servers = ["xli" , "lrm_dev" , "lrm_qa" ]
71
+
72
+ # Store provided values only
73
+ self ._provided_user = user
74
+ self ._provided_password = password
75
+ self ._provided_token = token
76
+
77
+ def _get_user (self ) -> str | None :
78
+ """Retrieve the user, using the provided value or defaulting to Django settings."""
79
+ return self ._provided_user or getattr (settings , f"{ self .server_name } _USER" .upper (), None )
80
+
81
+ def _get_password (self ) -> str | None :
82
+ """Retrieve the password, using the provided value or defaulting to Django settings."""
83
+ return self ._provided_password or getattr (settings , f"{ self .server_name } _PASSWORD" .upper (), None )
84
+
85
+ def _get_token (self ) -> str | None :
86
+ """Retrieve the token, using the provided value or defaulting to Django settings."""
87
+ return self ._provided_token or getattr (settings , f"{ self .server_name } _TOKEN" .upper (), None )
88
+
89
+ def _get_source_name (self ) -> str :
90
+ """by default, the source is /SDE/. However for the various dev servers, the source is tends to be /scrapers/"""
91
+ return "scrapers" if self .server_name in self .dev_servers else "SDE"
65
92
66
93
def process_response (self , url : str , payload : dict [str , Any ]) -> Any :
67
94
response = requests .post (url , headers = {}, json = payload , verify = False )
68
95
69
- if response .status_code == requests .status_codes . codes .ok :
70
- meaningful_response = response .json ()
96
+ if response .status_code == requests .codes .ok :
97
+ return response .json ()
71
98
else :
72
- raise Exception ( response .text )
99
+ response .raise_for_status ( )
73
100
74
- return meaningful_response
101
+ def query (self , page : int , collection_config_folder : str = None , source : str = None ) -> Any :
102
+ url = f"{ self .base_url } /api/v1/search.query"
103
+ if self .server_name in self .dev_servers :
104
+ user = self ._get_user ()
105
+ password = self ._get_password ()
75
106
76
- def query (self , page : int , collection_config_folder : str = "" ) -> Any :
77
- if self .server_name == "lis_server" :
78
- url = f"{ self .base_url } /api/v1/search.query?Password={ self .xli_password } &User={ self .xli_user } "
79
- elif self .server_name == "lrm_dev_server" :
80
- url = f"{ self .base_url } /api/v1/search.query?Password={ self .lrm_password } &User={ self .lrm_user } "
81
- elif self .server_name == "lrm_qa_server" :
82
- url = f"{ self .base_url } /api/v1/search.query?Password={ self .lrm_qa_password } &User={ self .lrm_qa_user } "
107
+ if not user or not password :
108
+ raise ValueError (
109
+ "User and password are required for the query endpoint on the following servers: {self.dev_servers}"
110
+ )
111
+ authentication = f"?Password={ password } &User={ user } "
112
+ url = f"{ url } { authentication } "
83
113
else :
84
114
url = f"{ self .base_url } /api/v1/search.query"
115
+
85
116
payload = {
86
117
"app" : self .app_name ,
87
118
"query" : {
@@ -94,11 +125,73 @@ def query(self, page: int, collection_config_folder: str = "") -> Any:
94
125
}
95
126
96
127
if collection_config_folder :
97
- if self .server_name in ["lis_server" , "lrm_dev_server" , "lrm_qa_server" ]:
98
- payload ["query" ]["advanced" ]["collection" ] = f"/scrapers/{ collection_config_folder } /"
99
- else :
100
- payload ["query" ]["advanced" ]["collection" ] = f"/SDE/{ collection_config_folder } /"
128
+ source = source if source else self ._get_source_name ()
129
+ payload ["query" ]["advanced" ]["collection" ] = f"/{ source } /{ collection_config_folder } /"
130
+
131
+ return self .process_response (url , payload )
132
+
133
+ def sql_query (self , sql : str ) -> Any :
134
+ """Executes an SQL query on the configured server using token-based authentication."""
135
+ token = self ._get_token ()
136
+ if not token :
137
+ raise ValueError ("A token is required to use the SQL endpoint" )
138
+ url = f"{ self .base_url } /api/v1/engine.sql"
139
+ headers = {"Content-Type" : "application/json" , "Authorization" : f"Bearer { token } " }
140
+ payload = json .dumps (
141
+ {
142
+ "method" : "engine.sql" ,
143
+ "sql" : sql ,
144
+ "pretty" : True ,
145
+ "log" : False ,
146
+ "output" : "json" ,
147
+ "resolveIndexList" : "false" ,
148
+ "engines" : "default" ,
149
+ }
150
+ )
151
+
152
+ try :
153
+ response = requests .post (url , headers = headers , data = payload , timeout = 10 )
154
+ response .raise_for_status ()
155
+ return response .json ()
156
+ except requests .exceptions .RequestException as e :
157
+ raise RuntimeError (f"Api request to SQL endpoint failed: { str (e )} " )
158
+
159
+ def get_full_texts (self , collection_config_folder : str , source : str = None ) -> Any :
160
+ """
161
+ Retrieves the full texts, URLs, and titles for a specified collection.
162
+
163
+ Returns:
164
+ dict: A JSON response containing the results of the SQL query,
165
+ where each item has 'url', 'text', and 'title'.
166
+
167
+ Example:
168
+ Calling get_full_texts("example_collection") might return:
169
+ [
170
+ {
171
+ 'url': 'http://example.com/article1',
172
+ 'text': 'Here is the full text of the first article...',
173
+ 'title': 'Article One Title'
174
+ },
175
+ {
176
+ 'url': 'http://example.com/article2',
177
+ 'text': 'Here is the full text of the second article...',
178
+ 'title': 'Article Two Title'
179
+ }
180
+ ]
181
+ """
182
+
183
+ if not source :
184
+ source = self ._get_source_name ()
185
+
186
+ if (index := self .config .get ("index" )) is None :
187
+ raise ValueError ("Index not defined for this server" )
101
188
102
- response = self .process_response (url , payload )
189
+ sql = f"SELECT url1, text, title FROM { index } WHERE collection = '/{ source } /{ collection_config_folder } /'"
190
+ full_text_response = self .sql_query (sql )
191
+ return self ._process_full_text_response (full_text_response )
103
192
104
- return response
193
+ @staticmethod
194
+ def _process_full_text_response (full_text_response : str ):
195
+ return [
196
+ {"url" : url , "full_text" : full_text , "title" : title } for url , full_text , title in full_text_response ["Rows" ]
197
+ ]
0 commit comments