|
2 | 2 |
|
3 | 3 | import logging |
4 | 4 | from typing import Any, Annotated |
5 | | -from fastapi import FastAPI, Depends, HTTPException, status, Security |
| 5 | +from fastapi import FastAPI, Depends, HTTPException, status |
6 | 6 | from fastapi.responses import FileResponse |
7 | 7 |
|
8 | 8 | from pydantic import BaseModel |
9 | | -from fastapi.security import OAuth2PasswordRequestForm |
| 9 | +from fastapi.security import OAuth2PasswordRequestForm, OAuth2PasswordBearer |
10 | 10 | from .utilities import ( |
11 | 11 | get_sftp_bucket_name, |
12 | 12 | StorageControl, |
13 | | - fetch_institution_ids, |
14 | | - split_csv_and_generate_signed_urls, |
15 | 13 | ) |
16 | 14 | from .config import sftp_vars, env_vars, startup_env_vars |
17 | | -from .authn import ( |
18 | | - Token, |
19 | | - get_current_username, |
20 | | - check_creds, |
21 | | - create_access_token, |
22 | | - get_api_key, |
23 | | -) |
24 | | -from datetime import timedelta |
25 | | - |
26 | | - |
27 | | -import os |
| 15 | +from .authn import Token, get_current_username, check_creds, create_access_token |
| 16 | +from datetime import timedelta, datetime, timezone |
28 | 17 |
|
29 | 18 | # Set the logging |
30 | 19 | logging.basicConfig(format="%(asctime)s [%(levelname)s]: %(message)s") |
@@ -52,9 +41,8 @@ class PdpPullRequest(BaseModel): |
52 | 41 | class PdpPullResponse(BaseModel): |
53 | 42 | """Fields for the PDP pull response.""" |
54 | 43 |
|
55 | | - sftp_files: list[dict] |
56 | | - pdp_inst_generated: list[dict] |
57 | | - pdp_inst_not_found: list[str] |
| 44 | + pdp_inst_generated: list[int] |
| 45 | + pdp_inst_not_found: list[int] |
58 | 46 |
|
59 | 47 |
|
60 | 48 | @app.on_event("startup") |
@@ -96,78 +84,30 @@ async def login_for_access_token( |
96 | 84 | return Token(access_token=access_token, token_type="bearer") |
97 | 85 |
|
98 | 86 |
|
99 | | -def sftp_helper(storage_control: StorageControl, sftp_source_filenames: list) -> list: |
100 | | - """ |
101 | | - For each source file in sftp_source_filenames, copies the file from the SFTP |
102 | | - server to GCS. The destination filename is automatically generated by prefixing |
103 | | - the base name of the source file with "processed_". |
104 | | -
|
105 | | - Args: |
106 | | - storage_control (StorageControl): An instance with a method `copy_from_sftp_to_gcs`. |
107 | | - sftp_source_filenames (list): A list of file paths on the SFTP server. |
108 | | - """ |
109 | | - num_files = len(sftp_source_filenames) |
110 | | - logger.info(f"Starting sftp_helper for {num_files} file(s).") |
111 | | - all_blobs = [] |
112 | | - for sftp_source_filename in sftp_source_filenames: |
113 | | - sftp_source_filename = sftp_source_filename["path"] |
114 | | - if ( |
115 | | - sftp_source_filename |
116 | | - == "./receive/AO1600pdp_AO1600_AR_DEIDENTIFIED_STUDYID_20250228030226.csv" |
117 | | - ): |
118 | | - logger.debug(f"Processing source file: {sftp_source_filename}") |
119 | | - |
120 | | - # Extract the base filename. |
121 | | - base_filename = os.path.basename(sftp_source_filename) |
122 | | - dest_filename = f"{base_filename}" |
123 | | - logger.debug(f"Destination filename will be: {dest_filename}") |
124 | | - |
125 | | - try: |
126 | | - storage_control.copy_from_sftp_to_gcs( |
127 | | - sftp_vars["SFTP_HOST"], |
128 | | - 22, |
129 | | - sftp_vars["SFTP_USER"], |
130 | | - sftp_vars["SFTP_PASSWORD"], |
131 | | - sftp_source_filename, |
132 | | - get_sftp_bucket_name(env_vars["ENV"]), |
133 | | - dest_filename, |
134 | | - ) |
135 | | - all_blobs.append(dest_filename) |
136 | | - logger.info( |
137 | | - f"Successfully processed '{sftp_source_filename}' as '{dest_filename}'." |
138 | | - ) |
139 | | - return all_blobs |
140 | | - except Exception as e: |
141 | | - logger.error( |
142 | | - f"Error processing '{sftp_source_filename}': {e}", exc_info=True |
143 | | - ) |
144 | | - return all_blobs |
| 87 | +def sftp_helper( |
| 88 | + storage_control: StorageControl, sftp_source_filename: str, dest_filename: str |
| 89 | +): |
| 90 | + storage_control.copy_from_sftp_to_gcs( |
| 91 | + sftp_vars["SFTP_HOST"], |
| 92 | + sftp_vars["SFTP_PORT"], |
| 93 | + sftp_vars["SFTP_USER"], |
| 94 | + sftp_vars["SFTP_PASSWORD"], |
| 95 | + sftp_source_filename, |
| 96 | + get_sftp_bucket_name(env_vars["ENV"]), |
| 97 | + dest_filename, |
| 98 | + ) |
145 | 99 |
|
146 | 100 |
|
147 | 101 | @app.post("/execute-pdp-pull", response_model=PdpPullResponse) |
148 | | -async def execute_pdp_pull( |
| 102 | +def execute_pdp_pull( |
149 | 103 | req: PdpPullRequest, |
150 | 104 | current_username: Annotated[str, Depends(get_current_username)], |
151 | 105 | storage_control: Annotated[StorageControl, Depends(StorageControl)], |
152 | | - api_key_enduser_tuple: str = Security(get_api_key), |
153 | 106 | ) -> Any: |
154 | 107 | """Performs the PDP pull of the file.""" |
155 | 108 | storage_control.create_bucket_if_not_exists(get_sftp_bucket_name(env_vars["ENV"])) |
156 | | - files = storage_control.list_sftp_files( |
157 | | - sftp_vars["SFTP_HOST"], 22, sftp_vars["SFTP_USER"], sftp_vars["SFTP_PASSWORD"] |
158 | | - ) |
159 | | - all_blobs = sftp_helper(storage_control, files) |
160 | | - signed_urls = split_csv_and_generate_signed_urls( |
161 | | - bucket_name=get_sftp_bucket_name(env_vars["ENV"]), source_blob_name=all_blobs[0] |
162 | | - ) |
163 | | - |
164 | | - valid_pdp_ids, invalid_ids = fetch_institution_ids( |
165 | | - pdp_ids=list(signed_urls.keys()), |
166 | | - backend_api_key=next(key for key in api_key_enduser_tuple if key is not None), |
167 | | - ) |
168 | | - |
| 109 | + sftp_helper(storage_control, "sftp_file.csv", "write_out_file.csv") |
169 | 110 | return { |
170 | | - "sftp_files": files, |
171 | | - "pdp_inst_generated": [valid_pdp_ids], |
172 | | - "pdp_inst_not_found": invalid_ids, |
| 111 | + "pdp_inst_generated": [], |
| 112 | + "pdp_inst_not_found": [], |
173 | 113 | } |
0 commit comments