Skip to content

Commit 1559133

Browse files
committed
Login updates for 2FA
- Login updates for 2FA, it now opens a browser - Adds options for profile ID as well so you dont have to login every time - Fixed temp file uploads, this gives them a unique file name. - Updated metadata guidelines to explain overture maps
1 parent 5dcd757 commit 1559133

File tree

8 files changed

+340
-110
lines changed

8 files changed

+340
-110
lines changed

.env.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ AGOL_PORTAL_URL=https://your-org.maps.arcgis.com
99
AGOL_USERNAME=your_username
1010
AGOL_PASSWORD=your_secure_password_here
1111

12+
# ArcGIS Online 2FA / OAuth (Browser Login)
13+
# Set these instead of username/password when 2FA is required.
14+
# AGOL_USE_OAUTH=true
15+
# AGOL_CLIENT_ID=your_client_id_here
16+
# AGOL_PROFILE=your_profile_name # Optional: reuse saved auth across runs
17+
1218
# ArcGIS Online Advanced Settings (Optional)
1319
# AGOL_TOKEN_EXPIRATION=9999 # Extended timeout for large datasets
1420
# AGOL_LARGE_DATASET_THRESHOLD=10000 # Feature count threshold

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ The pipeline follows a modular Source -> Transform -> Publish/Export process:
3030

3131
## Requirements
3232
- Python 3.11+ (compatible with ArcGIS Python API)
33-
- ArcGIS Online credentials (optional - only required for `arcgis-upload` and `overture-dump` command)
33+
- ArcGIS Online credentials or OAuth client ID (required for `arcgis-upload` and `overture-dump` command)
3434

3535
The pipeline can be used for data export without ArcGIS Online credentials. Environment configuration is managed through `.env` files for secure credential storage.
3636

@@ -55,6 +55,15 @@ On macOS/Linux:
5555
- Use the `.env` example to define your AGOL credentials
5656
- No need to create country-specific config files, you can use the global config with a country argument
5757

58+
**2FA / Browser Login (Recommended)**
59+
- Set `AGOL_USE_OAUTH=true` and `AGOL_CLIENT_ID=...` (no username/password required).
60+
- On first run, a browser window opens to complete sign-in and 2FA.
61+
- You can register an ArcGIS Online app to get a Client ID (Application type: "Other").
62+
- Optional: set `AGOL_PROFILE=your_profile` to cache the session and avoid repeated prompts in batch runs.
63+
64+
**Username/Password (Non‑2FA or Service Accounts)**
65+
- Set `AGOL_USERNAME` and `AGOL_PASSWORD` as shown in `.env.example`.
66+
5867
### 4. Run commands
5968
The Python CLI has three main commands: uploading to AGOL `arcgis-upload` , downloading as geojson `export`, or download dump for local use as needed `overture-dump`.
6069

@@ -192,4 +201,4 @@ o2agol overture-dump buildings --country afg --force-download
192201
- Chris Holme's excellent tutorial here: https://github.com/cholmes/duckdb-geoparquet-tutorials/tree/main
193202
- Georock's post on spatial clipping: https://geo.rocks/post/duckdb_geospatial/
194203
- Bounding Box extracts from Natural Earth: https://github.com/sandstrom/country-bounding-boxes/tree/master
195-
- This ArcGIS Pro extension that helped inspire this workflow: https://github.com/COF-RyLopez/ArcGISPro-GeoParquet-Addin
204+
- This ArcGIS Pro extension that helped inspire this workflow: https://github.com/COF-RyLopez/ArcGISPro-GeoParquet-Addin

src/o2agol/cleanup.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,43 @@ def check_temp_size_limits(warning_gb: int = 10, limit_gb: int = 20) -> bool:
133133
return True
134134

135135

136+
def _remove_path_with_retries(path: Path, retries: int = 3, delay_s: float = 1.0) -> bool:
137+
"""Attempt to remove a file/directory with retries (handles Windows file locks)."""
138+
for attempt in range(1, retries + 1):
139+
try:
140+
if path.is_dir():
141+
shutil.rmtree(path)
142+
else:
143+
path.unlink()
144+
return True
145+
except OSError as e:
146+
if attempt == retries:
147+
logging.warning(f"Could not remove {path}: {e}")
148+
return False
149+
time.sleep(delay_s)
150+
return False
151+
152+
136153
def cleanup_current_pid() -> None:
137154
"""Clean up temp files for current process."""
138155
pid_dir = get_project_temp_dir() / f"pid_{os.getpid()}"
139-
if pid_dir.exists():
156+
if not pid_dir.exists():
157+
return
158+
159+
# Try to remove files first so a single locked file doesn't block all cleanup
160+
for item in pid_dir.rglob("*"):
161+
if item.is_file():
162+
_remove_path_with_retries(item, retries=3, delay_s=1.0)
163+
164+
# Remove any empty directories, then the pid dir itself
165+
for subdir in sorted([p for p in pid_dir.rglob("*") if p.is_dir()], reverse=True):
140166
try:
141-
shutil.rmtree(pid_dir)
142-
logging.debug(f"Cleaned up PID temp directory: {pid_dir}")
143-
except OSError as e:
144-
logging.warning(f"Could not clean PID temp directory {pid_dir}: {e}")
167+
subdir.rmdir()
168+
except OSError:
169+
pass
170+
171+
if not _remove_path_with_retries(pid_dir, retries=3, delay_s=1.0):
172+
logging.warning(f"Could not clean PID temp directory {pid_dir}: file(s) still in use")
145173

146174

147175
def cleanup_orphaned_pids() -> int:
@@ -257,4 +285,4 @@ def full_cleanup_check(
257285
# Then clean up stale files
258286
cleanup_stale_files(retention_hours)
259287

260-
return check_temp_size_limits(warning_gb, limit_gb)
288+
return check_temp_size_limits(warning_gb, limit_gb)

src/o2agol/cli.py

Lines changed: 107 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from dotenv import load_dotenv
1111

1212
import geopandas as gpd
13+
import pandas as pd
1314

1415
# Load environment variables BEFORE importing local modules that use them
1516
load_dotenv()
@@ -1040,7 +1041,8 @@ def arcgis_upload(
10401041
metadata = format_metadata_from_config(config_dict, query_config, country_config)
10411042

10421043
#hide any polygons
1043-
transformed_data = polygons_to_centroids(transformed_data)
1044+
if isinstance(transformed_data, dict) and query_config.is_multilayer:
1045+
transformed_data = add_sector_layers(transformed_data)
10441046

10451047
if isinstance(transformed_data, dict):
10461048
# Multi-layer service (education, health, markets)
@@ -1245,7 +1247,8 @@ def export_data_command(
12451247

12461248

12471249
# create single point from building polygons
1248-
transformed_data = polygons_to_centroids(transformed_data)
1250+
if isinstance(transformed_data, dict) and query_config.is_multilayer:
1251+
transformed_data = add_sector_layers(transformed_data)
12491252

12501253
############################################
12511254
############################################
@@ -1730,6 +1733,9 @@ def format_duration(seconds: float) -> str:
17301733
if not layer_gdf.empty:
17311734
normalized_gdf = transformer.normalize(layer_gdf)
17321735
transformed_data[layer_name] = transformer.add_metadata(normalized_gdf, country_config)
1736+
1737+
if isinstance(transformed_data, dict) and query_config.is_multilayer:
1738+
transformed_data = add_sector_layers(transformed_data)
17331739

17341740
if not transformed_data:
17351741
pipeline_logger.info("No valid data after transformation")
@@ -2160,68 +2166,134 @@ def version():
21602166
app()
21612167

21622168
# This function will check for layers with polygon geometries and convert them to centroids
2163-
def polygons_to_centroids(transformed_data: dict[str, gpd.GeoDataFrame]) -> dict[str, gpd.GeoDataFrame]:
2169+
def polygons_to_centroids(
2170+
transformed_data: gpd.GeoDataFrame | dict[str, gpd.GeoDataFrame]
2171+
) -> gpd.GeoDataFrame | dict[str, gpd.GeoDataFrame]:
21642172
base_logger = logging.getLogger(__name__)
21652173
pipeline_logger = PipelineLogger(base_logger)
21662174
# create single point from building polygons
21672175
#print("\n############################################\n")
21682176

2169-
out: dict[str, gpd.GeoDataFrame] = {k: v.copy() for k, v in transformed_data.items()}
2177+
def _centroid_layer(name: str, layer: gpd.GeoDataFrame) -> gpd.GeoDataFrame | None:
2178+
if not hasattr(layer, "geometry") or not hasattr(layer.geometry, "geom_type"):
2179+
pipeline_logger.info(f"[{name}] No GeoSeries geometry found. Skipping centroid conversion.")
2180+
return None
21702181

2171-
for name,d in transformed_data.items():
2172-
2173-
# What geometry types do we have?
2174-
gtypes = set(d.geom_type.dropna().unique()) if not d.empty else set()
2175-
crs_str = d.crs.to_string() if d.crs is not None else "None"
2182+
gtypes = set(layer.geometry.geom_type.dropna().unique()) if not layer.empty else set()
2183+
crs_str = layer.crs.to_string() if layer.crs is not None else "None"
21762184
pipeline_logger.info(f"[{name}] geom_types={gtypes} | crs={crs_str}")
2177-
#print(f"[{name}] geom_types={gtypes} | crs={crs_str}")
21782185

21792186
if not any(t in {"Polygon", "MultiPolygon"} for t in gtypes):
21802187
pipeline_logger.info(f"[{name}] No (Multi)Polygon geometries to centroid. Skipping.")
2181-
#print(f"[{name}] No (Multi)Polygon geometries to centroid. Skipping.")
2182-
continue
2188+
return None
21832189

2184-
if d.crs is None:
2190+
if layer.crs is None:
21852191
pipeline_logger.info(f"[{name}] CRS is None → set it first (e.g., EPSG:4326).")
2186-
#print(f"[{name}] CRS is None → set it first (e.g., EPSG:4326).")
2187-
continue
2188-
2192+
return None
2193+
21892194
# Compute centroids in a projected CRS, then back to WGS84
2190-
if d.crs.is_geographic:
2195+
if layer.crs.is_geographic:
21912196
try:
2192-
proj_crs = d.estimate_utm_crs()
2197+
proj_crs = layer.estimate_utm_crs()
21932198
pipeline_logger.info(f"[{name}] best CRS is: {proj_crs.to_string()}")
2194-
#print(f"[{name}] best CRS is: {proj_crs.to_string()}")
21952199
centroids_wgs84 = (
2196-
d.to_crs(proj_crs).geometry.centroid
2200+
layer.to_crs(proj_crs).geometry.centroid
21972201
.set_crs(proj_crs) # ensure CRS on series
21982202
.to_crs("EPSG:4326")
21992203
)
22002204
except Exception as e:
22012205
pipeline_logger.info(f"[{name}] estimate_utm_crs() failed: {e} → fallback (less accurate).")
2202-
#print(f"[{name}] estimate_utm_crs() failed: {e} → fallback (less accurate).")
2203-
centroids_wgs84 = gpd.GeoSeries(d.geometry.centroid, crs=d.crs).to_crs("EPSG:4326")
2206+
centroids_wgs84 = gpd.GeoSeries(layer.geometry.centroid, crs=layer.crs).to_crs("EPSG:4326")
22042207
else:
2205-
centroids_wgs84 = gpd.GeoSeries(d.geometry.centroid, crs=d.crs).to_crs("EPSG:4326")
2208+
centroids_wgs84 = gpd.GeoSeries(layer.geometry.centroid, crs=layer.crs).to_crs("EPSG:4326")
22062209

2207-
22082210
# Build a points layer without destroying the polygon layer
2209-
points_gdf = d.copy()
2210-
# (Optional) keep original polygon geometry as an attribute column for reference
2211-
# points_gdf["polygon_geom"] = d.geometry
2212-
2213-
2211+
points_gdf = layer.copy()
22142212
points_gdf["centroid"] = centroids_wgs84
2215-
points_gdf.set_geometry("centroid", inplace=True)
2213+
points_gdf.set_geometry("centroid", inplace=True)
22162214
points_gdf.drop(columns="geometry", inplace=True)
2215+
# Normalize geometry column name for downstream publishing
2216+
try:
2217+
points_gdf = points_gdf.rename_geometry("geometry")
2218+
except Exception:
2219+
points_gdf = points_gdf.set_geometry(points_gdf.geometry.name)
2220+
return points_gdf
2221+
2222+
if isinstance(transformed_data, gpd.GeoDataFrame):
2223+
centroids = _centroid_layer("features", transformed_data)
2224+
return centroids if centroids is not None else transformed_data
2225+
2226+
out: dict[str, gpd.GeoDataFrame] = {k: v.copy() for k, v in transformed_data.items()}
2227+
2228+
for name, layer in transformed_data.items():
2229+
centroids = _centroid_layer(name, layer)
2230+
if centroids is None:
2231+
continue
22172232

2218-
# Insert alongside the original polygon layer
2219-
# print(f"{name}_centroids")
22202233
points_key = f"{name}_centroids"
22212234
if points_key in out:
22222235
pipeline_logger.info(f"[{name}] '{points_key}' already exists, overwriting with new centroids layer.")
2223-
out[points_key] = points_gdf
2236+
out[points_key] = centroids
2237+
2238+
return out
2239+
2240+
2241+
def add_sector_layers(
2242+
transformed_data: gpd.GeoDataFrame | dict[str, gpd.GeoDataFrame]
2243+
) -> gpd.GeoDataFrame | dict[str, gpd.GeoDataFrame]:
2244+
"""
2245+
For sectoral queries (education/health/markets), ensure three layers:
2246+
- places (points)
2247+
- buildings (polygons)
2248+
- combined points (places + building centroids)
2249+
"""
2250+
if not isinstance(transformed_data, dict):
2251+
return transformed_data
2252+
2253+
# Identify primary layers
2254+
places_key = None
2255+
buildings_key = None
2256+
for key in transformed_data.keys():
2257+
lowered = key.lower()
2258+
if places_key is None and "place" in lowered:
2259+
places_key = key
2260+
if buildings_key is None and "building" in lowered:
2261+
buildings_key = key
2262+
2263+
if not places_key or not buildings_key:
2264+
return transformed_data
2265+
2266+
# Ensure centroid layer exists
2267+
updated = polygons_to_centroids(transformed_data)
2268+
if not isinstance(updated, dict):
2269+
return transformed_data
2270+
2271+
centroids_key = f"{buildings_key}_centroids"
2272+
if centroids_key not in updated:
2273+
return updated
2274+
2275+
places_gdf = updated[places_key]
2276+
centroids_gdf = updated[centroids_key]
2277+
2278+
# Normalize geometry column name for both
2279+
for gdf in (places_gdf, centroids_gdf):
2280+
if hasattr(gdf, "geometry"):
2281+
try:
2282+
if gdf.geometry.name != "geometry":
2283+
gdf.rename_geometry("geometry", inplace=True)
2284+
except Exception:
2285+
pass
2286+
2287+
combined_key = f"{places_key}_combined"
2288+
if combined_key not in updated:
2289+
combined = gpd.GeoDataFrame(
2290+
pd.concat([places_gdf, centroids_gdf], ignore_index=True, sort=False),
2291+
geometry="geometry",
2292+
crs=places_gdf.crs or centroids_gdf.crs
2293+
)
2294+
updated[combined_key] = combined
22242295

2225-
# Keep the original polygon layer unchanged in 'out[name]'
2296+
# Remove intermediate centroid layer (keep only combined output)
2297+
updated.pop(centroids_key, None)
22262298

2227-
return out
2299+
return updated

0 commit comments

Comments
 (0)