chekos
diff --git a/‎pypums/__init__.py‎
Lines changed: 10 additions & 4 deletions b/‎pypums/__init__.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎pypums/acs.py‎
Lines changed: 59 additions & 54 deletions b/‎pypums/acs.py‎
Lines changed: 59 additions & 54 deletions
diff --git a/‎pypums/api/geography.py‎
Lines changed: 26 additions & 5 deletions b/‎pypums/api/geography.py‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎pypums/decennial.py‎
Lines changed: 28 additions & 30 deletions b/‎pypums/decennial.py‎
Lines changed: 28 additions & 30 deletions
@@ -1,11 +1,17 @@
 # type: ignore[attr-defined]
 """Download PUMS data files from the US Census Bureau's FTP server."""
 
-from pypums.acs import get_acs as get_acs
-from pypums.api.key import census_api_key as census_api_key
-from pypums.decennial import get_decennial as get_decennial
 from pypums.surveys import ACS as ACS
-from pypums.variables import load_variables as load_variables
 
+from .acs import get_acs as get_acs
+from .api.key import census_api_key as census_api_key
 from .constants import __app_name__ as __app_name__
 from .constants import __version__ as __version__
+from .decennial import get_decennial as get_decennial
+from .moe import moe_product as moe_product
+from .moe import moe_prop as moe_prop
+from .moe import moe_ratio as moe_ratio
+from .moe import moe_sum as moe_sum
+from .moe import significance as significance
+from .pums import get_pums as get_pums
+from .variables import load_variables as load_variables
@@ -12,22 +12,21 @@
     99: 2.576,
 }
 
-# Geography columns that appear in Census API responses.
-_GEO_COLUMNS = frozenset({
-    "state", "county", "tract", "block group", "block",
-    "place", "congressional district",
+# Geography columns in FIPS concatenation order.  The order matters because
+# GEOID is built by joining these columns (e.g. state+county+tract).
+_GEO_COL_ORDER = [
+    "us", "region", "division", "state", "county", "county subdivision",
+    "tract", "block group", "block", "place", "congressional district",
     "state legislative district (upper chamber)",
     "state legislative district (lower chamber)",
     "zip code tabulation area",
-    "school district (unified)",
-    "school district (elementary)",
+    "school district (unified)", "school district (elementary)",
     "school district (secondary)",
     "metropolitan statistical area/micropolitan statistical area",
-    "combined statistical area",
-    "public use microdata area",
+    "combined statistical area", "public use microdata area",
     "american indian area/alaska native area/hawaiian home land",
-    "us", "region", "division", "county subdivision",
-})
+]
+_GEO_COLUMNS = frozenset(_GEO_COL_ORDER)
 
 
 def _call_census_api(url: str, params: dict) -> list[list[str]]:
@@ -42,6 +41,7 @@ def get_acs(
     state: str | None = None,
     county: str | None = None,
     year: int = 2023,
+    survey: str = "acs5",
     output: str = "tidy",
     moe_level: int = 90,
     summary_var: str | None = None,
@@ -64,14 +64,16 @@ def get_acs(
         County FIPS code.
     year
         Data year (default 2023).
+    survey
+        ``"acs5"`` (default) or ``"acs1"``.
     output
         ``"tidy"`` (default) or ``"wide"``.
     moe_level
         Confidence level for MOE: 90, 95, or 99 (default 90).
     summary_var
         Variable ID to include as denominator columns.
     geometry
-        If True, return a GeoDataFrame with shapes (not yet implemented).
+        If True, return a GeoDataFrame with shapes.
     key
         Census API key. Falls back to ``census_api_key()``.
 
@@ -80,8 +82,6 @@ def get_acs(
     pd.DataFrame
         Census data in tidy or wide format.
     """
-    if geometry:
-        raise NotImplementedError("geometry=True is not yet supported.")
     if output not in ("tidy", "wide"):
         raise ValueError(f"output must be 'tidy' or 'wide', got {output!r}")
     if moe_level not in _Z_SCORES:
@@ -109,7 +109,7 @@ def get_acs(
         api_vars.append(f"{summary_var}E")
         api_vars.append(f"{summary_var}M")
 
-    url = f"{CENSUS_API_BASE}/{year}/acs/acs5"
+    url = f"{CENSUS_API_BASE}/{year}/acs/{survey}"
     params: dict[str, str] = {
         "get": f"NAME,{','.join(api_vars)}",
         "for": for_clause,
@@ -124,8 +124,8 @@ def get_acs(
     headers = data[0]
     df = pd.DataFrame(data[1:], columns=headers)
 
-    # Build GEOID from FIPS columns.
-    geo_cols = [c for c in df.columns if c in _GEO_COLUMNS]
+    # Build GEOID from FIPS columns in canonical order.
+    geo_cols = [c for c in _GEO_COL_ORDER if c in df.columns]
     if geo_cols:
         df["GEOID"] = df[geo_cols].apply(lambda row: "".join(row), axis=1)
 
@@ -144,43 +144,48 @@ def get_acs(
 
     if output == "wide":
         keep_cols = ["GEOID", "NAME"] + estimate_cols + moe_cols
-        return df[[c for c in keep_cols if c in df.columns]]
-
-    # Tidy format: melt estimate and MOE columns separately, then merge.
-    id_cols = ["GEOID", "NAME"] if "GEOID" in df.columns else ["NAME"]
-
-    # Exclude summary_var columns from the main melt.
-    summary_est_col = f"{summary_var}E" if summary_var else None
-    summary_moe_col = f"{summary_var}M" if summary_var else None
-    main_est_cols = [c for c in estimate_cols if c != summary_est_col]
-    main_moe_cols = [c for c in moe_cols if c != summary_moe_col]
-
-    est_long = df.melt(
-        id_vars=id_cols,
-        value_vars=main_est_cols,
-        var_name="_est_var",
-        value_name="estimate",
-    )
-    est_long["variable"] = est_long["_est_var"].str[:-1]
-
-    moe_long = df.melt(
-        id_vars=id_cols,
-        value_vars=main_moe_cols,
-        var_name="_moe_var",
-        value_name="moe",
-    )
-    moe_long["variable"] = moe_long["_moe_var"].str[:-1]
-
-    tidy = est_long[id_cols + ["variable", "estimate"]].merge(
-        moe_long[id_cols + ["variable", "moe"]],
-        on=id_cols + ["variable"],
-    )
-
-    # Add summary variable columns if requested.
-    if summary_var is not None and summary_est_col in df.columns:
-        summary_df = df[id_cols + [summary_est_col, summary_moe_col]].rename(
-            columns={summary_est_col: "summary_est", summary_moe_col: "summary_moe"},
+        result = df[[c for c in keep_cols if c in df.columns]]
+    else:
+        # Tidy format: melt estimate and MOE columns separately, then merge.
+        id_cols = ["GEOID", "NAME"] if "GEOID" in df.columns else ["NAME"]
+
+        # Exclude summary_var columns from the main melt.
+        summary_est_col = f"{summary_var}E" if summary_var else None
+        summary_moe_col = f"{summary_var}M" if summary_var else None
+        main_est_cols = [c for c in estimate_cols if c != summary_est_col]
+        main_moe_cols = [c for c in moe_cols if c != summary_moe_col]
+
+        est_long = df.melt(
+            id_vars=id_cols,
+            value_vars=main_est_cols,
+            var_name="_est_var",
+            value_name="estimate",
+        )
+        est_long["variable"] = est_long["_est_var"].str[:-1]
+
+        moe_long = df.melt(
+            id_vars=id_cols,
+            value_vars=main_moe_cols,
+            var_name="_moe_var",
+            value_name="moe",
+        )
+        moe_long["variable"] = moe_long["_moe_var"].str[:-1]
+
+        result = est_long[id_cols + ["variable", "estimate"]].merge(
+            moe_long[id_cols + ["variable", "moe"]],
+            on=id_cols + ["variable"],
         )
-        tidy = tidy.merge(summary_df, on=id_cols)
 
-    return tidy
+        # Add summary variable columns if requested.
+        if summary_var is not None and summary_est_col in df.columns:
+            summary_df = df[id_cols + [summary_est_col, summary_moe_col]].rename(
+                columns={summary_est_col: "summary_est", summary_moe_col: "summary_moe"},
+            )
+            result = result.merge(summary_df, on=id_cols)
+
+    if geometry:
+        from pypums.spatial import attach_geometry
+
+        result = attach_geometry(result, geography, state=state, year=year)
+
+    return result
@@ -1,5 +1,7 @@
 """Census geography hierarchy definitions and query building."""
 
+import us
+
 GEOGRAPHY_HIERARCHY: dict[str, dict] = {
     "us": {"for": "us:1", "requires": []},
     "region": {"for": "region:*", "requires": []},
@@ -55,6 +57,22 @@
 }
 
 
+def _resolve_state_fips(state: str) -> str:
+    """Convert a state name or abbreviation to a 2-digit FIPS code."""
+    # Already a numeric FIPS code — normalize to 2 digits.
+    if state.isdigit():
+        return state.zfill(2)
+
+    result = us.states.lookup(state)
+    if result is None:
+        raise ValueError(
+            f"Could not resolve state: {state!r}. "
+            "Pass a 2-letter abbreviation (e.g. 'CA'), "
+            "full name (e.g. 'California'), or FIPS code (e.g. '06')."
+        )
+    return result.fips
+
+
 def build_geography_query(
     geography: str,
     state: str | None = None,
@@ -67,7 +85,7 @@ def build_geography_query(
     geography
         Geography level name (e.g. ``"state"``, ``"county"``, ``"tract"``).
     state
-        State FIPS code (e.g. ``"06"`` for California).
+        State FIPS code or name/abbreviation (e.g. ``"06"``, ``"CA"``).
     county
         County FIPS code (e.g. ``"037"`` for Los Angeles County).
 
@@ -91,7 +109,10 @@ def build_geography_query(
     spec = GEOGRAPHY_HIERARCHY[geo]
     required = spec["requires"]
 
-    if "state" in required and state is None:
+    # Resolve state to FIPS if provided.
+    state_fips = _resolve_state_fips(state) if state is not None else None
+
+    if "state" in required and state_fips is None:
         raise ValueError(
             f"Geography {geography!r} requires a state FIPS code. "
             "Pass state='XX' (e.g. state='06' for California)."
@@ -104,10 +125,10 @@ def build_geography_query(
 
     for_clause = spec["for"]
 
-    # Build the "in" clause from required parents
+    # Build the "in" clause from required parents.
     in_parts = []
-    if "state" in required and state is not None:
-        in_parts.append(f"state:{state}")
+    if "state" in required and state_fips is not None:
+        in_parts.append(f"state:{state_fips}")
     if "county" in required and county is not None:
         in_parts.append(f"county:{county}")
 
 
@@ -12,12 +12,12 @@
     2000: "dec/sf1",
 }
 
-# Geography columns that appear in Census API responses.
-_GEO_COLUMNS = frozenset({
-    "state", "county", "tract", "block group", "block",
-    "place", "congressional district",
-    "us", "region", "division", "county subdivision",
-})
+# Geography columns in FIPS concatenation order.
+_GEO_COL_ORDER = [
+    "us", "region", "division", "state", "county", "county subdivision",
+    "tract", "block group", "block", "place", "congressional district",
+]
+_GEO_COLUMNS = frozenset(_GEO_COL_ORDER)
 
 
 def _call_census_api(url: str, params: dict) -> list[list[str]]:
@@ -58,7 +58,7 @@ def get_decennial(
     pop_group
         Population group code for DHC-A disaggregated data.
     geometry
-        If True, return a GeoDataFrame with shapes (not yet implemented).
+        If True, return a GeoDataFrame with shapes.
     key
         Census API key. Falls back to ``census_api_key()``.
 
@@ -67,21 +67,14 @@ def get_decennial(
     pd.DataFrame
         Census data in tidy or wide format.
     """
-    if geometry:
-        raise NotImplementedError("geometry=True is not yet supported.")
     if output not in ("tidy", "wide"):
         raise ValueError(f"output must be 'tidy' or 'wide', got {output!r}")
 
     api_key = census_api_key(key) if key else census_api_key()
     for_clause, in_clause = build_geography_query(geography, state=state, county=county)
 
-    # Validate year and select dataset.
-    if year not in _YEAR_DATASETS:
-        raise ValueError(
-            f"Unsupported decennial year: {year}. "
-            f"Supported years: {sorted(_YEAR_DATASETS)}"
-        )
-    dataset = "dec/dhc-a" if pop_group is not None else _YEAR_DATASETS[year]
+    # Select dataset.
+    dataset = "dec/dhc-a" if pop_group is not None else _YEAR_DATASETS.get(year, "dec/dhc")
 
     # Build the variable list.
     if variables is not None:
@@ -110,8 +103,8 @@ def get_decennial(
     headers = data[0]
     df = pd.DataFrame(data[1:], columns=headers)
 
-    # Build GEOID from FIPS columns.
-    geo_cols = [c for c in df.columns if c in _GEO_COLUMNS]
+    # Build GEOID from FIPS columns in canonical order.
+    geo_cols = [c for c in _GEO_COL_ORDER if c in df.columns]
     if geo_cols:
         df["GEOID"] = df[geo_cols].apply(lambda row: "".join(row), axis=1)
 
@@ -124,15 +117,20 @@ def get_decennial(
 
     if output == "wide":
         keep_cols = ["GEOID", "NAME"] + var_cols
-        return df[[c for c in keep_cols if c in df.columns]]
-
-    # Tidy format: melt to one row per geography × variable.
-    id_cols = ["GEOID", "NAME"] if "GEOID" in df.columns else ["NAME"]
-    tidy = df.melt(
-        id_vars=id_cols,
-        value_vars=var_cols,
-        var_name="variable",
-        value_name="value",
-    )
-
-    return tidy
+        result = df[[c for c in keep_cols if c in df.columns]]
+    else:
+        # Tidy format: melt to one row per geography x variable.
+        id_cols = ["GEOID", "NAME"] if "GEOID" in df.columns else ["NAME"]
+        result = df.melt(
+            id_vars=id_cols,
+            value_vars=var_cols,
+            var_name="variable",
+            value_name="value",
+        )
+
+    if geometry:
+        from pypums.spatial import attach_geometry
+
+        result = attach_geometry(result, geography, state=state, year=year)
+
+    return result