ActivitySim
diff --git a/‎.github/workflows/core_tests.yml‎
Lines changed: 78 additions & 1 deletion b/‎.github/workflows/core_tests.yml‎
Lines changed: 78 additions & 1 deletion
diff --git a/‎activitysim/abm/models/cdap.py‎
Lines changed: 2 additions & 2 deletions b/‎activitysim/abm/models/cdap.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎activitysim/abm/models/disaggregate_accessibility.py‎
Lines changed: 6 additions & 5 deletions b/‎activitysim/abm/models/disaggregate_accessibility.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎activitysim/abm/models/joint_tour_frequency.py‎
Lines changed: 4 additions & 1 deletion b/‎activitysim/abm/models/joint_tour_frequency.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎activitysim/abm/models/location_choice.py‎
Lines changed: 2 additions & 35 deletions b/‎activitysim/abm/models/location_choice.py‎
Lines changed: 2 additions & 35 deletions
diff --git a/‎activitysim/abm/models/non_mandatory_tour_frequency.py‎
Lines changed: 14 additions & 4 deletions b/‎activitysim/abm/models/non_mandatory_tour_frequency.py‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎activitysim/abm/models/school_escorting.py‎
Lines changed: 4 additions & 1 deletion b/‎activitysim/abm/models/school_escorting.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎activitysim/abm/models/stop_frequency.py‎
Lines changed: 14 additions & 4 deletions b/‎activitysim/abm/models/stop_frequency.py‎
Lines changed: 14 additions & 4 deletions
@@ -321,6 +321,83 @@ jobs:
         run: |
           uv run pytest activitysim/estimation/test/test_larch_estimation.py --durations=0
 
+  estimation_notebooks:
+    needs: foundation
+    env:
+      python-version: "3.10"
+      label: win-64
+    defaults:
+      run:
+        shell: pwsh
+    name: Estimation Notebooks Test
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.12"
+          enable-cache: true
+          cache-dependency-glob: "uv.lock"
+
+      - name: setup graphviz
+        uses: ts-graphviz/setup-graphviz@v2
+
+      - name: Install activitysim
+        run: |
+          uv sync --locked --group github-action
+
+      - name: Create Estimation Data
+        run: >
+          uv run --group github-action python activitysim/examples/example_estimation/notebooks/est_mode_setup.py 
+          --household_sample_size 5000
+
+      - name: Test Estimation Notebooks
+        run: >
+          uv run --group github-action pytest activitysim/examples/example_estimation/notebooks 
+          --nbmake-timeout=3000 
+          --ignore=activitysim/examples/example_estimation/notebooks/01_estimation_mode.ipynb 
+          --ignore-glob=activitysim/examples/example_estimation/notebooks/test-estimation-data/**
+
+  estimation_edb_creation:
+    needs: foundation
+    env:
+      python-version: "3.10"
+      label: win-64
+    defaults:
+      run:
+        shell: pwsh
+    name: estimation_edb_creation_test
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.12"
+          enable-cache: true
+          cache-dependency-glob: "uv.lock"
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
+
+      - name: Install activitysim
+        run: |
+          uv sync --locked --only-group github-action
+
+      - name: Test Estimation EDB Creation
+        run: |
+          uv run pytest activitysim/estimation/test/test_edb_creation/test_edb_formation.py --durations=0
+
   expression-profiling:
     needs: foundation
     env:
@@ -397,4 +474,4 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           # Token is created automatically by Github Actions, no other config needed
           publish_dir: ./docs/_build/html
-          destination_dir: develop
+          destination_dir: develop
@@ -195,7 +195,7 @@ def cdap_simulate(
         estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_table(
             cdap_interaction_coefficients,
-            "interaction_coefficients",
+            "cdap_interaction_coefficients",
             index=False,
             append=False,
         )
@@ -204,7 +204,7 @@ def cdap_simulate(
             spec = cdap.get_cached_spec(state, hhsize)
             estimator.write_table(spec, "spec_%s" % hhsize, append=False)
             if add_joint_tour_utility:
-                joint_spec = cdap.get_cached_joint_spec(hhsize)
+                joint_spec = cdap.get_cached_joint_spec(state, hhsize)
                 estimator.write_table(
                     joint_spec, "joint_spec_%s" % hhsize, append=False
                 )
 
@@ -764,11 +764,12 @@ def get_disaggregate_logsums(
             state.filesystem, model_name + ".yaml"
         )
         model_settings.SAMPLE_SIZE = disagg_model_settings.DESTINATION_SAMPLE_SIZE
-        estimator = estimation.manager.begin_estimation(state, trace_label)
-        if estimator:
-            location_choice.write_estimation_specs(
-                state, estimator, model_settings, model_name + ".yaml"
-            )
+        # estimator = estimation.manager.begin_estimation(state, trace_label)
+        # if estimator:
+        #     location_choice.write_estimation_specs(
+        #         state, estimator, model_settings, model_name + ".yaml"
+        #     )
+        estimator = None
 
         # Append table references in settings with "proto_"
         # This avoids having to make duplicate copies of config files for disagg accessibilities
 
@@ -192,16 +192,19 @@ def joint_tour_frequency(
         print(f"len(joint_tours) {len(joint_tours)}")
 
         different = False
+        # need to check households as well because the full survey sample may not be used
+        # (e.g. if we set household_sample_size in settings.yaml)
         survey_tours_not_in_tours = survey_tours[
             ~survey_tours.index.isin(joint_tours.index)
+            & survey_tours.household_id.isin(households.index)
         ]
         if len(survey_tours_not_in_tours) > 0:
             print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}")
             different = True
         tours_not_in_survey_tours = joint_tours[
             ~joint_tours.index.isin(survey_tours.index)
         ]
-        if len(survey_tours_not_in_tours) > 0:
+        if len(tours_not_in_survey_tours) > 0:
             print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}")
             different = True
         assert not different
 
@@ -19,7 +19,6 @@
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
 from activitysim.core.util import reindex
 
-
 """
 The school/workplace location model predicts the zones in which various people will
 work or attend school.
@@ -140,7 +139,7 @@ def _location_sample(
 
     sample_size = model_settings.SAMPLE_SIZE
 
-    if estimator:
+    if estimator and model_settings.ESTIMATION_SAMPLE_SIZE >= 0:
         sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
         logger.info(
             f"Estimation mode for {trace_label} using sample size of {sample_size}"
@@ -423,7 +422,7 @@ def location_presample(
 
     # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
     maz_choices = tour_destination.choose_MAZ_for_TAZ(
-        state, taz_sample, MAZ_size_terms, trace_label
+        state, taz_sample, MAZ_size_terms, trace_label, model_settings
     )
 
     assert DEST_MAZ in maz_choices
@@ -512,38 +511,6 @@ def run_location_sample(
             trace_label=trace_label,
         )
 
-    # adding observed choice to alt set when running in estimation mode
-    if estimator:
-        # grabbing survey values
-        survey_persons = estimation.manager.get_survey_table("persons")
-        if "school_location" in trace_label:
-            survey_choices = survey_persons["school_zone_id"].reset_index()
-        elif ("workplace_location" in trace_label) and ("external" not in trace_label):
-            survey_choices = survey_persons["workplace_zone_id"].reset_index()
-        else:
-            return choices
-        survey_choices.columns = ["person_id", "alt_dest"]
-        survey_choices = survey_choices[
-            survey_choices["person_id"].isin(choices.index)
-            & (survey_choices.alt_dest > 0)
-        ]
-        # merging survey destination into table if not available
-        joined_data = survey_choices.merge(
-            choices, on=["person_id", "alt_dest"], how="left", indicator=True
-        )
-        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
-        missing_rows["pick_count"] = 1
-        if len(missing_rows) > 0:
-            new_choices = missing_rows[
-                ["person_id", "alt_dest", "prob", "pick_count"]
-            ].set_index("person_id")
-            choices = choices.append(new_choices, ignore_index=False).sort_index()
-            # making probability the mean of all other sampled destinations by person
-            # FIXME is there a better way to do this? Does this even matter for estimation?
-            choices["prob"] = choices["prob"].fillna(
-                choices.groupby("person_id")["prob"].transform("mean")
-            )
-
     return choices
 
 
 
@@ -289,14 +289,22 @@ def non_mandatory_tour_frequency(
         )
 
         if estimator:
-            estimator.write_spec(model_settings, bundle_directory=True)
+            bundle_directory = True
+            # writing to separte subdirectory for each segment if multiprocessing
+            if state.settings.multiprocess:
+                bundle_directory = False
+            estimator.write_spec(model_settings, bundle_directory=bundle_directory)
             estimator.write_model_settings(
-                model_settings, model_settings_file_name, bundle_directory=True
+                model_settings,
+                model_settings_file_name,
+                bundle_directory=bundle_directory,
             )
             # preserving coefficients file name makes bringing back updated coefficients more straightforward
             estimator.write_coefficients(coefficients_df, segment_settings)
             estimator.write_choosers(chooser_segment)
-            estimator.write_alternatives(alternatives, bundle_directory=True)
+            estimator.write_alternatives(
+                alternatives, bundle_directory=bundle_directory
+            )
 
             # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
             #  shuold we do it here or have interaction_simulate do it?
@@ -435,8 +443,10 @@ def non_mandatory_tour_frequency(
     if estimator:
         # make sure they created the right tours
         survey_tours = estimation.manager.get_survey_table("tours").sort_index()
+        # need the household_id check below incase household_sample_size != 0
         non_mandatory_survey_tours = survey_tours[
-            survey_tours.tour_category == "non_mandatory"
+            (survey_tours.tour_category == "non_mandatory")
+            & survey_tours.household_id.isin(persons.household_id)
         ]
         # need to remove the pure-escort tours from the survey tours table for comparison below
         if state.is_table("school_escort_tours"):
 
@@ -503,7 +503,10 @@ def school_escorting(
                 coefficients_df, file_name=stage.upper() + "_COEFFICIENTS"
             )
             estimator.write_choosers(choosers)
-            estimator.write_alternatives(alts, bundle_directory=True)
+            if state.settings.multiprocess:
+                estimator.write_alternatives(alts, bundle_directory=False)
+            else:
+                estimator.write_alternatives(alts, bundle_directory=True)
 
             # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
             #  shuold we do it here or have interaction_simulate do it?
 
@@ -191,9 +191,15 @@ def stop_frequency(
 
         if estimator:
             estimator.write_spec(segment_settings, bundle_directory=False)
-            estimator.write_model_settings(
-                model_settings, model_settings_file_name, bundle_directory=True
-            )
+            # writing to separte subdirectory for each segment if multiprocessing
+            if state.settings.multiprocess:
+                estimator.write_model_settings(
+                    model_settings, model_settings_file_name, bundle_directory=False
+                )
+            else:
+                estimator.write_model_settings(
+                    model_settings, model_settings_file_name, bundle_directory=True
+                )
             estimator.write_coefficients(coefficients_df, segment_settings)
             estimator.write_choosers(chooser_segment)
 
@@ -265,7 +271,11 @@ def stop_frequency(
 
         survey_trips = estimation.manager.get_survey_table(table_name="trips")
         different = False
-        survey_trips_not_in_trips = survey_trips[~survey_trips.index.isin(trips.index)]
+        # need the check below on household_id incase household_sample_size != 0
+        survey_trips_not_in_trips = survey_trips[
+            ~survey_trips.index.isin(trips.index)
+            & survey_trips.household_id.isin(trips.household_id)
+        ]
         if len(survey_trips_not_in_trips) > 0:
             print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
             different = True