ActivitySim
diff --git a/‎activitysim/core/configuration/filesystem.py‎
Lines changed: 4 additions & 3 deletions b/‎activitysim/core/configuration/filesystem.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎activitysim/core/interaction_sample.py‎
Lines changed: 2 additions & 2 deletions b/‎activitysim/core/interaction_sample.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎activitysim/core/interaction_simulate.py‎
Lines changed: 2 additions & 2 deletions b/‎activitysim/core/interaction_simulate.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎activitysim/core/simulate.py‎
Lines changed: 4 additions & 2 deletions b/‎activitysim/core/simulate.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/_static/favicon.ico‎
15 KB b/‎docs/_static/favicon.ico‎
15 KB
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/dev-guide/install.md‎
Lines changed: 0 additions & 9 deletions b/‎docs/dev-guide/install.md‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎docs/dev-guide/using-sharrow.md‎
Lines changed: 152 additions & 1 deletion b/‎docs/dev-guide/using-sharrow.md‎
Lines changed: 152 additions & 1 deletion
diff --git a/‎docs/users-guide/example_models.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/users-guide/example_models.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/users-guide/index.rst‎
Lines changed: 2 additions & 3 deletions b/‎docs/users-guide/index.rst‎
Lines changed: 2 additions & 3 deletions
@@ -105,8 +105,8 @@ def data_model_dirs_must_exist(cls, data_model_dir, values):
     """
     Name of the output directory for sharrow cache files.
 
-    If not given, a directory named "__sharrowcache__" will be created inside
-    the general cache directory.
+    If not given, the sharrow cache is stored in a run-independent persistent
+    location, according to `platformdirs.user_cache_dir`.  See `persist_sharrow_cache`.
     """
 
     settings_file_name: str = "settings.yaml"
@@ -395,7 +395,8 @@ def get_sharrow_cache_dir(self) -> Path:
         Path
         """
         if self.sharrow_cache_dir is None:
-            out = self.get_cache_dir("__sharrowcache__")
+            self.persist_sharrow_cache()
+            out = self.sharrow_cache_dir
         else:
             out = self.get_working_subdir(self.sharrow_cache_dir)
         if not out.exists():
 
@@ -359,7 +359,7 @@ def _interaction_sample(
                     ),
                     interaction_utilities.values,
                     rtol=1e-2,
-                    atol=0,
+                    atol=1e-6,
                     err_msg="utility not aligned",
                     verbose=True,
                 )
@@ -370,7 +370,7 @@ def _interaction_sample(
                     interaction_utilities_sh.values,
                     interaction_utilities.values,
                     rtol=1e-2,
-                    atol=0,
+                    atol=1e-6,
                 )
             )
             _sh_util_miss1 = interaction_utilities_sh.values[
 
@@ -504,14 +504,14 @@ def to_series(x):
                         sh_util.reshape(utilities.values.shape),
                         utilities.values,
                         rtol=1e-2,
-                        atol=0,
+                        atol=1e-6,
                         err_msg="utility not aligned",
                         verbose=True,
                     )
             except AssertionError as err:
                 print(err)
                 misses = np.where(
-                    ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0)
+                    ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6)
                 )
                 _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)]
                 _u_miss1 = utilities.values[tuple(m[0] for m in misses)]
 
@@ -787,13 +787,15 @@ def eval_utilities(
                 sh_util,
                 utilities.values,
                 rtol=1e-2,
-                atol=0,
+                atol=1e-6,
                 err_msg="utility not aligned",
                 verbose=True,
             )
         except AssertionError as err:
             print(err)
-            misses = np.where(~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0))
+            misses = np.where(
+                ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6)
+            )
             _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)]
             _u_miss1 = utilities.values[tuple(m[0] for m in misses)]
             _sh_util_miss1 - _u_miss1
 
@@ -175,7 +175,7 @@
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-# html_favicon = None
+html_favicon = "favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 
@@ -49,18 +49,9 @@ conda activate ./ASIM-ENV
 git clone https://github.com/ActivitySim/sharrow.git
 python -m pip install -e ./sharrow
 git clone https://github.com/ActivitySim/activitysim.git
-cd activitysim
-git switch develop
-cd ..
 python -m pip install -e ./activitysim
 ```
 
-```{note}
-If the environment create step above fails due to a 404 missing error,
-the main repository may not be up to date with these docs, try this instead:
-https://raw.githubusercontent.com/camsys/activitysim/sharrow-black/conda-environments/activitysim-dev-base.yml
-```
-
 Note the above commands will create an environment with all the
 necessary dependencies, clone both ActivitySim and sharrow from GitHub,
 and `pip install` each of these libraries in editable mode, which
 
@@ -17,6 +17,55 @@ multiprocessing mode after all the compilation for all model components is
 complete.
 ```
 
+### Top-Level Activation Options
+
+Activating sharrow is done at the top level of the model settings file, typically
+`settings.yaml`, by setting the `sharrow` configuration setting to `True`:
+
+```yaml
+sharrow: True
+```
+
+The default operation for sharrow is to attempt to use the sharrow compiler for
+all model specifications, and to revert to the legacy pandas-based evaluation
+if the sharrow compiler encounters a problem.  Alternatively, the `sharrow`
+setting can also be set to `require` or `test`.  The `require` setting
+will cause the model simply fail if sharrow encounters a problem, which is
+useful if the user is interested in ensuring maximum performance.
+The `test` setting will run the model in a mode where both sharrow and the
+legacy pandas-based evaluation are run on each model specification, and the
+results are compared to ensure they are substantially identical.  This is
+useful for debugging and testing, but is not recommended for production runs
+as it is much slower than running only one evaluation path or the other.
+
+Testing is strongly recommended during model development, as it is possible
+to write expressions that are valid in one evaluation mode but not the other.
+This can happen if model data includes `NaN` values
+(see [Performance Considerations](#performance-considerations)), or when
+using arithmatic on logical values
+(see [Arithmetic on Logical Values](#arithmetic-on-logical-values)).
+
+### Caching of Precompiled Functions
+
+The first time you run a model with sharrow enabled, the compiler will run
+and create a cache of compiled functions.  This can take a long time, especially
+for models with many components or complex utility specifications.  However,
+once the cache is created, subsequent runs of the model will be much faster.
+By default, the cached functions are stored in a subdirectory of the
+`platformdirs.user_cache_dir` directory, which is located in a platform-specific
+location:
+
+- Windows: `%USERPROFILE%\AppData\Local\ActivitySim\ActivitySim\Cache\...`
+- MacOS: `~/Library/Caches/ActivitySim/...`
+- Linux: `~/.cache/ActivitySim/...` or `~/$XDG_CACHE_HOME/ActivitySim/...`
+
+The cache directory can be changed from this default location by setting the
+[`sharrow_cache_dir`](activitysim.core.configuration.FileSystem.sharrow_cache_dir)
+setting in the `settings.yaml` file.  Note if you change this setting and provide
+a relative path, it will be interpreted as relative to the model working directory,
+and cached functions may not carry over to other model runs unless copied there
+by the user.
+
 ## Model Design Requirements
 
 Activating the `sharrow` optimizations also requires using the new
@@ -231,6 +280,35 @@ such string operations won't appear in utility specifications at all, or if they
 do appear, they are executed only once and stored in a temporary value for re-use
 as needed.
 
+A good approach to reduce string operations in model spec files is to convert
+string columns to integer or categorical columns in preprocessors.  This can
+be done using the `map` method, which can be used to convert strings to integers,
+for example:
+
+    `df['fuel_type'].map({'Gas': 1, 'Diesel': 2, 'Hybrid': 3}).fillna(-1).astype(int)`
+
+Alternatively, data columns can be converted to categorical columns with well-defined
+structures. Recent versions of sharrow have made significant improvements in
+handling of unordered categorical values, allowing for the use of possibly
+more intuitive categorical columns.  For example, the fuel type column above
+could instead be redefined as a categorical column with the following code:
+
+    `df['fuel_type'].astype(pd.CategoricalDtype(categories=['Gas', 'Diesel', 'Hybrid'], ordered=False))`
+
+It is important that the categories are defined with the same set of values
+in the same order, as any deviation will from this will void the compiler cache
+and cause the model specification to be recompiled.  This means that using
+`x.astype('category')` is not recommended, as the categories will be inferred
+from the data and may not be consistent across multiple calls to the model
+specification evaluator.
+
+```{note}
+Beginning with ActivitySim version 1.3, string-valued
+columns created in preprocessors are converted to categorical columns automatically,
+which means that ignoring encoding for string-valued outputs is equivalent to
+using the `astype('category')` method, and is not recommended.
+```
+
 For models with utility expressions that include a lot of string comparisons,
 (e.g. because they are built for the legacy `pandas.eval` interpreter and have not
 been updated) sharrow can be disabled by setting
@@ -410,7 +488,7 @@ taz_skims:
 ```
 
 If groups of similarly named variables should have the same encoding applied,
-they can be identifed by regular expressions ("regex") instead of explicitly
+they can be identified by regular expressions ("regex") instead of explicitly
 giving each name.  For example:
 
 ```yaml
@@ -485,3 +563,76 @@ taz_skims:
 
 For more details on all the settings available for digital encoding, see
 [DigitalEncoding](activitysim.core.configuration.network.DigitalEncoding).
+
+## Troubleshooting
+
+If you encounter errors when running the model with sharrow enabled, it is
+important to address them before using the model for analysis.  This is
+especially important when errors are found running in "test" mode (activated
+by `sharrow: test` in the top level settings.yaml).  Errors may
+indicate that either sharrow or the legacy evaluator is not correctly processing
+the mathematical expressions in the utility specifications.
+
+### "utility not aligned" Error
+
+One common error that can occur when running the model with sharrow in "test"
+mode is the "utility not aligned" error.  This error occurs when a sharrow
+compiled utility calculation does not sufficiently match the legacy utility
+calculation.  We say "sufficiently" here because the two calculations may have
+slight differences due to numerical precision optimizations applied by sharrow.
+These optimizations can result in minor differences in the final utility values,
+which are typically inconsequential for model results.  However, if the differences
+are too large, the "utility not aligned" error will be raised.  This error does
+not indicate whether the incorrect result is from the sharrow or legacy calculation
+(or both), and it is up to the user to determine how to align the calculations
+so they are reflective of the model developer's intent.
+
+To troubleshoot the "utility not aligned" error, the user can use a Python debugger
+to compare the utility values calculated by sharrow and the legacy evaluator.
+ActivitySim also includes error handler code that will attempt to find the
+problematic utility expression and print it to the console or log file, under the
+heading "possible problematic expressions".  This can be helpful in quickly narrowing
+down which lines of a specification file are causing the error.
+
+Common causes of the "utility not aligned" error include:
+
+- model data includes `NaN` values but the component settings do not
+  disable `fastmath` (see [Performance Considerations](#performance-considerations))
+- incorrect use of arithmatic on logical values (see
+  [Arithmetic on Logical Values](#arithmetic-on-logical-values))
+
+### Insufficient system resources
+
+For large models run on large servers, it is possible to overwhelm the system
+with too many processes and threads, which can result in the following error:
+
+```
+OSError: Insufficient system resources exist to complete the requested service
+```
+
+This error can be resolved by reducing the number of processes and/or threads per
+process.  See [Multiprocessing](../users-guide/performance/multiprocessing.md) and
+[Multithreading](../users-guide/performance/multithreading.md) in the User's Guide
+for more information on how to adjust these settings.
+
+### Permission Error
+
+If running a model using multiprocessing with sharrow enabled, it is necessary
+to have pre-compiled all the utility specifications to prevent the multiple
+processes from competing to write to the same cache location on disk.  Failure
+to do this can result in a permission error, as some processes may be unable to
+write to the cache location.
+
+```
+PermissionError: The process cannot access the file because it is being used by another process
+```
+
+To resolve this error, run the model with sharrow enabled in single-process mode
+to pre-compile all the utility specifications.  If that does not resolve the error,
+it is possible that some compiling is being triggered in multiprocess steps that
+is not being handled in the single process mode.  This is likely due to the presence
+of string or categorical columns created in a preprocessor that are not being
+stored in a stable data format.  To resolve this error, ensure that all expressions
+in pre-processors are written in a manner that results in stable data types (e.g.
+integers, floats, or categorical columns with a fixed set of categories).  See
+see [Performance Considerations](#performance-considerations)) for examples.
@@ -2756,7 +2756,7 @@ Skims are named <PATH TYPE>_<MEASURE>__<TIME PERIOD>:
 Configuration
 _____________
 
-This section has been moved to :ref:`configuration`.
+This section has been moved to :ref:`user_configuration`.
 
 .. _sub-model-spec-files:
 
 
@@ -33,9 +33,10 @@ Contents
 
 .. toctree::
    :maxdepth: 2
-   
+
    modelsetup
    ways_to_run
+   performance/index
    run_primary_example
    model_anatomy
    ../howitworks
@@ -45,5 +46,3 @@ Contents
    .. toctree::
    :maxdepth: 1
    other_examples
-
-
Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,7 @@ def _interaction_sample(`
`359`	`359`	`),`
`360`	`360`	`interaction_utilities.values,`
`361`	`361`	`rtol=1e-2,`
`362`		`- atol=0,`
	`362`	`+ atol=1e-6,`
`363`	`363`	`err_msg="utility not aligned",`
`364`	`364`	`verbose=True,`
`365`	`365`	`)`
`@@ -370,7 +370,7 @@ def _interaction_sample(`
`370`	`370`	`interaction_utilities_sh.values,`
`371`	`371`	`interaction_utilities.values,`
`372`	`372`	`rtol=1e-2,`
`373`		`- atol=0,`
	`373`	`+ atol=1e-6,`
`374`	`374`	`)`
`375`	`375`	`)`
`376`	`376`	`_sh_util_miss1 = interaction_utilities_sh.values[`