Update docs

bennyistanto · bennyistanto · commit edd86a1fc8df · 2026-01-29T11:13:37.000+07:00
diff --git a/docs/get-started/data-model.qmd b/docs/get-started/data-model.qmd
@@ -66,9 +66,9 @@ When you pass an `xarray.Dataset` (rather than a `DataArray`), the code searches
 
 | Variable type | Auto-detected names |
 |--------------|---------------------|
-| Precipitation | `precip`, `prcp`, `pr`, `ppt`, `rainfall` |
+| Precipitation | `precip`, `prcp`, `precipitation`, `pr`, `ppt`, `rainfall` |
 | PET | `pet`, `eto`, `et`, `evap` |
-| Temperature | `temp`, `tas`, `t2m` |
+| Temperature | `temp`, `tas`, `tasmin`, `tasmax`, `t2m`, `tmean`, `tmin`, `tmax` |
 
 You can always override auto-detection with explicit parameters: `precip_var_name=`, `pet_var_name=`, `temp_var_name=`.
 
@@ -88,6 +88,33 @@ The code enforces these minimums per grid cell during calibration. If a cell fai
 | Minimum non-zero values | 10 | Fitting skipped (NaN output) |
 | Maximum zero proportion | 95% | Fitting skipped (NaN output) |
 
+::: {.callout-warning}
+## Arid regions and zero precipitation
+
+**Problem:** In deserts and semi-arid regions, many months have zero precipitation. If >95% of months are zero, fitting fails entirely (NaN output). Even with 80-95% zeros, fitting quality degrades.
+
+**Why this matters:** Pearson Type III distribution requires at least 4 non-zero values for L-moments computation. When this fails, the code falls back to Method of Moments, then to a normal approximation — but results may be unreliable.
+
+**Diagnosis:** Use `diagnose_data()` before running SPI/SPEI to check zero proportions:
+
+```python
+from distributions import diagnose_data
+
+# Check a single location
+ts = precip.isel(lat=50, lon=100).values
+diag = diagnose_data(ts)
+print(f"Zero proportion: {diag.zero_proportion:.1%}")
+print(f"Recommendation: {diag.recommendation}")
+```
+
+**Options for arid regions:**
+
+1. **Accept NaN** — If a region is truly hyper-arid, SPI may not be meaningful there
+2. **Use longer time scales** — SPI-12 or SPI-24 aggregates more precipitation, reducing zero proportion
+3. **Mask desert pixels** — Exclude cells with mean annual precipitation < 50 mm
+4. **Use Gamma distribution** — More robust than Pearson III for high-zero data (but still limited)
+:::
+
 ::: {.callout-tip}
 ## Quick sanity checks (recommended)
 ```python
diff --git a/docs/user-guide/spei.qmd b/docs/user-guide/spei.qmd
@@ -192,8 +192,32 @@ print(f"Mean P-PET: {water_balance.mean().values:.1f} mm/month")
 
 ### Issue 3: PET > Precipitation Always
 
-**Problem:** In deserts, PET always exceeds precipitation    
-**Solution:** SPEI is designed for this — shows persistent dry conditions    
+**Problem:** In deserts, PET always exceeds precipitation
+**Solution:** SPEI is designed for this — shows persistent dry conditions
+
+### Issue 4: NaN in Arid Regions (Distribution Fitting Failure)
+
+**Problem:** Hyper-arid grid cells return NaN even with valid data
+**Cause:** Pearson Type III requires sufficient non-zero values for L-moments computation. When >95% of water balance values cluster near zero or are identical, fitting fails.
+
+**Diagnosis:**
+```python
+from distributions import diagnose_data
+
+# Check the water balance distribution
+water_balance = (precip - pet).isel(lat=50, lon=100).values
+diag = diagnose_data(water_balance)
+print(f"Zero proportion: {diag.zero_proportion:.1%}")
+print(f"Suitable for Pearson III: {diag.is_suitable_pearson3}")
+```
+
+**Solutions:**
+
+- Use **Gamma distribution** instead of Pearson III (more robust for extreme cases)
+- Use longer time scales (SPEI-12, SPEI-24)
+- Mask hyper-arid pixels where SPEI is not meaningful
+
+See [Data Model — Arid Regions](../get-started/data-model.qmd#arid-regions-and-zero-precipitation) for detailed guidance.
 
 ## Visualization
 
diff --git a/docs/user-guide/spi.qmd b/docs/user-guide/spi.qmd
@@ -242,7 +242,7 @@ if precip.dims != ('time', 'lat', 'lon'):
 
 ### Issue 3: Memory Error
 
-**Problem:** Out of memory for large datasets    
+**Problem:** Out of memory for large datasets
 **Solution:** Use Dask-enabled version
 
 ```python
@@ -253,6 +253,29 @@ precip = xr.open_dataset('precip.nc', chunks={'time': 100})['precip']
 spi_12 = spi(precip, scale=12)  # Automatically uses Dask if input is chunked
 ```
 
+### Issue 4: NaN in Arid Regions
+
+**Problem:** Desert/semi-arid grid cells return NaN even with valid data
+**Cause:** Too many zero-precipitation months (>95% zeros triggers fitting failure)
+
+**Diagnosis:**
+```python
+from distributions import diagnose_data
+
+# Check zero proportion at a location
+ts = precip.isel(lat=50, lon=100).values
+diag = diagnose_data(ts)
+print(f"Zero proportion: {diag.zero_proportion:.1%}")
+```
+
+**Solutions:**
+
+- Use longer time scales (SPI-12, SPI-24) to reduce zero proportion
+- Mask hyper-arid pixels (mean annual precip < 50 mm)
+- Accept that SPI may not be meaningful for true deserts
+
+See [Data Model — Arid Regions](../get-started/data-model.qmd#arid-regions-and-zero-precipitation) for detailed guidance.
+
 ## Visualization
 
 ### Quick Map
diff --git a/src/config.py b/src/config.py
@@ -135,9 +135,9 @@ def unit(self) -> str:
 
 # Variable name patterns for auto-detection in NetCDF datasets.
 # Add patterns here if your data uses different variable names.
-PRECIP_VAR_PATTERNS = ['precip', 'prcp', 'pr', 'ppt', 'rainfall']
+PRECIP_VAR_PATTERNS = ['precip', 'prcp', 'precipitation', 'pr', 'ppt', 'rainfall']
 PET_VAR_PATTERNS = ['pet', 'eto', 'et', 'evap']
-TEMP_VAR_PATTERNS = ['temp', 'tas', 't2m']
+TEMP_VAR_PATTERNS = ['temp', 'tas', 'tasmin', 'tasmax', 't2m', 'tmean', 'tmin', 'tmax']
 
 
 # =============================================================================
diff --git a/src/indices.py b/src/indices.py
@@ -582,18 +582,21 @@ def spei(
     precip_var_name: Optional[str] = None,
     pet_var_name: Optional[str] = None,
     temp_var_name: Optional[str] = None,
-    distribution: str = DEFAULT_DISTRIBUTION
+    distribution: str = DEFAULT_DISTRIBUTION,
+    pet_method: str = 'thornthwaite',
+    temp_min: Optional[Union[np.ndarray, xr.DataArray]] = None,
+    temp_max: Optional[Union[np.ndarray, xr.DataArray]] = None
 ) -> Union[xr.DataArray, Tuple[xr.DataArray, Dict[str, np.ndarray]]]:
     """
     Calculate Standardized Precipitation Evapotranspiration Index (SPEI).
 
     SPEI uses the water balance (P - PET) instead of just precipitation.
     PET can be provided directly or calculated from temperature using
-    the Thornthwaite method.
+    Thornthwaite or Hargreaves-Samani method.
 
     :param precip: precipitation data in mm
     :param pet: potential evapotranspiration in mm (optional if temperature provided)
-    :param temperature: temperature in °C for PET calculation (optional if PET provided)
+    :param temperature: mean temperature in C for PET calculation (optional if PET provided)
     :param latitude: latitude for PET calculation (required if using temperature)
     :param scale: accumulation period in time steps
     :param periodicity: 'monthly' or 'daily'
@@ -608,6 +611,11 @@ def spei(
     :param distribution: distribution type ('gamma', 'pearson3', 'log_logistic',
         'gev', 'gen_logistic'). Default: 'gamma'.
         Note: Pearson III or Log-Logistic are recommended for SPEI.
+    :param pet_method: PET calculation method ('thornthwaite' or 'hargreaves').
+        - 'thornthwaite': Uses only mean temperature (default)
+        - 'hargreaves': Uses mean, min, max temperature (better for arid regions)
+    :param temp_min: minimum temperature in C (required for Hargreaves method)
+    :param temp_max: maximum temperature in C (required for Hargreaves method)
     :return: SPEI values as xarray DataArray, or tuple (SPEI, params)
 
     Example:
@@ -617,9 +625,13 @@ def spei(
         >>> # With Pearson III distribution (recommended for SPEI)
         >>> spei_12 = spei(precip_da, pet=pet_da, scale=12, distribution='pearson3')
 
-        >>> # With temperature (auto-compute PET)
+        >>> # With temperature - Thornthwaite method (default)
         >>> spei_12 = spei(precip_da, temperature=temp_da, latitude=lat_da, scale=12)
 
+        >>> # With temperature - Hargreaves method (better for arid regions)
+        >>> spei_12 = spei(precip_da, temperature=temp_mean, latitude=lat_da, scale=12,
+        ...               pet_method='hargreaves', temp_min=tmin, temp_max=tmax)
+
         >>> # Save and reuse parameters
         >>> spei_12, params = spei(precip_da, pet=pet_da, scale=12, return_params=True)
         >>> save_fitting_params(params, 'spei_params.nc', scale=12,
@@ -666,11 +678,18 @@ def spei(
             pet_array = np.asarray(pet)
     elif temperature is not None:
         # Compute PET from temperature
-        _logger.info("Computing PET from temperature using Thornthwaite method")
+        pet_method = pet_method.lower()
+        _logger.info(f"Computing PET from temperature using {pet_method.capitalize()} method")
 
         if latitude is None:
             raise ValueError("latitude required for PET calculation from temperature")
 
+        if pet_method == 'hargreaves' and (temp_min is None or temp_max is None):
+            raise ValueError(
+                "Hargreaves method requires temp_min and temp_max parameters. "
+                "Use pet_method='thornthwaite' if only mean temperature is available."
+            )
+
         # Handle temperature input
         if isinstance(temperature, xr.Dataset):
             if temp_var_name is None:
@@ -693,8 +712,13 @@ def spei(
         if data_start_year is None:
             raise ValueError("data_start_year required for PET calculation")
 
-        # Calculate PET
-        pet_da = calculate_pet(temp_da, latitude, data_start_year)
+        # Calculate PET using specified method
+        pet_da = calculate_pet(
+            temp_da, latitude, data_start_year,
+            method=pet_method,
+            temp_min=temp_min,
+            temp_max=temp_max
+        )
         pet_array = pet_da.values if isinstance(pet_da, xr.DataArray) else pet_da
     else:
         raise ValueError("Either 'pet' or 'temperature' (with 'latitude') must be provided")
@@ -822,14 +846,17 @@ def spei_multi_scale(
     pet_var_name: Optional[str] = None,
     temp_var_name: Optional[str] = None,
     distribution: str = DEFAULT_DISTRIBUTION,
-    global_attrs: Optional[Dict] = None
+    global_attrs: Optional[Dict] = None,
+    pet_method: str = 'thornthwaite',
+    temp_min: Optional[Union[np.ndarray, xr.DataArray]] = None,
+    temp_max: Optional[Union[np.ndarray, xr.DataArray]] = None
 ) -> Union[xr.Dataset, Tuple[xr.Dataset, Dict[int, Dict[str, np.ndarray]]]]:
     """
     Calculate SPEI for multiple time scales.
 
     :param precip: precipitation data
     :param pet: potential evapotranspiration (optional if temperature provided)
-    :param temperature: temperature for PET calculation
+    :param temperature: mean temperature for PET calculation
     :param latitude: latitude for PET calculation
     :param scales: list of accumulation scales (e.g., [1, 3, 6, 12])
     :param periodicity: 'monthly' or 'daily'
@@ -844,6 +871,9 @@ def spei_multi_scale(
         'gev', 'gen_logistic'). Default: 'gamma'
     :param global_attrs: optional dict of global attributes to override defaults
         (e.g., {'institution': 'My Org', 'source': 'My Project'})
+    :param pet_method: PET calculation method ('thornthwaite' or 'hargreaves')
+    :param temp_min: minimum temperature (required for Hargreaves)
+    :param temp_max: maximum temperature (required for Hargreaves)
     :return: Dataset with SPEI for all scales
 
     Example:
@@ -852,6 +882,10 @@ def spei_multi_scale(
         >>> # With Pearson III (recommended for SPEI)
         >>> spei_ds = spei_multi_scale(precip_da, pet=pet_da, scales=[3, 12],
         ...                            distribution='pearson3')
+        >>> # With Hargreaves PET
+        >>> spei_ds = spei_multi_scale(precip_da, temperature=tmean, latitude=lat,
+        ...                            scales=[3, 12], pet_method='hargreaves',
+        ...                            temp_min=tmin, temp_max=tmax)
     """
     dist = distribution.lower()
     _logger.info(f"Computing SPEI for scales: {scales} (distribution={dist})")
@@ -879,7 +913,10 @@ def spei_multi_scale(
             precip_var_name=precip_var_name,
             pet_var_name=pet_var_name,
             temp_var_name=temp_var_name,
-            distribution=dist
+            distribution=dist,
+            pet_method=pet_method,
+            temp_min=temp_min,
+            temp_max=temp_max
         )
 
         var_name_out = get_variable_name('spei', s, periodicity, distribution=dist)
diff --git a/src/utils.py b/src/utils.py