diff --git a/.gitignore b/.gitignore
index f5378e980..b3f916c9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,5 @@ coverage.xml
 # pixi environments
 .pixi/*
 !.pixi/config.toml
+SKILL.md
+CLAUDE.md
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
index 25dc32b2b..daa8faf4d 100644
--- a/docs/_quarto.yml
+++ b/docs/_quarto.yml
@@ -37,36 +37,36 @@ website:
         file: pyfixest-sprint.md
       - text: Learn more
         menu:
-        - text: "Regression Tables and Summary Statistics"
-          file: table-layout.qmd
-        - text: "Hypothesis Testing and Marginal Effects"
-          file: marginaleffects.qmd
-        - text: "Difference-in-Differences Estimation"
-          file: difference-in-differences.qmd
-        - file: multiple_testing.ipynb
-          text: "Multiple Testing Corrections"
-        - file: regression_decomposition.ipynb
-          text: "Regression Decomposition"
-        - file: ssc.qmd
-          text: "On Small Sample Corrections"
-        - file: quantile-regression.qmd
-          text: "Quantile Regression"
-        #- text: "Compare fixest & PyFixest"
-        #  file: compare-fixest-pyfixest.qmd
-        - text: "Compare Stata & PyFixest"
-          file: stata-2-pyfixest.qmd
-        - text: "PyFixest on the GPU via CuPy"
-          file: pyfixest-gpu-cupy.ipynb
-        - text: "PyFixest on the GPU via JAX"
-          file: pyfixest_gpu.ipynb
-        - text: "Other Resources around PyFixest"
-          file: resources.qmd
-        - text: "Replicating 'The Effect' with PyFixest"
-          file: replicating-the-effect.qmd
-        - text: "Replicating 'The Mixtape' with PyFixest"
-          file: mixtape.ipynb
-        - text: "Replicating 'Causal Inference for the Brave and True' with PyFixest"
-          file: brave_true.ipynb
+          - text: "Regression Tables and Summary Statistics"
+            file: table-layout.qmd
+          - text: "Hypothesis Testing and Marginal Effects"
+            file: marginaleffects.qmd
+          - text: "Difference-in-Differences Estimation"
+            file: difference-in-differences.qmd
+          - file: multiple_testing.ipynb
+            text: "Multiple Testing Corrections"
+          - file: regression_decomposition.ipynb
+            text: "Regression Decomposition"
+          - file: ssc.qmd
+            text: "On Small Sample Corrections"
+          - file: quantile-regression.qmd
+            text: "Quantile Regression"
+          #- text: "Compare fixest & PyFixest"
+          #  file: compare-fixest-pyfixest.qmd
+          - text: "Compare Stata & PyFixest"
+            file: stata-2-pyfixest.qmd
+          - text: "PyFixest on the GPU via CuPy"
+            file: pyfixest-gpu-cupy.ipynb
+          - text: "PyFixest on the GPU via JAX"
+            file: pyfixest_gpu.ipynb
+          - text: "Other Resources around PyFixest"
+            file: resources.qmd
+          - text: "Replicating 'The Effect' with PyFixest"
+            file: replicating-the-effect.qmd
+          - text: "Replicating 'The Mixtape' with PyFixest"
+            file: mixtape.ipynb
+          - text: "Replicating 'Causal Inference for the Brave and True' with PyFixest"
+            file: brave_true.ipynb
 
 quartodoc:
   package: pyfixest
@@ -116,6 +116,13 @@ quartodoc:
         - report.coefplot
         - report.iplot
         - did.visualize.panelview
+    - title: Formula Parsing & Model Matrix
+      desc: |
+        Internal APIs for formula parsing and model matrix construction
+      contents:
+        - estimation.formula.parse.Formula
+        - estimation.formula.model_matrix.ModelMatrix
+        - estimation.formula.factor_interaction.factor_interaction
     - title: Misc / Utilities
       desc: |
         PyFixest internals and utilities
diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml
index 0c696cfd1..8bc2f3b1d 100644
--- a/docs/_sidebar.yml
+++ b/docs/_sidebar.yml
@@ -32,6 +32,12 @@ website:
       - reference/report.iplot.qmd
       - reference/did.visualize.panelview.qmd
       section: Summarize and Visualize
+    - contents:
+      - reference/estimation.formula.parse.Formula.qmd
+      - reference/estimation.formula.parse.parse.qmd
+      - reference/estimation.formula.model_matrix.ModelMatrix.qmd
+      - reference/estimation.formula.factor_interaction.factor_interaction.qmd
+      section: Formula Parsing & Model Matrix
     - contents:
       - reference/estimation.demean.qmd
       - reference/estimation.detect_singletons.qmd
diff --git a/docs/acknowledgements.md b/docs/acknowledgements.md
index 2529b6462..dfef5edd1 100644
--- a/docs/acknowledgements.md
+++ b/docs/acknowledgements.md
@@ -22,7 +22,7 @@ More concretely, we have borrowed the following API conventions and ideas direct
 | **On the fly variance covariance adjustments** | As in `fixest`, you can adjust the vcov post estimation by calling a `vcov()` method on the results object (`Feols` in pyfixest and `fixest` in `fixest`) |
 | **Predict method for fixed effects** | The `predict()`  and `fixef()` methods in PyFixest mirrors fixest's functionality for obtaining fitted values, fixed effects, and linear predictions |
 
-You can learn more about fixest [on github](https://github.com/lrberge/fixest), via its [documentation](https://lrberge.github.io/fixest/), or by reading the [associated paper](https://arxiv.org/abs/2601.21749). 
+You can learn more about fixest [on github](https://github.com/lrberge/fixest), via its [documentation](https://lrberge.github.io/fixest/), or by reading the [associated paper](https://arxiv.org/abs/2601.21749).
 
 PyFixest is tested against fixest via **rpy2** to ensure numerical equivalence
 (usually `rtol = 1e-08`, `atol = 1e-08`) for coefficients,
diff --git a/docs/quickstart.qmd b/docs/quickstart.qmd
index 4fe552932..90d0bdae8 100644
--- a/docs/quickstart.qmd
+++ b/docs/quickstart.qmd
@@ -507,7 +507,7 @@ multi_fit.etable()
 You can access an individual model by its name - i.e. a formula - via the `all_fitted_models` attribute.
 
 ```{python}
-multi_fit.all_fitted_models["Y~X1"].tidy()
+multi_fit.all_fitted_models["Y ~ X1"].tidy()
 ```
 
 or equivalently via the `fetch_model` method:
diff --git a/pyfixest/did/did2s.py b/pyfixest/did/did2s.py
index af20b83da..4381d6156 100644
--- a/pyfixest/did/did2s.py
+++ b/pyfixest/did/did2s.py
@@ -8,8 +8,8 @@
 from pyfixest.did.did import DID
 from pyfixest.estimation import feols
 from pyfixest.estimation.feols_ import Feols
-from pyfixest.estimation.FormulaParser import FixestFormulaParser
-from pyfixest.estimation.model_matrix_fixest_ import model_matrix_fixest
+from pyfixest.estimation.formula import model_matrix
+from pyfixest.estimation.formula.parse import Formula
 
 
 class DID2S(DID):
@@ -304,37 +304,48 @@ def _did2s_vcov(
 
     # some formula parsing to get the correct formula for the first and second stage model matrix
     first_stage_x, first_stage_fe = first_stage.split("|")
-    first_stage_fe_list = [f"C({i})" for i in first_stage_fe.split("+")]
+    first_stage_fe_list = [f"C({i.strip()})" for i in first_stage_fe.split("+")]
     first_stage_fe_fml = "+".join(first_stage_fe_list)
-    first_stage = f"{first_stage_x}+{first_stage_fe_fml}"
-
-    second_stage = f"{second_stage}"
+    first_stage_fml = f"{first_stage_x}+{first_stage_fe_fml}"
 
     # note for future Alex: intercept needs to be dropped! it is not as fixed
     # effects are converted to dummies, hence has_fixed checks are False
 
-    FML1 = FixestFormulaParser(f"{yname} {first_stage}")
-    FML2 = FixestFormulaParser(f"{yname} {second_stage}")
-    FixestFormulaDict1 = FML1.FixestFormulaDict
-    FixestFormulaDict2 = FML2.FixestFormulaDict
+    # Create Formula objects for the new model_matrix system.
+    # First stage: use `- 1` so that C() dummy encoding keeps all levels,
+    # matching the feols demeaning approach (which implicitly includes all
+    # fixed-effect levels). Removing `- 1` would cause formulaic to drop
+    # reference levels, changing the GMM vcov standard errors.
+    FML1 = Formula(
+        _second_stage=f"{yname} ~ {first_stage_fml.replace('~', '').strip()} - 1",
+    )
+    # Second stage: do NOT use `- 1`. Formulaic needs the intercept present
+    # for full-rank encoding (dropping a reference level for factors like
+    # i(treat)). The intercept column is then removed by drop_intercept=True
+    # below, matching what feols does in _did2s_estimate.
+    FML2 = Formula(
+        _second_stage=f"{yname} ~ {second_stage.replace('~', '').strip()}",
+    )
 
-    mm_dict_first_stage = model_matrix_fixest(
-        FixestFormula=next(iter(FixestFormulaDict1.values()))[0],
+    mm_first_stage = model_matrix.create_model_matrix(
+        formula=FML1,
         data=data,
         weights=None,
         drop_singletons=False,
-        drop_intercept=False,
+        ensure_full_rank=True,
+        drop_intercept=True,
     )
-    X1 = cast(pd.DataFrame, mm_dict_first_stage.get("X"))
+    X1 = mm_first_stage.independent
 
-    mm_second_stage = model_matrix_fixest(
-        FixestFormula=next(iter(FixestFormulaDict2.values()))[0],
+    mm_second_stage = model_matrix.create_model_matrix(
+        formula=FML2,
         data=data,
         weights=None,
         drop_singletons=False,
+        ensure_full_rank=True,
         drop_intercept=True,
-    )  # reference values not dropped, multicollinearity error
-    X2 = cast(pd.DataFrame, mm_second_stage.get("X"))
+    )
+    X2 = mm_second_stage.independent
 
     X1 = csr_matrix(X1.to_numpy() * weights_array[:, None])
     X2 = csr_matrix(X2.to_numpy() * weights_array[:, None])
@@ -359,10 +370,7 @@ def _did2s_vcov(
     X10 = X10.tocsr()
     X2 = X2.tocsr()  # type: ignore
 
-    for (
-        _,
-        g,
-    ) in enumerate(clustid):
+    for _, g in enumerate(clustid):
         idx_g: np.ndarray = cluster_col.values == g
         X10g = X10[idx_g, :]
         X2g = X2[idx_g, :]
diff --git a/pyfixest/did/saturated_twfe.py b/pyfixest/did/saturated_twfe.py
index d6c5153c8..815072643 100644
--- a/pyfixest/did/saturated_twfe.py
+++ b/pyfixest/did/saturated_twfe.py
@@ -203,15 +203,14 @@ def aggregate(
         treated_periods = list(period_set)
 
         df_agg = pd.DataFrame(
-            index=treated_periods,
+            index=pd.Index(treated_periods, name="period"),
             columns=["Estimate", "Std. Error", "t value", "Pr(>|t|)", "2.5%", "97.5%"],
         )
-        df_agg.index.name = "period"
 
         for period in treated_periods:
             R = np.zeros(len(coefs))
             for cohort in cohort_list:
-                cohort_pattern = rf"\[{re.escape(str(period))}\]:.*{re.escape(cohort)}$"
+                cohort_pattern = rf"^(?:.+)::{period}:(?:.+)::{cohort}$"
                 match_idx = [
                     i
                     for i, name in enumerate(coefnames)
@@ -319,28 +318,20 @@ def _saturated_event_study(
     unit_id: str,
     cluster: Optional[str] = None,
 ):
-    cohort_dummies = pd.get_dummies(
-        df.first_treated_period, drop_first=True, prefix="cohort_dummy"
+    ff = f"{outcome} ~ i(rel_time, first_treated_period, ref = -1.0, ref2=0.0) | {unit_id} + {time_id}"
+    m = feols(fml=ff, data=df, vcov={"CRV1": cluster})  # type: ignore
+    res = m.tidy().reset_index()
+    res = res.join(
+        res["Coefficient"].str.extract(
+            r".+::(?P<time>.+):.+::(?P<cohort>.+)", expand=True
+        )
     )
-    df_int = pd.concat([df, cohort_dummies], axis=1)
-
-    ff = f"""
-                {outcome} ~
-                {"+".join([f"i(rel_time, {x}, ref = -1.0)" for x in cohort_dummies.columns.tolist()])}
-                | {unit_id} + {time_id}
-                """
-    m = feols(fml=ff, data=df_int, vcov={"CRV1": cluster})  # type: ignore
-    res = m.tidy()
+    res["time"] = res["time"].astype(float)
     # create a dict with cohort specific effect curves
     res_cohort_eventtime_dict: dict[str, dict[str, pd.DataFrame | np.ndarray]] = {}
-    for cohort in cohort_dummies.columns:
-        res_cohort = res.filter(like=cohort, axis=0)
-        event_time = (
-            res_cohort.index.str.extract(r"\[(?:T\.)?(-?\d+(?:\.\d+)?)\]")
-            .astype(float)
-            .values.flatten()
-        )
-        res_cohort_eventtime_dict[cohort] = {"est": res_cohort, "time": event_time}
+    for cohort, res_cohort in res.groupby("cohort"):
+        event_time = res_cohort["time"].to_numpy()
+        res_cohort_eventtime_dict[str(cohort)] = {"est": res_cohort, "time": event_time}
 
     return m, res_cohort_eventtime_dict
 
@@ -366,11 +357,10 @@ def _test_treatment_heterogeneity(
     """
     mmres = model.tidy().reset_index()
     P = mmres.shape[0]
-    mmres[["time", "cohort"]] = mmres.Coefficient.str.split(":", expand=True)
-    mmres["time"] = mmres.time.str.extract(r"\[(?:T\.)?(-?\d+(?:\.\d+)?)\]").astype(
-        float
+    mmres[["time", "cohort"]] = mmres["Coefficient"].str.extract(
+        r".+::(?P<time>.+):.+::(?P<cohort>.+)", expand=True
     )
-    mmres["cohort"] = mmres.cohort.str.extract(r"(\d+)")
+    mmres["time"] = mmres["time"].astype(float)
     # indices of coefficients that are deviations from common event study coefs
     event_study_coefs = mmres.loc[~(mmres.cohort.isna()) & (mmres.time > 0)].index
     # Method 2 (K x P) - more efficient
diff --git a/pyfixest/errors/__init__.py b/pyfixest/errors/__init__.py
index 65aa4309a..79240aca5 100644
--- a/pyfixest/errors/__init__.py
+++ b/pyfixest/errors/__init__.py
@@ -58,6 +58,10 @@ class EmptyVcovError(Exception):  # noqa: D101
     pass
 
 
+class FormulaSyntaxError(Exception):  # noqa: D101
+    pass
+
+
 __all__ = [
     "CovariateInteractionError",
     "DepvarIsNotNumericError",
@@ -67,6 +71,7 @@ class EmptyVcovError(Exception):  # noqa: D101
     "EndogVarsAsCovarsError",
     "FeatureDeprecationError",
     "FixedEffectInteractionError",
+    "FormulaSyntaxError",
     "InstrumentsAsCovarsError",
     "MatrixNotFullRankError",
     "NanInClusterVarError",
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index e36910e38..cdd41c990 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -12,7 +12,7 @@
 from pyfixest.estimation.feols_compressed_ import FeolsCompressed
 from pyfixest.estimation.fepois_ import Fepois
 from pyfixest.estimation.feprobit_ import Feprobit
-from pyfixest.estimation.FormulaParser import FixestFormulaParser
+from pyfixest.estimation.formula.parse import Formula
 from pyfixest.estimation.literals import (
     DemeanerBackendOptions,
     QuantregMethodOptions,
@@ -214,7 +214,6 @@ def _prepare_estimation(
         self._ssc_dict: dict[str, Union[str, bool]] = {}
         self._drop_singletons = False
         self._is_multiple_estimation = False
-        self._drop_intercept = False
         self._weights = weights
         self._has_weights = False
         if weights is not None:
@@ -225,16 +224,19 @@ def _prepare_estimation(
         self._quantile_tol = quantile_tol
         self._quantile_maxiter = quantile_maxiter
 
-        FML = FixestFormulaParser(fml)
-        FML.set_fixest_multi_flag()
+        formula_dictionary = Formula.parse_to_dict(fml)
         self._is_multiple_estimation = (
-            FML._is_multiple_estimation
+            sum(len(v) for v in formula_dictionary.values()) > 1
             or self._run_split
             or (isinstance(quantile, list) and len(quantile) > 1)
         )
-        self.FixestFormulaDict = FML.FixestFormulaDict
+        self.FixestFormulaDict = formula_dictionary
         self._method = estimation
-        self._is_iv = FML.is_iv
+        self._is_iv = any(
+            formula.first_stage is not None
+            for _, formulas in formula_dictionary.items()
+            for formula in formulas
+        )
         # self._fml_dict = fxst_fml.condensed_fml_dict
         # self._fml_dict_iv = fxst_fml.condensed_fml_dict_iv
         self._ssc_dict = ssc if ssc is not None else {}
@@ -299,9 +301,9 @@ def _estimate_all_models(
             for _, fval in enumerate(_fixef_keys):
                 fixef_key_models = FixestFormulaDict.get(fval)
 
-                # dictionary to cache demeaned data with index: na_index_str,
+                # dictionary to cache demeaned data keyed by na_index,
                 # only relevant for `.feols()`
-                lookup_demeaned_data: dict[str, pd.DataFrame] = {}
+                lookup_demeaned_data: dict[frozenset[int], pd.DataFrame] = {}
 
                 for FixestFormula in fixef_key_models:  # type: ignore
                     # loop over both dictfe and dictfe_iv (if the latter is not None)
@@ -430,7 +432,7 @@ def _estimate_all_models(
                     # if X is empty: no inference (empty X only as shorthand for demeaning)
                     if not FIT._X_is_empty:
                         # inference
-                        vcov_type = _get_vcov_type(vcov, fval)
+                        vcov_type = _get_vcov_type(vcov)
                         FIT.vcov(
                             vcov=vcov_type,
                             vcov_kwargs=vcov_kwargs,
diff --git a/pyfixest/estimation/FormulaParser.py b/pyfixest/estimation/FormulaParser.py
index 653ffa61c..6e0b276d5 100644
--- a/pyfixest/estimation/FormulaParser.py
+++ b/pyfixest/estimation/FormulaParser.py
@@ -1,4 +1,5 @@
 import re
+import warnings
 from itertools import product
 from typing import Optional, Union
 
@@ -41,6 +42,14 @@ def __init__(self, fml: str):
             None
 
         """
+        warnings.warn(
+            "FixestFormulaParser is deprecated and will be removed in a future version. "
+            "Use `pyfixest.estimation.formula.parse.parse()` instead. "
+            "See https://py-econometrics.github.io/pyfixest/reference/estimation.formula.parse.parse.html",
+            FutureWarning,
+            stacklevel=2,
+        )
+
         depvars, covars, fevars, endogvars, instruments = _deparse_fml(fml)
 
         # Parse all individual formula components that allow for
diff --git a/pyfixest/estimation/demean_.py b/pyfixest/estimation/demean_.py
index 2caf43a5f..61e77a452 100644
--- a/pyfixest/estimation/demean_.py
+++ b/pyfixest/estimation/demean_.py
@@ -12,8 +12,8 @@ def demean_model(
     X: pd.DataFrame,
     fe: Optional[pd.DataFrame],
     weights: Optional[np.ndarray],
-    lookup_demeaned_data: dict[str, Any],
-    na_index_str: str,
+    lookup_demeaned_data: dict[frozenset[int], Any],
+    na_index: frozenset[int],
     fixef_tol: float,
     fixef_maxiter: int,
     demean_func: Callable,
@@ -42,9 +42,9 @@ def demean_model(
         A dictionary with keys for each fixed effects combination and potentially
         values of demeaned data frames. The function checks this dictionary to
         see if some of the variables have already been demeaned.
-    na_index_str : str
-        A string with indices of dropped columns. Used for caching of demeaned
-        variables.
+    na_index : frozenset[int]
+        A frozenset of indices of dropped rows. Used as a hashable cache key
+        for demeaned variables.
     fixef_tol: float
         The tolerance for the demeaning algorithm.
     fixef_maxiter: int
@@ -79,9 +79,9 @@ def demean_model(
     if fe is not None:
         fe_array = fe.to_numpy()
         # check if looked dict has data for na_index
-        if lookup_demeaned_data.get(na_index_str) is not None:
+        if lookup_demeaned_data.get(na_index) is not None:
             # get data out of lookup table: list of [algo, data]
-            value = lookup_demeaned_data.get(na_index_str)
+            value = lookup_demeaned_data.get(na_index)
             if value is not None:
                 try:
                     _, YX_demeaned_old = value
@@ -146,7 +146,7 @@ def demean_model(
             YX_demeaned = pd.DataFrame(YX_demeaned)
             YX_demeaned.columns = yx_names
 
-        lookup_demeaned_data[na_index_str] = [None, YX_demeaned]
+        lookup_demeaned_data[na_index] = [None, YX_demeaned]
 
     else:
         # nothing to demean here
diff --git a/pyfixest/estimation/fegaussian_.py b/pyfixest/estimation/fegaussian_.py
index 75ceb4869..0caca9102 100644
--- a/pyfixest/estimation/fegaussian_.py
+++ b/pyfixest/estimation/fegaussian_.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.feglm_ import Feglm
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import DemeanerBackendOptions
 
 
@@ -24,7 +24,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/feglm_.py b/pyfixest/estimation/feglm_.py
index 30f3a6c99..f7d89296e 100644
--- a/pyfixest/estimation/feglm_.py
+++ b/pyfixest/estimation/feglm_.py
@@ -16,7 +16,7 @@
     _drop_multicollinear_variables,
 )
 from pyfixest.estimation.fepois_ import _check_for_separation
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import DemeanerBackendOptions
 from pyfixest.estimation.solvers import solve_ols
 from pyfixest.utils.dev_utils import DataFrameType
@@ -37,7 +37,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py
index 53ee86930..47f4eba7f 100644
--- a/pyfixest/estimation/feiv_.py
+++ b/pyfixest/estimation/feiv_.py
@@ -8,7 +8,7 @@
 
 from pyfixest.estimation.demean_ import demean_model
 from pyfixest.estimation.feols_ import Feols, _drop_multicollinear_variables
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import DemeanerBackendOptions
 from pyfixest.estimation.solvers import solve_ols
 
@@ -146,7 +146,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: Literal[
             "np.linalg.lstsq",
             "np.linalg.solve",
@@ -214,7 +214,7 @@ def demean(self) -> None:
                 self._fe,
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
-                self._na_index_str,
+                self._na_index,
                 self._fixef_tol,
                 self._fixef_maxiter,
                 self._demean_func,
@@ -276,8 +276,10 @@ def first_stage(self) -> None:
         fixest_module = import_module("pyfixest.estimation")
         fit_ = fixest_module.feols
 
-        fml_first_stage = self.FixestFormula.fml_first_stage.replace(" ", "")
-        if self._has_fixef:
+        fml_first_stage = self.FixestFormula.first_stage
+        # Append fixed effects manually since fml_first_stage doesn't include them
+        # (see Formula.fml_first_stage docstring for explanation)
+        if self._has_fixef and fml_first_stage is not None:
             fml_first_stage += f" | {self._fixef}"
 
         # Type hint to reflect that vcov_detail can be either a dict or a str
diff --git a/pyfixest/estimation/felogit_.py b/pyfixest/estimation/felogit_.py
index 03fd07578..a6c5833e3 100644
--- a/pyfixest/estimation/felogit_.py
+++ b/pyfixest/estimation/felogit_.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.feglm_ import Feglm
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import DemeanerBackendOptions
 
 
@@ -24,7 +24,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
index f310f2847..a9a721136 100644
--- a/pyfixest/estimation/feols_.py
+++ b/pyfixest/estimation/feols_.py
@@ -17,7 +17,8 @@
 from pyfixest.estimation.backends import BACKENDS
 from pyfixest.estimation.decomposition import GelbachDecomposition, _decompose_arg_check
 from pyfixest.estimation.demean_ import demean_model
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula import model_matrix as model_matrix_fixest
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import (
     DemeanerBackendOptions,
     PredictionErrorOptions,
@@ -25,7 +26,6 @@
     SolverOptions,
     _validate_literal_argument,
 )
-from pyfixest.estimation.model_matrix_fixest_ import model_matrix_fixest
 from pyfixest.estimation.prediction import (
     _compute_prediction_error,
     _get_fixed_effects_prediction_component,
@@ -52,7 +52,6 @@
 )
 from pyfixest.utils.dev_utils import (
     DataFrameType,
-    _drop_cols,
     _extract_variable_level,
     _narwhals_to_pandas,
     _select_order_coefs,
@@ -254,7 +253,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: DemeanerBackendOptions = "numba",
         store_data: bool = True,
@@ -267,9 +266,9 @@ def __init__(
         self._sample_split_value = sample_split_value
         self._sample_split_var = sample_split_var
         self._model_name = (
-            FixestFormula.fml
+            FixestFormula.formula
             if self._sample_split_var is None
-            else f"{FixestFormula.fml} (Sample: {self._sample_split_var} = {self._sample_split_value})"
+            else f"{FixestFormula.formula} (Sample: {self._sample_split_var} = {self._sample_split_value})"
         )
         self._model_name_plot = self._model_name
         self._method = "feols"
@@ -313,9 +312,9 @@ def __init__(
 
         # attributes that have to be enriched outside of the class -
         # not really optimal code change later
-        self._fml = FixestFormula.fml
+        self._fml = FixestFormula.formula
         self._has_fixef = False
-        self._fixef = FixestFormula._fval
+        self._fixef = FixestFormula.fixed_effects
         # self._coefnames = None
         self._icovars = None
 
@@ -410,8 +409,8 @@ def _not_implemented_did(*args, **kwargs):
 
     def prepare_model_matrix(self):
         "Prepare model matrices for estimation."
-        mm_dict = model_matrix_fixest(
-            FixestFormula=self.FixestFormula,
+        model_matrix = model_matrix_fixest.create_model_matrix(
+            formula=self.FixestFormula,
             data=self._data,
             drop_singletons=self._drop_singletons,
             drop_intercept=self._drop_intercept,
@@ -419,31 +418,41 @@ def prepare_model_matrix(self):
             context=self._context,
         )
 
-        self._Y = mm_dict.get("Y")
-        self._Y_untransformed = mm_dict.get("Y").copy()
-        self._X = mm_dict.get("X")
-        self._fe = mm_dict.get("fe")
-        self._endogvar = mm_dict.get("endogvar")
-        self._Z = mm_dict.get("Z")
-        self._weights_df = mm_dict.get("weights_df")
-        self._na_index = mm_dict.get("na_index")
-        self._na_index_str = mm_dict.get("na_index_str")
-        self._icovars = mm_dict.get("icovars")
-        self._X_is_empty = mm_dict.get("X_is_empty")
-        self._model_spec = mm_dict.get("model_spec")
+        self._Y = model_matrix.dependent
+        self._Y_untransformed = model_matrix.dependent.copy()
+        self._X = model_matrix.independent
+        self._fe = model_matrix.fixed_effects
+        self._endogvar = model_matrix.endogenous
+        self._Z = model_matrix.instruments
+        self._weights_df = model_matrix.weights
+        self._na_index = model_matrix.na_index
+        # TODO: set dynamically based on naming set in pyfixest.estimation.formula.factor_interaction._encode_i
+        is_icovar = (
+            self._X.columns.str.contains(r"^.+::.+$") if not self._X.empty else None
+        )
+        self._icovars = (
+            self._X.columns[is_icovar].tolist()
+            if is_icovar is not None and is_icovar.any()
+            else None
+        )
+        self._X_is_empty = not model_matrix.independent.shape[0] > 0
+        self._model_spec = model_matrix.model_spec
 
         self._coefnames = self._X.columns.tolist()
         self._coefnames_z = self._Z.columns.tolist() if self._Z is not None else None
         self._depvar = self._Y.columns[0]
 
         self._has_fixef = self._fe is not None
-        self._fixef = self.FixestFormula._fval
+        self._fixef = self.FixestFormula.fixed_effects
 
         self._k_fe = self._fe.nunique(axis=0) if self._has_fixef else None
         self._n_fe = len(self._k_fe) if self._has_fixef else 0
 
-        # update data:
-        self._data = _drop_cols(self._data, self._na_index)
+        # update data
+        self._data.drop(
+            self._data.index[~self._data.index.isin(model_matrix.dependent.index)],
+            inplace=True,
+        )
 
         self._weights = self._set_weights()
         self._N, self._N_rows = self._set_nobs()
@@ -495,7 +504,7 @@ def demean(self):
                 self._fe,
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
-                self._na_index_str,
+                self._na_index,
                 self._fixef_tol,
                 self._fixef_maxiter,
                 self._demean_func,
@@ -747,7 +756,7 @@ def vcov(
 
             k_fe_nested = 0
             n_fe_fully_nested = 0
-            if self._has_fixef and self._ssc_dict["k_fixef"] == "nonnested":
+            if self._fixef is not None and self._ssc_dict["k_fixef"] == "nonnested":
                 k_fe_nested_flag, n_fe_fully_nested = self._count_nested_fixef_func(
                     all_fixef_array=np.array(
                         self._fixef.replace("^", "_").split("+"), dtype=str
@@ -1098,7 +1107,7 @@ def add_fixest_multi_context(
         None
         """
         # some bookkeeping
-        self._fml = self.FixestFormula.fml
+        self._fml = self.FixestFormula.formula
         self._depvar = depvar
         self._Y_untransformed = Y
         self._data = pd.DataFrame()
@@ -2547,7 +2556,9 @@ def ritest(
 
         else:
             weights = self._weights.flatten()
-            fval_df = self._data[self._fixef.split("+")] if self._has_fixef else None
+            fval_df = (
+                self._data[self._fixef.split("+")] if self._fixef is not None else None
+            )
             D = self._data[resampvar_].to_numpy()
 
             ri_stats = _get_ritest_stats_fast(
diff --git a/pyfixest/estimation/feols_compressed_.py b/pyfixest/estimation/feols_compressed_.py
index c252ddafd..722d689dd 100644
--- a/pyfixest/estimation/feols_compressed_.py
+++ b/pyfixest/estimation/feols_compressed_.py
@@ -9,7 +9,7 @@
 from tqdm import tqdm
 
 from pyfixest.estimation.feols_ import Feols, PredictionErrorOptions, PredictionType
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import (
     DemeanerBackendOptions,
     SolverOptions,
@@ -91,7 +91,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: DemeanerBackendOptions = "numba",
         store_data: bool = True,
@@ -125,7 +125,7 @@ def __init__(
             sample_split_value,
         )
 
-        if FixestFormula.fml_first_stage is not None:
+        if FixestFormula.first_stage is not None:
             raise NotImplementedError(
                 "Compression is not supported with IV regression."
             )
diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py
index ced290848..12bcc75d2 100644
--- a/pyfixest/estimation/fepois_.py
+++ b/pyfixest/estimation/fepois_.py
@@ -1,3 +1,4 @@
+import re
 import warnings
 from collections.abc import Mapping
 from importlib import import_module
@@ -17,7 +18,7 @@
     PredictionType,
     _drop_multicollinear_variables,
 )
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import (
     DemeanerBackendOptions,
     SolverOptions,
@@ -95,7 +96,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         tol: float,
         maxiter: int,
         solver: SolverOptions = "np.linalg.solve",
@@ -695,7 +696,7 @@ def _check_for_separation_ir(
     separation_na: set[int] = set()
     tmp_suffix = "_separationTmp"
     # build formula
-    name_dependent, rest = fml.split("~")
+    name_dependent, rest = re.split(r"\s*~\s*", fml, maxsplit=1)
     name_dependent_separation = "U"
     if name_dependent_separation in data.columns:
         name_dependent_separation += tmp_suffix
diff --git a/pyfixest/estimation/feprobit_.py b/pyfixest/estimation/feprobit_.py
index 825dbe4d4..41524d5f4 100644
--- a/pyfixest/estimation/feprobit_.py
+++ b/pyfixest/estimation/feprobit_.py
@@ -7,7 +7,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.feglm_ import Feglm
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import DemeanerBackendOptions
 
 
@@ -26,7 +26,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/formula/__init__.py b/pyfixest/estimation/formula/__init__.py
new file mode 100644
index 000000000..aa130a715
--- /dev/null
+++ b/pyfixest/estimation/formula/__init__.py
@@ -0,0 +1,7 @@
+from typing import Final
+
+from formulaic.parser import DefaultFormulaParser
+
+FORMULAIC_FEATURE_FLAG: Final[DefaultFormulaParser.FeatureFlags] = (
+    DefaultFormulaParser.FeatureFlags.DEFAULT
+)
diff --git a/pyfixest/estimation/formula/factor_interaction.py b/pyfixest/estimation/formula/factor_interaction.py
new file mode 100644
index 000000000..b4d97f62b
--- /dev/null
+++ b/pyfixest/estimation/formula/factor_interaction.py
@@ -0,0 +1,266 @@
+from collections.abc import Hashable
+from typing import TYPE_CHECKING, Any, Final, Optional
+
+import numpy as np
+import pandas as pd
+from formulaic.materializers.types import FactorValues
+from formulaic.transforms.contrasts import TreatmentContrasts, encode_contrasts
+from formulaic.utils.sentinels import UNSET
+
+if TYPE_CHECKING:
+    from formulaic.model_spec import ModelSpec
+
+
+def factor_interaction(
+    data: Any,
+    var2: Any = None,
+    *,
+    ref: Optional[Hashable] = None,
+    ref2: Optional[Hashable] = None,
+    bin: Optional[dict] = None,
+    bin2: Optional[dict] = None,
+) -> FactorValues:
+    """
+    Fixest-style i() operator for categorical encoding with interactions.
+
+    Args:
+        data: The categorical variable
+        var2: Optional second variable for interaction (continuous or categorical)
+        ref: Reference level to drop from data
+        ref2: Reference level to drop from var2 (if categorical)
+        bin: Dict mapping new_level -> [old_levels] for binning
+
+    Naming convention (matches R fixest):
+        i(cyl)           -> cyl::4, cyl::6, cyl::8
+        i(cyl, ref=4)    -> cyl::6, cyl::8
+        i(cyl, wt)       -> cyl::4:wt, cyl::6:wt, cyl::8:wt
+        i(cyl, wt, ref=4) -> cyl::6:wt, cyl::8:wt
+    """
+    # Try to get variable names from Series.name attribute
+    factor_name = _get_series_name(data, default="factor")
+    var2_name = _get_series_name(var2, default="var") if var2 is not None else None
+
+    def encoder(
+        values: Any,
+        reduced_rank: bool,
+        drop_rows: list[int],
+        encoder_state: dict[str, Any],
+        model_spec: "ModelSpec",
+    ) -> FactorValues:
+        """Run encoder callback during materialization."""
+        return _encode_i(
+            values=values,
+            factor_name=factor_name,
+            var2_name=var2_name,
+            ref=ref,
+            ref2=ref2,
+            bin=bin,
+            bin2=bin2,
+            reduced_rank=reduced_rank,
+            drop_rows=drop_rows,
+            encoder_state=encoder_state,
+            model_spec=model_spec,
+        )
+
+    # When var2 is provided, wrap both variables in a dict so that find_nulls()
+    # will check both for null values. This ensures drop_rows is correctly populated.
+    wrapped_data = {"__data__": data, "__var2__": var2} if var2 is not None else data
+
+    return FactorValues(
+        wrapped_data,
+        kind="categorical",
+        spans_intercept=var2 is None,
+        encoder=encoder,
+    )
+
+
+def _get_series_name(data: Any, default: str = "var") -> str:
+    """Extract name from Series/DataFrame column, or return default."""
+    if data is None:
+        return default
+    if isinstance(data, FactorValues):
+        data = data.__wrapped__
+    if isinstance(data, pd.Series) and data.name is not None:
+        return str(data.name)
+    return default
+
+
+def _encode_i(
+    values: Any,
+    factor_name: str,
+    var2_name: Optional[str],
+    ref: Optional[Hashable],
+    ref2: Optional[Hashable],
+    bin: Optional[dict],
+    bin2: Optional[dict],
+    reduced_rank: bool,
+    drop_rows: list[int],
+    encoder_state: dict[str, Any],
+    model_spec: "ModelSpec",
+) -> FactorValues:
+    """
+    Actual encoding logic, called during materialization.
+
+    Uses formulaic's native encode_contrasts + TreatmentContrasts for the core
+    dummy encoding, then applies fixest-style naming and handles interactions.
+    """
+    # Extract values - may be wrapped in dict for null detection
+    unwrapped = values.__wrapped__ if isinstance(values, FactorValues) else values
+    data = unwrapped["__data__"] if var2_name is not None else unwrapped
+    var2 = unwrapped.get("__var2__") if var2_name is not None else None
+    # Convert to pandas Series and drop specified rows
+    data = pd.Series(data)
+    data.drop(index=data.index[drop_rows], inplace=True)
+    if var2 is not None:
+        var2 = pd.Series(var2)
+        var2.drop(index=var2.index[drop_rows], inplace=True)
+    dummies = _encode_factor(
+        pd.Series(data),
+        ref=ref,
+        bins=bin,
+        reduced_rank=reduced_rank and var2 is None,
+        encoder_state=encoder_state,
+        model_spec=model_spec,
+    )
+    # Three options: (i) no interaction, (ii) interaction with continuous variable, (ii) factor-factor interaction
+    if var2 is None:
+        # (i) No interaction: return categorical encoding of single variable
+        dummies.rename(
+            columns={level: f"{factor_name}::{level}" for level in dummies.columns},
+            inplace=True,
+        )
+        return FactorValues(
+            dummies,
+            kind="categorical",
+            # spans_intercept is True only when no reference level was dropped
+            # (i.e., ref is None and reduced_rank is False)
+            spans_intercept=(ref is None and not reduced_rank),
+            column_names=tuple(dummies.columns),
+            format="{field}",  # Use column names directly
+        )
+    elif ref2 is None and bin2 is None and _is_numeric(var2):
+        # (ii) interaction with continuous variable
+        result = dummies.multiply(var2, axis=0)
+        result.rename(
+            columns={
+                level: f"{factor_name}::{level}:{var2_name}"
+                for level in dummies.columns
+            },
+            inplace=True,
+        )
+        return FactorValues(
+            result,
+            kind="numerical",
+            spans_intercept=False,
+            column_names=tuple(result.columns),
+            format="{field}",
+        )
+    else:
+        # (iii) factor-factor interaction
+        dummies2 = _encode_factor(
+            data=var2,
+            ref=ref2,
+            bins=bin2,
+            reduced_rank=False,
+            encoder_state=encoder_state,
+            model_spec=model_spec,
+        )
+        interacted = pd.DataFrame(
+            _interact_dummies(
+                left=dummies.to_numpy(),
+                right=dummies2.to_numpy(),
+            ),
+            columns=[
+                f"{factor_name}::{l1}:{var2_name}::{l2}"
+                for l1 in dummies.columns
+                for l2 in dummies2.columns
+            ],
+            index=dummies.index,
+        )
+        # Drop reference level
+        if ref is None:
+            ref = encoder_state[f"__contrasts_{factor_name}__"]["levels"][0]
+        if ref2 is None:
+            ref2 = encoder_state[f"__contrasts_{var2_name}__"]["levels"][0]
+        interacted.drop(
+            f"{factor_name}::{ref}:{var2_name}::{ref2}",
+            axis=1,
+            inplace=True,
+            errors="ignore",
+        )
+        return FactorValues(
+            interacted,
+            kind="categorical",
+            spans_intercept=True,
+            column_names=tuple(interacted.columns),
+            format="{field}",  # Use column names directly
+        )
+
+
+def _encode_factor(
+    data: pd.Series,
+    ref: Optional[Hashable],
+    bins: Optional[dict],
+    reduced_rank: bool,
+    encoder_state: dict[str, Any],
+    model_spec: "ModelSpec",
+) -> pd.DataFrame:
+    # --- Binning (optional) ---
+    if bins is not None:
+        data = _apply_binning(data, bins, encoder_state)
+    contrasts_key: Final[str] = f"__contrasts_{data.name}__"
+    contrasts_state = encoder_state.get(contrasts_key)
+    if contrasts_state is None:
+        # Create a dedicated sub-state for encode_contrasts to avoid key collisions
+        contrasts_state = encoder_state.setdefault(contrasts_key, {})
+    # Drop a level if: (1) model has intercept (reduced_rank=True), OR (2) ref is explicitly specified
+    # This replicates the old monkey-patched behavior: drop=reduced_rank or ref is not None
+    encoded = encode_contrasts(
+        data,
+        contrasts=TreatmentContrasts(base=ref if ref is not None else UNSET),
+        levels=contrasts_state.get("levels"),
+        reduced_rank=reduced_rank or ref is not None,
+        output="pandas",
+        _state=contrasts_state,
+        _spec=model_spec,
+    )
+    dummies = encoded.__wrapped__
+    if "levels" not in contrasts_state:
+        encoder_state[f"__contrasts_{data.name}__"].update(
+            {"levels": dummies.columns.tolist()}
+        )
+    return dummies
+
+
+def _interact_dummies(left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    # Compute all pairwise products using broadcasting
+    # arr1[:, :, None] has shape (n_rows, n_levels1, 1)
+    # arr2[:, None, :] has shape (n_rows, 1, n_levels2)
+    return np.reshape(
+        # Product has shape (n_rows, n_levels1, n_levels2)
+        left[:, :, None] * right[:, None, :],
+        shape=(len(left), -1),
+    )
+
+
+def _is_numeric(series: pd.Series) -> bool:
+    """Check if series is numeric (not categorical/object)."""
+    return pd.api.types.is_numeric_dtype(series) and not pd.api.types.is_bool_dtype(
+        series
+    )
+
+
+def _apply_binning(series: pd.Series, bins: dict, state: dict) -> pd.Series:
+    """
+    Apply binning: bin={'low': ['a','b'], 'high': ['c','d']}.
+
+    Values not in the mapping are kept unchanged (matches R fixest behavior).
+    """
+    if "bin_mapping" not in state:
+        mapping = {}
+        for new_level, old_levels in bins.items():
+            for old in old_levels:
+                mapping[old] = new_level
+        state["bin_mapping"] = mapping
+    # Use replace() instead of map() to keep unmapped values unchanged
+    return series.replace(state["bin_mapping"])
diff --git a/pyfixest/estimation/formula/model_matrix.py b/pyfixest/estimation/formula/model_matrix.py
new file mode 100644
index 000000000..e317cbfd0
--- /dev/null
+++ b/pyfixest/estimation/formula/model_matrix.py
@@ -0,0 +1,370 @@
+import warnings
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Any, Final, Optional, Union
+
+import formulaic
+import numpy as np
+import pandas as pd
+from formulaic.parser import DefaultFormulaParser
+
+from pyfixest.estimation.detect_singletons_ import detect_singletons
+from pyfixest.estimation.formula import FORMULAIC_FEATURE_FLAG
+from pyfixest.estimation.formula.factor_interaction import factor_interaction
+from pyfixest.estimation.formula.parse import Formula
+from pyfixest.estimation.formula.utils import (
+    _encode_fixed_effects,
+    _factorize,
+    _get_weights,
+    log,
+)
+from pyfixest.utils.utils import capture_context
+
+
+@dataclass(frozen=True, kw_only=True)
+class _ModelMatrixKey:
+    main: str = "second_stage"
+    fixed_effects: str = "fe"
+    instrumental_variable: str = "first_stage"
+    weights: str = "weights"
+
+
+class ModelMatrix:
+    """
+    A wrapper around formulaic.ModelMatrix for the specification of PyFixest models.
+
+    This class organizes and processes model matrices for econometric estimation,
+    extracting dependent and independent variables, fixed effects, instrumental
+    variables, and weights. It handles missing data, singleton observations,
+    and ensures proper formatting for estimation procedures.
+
+    Attributes
+    ----------
+    dependent : pd.DataFrame
+        The dependent variable(s) (left-hand side of the main equation).
+    independent : pd.DataFrame
+        The independent variable(s) (right-hand side of the main equation).
+    fixed_effects : pd.DataFrame or None
+        Fixed effects variables, encoded as integers.
+    endogenous : pd.DataFrame or None
+        Endogenous variables in instrumental variable specifications.
+    instruments : pd.DataFrame or None
+        Instrumental variables for IV estimation.
+    weights : pd.DataFrame or None
+        Observation weights for weighted estimation.
+    model_spec : formulaic.ModelSpec
+        The underlying formulaic model specification.
+    na_index : frozenset[int]
+        Indices of rows that were dropped.
+    """
+
+    @property
+    def dependent(self) -> pd.DataFrame:
+        """
+        Get the dependent variable(s) from the model.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame containing the dependent variable(s) (left-hand side
+            of the main equation).
+        """
+        return self._data.loc[:, self._dependent]
+
+    @property
+    def independent(self) -> pd.DataFrame:
+        """
+        Get the independent variable(s) from the model.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame containing the independent variable(s) (right-hand side
+            of the main equation). Intercept columns are excluded when fixed
+            effects are present.
+        """
+        return self._data.loc[:, self._independent]
+
+    @property
+    def fixed_effects(self) -> Optional[pd.DataFrame]:
+        """
+        Get the fixed effects variables from the model.
+
+        Returns
+        -------
+        pd.DataFrame or None
+            DataFrame containing the fixed effects variables encoded as integers,
+            or None if no fixed effects are specified in the model.
+        """
+        if self._fixed_effects is None:
+            return None
+        else:
+            return self._data.loc[:, self._fixed_effects]
+
+    @property
+    def endogenous(self) -> Optional[pd.DataFrame]:
+        """
+        Get the endogenous variable(s) for instrumental variable estimation.
+
+        Returns
+        -------
+        pd.DataFrame or None
+            DataFrame containing the endogenous variable(s) (left-hand side
+            of the first-stage equation in IV estimation), or None if not
+            using instrumental variables.
+        """
+        if self._endogenous is None:
+            return None
+        else:
+            return self._data.loc[:, self._endogenous]
+
+    @property
+    def instruments(self) -> Optional[pd.DataFrame]:
+        """
+        Get the instrumental variable(s) for IV estimation.
+
+        Returns
+        -------
+        pd.DataFrame or None
+            DataFrame containing the instrumental variable(s) (right-hand side
+            of the first-stage equation in IV estimation), or None if not
+            using instrumental variables. Intercept columns are excluded when
+            fixed effects are present.
+        """
+        if self._instruments is None:
+            return None
+        else:
+            return self._data.loc[:, self._instruments]
+
+    @property
+    def weights(self) -> Optional[pd.DataFrame]:
+        """
+        Get the observation weights for weighted estimation.
+
+        Returns
+        -------
+        pd.DataFrame or None
+            DataFrame containing the observation weights (must be non-negative
+            numeric values), or None if no weights are specified.
+        """
+        if self._weights is None:
+            return None
+        else:
+            return self._data.loc[:, self._weights]
+
+    @property
+    def model_spec(self) -> formulaic.ModelSpec:
+        """
+        Get the underlying formulaic model specification.
+
+        Returns
+        -------
+        formulaic.ModelSpec
+            The formulaic ModelSpec object containing metadata about the
+            model structure and transformations.
+        """
+        return self._model_spec
+
+    @property
+    def na_index(self) -> frozenset[int]:
+        """Integer positions of rows dropped in model matrix creation."""
+        return self._na_index
+
+    def __init__(
+        self,
+        model_matrix: formulaic.ModelMatrix,
+        drop_rows: set[int],
+        drop_singletons: bool = True,
+        drop_intercept: bool = False,
+    ) -> None:
+        self._drop_intercept = drop_intercept
+        self._model_spec = model_matrix.model_spec
+        self._collect_columns(model_matrix)
+        self._collect_data(model_matrix)
+        self._process(dropped_rows=drop_rows, drop_singletons=drop_singletons)
+
+    def _collect_columns(self, model_matrix: formulaic.ModelMatrix) -> None:
+        # Extract dependent and independent variables (always present)
+        self._dependent = model_matrix[_ModelMatrixKey.main]["lhs"].columns.tolist()
+        self._independent = model_matrix[_ModelMatrixKey.main]["rhs"].columns.tolist()
+        # Extract fixed effects (optional)
+        try:
+            self._fixed_effects = model_matrix[
+                _ModelMatrixKey.fixed_effects
+            ].columns.tolist()
+        except KeyError:
+            self._fixed_effects = None
+        # Extract endogenous variables
+        try:
+            self._endogenous = model_matrix[_ModelMatrixKey.instrumental_variable][
+                "lhs"
+            ].columns.tolist()
+        except KeyError:
+            self._endogenous = None
+        # Extract instruments
+        try:
+            self._instruments = model_matrix[_ModelMatrixKey.instrumental_variable][
+                "rhs"
+            ].columns.tolist()
+        except KeyError:
+            self._instruments = None
+        # Extract weights (optional)
+        try:
+            self._weights = model_matrix[_ModelMatrixKey.weights].columns.tolist()
+        except KeyError:
+            self._weights = None
+
+    def _collect_data(self, model_matrix: formulaic.ModelMatrix) -> None:
+        datas: list[pd.DataFrame] = list(model_matrix._flatten())
+        if not all(datas[0].index.identical(other.index) for other in datas[1:]):
+            raise ValueError("All design matrix data must have the same index.")
+        data = pd.concat(datas, ignore_index=False, axis=1)
+        self._data = data.loc[:, ~data.columns.duplicated()]
+
+    def _process(self, dropped_rows: set[int], drop_singletons: bool = False) -> None:
+        if self.dependent.shape[1] != 1:
+            # If the dependent variable is not numeric, formulaic's contrast encoding kicks in
+            # creating multiple columns for the dependent variable
+            # TODO: Make this check more explicit?
+            raise TypeError("The dependent variable must be numeric.")
+        if self.endogenous is not None and self.endogenous.shape[1] != 1:
+            raise TypeError("The endogenous variable must be numeric.")
+        # Drop rows with non-finite values
+        is_infinite = pd.Series(
+            ~np.isfinite(self._data).all(axis=1), index=self._data.index
+        )
+        if is_infinite.any():
+            infinite_indices = is_infinite[is_infinite].index.tolist()
+            dropped_rows |= set(infinite_indices)
+            self._data.drop(infinite_indices, inplace=True)
+            warnings.warn(
+                f"{is_infinite.sum()} rows with infinite values dropped from the model.",
+            )
+        if self.fixed_effects is not None:
+            # Ensure fixed effects are `int32`
+            self._data[self._fixed_effects] = self.fixed_effects.astype("int32")
+        if self.fixed_effects is not None or self._drop_intercept:
+            self._independent = [col for col in self._independent if col != "Intercept"]
+            if self._instruments is not None:
+                self._instruments = [
+                    col for col in self._instruments if col != "Intercept"
+                ]
+        # Drop singletons if specified
+        if drop_singletons and self.fixed_effects is not None:
+            is_singleton = pd.Series(
+                detect_singletons(self.fixed_effects.to_numpy()),
+                index=self._data.index,
+            )
+            if is_singleton.any():
+                singleton_indices = self._data[is_singleton].index.tolist()
+                dropped_rows |= set(singleton_indices)
+                self._data.drop(singleton_indices, inplace=True)
+                warnings.warn(
+                    f"{is_singleton.sum()} singleton fixed effect(s) dropped from the model."
+                )
+        self._na_index = frozenset(dropped_rows)
+
+
+def create_model_matrix(
+    formula: Formula,
+    data: pd.DataFrame,
+    weights: str | None = None,
+    drop_singletons: bool = False,
+    drop_intercept: bool = False,
+    ensure_full_rank: bool = True,
+    context: Union[int, Mapping[str, Any]] = 0,
+) -> ModelMatrix:
+    """
+    Create a ModelMatrix from a formula and data.
+
+    This function constructs model matrices for econometric estimation by parsing
+    formulas and extracting the necessary components (dependent/independent variables,
+    fixed effects, instruments, weights) from the provided data.
+
+    Parameters
+    ----------
+    formula : Formula
+        A Formula object specifying the model structure, including dependent and
+        independent variables, fixed effects, and instrumental variables.
+    data : pd.DataFrame
+        The input data containing all variables referenced in the formula.
+        The index will be reset during processing.
+    weights : str or None, default=None
+        Column name in data to use as observation weights. Weights must be
+        non-negative numeric values. If None, no weighting is applied.
+    drop_singletons : bool, default=False
+        If True, observations that are singletons in any fixed effect category
+        are dropped from the model.
+    drop_intercept : bool, default=False
+        If True, the intercept column is removed from the independent variables
+        and instruments matrices. The intercept is always removed when fixed
+        effects are present, regardless of this parameter.
+    ensure_full_rank : bool, default=True
+        If True, formulaic will ensure the design matrix is full rank by
+        dropping collinear columns.
+    context : int or Mapping[str, Any], default=0
+        Additional context variables for formulaic during model matrix creation.
+        Can be an integer (stack frame depth) or a dictionary of variables to
+        make available in the formula environment (e.g., custom transformations).
+
+    Returns
+    -------
+    ModelMatrix
+        A ModelMatrix object containing the processed dependent and independent
+        variables, fixed effects, instruments, weights, and metadata about
+        dropped observations.
+
+    """
+    # Process input data
+    data.reset_index(drop=True, inplace=True)  # Sanitise index
+    n_observations: Final[int] = data.shape[0]
+    formula_formulaic = _get_formulaic_formula(
+        formula=formula, data=data, weights=weights
+    )
+    model_matrix = formula_formulaic.get_model_matrix(
+        data=data,
+        ensure_full_rank=ensure_full_rank,
+        na_action="drop",
+        output="pandas",
+        context={
+            "log": log,  # custom log settings infinite to nan
+            "i": factor_interaction,  # fixest::i()-style syntax
+            "__fixed_effect__": _factorize,
+        }
+        | {**capture_context(context)},
+    )
+    drop_rows: set[int] = set(range(n_observations)).difference(
+        model_matrix[_ModelMatrixKey.main]["lhs"].index
+    )
+    return ModelMatrix(
+        model_matrix,
+        drop_rows=drop_rows,
+        drop_singletons=drop_singletons,
+        drop_intercept=drop_intercept,
+    )
+
+
+def _get_formulaic_formula(
+    formula: Formula,
+    data: pd.DataFrame,
+    weights: str | None = None,
+) -> formulaic.Formula:
+    # Collate kwargs to be passed to formulaic.Formula
+    formula_kwargs: dict[str, str] = {_ModelMatrixKey.main: formula.second_stage}
+    if formula.fixed_effects is not None:
+        fixed_effects_formula = _encode_fixed_effects(
+            fixed_effects=formula.fixed_effects, data=data
+        )
+        formula_kwargs.update({_ModelMatrixKey.fixed_effects: fixed_effects_formula})
+    if formula.first_stage is not None:
+        formula_kwargs.update(
+            {_ModelMatrixKey.instrumental_variable: formula.first_stage}
+        )
+    if weights is not None:
+        data[weights] = _get_weights(data, weights)
+        formula_kwargs.update({_ModelMatrixKey.weights: f"{weights}-1"})
+    formula_formulaic = formulaic.Formula(
+        formula_kwargs,
+        _parser=DefaultFormulaParser(feature_flags=FORMULAIC_FEATURE_FLAG),
+    )
+    return formula_formulaic
diff --git a/pyfixest/estimation/formula/parse.py b/pyfixest/estimation/formula/parse.py
new file mode 100644
index 000000000..41da07819
--- /dev/null
+++ b/pyfixest/estimation/formula/parse.py
@@ -0,0 +1,250 @@
+import itertools
+import re
+from dataclasses import dataclass
+from typing import Final
+
+import formulaic
+
+from pyfixest.errors import (
+    EndogVarsAsCovarsError,
+    FormulaSyntaxError,
+    InstrumentsAsCovarsError,
+    UnderDeterminedIVError,
+)
+from pyfixest.estimation.formula.utils import (
+    _MULTIPLE_ESTIMATION_PATTERN,
+    _get_position_of_first_parenthesis_pair,
+    _MultipleEstimationType,
+    _split_parenthesis_preserving,
+)
+
+
+@dataclass(kw_only=True, frozen=True, slots=True)
+class Formula:
+    """A formulaic-compliant formula."""
+
+    _second_stage: str
+    _fixed_effects: str | None = None
+    _first_stage: str | None = None
+
+    def __post_init__(self) -> None:
+        if self._first_stage is not None:
+            second_stage = formulaic.Formula(self._second_stage)
+            first_stage = formulaic.Formula(self._first_stage)
+            exogenous = second_stage.rhs.required_variables
+            endogenous = first_stage.lhs.required_variables
+            instruments = first_stage.rhs.required_variables
+            if len(endogenous) > 1:
+                raise FormulaSyntaxError(
+                    "Multiple endogenous variables are currently not supported."
+                )
+            if len(endogenous) > len(instruments):
+                raise UnderDeterminedIVError(
+                    "The IV system is underdetermined. "
+                    "Please provide at least as many instruments as endogenous variables."
+                )
+            endogenous_are_covariates = endogenous.intersection(exogenous)
+            if endogenous_are_covariates:
+                raise EndogVarsAsCovarsError(
+                    f"Endogeneous variables specified as covariates: {endogenous_are_covariates}"
+                )
+            instruments_are_covariates = instruments.intersection(exogenous)
+            if instruments_are_covariates:
+                raise InstrumentsAsCovarsError(
+                    f"Instruments specified as covariates: {instruments_are_covariates}"
+                )
+
+    @property
+    def formula(self) -> str:
+        """Full fixest-style formula."""
+        formula = self._second_stage
+        if self._fixed_effects is not None:
+            formula = f"{formula} | {self._fixed_effects}"
+        if self._first_stage is not None:
+            formula = f"{formula} | {self._first_stage}"
+        return formula
+
+    @property
+    def endogenous(self) -> str | None:
+        """Endogenous variables of an instrumental variable specification."""
+        if self._first_stage is None:
+            return None
+        else:
+            endogenous, _ = re.split(r"\s*~\s*", self._first_stage, maxsplit=1)
+            return endogenous
+
+    @property
+    def exogenous(self) -> str:
+        """Exogenous aka covariates aka independent variables."""
+        _, exogenous = re.split(r"\s*~\s*", self._second_stage, maxsplit=1)
+        return exogenous
+
+    @property
+    def second_stage(self) -> str:
+        """The second stage formula."""
+        second_stage = self._second_stage
+        if self._first_stage is not None:
+            # Add endogenous variables as covariates in second stage
+            second_stage = f"{second_stage} + {self.endogenous}"
+        return second_stage
+
+    @property
+    def first_stage(self) -> str | None:
+        """The first stage formula of an instrumental variable specification."""
+        if self._first_stage is None:
+            return None
+        else:
+            # Add exogenous variables as covariates in first stage
+            return f"{self._first_stage} + {self.exogenous}"
+
+    @property
+    def fixed_effects(self) -> str | None:
+        """The fixed effects of a formula."""
+        return self._fixed_effects
+
+    @classmethod
+    def parse(cls, formula: str) -> list["Formula"]:
+        """Parse fixest-style formula."""
+        _validate(formula)
+        formula = _preprocess(formula)
+        return [
+            _split_formula_into_parts(formula)
+            for formula in _expand_all_multiple_estimation(formula)
+        ]
+
+    @classmethod
+    def parse_to_dict(cls, formula: str) -> dict[str | None, list["Formula"]]:
+        """Group parsed formulas into dictionary keyed by fixed effects."""
+        formulas = cls.parse(formula)
+        result: dict[str | None, list[Formula]] = {}
+        for parsed_formula in formulas:
+            result.setdefault(parsed_formula._fixed_effects, []).append(parsed_formula)
+        return result
+
+
+def _validate(formula: str) -> None:
+    max_parts: Final[int] = 3
+    parts = _split_parenthesis_preserving(string=formula, separator="|")
+
+    # Check: at most 3 parts
+    if len(parts) > max_parts:
+        raise FormulaSyntaxError(
+            f"Formula can have at most {max_parts} parts separated by '|'. "
+            f"Received {len(parts)}: '{formula}'"
+        )
+
+    # Check: no part has more than one tilde
+    parts_with_multiple_tildes = [p for p in parts if p.count("~") > 1]
+    if parts_with_multiple_tildes:
+        raise FormulaSyntaxError(
+            f"Each formula part can contain at most one '~'. "
+            f"Invalid parts: {parts_with_multiple_tildes}"
+        )
+
+    # Check structure based on number of parts
+    if len(parts) == 1 and "~" not in parts[0]:
+        # Format: Y ~ X
+        raise FormulaSyntaxError(f"Formula must contain '~': '{formula}'")
+    elif len(parts) == 2 and "~" not in parts[0]:
+        # Format: Y ~ X | fe  OR  Y ~ X | endog ~ instr
+        # Part 0 must have a tilde
+        raise FormulaSyntaxError(
+            f"First part must contain '~' (dependent ~ independent): '{parts[0]}'"
+        )
+    elif len(parts) == 3:
+        # Format: Y ~ X | fe | endog ~ instr
+        # Parts 0 and 2 must have tildes, part 1 must NOT
+        if "~" not in parts[0]:
+            raise FormulaSyntaxError(
+                f"First part must contain '~' (dependent ~ independent): '{parts[0]}'"
+            )
+        if "~" in parts[1]:
+            raise FormulaSyntaxError(
+                f"Second part (fixed effects) cannot contain '~': '{parts[1]}'. "
+                "Fixed effects should be specified as 'f1 + f2', not as a formula."
+            )
+        if "~" not in parts[2]:
+            raise FormulaSyntaxError(
+                "Three-part formula requires IV specification in third part: "
+                "'dependent ~ independent | fixed_effects | endogenous ~ instruments'. "
+            )
+
+
+def _preprocess(formula: str) -> str:
+    """Convert multiple dependent variables to multiple estimation syntax.
+    Y + Y2 ~ X1 + X2 will be converted to sw(Y, Y2) ~ X1 + X2.
+    """
+    dependents, rhs = re.split(r"\s*~\s*", formula, maxsplit=1)
+    dependents = _split_parenthesis_preserving(dependents.strip(), separator="+")
+    if len(dependents) > 1:
+        # Multiple dependent variables
+        formula = f"sw({', '.join(dependents)}) ~ {rhs}"
+    return formula
+
+
+def _expand_first_multiple_estimation(formula: str) -> list[str] | None:
+    """Expand the first multiple estimation syntax in formula."""
+    match = _MULTIPLE_ESTIMATION_PATTERN.search(formula)
+    if not match:
+        return None
+    kind = _MultipleEstimationType[match.group(1)]
+    parenthesis_open, parenthesis_closed = _get_position_of_first_parenthesis_pair(
+        string=formula[match.start() :]
+    )
+    parenthesis_open += match.start()
+    parenthesis_closed += match.start()
+    arguments = _split_parenthesis_preserving(
+        string=formula[parenthesis_open:parenthesis_closed],
+        separator=",",
+    )
+    if kind is _MultipleEstimationType.mvsw:
+        # Multiverse stepwise: all combinations of arguments
+        arguments = [
+            " + ".join(combination)
+            for combination in itertools.chain.from_iterable(
+                itertools.combinations(arguments, r=length)
+                for length in range(1, len(arguments) + 1)
+            )
+        ]
+    elif kind is _MultipleEstimationType.csw or kind is _MultipleEstimationType.csw0:
+        # Cumulative stepwise
+        arguments = [" + ".join(arguments[: i + 1]) for i, _ in enumerate(arguments)]
+    if (
+        kind is _MultipleEstimationType.sw0
+        or kind is _MultipleEstimationType.csw0
+        or kind is _MultipleEstimationType.mvsw  # Following fixest there's no mvsw0
+    ):
+        # Add zero step
+        arguments = ["1", *arguments]
+    multiple_estimation_call = formula[match.start() : parenthesis_closed + 1]
+    return [
+        formula.replace(multiple_estimation_call, argument) for argument in arguments
+    ]
+
+
+def _expand_all_multiple_estimation(formula: str) -> list[str]:
+    """Recursively expand all multiple estimation calls."""
+    expansion = _expand_first_multiple_estimation(formula)
+    if expansion is None:
+        # No multiple estimation syntax present
+        return [formula]
+    else:
+        return [
+            parsed
+            for formula_expanded in expansion
+            for parsed in _expand_all_multiple_estimation(formula_expanded)
+        ]
+
+
+def _split_formula_into_parts(formula: str) -> Formula:
+    parts = re.split(r"\s*\|\s*", formula)
+    second_stage = parts.pop(0).strip()
+    first_stage = next((part.strip() for part in parts if "~" in part), None)
+    fixed_effects = next((part.strip() for part in parts if "~" not in part), None)
+    if fixed_effects in ("0", "1"):
+        fixed_effects = None
+    return Formula(
+        _second_stage=second_stage,
+        _fixed_effects=fixed_effects,
+        _first_stage=first_stage,
+    )
diff --git a/pyfixest/estimation/formula/utils.py b/pyfixest/estimation/formula/utils.py
new file mode 100644
index 000000000..7d77ca3a5
--- /dev/null
+++ b/pyfixest/estimation/formula/utils.py
@@ -0,0 +1,124 @@
+import re
+import warnings
+from enum import StrEnum
+
+import numpy as np
+import pandas as pd
+
+
+def log(array: np.ndarray) -> np.ndarray:
+    """
+    Compute the natural logarithm of an array, replacing non-finite values with NaN.
+
+    Parameters
+    ----------
+    array : np.ndarray
+        Input array for which to compute the logarithm.
+
+    Returns
+    -------
+    np.ndarray
+        Array with natural logarithm values, where non-finite results (such as
+        -inf from log(0) or NaN from log(negative)) are replaced with NaN.
+    """
+    result = np.full_like(array, np.nan, dtype="float64")
+    valid = (array > 0.0) & np.isfinite(array)
+    if not valid.all():
+        warnings.warn(
+            f"{np.sum(~valid)} rows with infinite values detected. These rows are dropped from the model.",
+        )
+    np.log(array, out=result, where=valid)
+    return result
+
+
+def _split_parenthesis_preserving(string: str, separator: str) -> list[str]:
+    """Split on top-level separator, respecting nested parentheses."""
+    args: list[str] = []
+    depth = 0
+    current: list[str] = []
+    for c in string:
+        if c == "(":
+            depth += 1
+        elif c == ")":
+            depth -= 1
+        elif c == separator and depth == 0:
+            args.append("".join(current).strip())
+            current = []
+            continue
+        current.append(c)
+    args.append("".join(current).strip())
+    return args
+
+
+def _get_position_of_first_parenthesis_pair(string: str) -> tuple[int, int]:
+    position_open = string.find("(")
+    if position_open == -1:
+        raise ValueError(f"No parenthesis in `{string}`")
+    else:
+        position_open += 1
+    position: int = position_open
+    depth: int = 1
+    while position < len(string) and depth:
+        position += 1
+        if string[position] == "(":
+            depth += 1
+        elif string[position] == ")":
+            depth -= 1
+    if depth != 0:
+        raise ValueError(f"Unmatched '(' in `{string}`")
+    return position_open, position
+
+
+def _encode_fixed_effects(fixed_effects: str, data: pd.DataFrame) -> str:
+    fes = set(re.split(r"\s*\+\s*", fixed_effects))
+    for fixed_effect in fes:
+        if "^" not in fixed_effect:
+            continue
+        # Encode interacted fixed effects
+        vars = fixed_effect.split("^")
+        data[fixed_effect.replace("^", "_")] = (
+            data[vars[0]]
+            .astype(pd.StringDtype())
+            .str.cat(
+                data[vars[1:]].astype(pd.StringDtype()),
+                sep="^",
+                na_rep=None,  # a row containing a missing value in any of the columns (before concatenation) will have a missing value in the result
+            )
+        )
+    encoded_fixed_effects = (f"__fixed_effect__({fe.replace('^', '_')})" for fe in fes)
+    fixed_effects_formula = f"{' + '.join(encoded_fixed_effects)} - 1"
+    return fixed_effects_formula
+
+
+def _factorize(series: pd.Series) -> np.ndarray:
+    factorized, _ = pd.factorize(series, use_na_sentinel=True)
+    # use_sentinel=True replaces np.nan with -1, so we revert to np.nan
+    factorized = np.where(factorized == -1, np.nan, factorized)
+    return factorized
+
+
+def _get_weights(data: pd.DataFrame, weights: str) -> pd.Series:
+    w = data[weights]
+    try:
+        w = pd.to_numeric(w, errors="raise")
+    except ValueError:
+        raise ValueError(f"The weights column '{weights}' must be numeric.")
+    if not (w.dropna() > 0.0).all():
+        raise ValueError(
+            f"The weights column '{weights}' must have only non-negative values."
+        )
+    return w
+
+
+class _MultipleEstimationType(StrEnum):
+    # See https://lrberge.github.io/fixest/reference/stepwise.html
+    sw = "sequential stepwise"
+    csw = "cumulative stepwise"
+    sw0 = "sequential stepwise with zero step"
+    csw0 = "cumulative stepwise with zero step"
+    mvsw = "multiverse stepwise"
+
+
+_MULTIPLE_ESTIMATION_PATTERN = re.compile(
+    rf"\b({'|'.join(me.name for me in _MultipleEstimationType)})\b\(.+\)"
+)
diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py
index 2a6b713a8..cdc38828e 100644
--- a/pyfixest/estimation/model_matrix_fixest_.py
+++ b/pyfixest/estimation/model_matrix_fixest_.py
@@ -8,7 +8,7 @@
 from formulaic import Formula
 
 from pyfixest.estimation.detect_singletons_ import detect_singletons
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.utils.utils import capture_context
 
 
@@ -92,12 +92,23 @@ def model_matrix_fixest(
     mm = model_matrix_fixest(FixestFormula, data)
     mm
     ```
+
+    .. deprecated::
+        This function will be deprecated in a future version.
+        Use `pyfixest.estimation.formula.model_matrix.create_model_matrix()` with a `Formula` object instead.
+        See https://py-econometrics.github.io/pyfixest/reference/estimation.formula.model_matrix.ModelMatrix.html
     """
-    FixestFormula.check_syntax()
+    warnings.warn(
+        "model_matrix_fixest is deprecated and will be removed in a future version. "
+        "Use `pyfixest.estimation.formula.model_matrix.create_model_matrix()` with a `Formula` object instead. "
+        "See https://py-econometrics.github.io/pyfixest/reference/estimation.formula.model_matrix.ModelMatrix.html",
+        FutureWarning,
+        stacklevel=2,
+    )
 
-    fml_second_stage = FixestFormula.fml_second_stage
-    fml_first_stage = FixestFormula.fml_first_stage
-    fval = FixestFormula._fval
+    fml_second_stage = FixestFormula.second_stage
+    fml_first_stage = FixestFormula.first_stage
+    fval = FixestFormula.fixed_effects
     _check_weights(weights, data)
 
     pattern = (
@@ -123,13 +134,14 @@ def model_matrix_fixest(
         else fml_first_stage
     )
 
-    fval, data = _fixef_interactions(fval=fval, data=data)
+    if fval is not None:
+        fval, data = _fixef_interactions(fval=fval, data=data)
     _is_iv = fml_first_stage is not None
 
     fml_kwargs = {
         "fml_second_stage": fml_second_stage,
         **({"fml_first_stage": fml_first_stage} if _is_iv else {}),
-        **({"fe": wrap_factorize(fval)} if fval != "0" else {}),
+        **({"fe": wrap_factorize(fval)} if fval is not None else {}),
         **({"weights": weights} if weights is not None else {}),
     }
 
@@ -148,7 +160,7 @@ def model_matrix_fixest(
     if _is_iv:
         endogvar = mm["fml_first_stage"]["lhs"]
         Z = mm["fml_first_stage"]["rhs"]
-    if fval != "0":
+    if fval is not None:
         fe = mm["fe"]
     if weights is not None:
         weights_df = mm["weights"]
diff --git a/pyfixest/estimation/prediction.py b/pyfixest/estimation/prediction.py
index 5dfcae5ee..37b4d36a7 100644
--- a/pyfixest/estimation/prediction.py
+++ b/pyfixest/estimation/prediction.py
@@ -1,3 +1,4 @@
+import re
 import warnings
 from collections.abc import Mapping
 from typing import Any, Optional, Union
@@ -59,7 +60,7 @@ def get_design_matrix_and_yhat(
                 )
 
             if hasattr(model, "_model_spec") and model._model_spec is not None:
-                rhs_spec = model._model_spec.fml_second_stage.rhs
+                rhs_spec = model._model_spec.second_stage.rhs
                 X = rhs_spec.get_model_matrix(newdata, context=context)
             else:
                 xfml = model._fml.split("|")[0].split("~")[1]
@@ -112,7 +113,7 @@ def _get_fixed_effects_prediction_component(
         if model._sumFE is None:
             model.fixef(atol, btol)
 
-        fvals = model._fixef.split("+")
+        fvals = re.split(r"\s*\+\s*", model._fixef)
 
         # warn if newdata types do not match
         mismatched_fixef_types = [
diff --git a/pyfixest/estimation/quantreg/QuantregMulti.py b/pyfixest/estimation/quantreg/QuantregMulti.py
index 598f66ae1..a5ef9441c 100644
--- a/pyfixest/estimation/quantreg/QuantregMulti.py
+++ b/pyfixest/estimation/quantreg/QuantregMulti.py
@@ -7,7 +7,7 @@
 import pandas as pd
 from scipy.stats import norm
 
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import (
     QuantregMethodOptions,
     QuantregMultiOptions,
@@ -34,7 +34,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
diff --git a/pyfixest/estimation/quantreg/quantreg_.py b/pyfixest/estimation/quantreg/quantreg_.py
index 0ba5e76aa..7f2bc9224 100644
--- a/pyfixest/estimation/quantreg/quantreg_.py
+++ b/pyfixest/estimation/quantreg/quantreg_.py
@@ -10,7 +10,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.feols_ import Feols
-from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.literals import (
     QuantregMethodOptions,
     SolverOptions,
@@ -37,7 +37,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[str, pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
@@ -93,9 +93,9 @@ def __init__(
         self._quantile_maxiter = quantile_maxiter
 
         self._model_name = (
-            FixestFormula.fml
+            FixestFormula.formula
             if self._sample_split_var is None
-            else f"{FixestFormula.fml} (Sample: {self._sample_split_var} = {self._sample_split_value})"
+            else f"{FixestFormula.formula} (Sample: {self._sample_split_var} = {self._sample_split_value})"
         )
         # update with quantile name
         self._model_name = f"{self._model_name} (q = {quantile})"
diff --git a/pyfixest/estimation/vcov_utils.py b/pyfixest/estimation/vcov_utils.py
index 9a6992b9f..19deab574 100644
--- a/pyfixest/estimation/vcov_utils.py
+++ b/pyfixest/estimation/vcov_utils.py
@@ -62,7 +62,7 @@ def _count_G_for_ssc_correction(
 
 
 def _get_vcov_type(
-    vcov: Union[str, dict[str, str], None], fval: str
+    vcov: Union[str, dict[str, str], None],
 ) -> Union[str, dict[str, str]]:
     """
     Pass the specified vcov type.
@@ -74,8 +74,6 @@ def _get_vcov_type(
     ----------
     vcov : Union[str, dict[str, str], None]
         The specified vcov type.
-    fval : str
-        The specified fixed effects. (i.e. "X1+X2")
 
     Returns
     -------
diff --git a/tests/test_demean.py b/tests/test_demean.py
index e79ed2844..46cfeda2d 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -85,7 +85,7 @@ def test_demean_model_no_fixed_effects(benchmark, demean_func):
         fe=None,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -123,7 +123,7 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -138,8 +138,8 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
     assert Xd.columns.equals(X.columns)
 
     # Verify results are cached in lookup_dict
-    assert "test" in lookup_dict
-    cached_data = lookup_dict["test"][1]
+    assert frozenset() in lookup_dict
+    cached_data = lookup_dict[frozenset()][1]
     assert np.allclose(cached_data[Y.columns].values, Yd.values)
     assert np.allclose(cached_data[X.columns].values, Xd.values)
 
@@ -168,7 +168,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -181,7 +181,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         fe=fe,
         weights=np.ones(N),
         lookup_demeaned_data={},
-        na_index_str="test2",
+        na_index=frozenset({1}),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -215,7 +215,7 @@ def test_demean_model_caching(benchmark, demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -229,7 +229,7 @@ def test_demean_model_caching(benchmark, demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -249,7 +249,7 @@ def test_demean_model_caching(benchmark, demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=10_000,
         demean_func=demean_func,
@@ -288,7 +288,7 @@ def test_demean_model_maxiter_convergence_failure(demean_func):
             fe=fe,
             weights=weights,
             lookup_demeaned_data=lookup_dict,
-            na_index_str="test",
+            na_index=frozenset(),
             fixef_tol=1e-6,
             fixef_maxiter=1,  # Very small limit
             demean_func=demean_func,
@@ -318,7 +318,7 @@ def test_demean_model_custom_maxiter_success(demean_func):
         fe=fe,
         weights=weights,
         lookup_demeaned_data=lookup_dict,
-        na_index_str="test",
+        na_index=frozenset(),
         fixef_tol=1e-6,
         fixef_maxiter=5000,  # Custom limit
         demean_func=demean_func,
diff --git a/tests/test_did.py b/tests/test_did.py
index bff62b13d..394d461a0 100644
--- a/tests/test_did.py
+++ b/tests/test_did.py
@@ -38,8 +38,7 @@ def data():
     return df_het
 
 
-@pytest.mark.skipif(import_check is False, reason="R package did2s not installed.")
-@pytest.mark.against_r_extended
+@pytest.mark.against_r_core
 def test_event_study(data):
     """Test the event_study() function."""
     fit_did2s = event_study(
@@ -80,8 +79,7 @@ def test_event_study(data):
     np.testing.assert_allclose(fit_did2s.se(), float(r_df[2]), atol=1e-05, rtol=1e-05)
 
 
-@pytest.mark.skipif(import_check is False, reason="R package did2s not installed.")
-@pytest.mark.against_r_extended
+@pytest.mark.against_r_core
 @pytest.mark.parametrize("weights", [None, "weights"])
 def test_did2s(data, weights):
     """Test the did2s() function."""
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 52566f7bb..1260863ef 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -7,6 +7,7 @@
 from pyfixest.errors import (
     DuplicateKeyError,
     EndogVarsAsCovarsError,
+    FormulaSyntaxError,
     InstrumentsAsCovarsError,
     NanInClusterVarError,
     UnderDeterminedIVError,
@@ -94,8 +95,10 @@ def test_iv_errors():
     data = get_data()
 
     # under determined
+    with pytest.raises(FormulaSyntaxError):
+        feols(fml="Y ~ X1 | Z1 + Z2 ~ X2", data=data)
     with pytest.raises(UnderDeterminedIVError):
-        feols(fml="Y ~ X1 | Z1 + Z2 ~ 24 ", data=data)
+        feols(fml="Y ~ X1 | Z1 ~ 1", data=data)
     # instrument specified as covariate
     with pytest.raises(InstrumentsAsCovarsError):
         feols(fml="Y ~ X1 | Z1  ~ X1 + X2", data=data)
@@ -118,12 +121,12 @@ def test_iv_errors():
     with pytest.raises(NotImplementedError):
         feols(fml="Y ~ 1 | Z1 ~ X1 ", data=data).wildboottest(param="Z1", reps=999)
     # multi estimation error
-    with pytest.raises(NotImplementedError):
-        feols(fml="Y + Y2 ~ 1 | Z1 ~ X1 ", data=data)
-    with pytest.raises(NotImplementedError):
-        feols(fml="Y  ~ 1 | sw(f2, f3) | Z1 ~ X1 ", data=data)
-    with pytest.raises(NotImplementedError):
-        feols(fml="Y  ~ 1 | csw(f2, f3) | Z1 ~ X1 ", data=data)
+    # with pytest.raises(NotImplementedError):
+    #     feols(fml="Y + Y2 ~ 1 | Z1 ~ X1 ", data=data)
+    # with pytest.raises(NotImplementedError):
+    #     feols(fml="Y  ~ 1 | sw(f2, f3) | Z1 ~ X1 ", data=data)
+    # with pytest.raises(NotImplementedError):
+    #     feols(fml="Y  ~ 1 | csw(f2, f3) | Z1 ~ X1 ", data=data)
     # unsupported HC vcov
     with pytest.raises(VcovTypeNotSupportedError):
         feols(fml="Y  ~ 1 | Z1 ~ X1", vcov="HC2", data=data)
@@ -393,16 +396,14 @@ def test_i_error():
     data = get_data()
     data["f2"] = pd.Categorical(data["f2"])
 
-    with pytest.raises(ValueError):
-        feols("Y ~ i(f1, f2)", data)
-
-    data["f2"] = data["f2"].astype("object")
-    with pytest.raises(ValueError):
-        feols("Y ~ i(f1, f2)", data)
-
     with pytest.raises(FactorEvaluationError):
+        # Incorrectly specified reference (a instead of 'a')
         feols("Y ~ i(f1, X1, ref=a)", data)
 
+    with pytest.raises(ValueError):
+        # Reference level not in data
+        feols("Y ~ i(f1, X1, ref='a')", data)
+
 
 def test_plot_error():
     df = get_data()
diff --git a/tests/test_formula_parse.py b/tests/test_formula_parse.py
new file mode 100644
index 000000000..5ed238d70
--- /dev/null
+++ b/tests/test_formula_parse.py
@@ -0,0 +1,379 @@
+"""
+Tests for the formula parsing implementation in pyfixest/estimation/formula/parse.py.
+
+This module contains:
+- Part 1: Unit tests for Formula.parse() and internal parsing functions
+- Part 2: End-to-end compatibility tests via feols()
+- Part 3: Edge case tests
+"""
+
+import re
+
+import numpy as np
+import pytest
+
+import pyfixest as pf
+from pyfixest.errors import FormulaSyntaxError
+from pyfixest.estimation.formula.parse import Formula, _expand_all_multiple_estimation
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture(scope="module")
+def test_data():
+    """Generate test data for compatibility tests."""
+    return pf.get_data(N=500, seed=12345)
+
+
+# =============================================================================
+# Part 1: Unit Tests for formula/parse.py
+# =============================================================================
+
+
+class TestMultipleEstimationExpansion:
+    """Tests for multiple estimation expansion."""
+
+    @pytest.mark.parametrize(
+        "formula,expected",
+        [
+            # No multiple estimation
+            ("Y ~ X1", ["Y ~ X1"]),
+            ("Y ~ X1 + X2", ["Y ~ X1 + X2"]),
+            # sw() cases
+            ("Y ~ sw(X1, X2)", ["Y ~ X1", "Y ~ X2"]),
+            ("Y ~ A + sw(X1, X2)", ["Y ~ A + X1", "Y ~ A + X2"]),
+            ("Y ~ sw(X1, X2, X3)", ["Y ~ X1", "Y ~ X2", "Y ~ X3"]),
+            # csw() cases
+            ("Y ~ csw(X1, X2)", ["Y ~ X1", "Y ~ X1 + X2"]),
+            (
+                "Y ~ A + csw(X1, X2, X3)",
+                [
+                    "Y ~ A + X1",
+                    "Y ~ A + X1 + X2",
+                    "Y ~ A + X1 + X2 + X3",
+                ],
+            ),
+            # sw0() cases
+            ("Y ~ sw0(X1, X2)", ["Y ~ 1", "Y ~ X1", "Y ~ X2"]),
+            ("Y ~ A + sw0(X1, X2)", ["Y ~ A + 1", "Y ~ A + X1", "Y ~ A + X2"]),
+            # csw0() cases
+            ("Y ~ csw0(X1, X2)", ["Y ~ 1", "Y ~ X1", "Y ~ X1 + X2"]),
+            (
+                "Y ~ A + csw0(X1, X2, X3)",
+                [
+                    "Y ~ A + 1",
+                    "Y ~ A + X1",
+                    "Y ~ A + X1 + X2",
+                    "Y ~ A + X1 + X2 + X3",
+                ],
+            ),
+            # mvsw() cases - all combinations of arguments, with zero step
+            (
+                "Y ~ mvsw(X1, X2)",
+                ["Y ~ 1", "Y ~ X1", "Y ~ X2", "Y ~ X1 + X2"],
+            ),
+            (
+                "Y ~ mvsw(X1, X2, X3)",
+                [
+                    "Y ~ 1",
+                    "Y ~ X1",
+                    "Y ~ X2",
+                    "Y ~ X3",
+                    "Y ~ X1 + X2",
+                    "Y ~ X1 + X3",
+                    "Y ~ X2 + X3",
+                    "Y ~ X1 + X2 + X3",
+                ],
+            ),
+            (
+                "Y ~ A + mvsw(X1, X2)",
+                ["Y ~ A + 1", "Y ~ A + X1", "Y ~ A + X2", "Y ~ A + X1 + X2"],
+            ),
+            (
+                "Y ~ A + mvsw(X1, X2, X3)",
+                [
+                    "Y ~ A + 1",
+                    "Y ~ A + X1",
+                    "Y ~ A + X2",
+                    "Y ~ A + X3",
+                    "Y ~ A + X1 + X2",
+                    "Y ~ A + X1 + X3",
+                    "Y ~ A + X2 + X3",
+                    "Y ~ A + X1 + X2 + X3",
+                ],
+            ),
+            # mvsw() with single argument
+            ("Y ~ mvsw(X1)", ["Y ~ 1", "Y ~ X1"]),
+            # mvsw() with fixed effects
+            (
+                "Y ~ mvsw(X1, X2) | f1",
+                ["Y ~ 1 | f1", "Y ~ X1 | f1", "Y ~ X2 | f1", "Y ~ X1 + X2 | f1"],
+            ),
+            # mvsw() in fixed effects
+            (
+                "Y ~ X1 | mvsw(f1, f2)",
+                ["Y ~ X1 | 1", "Y ~ X1 | f1", "Y ~ X1 | f2", "Y ~ X1 | f1 + f2"],
+            ),
+            # Multiple estimation with sums of variables
+            ("Y ~ sw0(f1, f1+f2)", ["Y ~ 1", "Y ~ f1", "Y ~ f1+f2"]),
+            ("Y ~ csw0(f1, f1+f2)", ["Y ~ 1", "Y ~ f1", "Y ~ f1 + f1+f2"]),
+            # Fixed effects with multiple estimation
+            ("Y ~ X1 | sw(f1, f2)", ["Y ~ X1 | f1", "Y ~ X1 | f2"]),
+        ],
+    )
+    def test_expand_all_multiple_estimation(self, formula, expected):
+        """Test expansion of multiple estimation syntax."""
+        result = _expand_all_multiple_estimation(formula)
+        assert result == expected
+
+
+class TestFormulaParse:
+    """Tests for Formula.parse() and Formula.parse_to_dict()."""
+
+    @pytest.mark.parametrize(
+        "formula,expected_count",
+        [
+            ("Y ~ X1", 1),
+            ("Y ~ sw(X1, X2)", 2),
+            ("Y ~ csw(X1, X2)", 2),
+            ("Y ~ sw0(X1, X2)", 3),
+            ("Y ~ csw0(X1, X2)", 3),
+            ("Y ~ mvsw(X1, X2)", 4),
+            ("Y ~ mvsw(X1, X2, X3)", 8),
+        ],
+    )
+    def test_parse_count(self, formula, expected_count):
+        """Test that parse returns the correct number of Formula objects."""
+        result = Formula.parse(formula)
+        assert len(result) == expected_count
+
+    def test_parse_basic(self):
+        """Test parsing a basic formula with no fixed effects or IV."""
+        result = Formula.parse("Y ~ X1 + X2")
+        assert len(result) == 1
+        f = result[0]
+        assert f.second_stage == "Y ~ X1 + X2"
+        assert f.fixed_effects is None
+        assert f.first_stage is None
+
+    def test_parse_with_fe(self):
+        """Test parsing a formula with fixed effects."""
+        result = Formula.parse("Y ~ X1 | f1")
+        assert len(result) == 1
+        f = result[0]
+        assert f.second_stage == "Y ~ X1"
+        assert f.fixed_effects == "f1"
+
+    # def test_parse_iv(self):
+    #     result = Formula.parse("Y ~ X1 | f1 | Z1 ~ W1")
+    #     assert len(result) == 1
+    #     f = result[0]
+    #     assert f.second_stage == "Y ~ X1 + Z1"
+    #     assert f.fixed_effects == "f1"
+    #     assert f.first_stage == "Z1 ~ W1"
+
+    def test_parse_multiple_dependents(self):
+        """Y + Y2 ~ X1 is preprocessed to sw(Y, Y2) ~ X1."""
+        result = Formula.parse("Y + Y2 ~ X1")
+        assert len(result) == 2
+        assert result[0].second_stage == "Y ~ X1"
+        assert result[1].second_stage == "Y2 ~ X1"
+
+    def test_parse_to_dict_groups_by_fe(self):
+        """Test parsing of formulas into dictionary."""
+        result = Formula.parse_to_dict("Y ~ X1 | sw(f1, f2)")
+        assert "f1" in result
+        assert "f2" in result
+        assert len(result["f1"]) == 1
+        assert len(result["f2"]) == 1
+
+    def test_parse_to_dict_no_fe(self):
+        """Test parsing of formulas into dictionary without fixed effects."""
+        result = Formula.parse_to_dict("Y ~ X1")
+        assert None in result
+        assert len(result[None]) == 1
+
+    def test_parse_sw_in_fe_and_independent(self):
+        """Cross-product: sw in both independent and FE."""
+        result = Formula.parse("Y ~ sw(X1, X2) | sw(f1, f2)")
+        assert len(result) == 4  # 2 x 2
+
+
+class TestValidation:
+    """Tests for formula validation / error handling."""
+
+    def test_no_tilde(self):
+        """Check minimum number of tildes."""
+        with pytest.raises(FormulaSyntaxError):
+            Formula.parse("Y X1")
+
+    def test_too_many_parts(self):
+        """Check maximum number of formula parts is not exceeded."""
+        with pytest.raises(FormulaSyntaxError):
+            Formula.parse("Y ~ X1 | f1 | Z1 ~ X2 | extra")
+
+    def test_too_many_tildes_in_part(self):
+        """Check maximum number of tildes is not exceeded."""
+        with pytest.raises(FormulaSyntaxError):
+            Formula.parse("Y ~ X1 ~ X2 ~ X3")
+
+    def test_three_parts_without_iv(self):
+        """Y ~ X | f1 | f2 should error (should be Y ~ X | f1 + f2)."""
+        with pytest.raises(FormulaSyntaxError, match="Three-part formula"):
+            Formula.parse("Y ~ X1 | f1 | f2")
+
+    def test_three_parts_with_tilde_in_fe(self):
+        """Y ~ X | Z ~ W | A ~ B should error (FE part has tilde)."""
+        with pytest.raises(
+            FormulaSyntaxError, match=re.compile("fixed effects.*cannot contain")
+        ):
+            Formula.parse("Y ~ X | Z ~ W | A ~ B")
+
+    def test_first_part_must_have_tilde(self):
+        """Formula must have at least one tilde."""
+        with pytest.raises(FormulaSyntaxError):
+            Formula.parse("Y | f1")
+
+
+# =============================================================================
+# Part 2: End-to-end compatibility tests via feols()
+# =============================================================================
+
+
+@pytest.mark.parametrize(
+    "formula,expected_n_models",
+    [
+        ("Y ~ X1", 1),
+        ("Y ~ sw(X1, X2)", 2),
+        ("Y ~ csw(X1, X2)", 2),
+        ("Y ~ sw0(X1, X2)", 3),
+        ("Y ~ csw0(X1, X2)", 3),
+        ("Y + Y2 ~ X1", 2),
+        ("Y ~ X1 | sw(f1, f2)", 2),
+        ("Y ~ mvsw(X1, X2)", 4),
+        ("Y ~ mvsw(X1, X2, Z1)", 8),
+        ("Y ~ mvsw(X1, X2) | f1", 4),
+        ("Y ~ sw(X1, X2) | csw(f1, f2)", 4),  # 2 x 2
+    ],
+)
+def test_correct_number_of_models(test_data, formula: str, expected_n_models: int):
+    """Verify the correct number of models are generated from multiple estimation syntax."""
+    fit = pf.feols(formula, data=test_data)
+
+    n_models = len(fit.to_list()) if hasattr(fit, "to_list") else 1
+
+    assert n_models == expected_n_models, (
+        f"Expected {expected_n_models} models for '{formula}', got {n_models}"
+    )
+
+
+def test_explicit_no_fe_coefficients_match(test_data):
+    """Verify Y ~ X1 | 1 produces same coefficients as Y ~ X1."""
+    fit_implicit = pf.feols("Y ~ X1", data=test_data)
+    fit_explicit = pf.feols("Y ~ X1 | 1", data=test_data)
+
+    assert np.allclose(fit_implicit.coef().values, fit_explicit.coef().values)
+    assert np.allclose(fit_implicit.se().values, fit_explicit.se().values)
+
+
+def test_explicit_no_fe_iv_coefficients_match(test_data):
+    """Verify Y ~ 1 | 1 | Y2 ~ X1 produces same coefficients as Y ~ 1 | Y2 ~ X1."""
+    fit_implicit = pf.feols("Y ~ 1 | Y2 ~ X1", data=test_data)
+    fit_explicit = pf.feols("Y ~ 1 | 1 | Y2 ~ X1", data=test_data)
+
+    assert np.allclose(fit_implicit.coef().values, fit_explicit.coef().values)
+    assert np.allclose(fit_implicit.se().values, fit_explicit.se().values)
+
+
+# =============================================================================
+# Part 3: Edge Case Tests
+# =============================================================================
+
+
+class TestEdgeCases:
+    """Test edge cases in formula parsing."""
+
+    def test_intercept_only(self):
+        """Test intercept only."""
+        result = Formula.parse("Y ~ 1")
+        assert len(result) == 1
+        assert result[0].second_stage == "Y ~ 1"
+
+    def test_no_fe_in_dict(self):
+        """No fixed effects results in None key in parse_to_dict."""
+        result = Formula.parse_to_dict("Y ~ X1")
+        assert None in result
+
+    def test_fe_key_in_dict(self):
+        """Fixed effects are used as keys in parse_to_dict."""
+        result = Formula.parse_to_dict("Y ~ X1 | f1")
+        assert "f1" in result
+
+    def test_multiple_dependent_variables(self):
+        """Test multiple independent variables."""
+        result = Formula.parse("Y + Y2 + Y3 ~ X1")
+        assert len(result) == 3
+
+    def test_iv_endogenous_in_second_stage(self):
+        """Endogenous variable should be added to second_stage covariates."""
+        result = Formula.parse("Y ~ X1 | Z1 ~ W1")
+        f = result[0]
+        assert "Z1" in f.second_stage
+        # assert f.first_stage == "Z1 ~ W1"
+
+    def test_iv_with_fe_endogenous_in_second_stage(self):
+        """Endogenous variable should be in second_stage even with FE."""
+        result = Formula.parse("Y ~ X1 | f1 | Z1 ~ W1")
+        f = result[0]
+        assert "Z1" in f.second_stage
+        assert f.fixed_effects == "f1"
+        # assert f.first_stage == "Z1 ~ W1"
+
+    def test_explicit_no_fe_syntax(self):
+        """Y ~ X1 | 0 and Y ~ X1 should produce equivalent formulas."""
+        result_explicit = Formula.parse_to_dict("Y ~ X1 | 0")
+        result_implicit = Formula.parse_to_dict("Y ~ X1")
+
+        assert list(result_explicit.keys()) == [None]
+        assert list(result_implicit.keys()) == [None]
+
+        f_explicit = result_explicit[None][0]
+        f_implicit = result_implicit[None][0]
+        assert f_explicit.second_stage == f_implicit.second_stage
+        assert f_explicit.fixed_effects is None
+        assert f_implicit.fixed_effects is None
+
+    def test_explicit_no_fe_with_iv(self):
+        """Y ~ 1 | 0 | Z1 ~ X1 and Y ~ 1 | Z1 ~ X1 should be equivalent."""
+        result_explicit = Formula.parse_to_dict("Y ~ 1 | 0 | Z1 ~ X1")
+        result_implicit = Formula.parse_to_dict("Y ~ 1 | Z1 ~ X1")
+
+        assert list(result_explicit.keys()) == [None]
+        assert list(result_implicit.keys()) == [None]
+
+        f_explicit = result_explicit[None][0]
+        f_implicit = result_implicit[None][0]
+        assert f_explicit.second_stage == f_implicit.second_stage
+        assert f_explicit.fixed_effects is None
+        assert f_implicit.fixed_effects is None
+        assert f_explicit.first_stage == f_implicit.first_stage
+
+    def test_formula_roundtrip(self):
+        """Parsing a formula and reconstructing it should preserve structure."""
+        formulas = [
+            "Y ~ X1",
+            "Y ~ X1 + X2",
+            "Y ~ X1 | f1",
+            "Y ~ X1 | f1 + f2",
+        ]
+        for fml in formulas:
+            result = Formula.parse(fml)
+            assert len(result) == 1
+            # Reconstructed formula should re-parse to the same structure
+            reparsed = Formula.parse(result[0].formula)
+            assert len(reparsed) == 1
+            assert reparsed[0].second_stage == result[0].second_stage
+            assert reparsed[0].fixed_effects == result[0].fixed_effects
+            assert reparsed[0].first_stage == result[0].first_stage
diff --git a/tests/test_i.py b/tests/test_i.py
index cf549bae3..0b5b54b2f 100644
--- a/tests/test_i.py
+++ b/tests/test_i.py
@@ -1,10 +1,23 @@
+"""
+Comprehensive tests for pyfixest i() syntax.
+
+Tests cover:
+- Simple i(var) with different factor types
+- Factor x Continuous: i(var, continuous)
+- Factor x Factor: i(var1, var2)
+- Binning: bin and bin2 parameters
+- Intercept control: 0+, -1, 1+ syntax
+- Fixed effects combinations
+- Multiple i() terms
+"""
+
+import re
+
 import numpy as np
 import pandas as pd
 import pytest
 import rpy2.robjects as ro
 from rpy2.robjects import pandas2ri
-
-# rpy2 imports
 from rpy2.robjects.packages import importr
 
 from pyfixest.estimation import feols
@@ -13,104 +26,151 @@
 
 fixest = importr("fixest")
 stats = importr("stats")
-broom = importr("broom")
-
-
-@pytest.mark.against_r_core
-def test_i():
-    df_het = pd.read_csv("pyfixest/did/data/df_het.csv")
-    df_het["X"] = np.random.normal(size=len(df_het))
-
-    if (
-        "C(rel_year)[T.1.0]"
-        in feols("dep_var~i(rel_year, ref = 1.0)", df_het)._coefnames
-    ):
-        raise AssertionError("C(rel_year)[T.1.0] should not be in the column names.")
-    if (
-        "C(rel_year)[T.-2.0]"
-        in feols("dep_var~i(rel_year,ref=-2.0)", df_het)._coefnames
-    ):
-        raise AssertionError("C(rel_year)[T.-2.0] should not be in the column names.")
-
-    if (
-        "C(rel_year)[T.1.0]:treat"
-        in feols("dep_var~i(rel_year, treat, ref=1.0)", df_het)._coefnames
-    ):
-        raise AssertionError(
-            "C(rel_year)[T.1.0]:treat should not be in the column names."
-        )
-    if (
-        "C(rel_year)[T.-2.0]:treat"
-        in feols("dep_var~i(rel_year, treat,ref=-2.0)", df_het)._coefnames
-    ):
-        raise AssertionError(
-            "C(rel_year)[T.-2.0]:treat should not be in the column names."
-        )
-
-    with pytest.raises(ValueError):
-        feols("dep_var~i(rel_year, ref = [1.0, 'a'])", df_het)
-
-
-@pytest.mark.against_r_core
-def test_i_vs_fixest():
-    df_het = pd.read_csv("pyfixest/did/data/df_het.csv")
-    df_het = df_het[df_het["year"] >= 2010]
-    # ------------------------------------------------------------------------ #
-    # no fixed effects
-
-    # no references
-    fit_py = feols("dep_var~i(treat)", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(treat)"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
-    )
 
-    fit_py = feols("dep_var~i(rel_year)", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(rel_year)"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
-    )
+# Tolerances for coefficient comparison
+RTOL = 1e-5
+ATOL = 1e-8
 
-    # with references
-    fit_py = feols("dep_var~i(treat, ref = False)", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(treat, ref = FALSE)"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
-    )
 
-    fit_py = feols("dep_var~i(rel_year, ref = 1.0)", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(rel_year, ref = c(1))"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
-    )
+# =============================================================================
+# Helper Functions
+# =============================================================================
 
-    # ------------------------------------------------------------------------ #
-    # with fixed effects
 
-    # no references
-    fit_py = feols("dep_var~i(treat) | year", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(treat)|year"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
-    )
+def normalize_coef_name(name: str) -> str:
+    """Normalize coefficient name for comparison between R and Python."""
+    name = str(name)
+    # R uses (Intercept), Python uses Intercept
+    if name == "(Intercept)":
+        return "Intercept"
 
-    fit_py = feols("dep_var~i(rel_year) | year", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(rel_year)|year"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
+    # Normalize float formatting in factor levels (1.0 vs 1)
+    def normalize_float_level(match):
+        prefix = match.group(1)
+        num = float(match.group(2))
+        suffix = match.group(3) or ""
+        if num == int(num):
+            return f"{prefix}{int(num)}{suffix}"
+        return match.group(0)
+
+    name = re.sub(r"(::)(\d+\.0)(\b|:)", normalize_float_level, name)
+    return name
+
+
+def get_r_coef_names(fit_r) -> list[str]:
+    """Extract coefficient names from R fixest fit."""
+    ro.globalenv["fit_tmp"] = fit_r
+    names = ro.r("names(coef(fit_tmp))")
+    ro.r("rm(fit_tmp)")
+    if names is ro.NULL or names is None:
+        return []
+    return [normalize_coef_name(n) for n in names]
+
+
+def get_r_coef_values(fit_r) -> np.ndarray:
+    """Extract coefficient values from R fixest fit."""
+    ro.globalenv["fit_tmp"] = fit_r
+    coefs = ro.r("as.numeric(coef(fit_tmp))")
+    ro.r("rm(fit_tmp)")
+    return np.array(coefs)
+
+
+def assert_models_match(
+    py_names: list[str],
+    py_values: np.ndarray,
+    r_names: list[str],
+    r_values: np.ndarray,
+    check_names: bool = True,
+) -> None:
+    """Assert pyfixest and R fixest models match."""
+    assert len(py_names) == len(r_names), (
+        f"Coefficient count mismatch: py={len(py_names)}, r={len(r_names)}"
     )
+    if check_names:
+        assert py_names == r_names, f"Name mismatch:\n  py={py_names}\n  r={r_names}"
+    np.testing.assert_allclose(py_values, r_values, rtol=RTOL, atol=ATOL)
 
-    # with references
-    fit_py = feols("dep_var~i(treat,ref=False) | year", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(treat, ref = FALSE)|year"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
+
+def compare_with_r(
+    r_fml: str, df: pd.DataFrame, py_fml: str | None = None
+) -> tuple[list[str], np.ndarray, list[str], np.ndarray]:
+    """
+    Compare pyfixest and R fixest models.
+
+    Returns (py_names, py_values, r_names, r_values).
+    """
+    py_formula = py_fml if py_fml is not None else r_fml
+    fit_py = feols(py_formula, df)
+    py_names = [normalize_coef_name(str(n)) for n in fit_py._coefnames]
+    py_values = fit_py.coef().values
+
+    fit_r = fixest.feols(ro.Formula(r_fml), df)
+    r_names = get_r_coef_names(fit_r)
+    r_values = get_r_coef_values(fit_r)
+
+    return py_names, py_values, r_names, r_values
+
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture(scope="module")
+def df_het() -> pd.DataFrame:
+    """Load heterogeneous treatment effects data."""
+    np.random.seed(123)
+    df = pd.read_csv("pyfixest/did/data/df_het.csv")
+    df["X"] = np.random.normal(size=len(df))
+    return df
+
+
+@pytest.fixture(scope="module")
+def df_test() -> pd.DataFrame:
+    """Create test data with various factor types."""
+    np.random.seed(42)
+    n = 200
+
+    return pd.DataFrame(
+        {
+            "Y": np.random.randn(n),
+            "X1": np.random.randn(n),
+            "X2": np.random.randn(n),
+            # String factor
+            "f_str": np.random.choice(["apple", "banana", "cherry"], n),
+            # Integer factor
+            "f_int": np.random.choice([1, 2, 3, 10, 20], n),
+            # Float factor
+            "f_float": np.random.choice([1.0, 2.0, 3.0], n),
+            # Second string factor for interactions
+            "g": np.random.choice(["X", "Y", "Z"], n),
+            # Fixed effects
+            "fe1": np.random.choice(range(10), n),
+            "fe2": np.random.choice(range(5), n),
+        }
     )
 
-    fit_py = feols("dep_var~i(rel_year,ref=1.0) | year", df_het)
-    fit_r = fixest.feols(ro.Formula("dep_var~i(rel_year, ref = c(1))|year"), df_het)
-    np.testing.assert_allclose(
-        fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
+
+# =============================================================================
+# Basic i() Tests (existing)
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "formula,excluded_coef",
+    [
+        ("dep_var ~ i(rel_year, ref=1.0)", "rel_year::1"),
+        ("dep_var ~ i(rel_year, ref=-2.0)", "rel_year::-2"),
+        ("dep_var ~ i(rel_year, treat, ref=1.0)", "rel_year::1:treat"),
+        ("dep_var ~ i(rel_year, treat, ref=-2.0)", "rel_year::-2:treat"),
+    ],
+)
+def test_i_reference_exclusion(df_het, formula, excluded_coef):
+    """Test that reference levels are properly excluded."""
+    fit = feols(formula, df_het)
+    assert excluded_coef not in fit._coefnames, (
+        f"{excluded_coef} should not be in coefficient names"
     )
 
 
@@ -126,12 +186,317 @@ def test_i_vs_fixest():
         "dep_var ~ i(state, year, ref = 1) | state",
     ],
 )
-def test_i_interacted_fixest(fml):
-    df_het = pd.read_csv("pyfixest/did/data/df_het.csv")
-    df_het["X"] = np.random.normal(df_het.shape[0])
+def test_i_vs_fixest(fml):
+    """Test i() against R fixest."""
+    df = pd.read_csv("pyfixest/did/data/df_het.csv")
+    df["X"] = np.random.normal(df.shape[0])
 
-    fit_py = feols(fml, df_het)
-    fit_r = fixest.feols(ro.Formula(fml), df_het)
+    fit_py = feols(fml, df)
+    fit_r = fixest.feols(ro.Formula(fml), df)
     np.testing.assert_allclose(
         fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
     )
+
+
+# =============================================================================
+# Intercept Control Tests (0+, -1, 1+)
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ 0 + i(f_str)",  # No intercept, keep all levels
+        "Y ~ -1 + i(f_str)",  # Same as 0 +
+        "Y ~ i(f_str) - 1",  # Alternative syntax
+    ],
+)
+def test_no_intercept_all_levels(df_test, fml):
+    """Test that without intercept, all levels are kept."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ 0 + i(f_str, ref='apple')",  # No intercept + explicit ref
+        "Y ~ -1 + i(f_str, ref='banana')",  # Same with different ref
+    ],
+)
+def test_no_intercept_with_ref(df_test, fml):
+    """Test no intercept with explicit reference level."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ 1 + i(f_str)",  # With intercept, drop first level
+        "Y ~ i(f_str)",  # Same (intercept implicit)
+    ],
+)
+def test_with_intercept_drop_level(df_test, fml):
+    """Test that with intercept, first level is dropped."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+# =============================================================================
+# Binning Tests
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+def test_binning_simple(df_test):
+    """Test i() with bin parameter."""
+    r_fml = "Y ~ i(f_str, bin=list(fruit=c('apple','banana')))"
+    py_fml = "Y ~ i(f_str, bin={'fruit': ['apple','banana']})"
+    py_names, py_values, r_names, r_values = compare_with_r(r_fml, df_test, py_fml)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+def test_binning_with_ref(df_test):
+    """Test i() with bin and ref parameters."""
+    r_fml = "Y ~ i(f_str, bin=list(fruit=c('apple','banana')), ref='fruit')"
+    py_fml = "Y ~ i(f_str, bin={'fruit': ['apple','banana']}, ref='fruit')"
+    py_names, py_values, r_names, r_values = compare_with_r(r_fml, df_test, py_fml)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+def test_binning_with_continuous(df_test):
+    """Test i() with bin parameter and continuous interaction."""
+    r_fml = "Y ~ i(f_str, X1, bin=list(fruit=c('apple','banana')))"
+    py_fml = "Y ~ i(f_str, X1, bin={'fruit': ['apple','banana']})"
+    py_names, py_values, r_names, r_values = compare_with_r(r_fml, df_test, py_fml)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+# =============================================================================
+# Factor x Factor Tests
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "r_fml,py_fml",
+    [
+        ("Y ~ i(f_str, i.g)", "Y ~ i(f_str, g)"),
+        ("Y ~ i(f_str, i.g, ref='apple')", "Y ~ i(f_str, g, ref='apple')"),
+        (
+            "Y ~ i(f_str, i.g, ref='apple', ref2='X')",
+            "Y ~ i(f_str, g, ref='apple', ref2='X')",
+        ),
+    ],
+)
+def test_factor_x_factor(df_test, r_fml, py_fml):
+    """Test i(factor1, factor2) interactions."""
+    py_names, py_values, r_names, r_values = compare_with_r(r_fml, df_test, py_fml)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "r_fml,py_fml",
+    [
+        ("Y ~ i(f_str, i.g) | fe1", "Y ~ i(f_str, g) | fe1"),
+        (
+            "Y ~ i(f_str, i.g, ref='apple', ref2='X') | fe1",
+            "Y ~ i(f_str, g, ref='apple', ref2='X') | fe1",
+        ),
+    ],
+)
+def test_factor_x_factor_with_fe(df_test, r_fml, py_fml):
+    """Test i(factor1, factor2) with fixed effects."""
+    py_names, py_values, r_names, r_values = compare_with_r(r_fml, df_test, py_fml)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+# =============================================================================
+# Multiple i() Terms
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str) + i(g)",
+        "Y ~ i(f_str, ref='apple') + i(g, ref='X')",
+        "Y ~ X1 + i(f_str) + i(g)",
+    ],
+)
+def test_multiple_i_terms(df_test, fml):
+    """Test multiple i() terms in one formula."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str) + i(g) | fe1",
+        "Y ~ i(f_str, ref='apple') + i(g, ref='X') | fe1",
+    ],
+)
+def test_multiple_i_terms_with_fe(df_test, fml):
+    """Test multiple i() terms with fixed effects."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+# =============================================================================
+# Different Factor Types
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str)",
+        "Y ~ i(f_str, ref='apple')",
+        "Y ~ i(f_int)",
+        "Y ~ i(f_int, ref=1)",
+        "Y ~ i(f_float)",
+        "Y ~ i(f_float, ref=1)",
+    ],
+)
+def test_factor_types(df_test, fml):
+    """Test i() with string, integer, and float factors."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str, X1)",
+        "Y ~ i(f_str, X1, ref='apple')",
+        "Y ~ i(f_int, X1)",
+        "Y ~ i(f_int, X1, ref=1)",
+    ],
+)
+def test_factor_x_continuous(df_test, fml):
+    """Test i(factor, continuous) with different factor types."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+# =============================================================================
+# Edge Cases
+# =============================================================================
+
+
+@pytest.mark.against_r_core
+def test_interacted_fixed_effects(df_test):
+    """Test i() with interacted fixed effects."""
+    fml = "Y ~ i(f_str) | fe1^fe2"
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+def test_i_with_same_var_standalone(df_test):
+    """Test i(f, X) when X is also used standalone."""
+    fml = "Y ~ X1 + i(f_str, X1)"
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_test)
+    assert_models_match(py_names, py_values, r_names, r_values, check_names=False)
+
+
+# =============================================================================
+# Null Value Handling Tests
+# =============================================================================
+
+
+@pytest.fixture(scope="module")
+def df_with_nulls() -> pd.DataFrame:
+    """Create test data with null values in various positions."""
+    np.random.seed(42)
+    n = 100
+
+    df = pd.DataFrame(
+        {
+            "Y": np.random.randn(n),
+            "X1": np.random.randn(n),
+            "X2": np.random.randn(n),
+            "f_str": np.random.choice(["A", "B", "C"], n),
+            "f_int": np.random.choice([1, 2, 3], n),
+            "fe": np.random.choice(range(5), n),
+        }
+    )
+
+    # Introduce nulls in different variables at different positions
+    df.loc[[5, 15, 25, 35, 45], "Y"] = np.nan  # Nulls in dependent variable
+    df.loc[[10, 20, 30], "X1"] = np.nan  # Nulls in continuous variable
+    df.loc[[12, 22, 32], "f_str"] = np.nan  # Nulls in factor variable
+    df.loc[[14, 24], "X2"] = np.nan  # Nulls in another continuous variable
+
+    return df
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str)",  # Simple i() with nulls in Y and f_str
+        "Y ~ i(f_str, X1)",  # i() with continuous, nulls in Y, f_str, X1
+        "Y ~ i(f_str) + X2",  # i() with covariate, nulls in multiple vars
+        "Y ~ i(f_int)",  # i() with integer factor
+        "Y ~ i(f_int, X1)",  # i() with integer factor and continuous
+    ],
+)
+def test_null_handling(df_with_nulls, fml):
+    """Test that null values are handled consistently between pyfixest and fixest."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_with_nulls)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+@pytest.mark.parametrize(
+    "fml",
+    [
+        "Y ~ i(f_str) | fe",  # With fixed effects
+        "Y ~ i(f_str, X1) | fe",  # i() with continuous and FE
+        "Y ~ i(f_str) + X2 | fe",  # i() with covariate and FE
+    ],
+)
+def test_null_handling_with_fe(df_with_nulls, fml):
+    """Test null handling with fixed effects."""
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_with_nulls)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+def test_null_handling_with_ref(df_with_nulls):
+    """Test null handling with explicit reference level."""
+    fml = "Y ~ i(f_str, ref='A')"
+    py_names, py_values, r_names, r_values = compare_with_r(fml, df_with_nulls)
+    assert_models_match(py_names, py_values, r_names, r_values)
+
+
+@pytest.mark.against_r_core
+def test_null_handling_nobs(df_with_nulls):
+    """Test that number of observations matches after null removal."""
+    fml = "Y ~ i(f_str, X1) + X2"
+
+    fit_py = feols(fml, df_with_nulls)
+    fit_r = fixest.feols(ro.Formula(fml), df_with_nulls)
+
+    # Extract number of observations from R
+    ro.globalenv["fit_tmp"] = fit_r
+    r_nobs = int(ro.r("fit_tmp$nobs")[0])
+    ro.r("rm(fit_tmp)")
+
+    # Compare number of observations
+    assert r_nobs == fit_py._N, (
+        f"Number of observations mismatch: py={fit_py._N}, r={r_nobs}"
+    )
diff --git a/tests/test_model_matrix.py b/tests/test_model_matrix.py
deleted file mode 100644
index 43727f841..000000000
--- a/tests/test_model_matrix.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import pytest
-
-import pyfixest as pf
-
-
-# Define the fixture to provide data
-@pytest.fixture
-def data():
-    return pf.get_data()
-
-
-# Parameterize the test function directly with formulas
-@pytest.mark.parametrize(
-    "fml",
-    [
-        "Y ~ i(f1)",
-        "Y ~ i(f1, ref = 1.0)",
-        "Y ~ i(f1, X1)",
-        "Y ~ i(f1, X1, ref = 2.0)",
-        "Y ~ i(f1) + X2",
-        "Y ~ i(f1, ref = 1.0) + X2",
-        "Y ~ i(f1, X1) + X2",
-        "Y ~ i(f1, X1, ref = 2.0) + X2",
-    ],
-)
-def test_get_icovars(data, fml):
-    # Use the data and fml from the fixture and parameterization
-    fit = pf.feols(fml, data=data)
-    assert len(fit._icovars) > 0, "No icovars found"
-    assert "X2" not in fit._icovars, "X2 is found in _icovars"
diff --git a/tests/test_others.py b/tests/test_others.py
index 42aea5f1a..93722c72a 100644
--- a/tests/test_others.py
+++ b/tests/test_others.py
@@ -23,9 +23,9 @@ def test_multicol_overdetermined_iv():
     assert fit._collin_vars_z == ["f1"]
 
     np.testing.assert_allclose(
-        fit._beta_hat, np.array([-0.993607, -0.174227], dtype=float), rtol=1e-5
+        fit._beta_hat, np.array([-0.174227, -0.993607], dtype=float), rtol=1e-5
     )
-    np.testing.assert_allclose(fit._se, np.array([0.104009, 0.018416]), rtol=1e-5)
+    np.testing.assert_allclose(fit._se, np.array([0.018416, 0.104009]), rtol=1e-5)
 
 
 def test_polars_input():
diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
index a76977b75..dfd6ed593 100644
--- a/tests/test_vs_fixest.py
+++ b/tests/test_vs_fixest.py
@@ -179,6 +179,19 @@ def check_relative_diff(x1, x2, tol, msg=None):
     assert np.all(np.abs(x1 - x2) / np.abs(x1) < tol), msg
 
 
+def _get_vcov_diag(py_model, r_model, coefname, is_iv=False):
+    """Get the variance of a named coefficient from both Python and R models."""
+    py_idx = py_model._coefnames.index(coefname)
+    py_vcov = py_model._vcov[py_idx, py_idx]
+    # Get R coefficient names (pandas2ri strips names from auto-converted arrays)
+    ro.globalenv[".tmp.model"] = r_model
+    r_names = list(ro.r("names(coef(.tmp.model))"))
+    r_name = f"fit_{coefname}" if is_iv else coefname
+    r_idx = r_names.index(r_name)
+    r_vcov = np.array(stats.vcov(r_model))[r_idx, r_idx]
+    return py_vcov, r_vcov
+
+
 test_counter_feols = 0
 test_counter_fepois = 0
 test_counter_feiv = 0
@@ -282,7 +295,7 @@ def test_single_fit_feols(
     py_pval = mod.pvalue().xs("X1")
     py_tstat = mod.tstat().xs("X1")
     py_confint = mod.confint().xs("X1").values
-    py_vcov = mod._vcov[0, 0]
+    py_vcov, r_vcov = _get_vcov_diag(mod, r_fixest, "X1")
 
     py_nobs = mod._N
     py_resid = mod.resid()
@@ -296,7 +309,6 @@ def test_single_fit_feols(
     r_pval = df_X1["p.value"]
     r_tstat = df_X1["statistic"]
     r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
-    r_vcov = stats.vcov(r_fixest)[0, 0]
 
     r_nobs = int(stats.nobs(r_fixest)[0])
     r_df_k = int(ro.r('attr(r_fixest$cov.scaled, "df.K")')[0])
@@ -568,7 +580,6 @@ def test_single_fit_fepois(
     py_tstat = mod.tstat().xs("X1")
     py_confint = mod.confint().xs("X1").values
     py_nobs = mod._N
-    py_vcov = mod._vcov[0, 0]
     py_deviance = mod.deviance
     py_resid = mod.resid()
     py_irls_weights = mod._irls_weights.flatten()
@@ -582,6 +593,7 @@ def test_single_fit_fepois(
     df_X1 = _get_r_df(r_fixest)
     ro.globalenv["r_fixest"] = r_fixest
 
+    py_vcov, r_vcov = _get_vcov_diag(mod, r_fixest, "X1")
     r_coef = df_X1["estimate"]
     r_se = df_X1["std.error"]
     r_pval = df_X1["p.value"]
@@ -589,7 +601,6 @@ def test_single_fit_fepois(
     r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
     r_nobs = int(stats.nobs(r_fixest)[0])
     r_resid = stats.residuals(r_fixest)
-    r_vcov = stats.vcov(r_fixest)[0, 0]
     r_deviance = r_fixest.rx2("deviance")
     r_irls_weights = r_fixest.rx2("irls_weights")
     r_df_k = int(ro.r('attr(r_fixest$cov.scaled, "df.K")')[0])
@@ -718,7 +729,7 @@ def test_single_fit_iv(
     py_pval = mod.pvalue().xs("X1")
     py_tstat = mod.tstat().xs("X1")
     py_confint = mod.confint().xs("X1").values
-    py_vcov = mod._vcov[0, 0]
+    py_vcov, r_vcov = _get_vcov_diag(mod, r_fixest, "X1", is_iv=True)
 
     py_nobs = mod._N
     py_resid = mod.resid()
@@ -730,7 +741,6 @@ def test_single_fit_iv(
     r_pval = df_X1["p.value"]
     r_tstat = df_X1["statistic"]
     r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
-    r_vcov = stats.vcov(r_fixest)[0, 0]
 
     r_nobs = int(stats.nobs(r_fixest)[0])
     r_resid = stats.resid(r_fixest)
@@ -904,8 +914,7 @@ def test_glm_vs_fixest(N, seed, dropna, fml, inference, family):
     )
 
     # Compare variance-covariance matrices
-    py_vcov = fit_py._vcov[0, 0]
-    r_vcov = stats.vcov(fit_r)[0, 0]
+    py_vcov, r_vcov = _get_vcov_diag(fit_py, fit_r, "X1")
     check_absolute_diff(
         py_vcov,
         r_vcov,
@@ -955,6 +964,29 @@ def test_glm_vs_fixest(N, seed, dropna, fml, inference, family):
         ("Y + Y2 ~ X1 | csw0(f1,f2)"),
         ("Y + log(Y2) ~ sw(X1, X2) | csw0(f1,f2,f3)"),
         ("Y ~ C(f2):X2 + sw0(X1, f3)"),
+        # Multiple estimation with variable combinations (e.g., f1+f2 as a single step)
+        ("Y ~ X1 | sw0(f1, f1+f2)"),
+        ("Y ~ X1 | csw0(f1, f1+f2)"),
+        ("Y ~ X1 | sw(f1, f1+f2)"),
+        ("Y ~ X1 | csw(f1, f1+f2)"),
+        ("Y ~ sw0(X1, X1+X2)"),
+        ("Y ~ csw0(X1, X1+X2)"),
+        ("Y ~ sw(X1, X1+X2)"),
+        ("Y ~ X1 + sw0(X2, X2+f1)"),
+        ("Y ~ X1 + csw0(X2, X2+f1)"),
+        ("Y ~ X1 | sw0(f1, f1+f2, f1+f2+f3)"),
+        ("Y ~ X1 | csw0(f1, f1+f2, f1+f2+f3)"),
+        ("Y + Y2 ~ X1 | sw0(f1, f1+f2)"),
+        ("Y + Y2 ~ sw0(X1, X1+X2) | f1"),
+        # mvsw() cases - multiverse stepwise (all combinations)
+        ("Y ~ mvsw(X1, X2)"),
+        ("Y ~ mvsw(X1, X2) | f1"),
+        ("Y ~ X1 + mvsw(X2, f1)"),
+        ("Y ~ X1 + mvsw(X2, f1) | f2"),
+        ("Y ~ X1 | mvsw(f1, f2)"),
+        ("Y + Y2 ~ mvsw(X1, X2)"),
+        ("Y + Y2 ~ mvsw(X1, X2) | f1"),
+        ("Y ~ mvsw(X1, X2, f1)"),
         # ("Y ~ i(f1,X2) | csw0(f2)"),
         # ("Y ~ i(f1,X2) | sw0(f2)"),
         # ("Y ~ i(f1,X2) | csw(f2, f3)"),
@@ -1517,8 +1549,6 @@ def test_inf_dropping(fml, weights):
     data = pf.get_data(model="Fepois").dropna()
     data.loc[data.index[0], "Y"] = 0
 
-    # test that two 0's in dependent variable are dropped
-    # and that warning is triggered
     n_zeros = (data.Y == 0).sum()
     with pytest.warns(
         UserWarning,
@@ -1527,7 +1557,6 @@ def test_inf_dropping(fml, weights):
         fit_py = feols(fml=fml, data=data, weights=weights, fixef_rm="none")
 
     assert int(data.shape[0] - n_zeros) == fit_py._N
-    assert np.all(fit_py._na_index == np.where(data.Y == 0)[0].tolist())
 
 
 def _convert_f3(data, f3_type):