x-tabdeveloping · x-tabdeveloping · Nov 24, 2025 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,35 +1,30 @@
 name: Tests
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
+
+on: [push]
 
 jobs:
-  pytest:
+  build:
+
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.11"]
-          #
-    # This allows a subsequently queued workflow run to interrupt previous runs
-    concurrency:
-      group: "${{ github.workflow }}-${{ matrix.python-version}}-${{ matrix.os }} @ ${{ github.ref }}"
-      cancel-in-progress: true
 
     steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        # This is the version of the action for setting up Python, not the Python version.
+        uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          # Semantic version range syntax or exact version of a Python version
+          python-version: '3.11'
+          # Optional - x64 or x86 architecture, defaults to x64
+          architecture: 'x64'
           cache: "pip"
       # You can test your matrix by printing the current Python version
       - name: Display Python version
-        run: python3 -c "import sys; print(sys.version)"
-
-      - name: Install dependencies
-        run: python3 -m pip install --upgrade turftopic[pyro-ppl] pandas pytest plotly igraph datasets pillow
+        run: python -c "import sys; print(sys.version)"
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[dev]
+          pip install pytest
       - name: Run tests
-        run: python3 -m pytest tests/
-
+        run: python -m pytest tests/
diff --git a/docs/SensTopic.md b/docs/SensTopic.md
@@ -0,0 +1,161 @@
+# SensTopic (BETA)
+
+SensTopic is a version of Semantic Signal Separation, that only discovers positive signals, while allowing components to be unbounded.
+This is achieved with an algorithm called Semi-nonnegative Matrix Factorization or SNMF.
+
+> :warning: This model is still in an experimental phase. More documentation and a paper are on their way. :warning:
+
+SensTopic uses a very efficient implementation of the SNMF algorithm, that is implemented in raw NumPy, but also in JAX.
+If you want to enable hardware acceleration and JIT compilation, make sure to install JAX before running the model.
+
+```bash
+pip install jax
+```
+
+Here's an example of running SensTopic on the 20 Newsgroups dataset:
+
+```python
+from sklearn.datasets import fetch_20newsgroups
+from turftopic import SensTopic
+
+corpus = fetch_20newsgroups(
+    subset="all",
+    remove=("headers", "footers", "quotes"),
+).data
+
+model = SensTopic(25)
+model.fit(corpus)
+
+model.print_topics()
+```
+
+
+| Topic ID | Highest Ranking |
+| - | - |
+| | ... |
+| 8 | gospels, mormon, catholics, protestant, mormons, synagogues, seminary, catholic, liturgy, churches |
+| 9 | encryption, encrypt, encrypting, crypt, cryptosystem, cryptography, cryptosystems, decryption, encrypted, spying |
+| 10 | palestinians, israelis, palestinian, israeli, gaza, israel, gazans, palestine, zionist, aviv |
+| 11 | nasa, spacecraft, spaceflight, satellites, interplanetary, astronomy, astronauts, astronomical, orbiting, astronomers |
+| 12 | imagewriter, colormaps, bitmap, bitmaps, pkzip, imagemagick, colormap, formats, adobe, ghostscript |
+| | ... |
+
+## Sparsity
+
+SensTopic has a sparsity hyper-parameter, that roughly dictates how many documents will be assigned to a single document, where many topics per document get penalized.
+This means that the model is both a matrix factorization model, but can also function as a soft clustering model, depending on this parameter.
+Unlike clustering models, however, it may assign multiple topics to documents that have them, and won't force every document to contain only one topic.
+
+Higher values will make your model more like a clustering model, while lower values will make it more like a decomposition model:
+
+??? info "Click to see code"
+    ```python
+    import pandas as pd
+    import numpy as np
+    import plotly.express as px
+    from sentence_transformers import SentenceTransformer
+    from datasets import load_dataset
+
+    from turftopic import SensTopic
+
+    ds = load_dataset("gopalkalpande/bbc-news-summary", split="train")
+    corpus = list(ds["Summaries"])
+
+    encoder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = encoder.encode(corpus, show_progress_bar=True)
+
+    models = []
+    doc_topic_ms = []
+    sparsities = np.array(
+        [
+            0.05,
+            0.1,
+            0.25,
+            0.5,
+            0.75,
+            1.0,
+            2.5,
+            5.0,
+            10.0,
+        ]
+    )
+    for i, sparsity in enumerate(sparsities):
+        model = SensTopic(
+            n_components=3, random_state=42, sparsity=sparsity, encoder=encoder
+        )
+        doc_topic = model.fit_transform(corpus, embeddings=embeddings)
+        doc_topic = (doc_topic.T / doc_topic.sum(axis=1)).T
+        models.append(model)
+        doc_topic_ms.append(doc_topic)
+    a_name, b_name, c_name = models[0].topic_names
+    records = []
+    for i, doc_topic in enumerate(doc_topic_ms):
+        for dt in doc_topic:
+            a, b, c, *_ = dt
+            records.append(
+                {
+                    "sparsity": sparsities[i],
+                    a_name: a,
+                    b_name: b,
+                    c_name: c,
+                    "topic": models[0].topic_names[np.argmax(dt)],
+                }
+            )
+    df = pd.DataFrame.from_records(records)
+    fig = px.scatter_ternary(
+        df, a=a_name, b=b_name, c=c_name, animation_frame="sparsity", color="topic"
+    )
+    fig.show()
+    ```
+
+<figure>
+  <iframe src="../images/ternary_sparsity.html", title="Ternary plot of topics in documents.", style="height:800px;width:1050px;padding:0px;border:none;"></iframe>
+  <figcaption> Ternary plot of topic distribution in a 3 topic SensTopic model varying with sparsity. </figcaption>
+</figure>
+
+You can see that as the sparsity increases, topics get clustered much more clearly, and more weight gets allocated to the edges of the graph.
+
+To see how many topics there are in your document you can use the `plot_topic_decay()` method, that shows you how topic weights get assigned to documents.
+
+```python
+model.plot_topic_decay()
+```
+
+<figure>
+  <iframe src="../images/topic_decay.html", title="Topic Decay in SensTopic model", style="height:520px;width:1050px;padding:0px;border:none;"></iframe>
+  <figcaption> Topic Decay in a SensTopic Model with sparsity=1. </figcaption>
+</figure>
+
+## Automatic number of topics
+
+SensTopic can learn the number of topics in a given dataset.
+In order to determine this quantity, we use a version of the Bayesian Information Criterion modified for NMF.
+This does not work equally well for all corpora, but it can be a powerful tool when the number of topics is not known a-priori.
+
+In this example the model finds 6 topics in the BBC News dataset:
+
+```python
+# pip install datasets
+from datasets import load_dataset
+
+ds = load_dataset("gopalkalpande/bbc-news-summary", split="train")
+corpus = list(ds["Summaries"])
+
+model = SensTopic("auto")
+model.fit(corpus)
+model.print_topics()
+```
+
+| Topic ID | Highest Ranking |
+| - | - |
+| 0 | liverpool, mourinho, chelsea, premiership, arsenal, striker, madrid, midfield, uefa, manchester |
+| 1 | oscar, bafta, oscars, cast, cinema, hollywood, actor, screenplay, actors, films |
+| 2 | mobile, mobiles, broadband, devices, digital, internet, computers, microsoft, phones, telecoms |
+| 3 | tory, blair, minister, ministers, parliamentary, mps, parliament, politicians, constituency, ukip |
+| 4 | tennis, competing, federer, wimbledon, iaaf, olympic, tournament, athlete, rugby, olympics |
+| 5 | gdp, stock, economy, earnings, investments, investment, invest, exports, finance, economies |
+
+
+## API Reference
+
+::: turftopic.models.senstopic.SensTopic
diff --git a/docs/images/ternary_sparsity.html b/docs/images/ternary_sparsity.html
diff --git a/docs/images/topic_decay.html b/docs/images/topic_decay.html
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -25,6 +25,7 @@ nav:
   - Topic Models:
     - Model Overview: model_overview.md
     - Semantic Signal Separation (S³): s3.md
+    - SensTopic (BETA): SensTopic.md
     - KeyNMF: KeyNMF.md
     - Topeax: Topeax.md
     - GMM: GMM.md

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,6 @@ dependencies = [
 [project.optional-dependencies]
 pyro-ppl = ["pyro-ppl>=1.8.0,<2.0.0"]
 openai = ["openai>=1.40.0,<2.0.0"]
-opentsne = ["openTSNE>=1.0.0,<2.0.0"]
 datamapplot=["datamapplot>=0.4.2, <1.0.0"]
 jieba = ["jieba>=0.40.0,<1.0.0"]
 spacy = ["spacy>=3.6.0,<4.0.0"]
@@ -52,7 +51,6 @@ docs = [
 dev = [
   "pyro-ppl>=1.8.0,<2.0.0",
   "openai>=1.40.0,<2.0.0",
-  "openTSNE>=1.0.0,<2.0.0",
   "datamapplot>=0.4.2, <1.0.0",
   "jieba>=0.40.0,<1.0.0",
   "snowballstemmer>=2.0.0,<3.0.0",
@@ -65,6 +63,7 @@ dev = [
   "mkdocstrings==0.22.0",
   "mkdocstrings-python==1.8.0",
   "griffe==0.40.0",
+  "datasets>=4.3.0"
 ]
 
 [build-system]

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -18,6 +18,8 @@
     FASTopic,
     KeyNMF,
     SemanticSignalSeparation,
+    SensTopic,
+    Topeax,
     load_model,
 )
 
@@ -79,6 +81,8 @@ def generate_dates(
     ),
     AutoEncodingTopicModel(3, combined=True),
     FASTopic(3, batch_size=None),
+    SensTopic(),
+    Topeax(),
 ]
 
 dynamic_models = [

diff --git a/turftopic/__init__.py b/turftopic/__init__.py
@@ -3,10 +3,11 @@
 from turftopic.base import ContextualModel
 from turftopic.error import NotInstalled
 from turftopic.models.cluster import BERTopic, ClusteringTopicModel, Top2Vec
-from turftopic.models.decomp import SemanticSignalSeparation
+from turftopic.models.decomp import S3, SemanticSignalSeparation
 from turftopic.models.fastopic import FASTopic
 from turftopic.models.gmm import GMM
 from turftopic.models.keynmf import KeyNMF
+from turftopic.models.senstopic import SensTopic
 from turftopic.models.topeax import Topeax
 from turftopic.serialization import load_model
 
@@ -31,4 +32,6 @@
     "load_model",
     "build_datamapplot",
     "create_concept_browser",
+    "S3",
+    "SensTopic",
 ]