ContextLab
diff --git a/‎datawrangler/__init__.py‎
Lines changed: 35 additions & 1 deletion b/‎datawrangler/__init__.py‎
Lines changed: 35 additions & 1 deletion
diff --git a/‎datawrangler/core/config.ini‎
Lines changed: 15 additions & 28 deletions b/‎datawrangler/core/config.ini‎
Lines changed: 15 additions & 28 deletions
diff --git a/‎datawrangler/zoo/format.py‎
Lines changed: 24 additions & 16 deletions b/‎datawrangler/zoo/format.py‎
Lines changed: 24 additions & 16 deletions
diff --git a/‎datawrangler/zoo/text.py‎
Lines changed: 15 additions & 7 deletions b/‎datawrangler/zoo/text.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎docs/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/installation.rst‎
Lines changed: 28 additions & 0 deletions b/‎docs/installation.rst‎
Lines changed: 28 additions & 0 deletions
@@ -1,4 +1,38 @@
-"""Top-level package for datawrangler."""
+"""
+Data Wrangler: Transform messy data into clean pandas DataFrames
+
+Data Wrangler is a Python package that automatically transforms various data types
+(arrays, text, files, URLs, etc.) into clean, consistent pandas DataFrame format.
+It specializes in text data processing using modern NLP models.
+
+Key Features:
+- Automatic data type detection and conversion
+- Text embedding using sentence-transformers and sklearn models
+- Function decorators for seamless DataFrame integration
+- Support for files, URLs, and mixed data types
+- Configurable processing pipeline
+
+Basic Usage:
+    >>> import datawrangler as dw
+    >>> df = dw.wrangle(your_data)
+    
+    # With text data using sentence-transformers
+    >>> text_df = dw.wrangle(["Hello world", "Another text"], 
+    ...                      text_kwargs={'model': 'all-MiniLM-L6-v2'})
+    
+    # Using the @funnel decorator
+    >>> @dw.funnel
+    ... def your_function(df):
+    ...     return df.mean()
+
+Requirements:
+- Python 3.9+
+- Optional: Install with [hf] extras for sentence-transformers support
+
+  pip install "pydata-wrangler[hf]"
+
+Version: 0.3.0+ (NumPy 2.0+ and pandas 2.0+ compatible)
+"""
 
 __author__ = """Contextual Dynamics Lab"""
 __email__ = '[email protected]'
 
@@ -71,38 +71,25 @@ n_components = 50
 [TruncatedSVD]
 n_components = 50
 
-[BytePairEmbeddings]
-__model = 'en'
+# Sentence-Transformers Models (Modern NLP)
+[SentenceTransformer]
+__model = 'all-MiniLM-L6-v2'
 
-[ELMoEmbeddings]
-__model = 'original'
+[all-MiniLM-L6-v2]
+# Fast, general-purpose sentence embeddings
+# Good for: similarity search, clustering, information retrieval
 
-[FlairEmbeddings]
-__model = 'mix-forward'
+[all-mpnet-base-v2]
+# High-quality sentence embeddings
+# Good for: semantic similarity, paraphrase detection
 
-[PooledFlairEmbeddings]
-__model = 'mix-forward'
+[paraphrase-MiniLM-L6-v2]
+# Optimized for paraphrase detection
+# Good for: duplicate detection, content deduplication
 
-[TransformerWordEmbeddings]
-__model = 'en'
-
-[WordEmbeddings]
-__model = 'en'
-
-[StackedEmbeddings]
-__model = [embeddings.WordEmbeddings('glove'), embeddings.FlairEmbeddings('mix-forward'), embeddings.FlairEmbeddings('mix-backward')]
-
-[DocumentPoolEmbeddings]
-__model = [embeddings.WordEmbeddings('glove')]
-
-[DocumentRNNEmbeddings]
-__model = [embeddings.WordEmbeddings('glove')]
-
-[TransformerDocumentEmbeddings]
-__model = 'bert-base-uncased'
-
-[SentenceTransformerDocumentEmbeddings]
-__model = 'stsb-mpnet-base-v2'
+[all-distilroberta-v1]
+# Balanced performance and speed
+# Good for: general text understanding tasks
 
 [impute]
 model = 'IterativeImputer'
 
@@ -16,30 +16,38 @@
 
 def wrangle(x, return_dtype=False, **kwargs):
     """
-    Turn messy data into clean data
+    Turn messy data into clean pandas DataFrames
+
+    Automatically detects and converts various data types into consistent DataFrame format.
+    Specializes in text processing using modern NLP models and handles mixed data types.
 
     Parameters
     ----------
-    :param x: data in any format (text, numpy arrays, pandas dataframes, or a mixed list (or nested lists) of those
-      types).  The following datatypes are supported:
+    :param x: data in any format. Supported datatypes:
         - Numpy Arrays, array-like objects, or paths to files that store array-like objects
-        - Pandas DataFrames, dataframe-like objects, or paths to files that store dataframe-like objects
-        - Images, or paths to files that store images
-        - Text, or paths to plain text files
-        - Mixed lists of the above
-    :param return_dtype: if True, also return the auto-detected datatype(s) of each dataset you wrangle
-    :param kwargs: used to control how data are wrangled (e.g., if you don't want to use the default options for each
-      data type):
-        - array_kwargs: passed to the datawrangler.zoo.array.wrangle_array function to control how arrays are handled
-        - dataframe_kwargs: passed to the datawrangler.zoo.dataframe.wrangle_dataframe function to control how
-          dataframes are handled
-        - image_kwargs: passed to the datawrangler.zoo.image.wrangle_image function to control how images are handled
-        - text_kwargs: passed to the datawrangler.zoo.text.wrangle_text function to control how text data are handled
-      any other keyword arguments are passed to *all* of the wrangle functions.
+        - Pandas DataFrames, dataframe-like objects, or paths to files that store dataframe-like objects  
+        - Text strings, lists of strings, or paths to plain text files
+        - Mixed lists or nested lists of the above types
+    :param return_dtype: if True, also return the auto-detected datatype(s) of each dataset. Default: False
+    :param kwargs: control how data are wrangled:
+        - array_kwargs: passed to wrangle_array function to control how arrays are handled
+        - dataframe_kwargs: passed to wrangle_dataframe function to control how dataframes are handled
+        - text_kwargs: passed to wrangle_text function to control how text data are handled
+            Common text_kwargs options:
+            - {'model': 'all-MiniLM-L6-v2'} for sentence-transformers
+            - {'model': ['CountVectorizer', 'LatentDirichletAllocation']} for sklearn pipeline
+        Any other keyword arguments are passed to all wrangle functions.
 
     Returns
     -------
     :return: a DataFrame, or a list of DataFrames, containing the wrangled data
+    
+    Examples
+    --------
+    >>> import datawrangler as dw
+    >>> df = dw.wrangle([1, 2, 3])  # Convert array to DataFrame
+    >>> text_df = dw.wrangle(["Hello", "World"], text_kwargs={'model': 'all-MiniLM-L6-v2'})
+    >>> mixed_df, dtypes = dw.wrangle([df, text_df], return_dtype=True)
     """
 
     deep_kwargs = {}
 
@@ -101,6 +101,8 @@ def robust_is_hugging_face_model(x):
     """
     Wrapper for is_hugging_face_model that also supports strings-- e.g., the string 'all-MiniLM-L6-v2' will be a valid
     hugging-face model when checked with this function, because it's a sentence-transformers model name.
+    
+    Parameters
     ----------
     :param x: a to-be-tested model object or a string
 
@@ -114,19 +116,25 @@ def robust_is_hugging_face_model(x):
 
 def get_text_model(x):
     """
-    Given an valid scikit-learn or hugging-face model, or a string (e.g., 'LatentDirichletAllocation' or
-    'TransformerDocumentEmbeddings') matching the name of a valid scikit-learn or hugging-face model, return
-    a callable function or class constructor for the given model.
+    Given a valid scikit-learn or sentence-transformers model, or a string matching the name of a valid model, 
+    return a callable function or class constructor for the given model.
 
     Parameters
     ----------
-    :param x: an object to turn into a valid scikit-learn or hugging-face model (e.g., an already-valid model or a
-      string)
+    :param x: an object to turn into a valid scikit-learn or sentence-transformers model. Can be:
+        - An already-valid model instance
+        - A string matching sklearn model names (e.g., 'LatentDirichletAllocation', 'CountVectorizer')
+        - A string matching sentence-transformers model names (e.g., 'all-MiniLM-L6-v2', 'all-mpnet-base-v2')
 
     Returns
     -------
-    :return: A valid scikit-learn or hugging-face model (or None if no model matching the given description can be
-      found)
+    :return: A valid scikit-learn or sentence-transformers model (or None if no model matching the given 
+        description can be found)
+        
+    Examples
+    --------
+    >>> get_text_model('LatentDirichletAllocation')  # sklearn model
+    >>> get_text_model('all-MiniLM-L6-v2')  # sentence-transformers model
     """
     if is_sklearn_model(x) or is_hugging_face_model(x):
         return x  # already a valid model
 
@@ -10,6 +10,7 @@ Wrangle your messy data into consistent well-organized formats!
 
    readme
    installation
+   migration_guide
    tutorials
    api
    contributing
 
@@ -4,16 +4,44 @@
 Installation
 ============
 
+Requirements
+------------
+
+- **Python 3.9+** (v0.3.0+ requires modern Python versions)
+- NumPy 2.0+ and pandas 2.0+ compatible
+- Optional: HuggingFace transformers for advanced text processing
 
 Stable release
 --------------
 
+**Basic Installation**
+
 To install datawrangler, run this command in your terminal:
 
 .. code-block:: console
 
     $ pip install pydata-wrangler
 
+This installs the core functionality including sklearn-based text processing.
+
+**Full Installation with ML Libraries**
+
+For advanced text processing with sentence-transformers models:
+
+.. code-block:: console
+
+    $ pip install "pydata-wrangler[hf]"
+
+This includes sentence-transformers, transformers, and related HuggingFace libraries.
+
+**Upgrade from Previous Versions**
+
+If upgrading from v0.2.x, ensure you have Python 3.9+:
+
+.. code-block:: console
+
+    $ pip install --upgrade "pydata-wrangler[hf]"
+
 This is the preferred method to install datawrangler, as it will always install the most recent stable release.
 
 If you don't have `pip`_ installed, this `Python installation guide`_ can guide