Update fenic session examples with minimal configs

cpard · cpard · commit 3fd5be9e5afa · 2025-09-22T15:46:17.000-07:00
diff --git a/docs/hub/datasets-fenic.md b/docs/hub/datasets-fenic.md
@@ -14,6 +14,16 @@ To get started, pip install `fenic`:
 pip install fenic
 ```
 
+### Create a Session
+
+Instantiate a fenic session with the default configuration (sufficient for reading datasets and other non-semantic operations):
+
+```python
+import fenic as fc
+
+session = fc.Session.get_or_create(fc.SessionConfig())
+```
+
 ## Overview
 
 fenic is an opinionated data processing framework that combines:
@@ -39,7 +49,8 @@ To read a dataset from the Hugging Face Hub:
 ```python
 import fenic as fc
 
-# Assuming session is already created
+session = fc.Session.get_or_create(fc.SessionConfig())
+
 # Read a CSV file from a public dataset
 df = session.read.csv("hf://datasets/datasets-examples/doc-formats-csv-1/data.csv")
 
@@ -112,19 +123,18 @@ Once loaded from Hugging Face, you can use fenic's full DataFrame API:
 ```python
 import fenic as fc
 
-  # Create session
-  session = fc.Session.get_or_create()
+session = fc.Session.get_or_create(fc.SessionConfig())
 
-  # Load IMDB dataset from Hugging Face
-  df = session.read.parquet("hf://datasets/imdb/plain_text/train-*.parquet")
+# Load IMDB dataset from Hugging Face
+df = session.read.parquet("hf://datasets/imdb/plain_text/train-*.parquet")
 
-  # Filter and select
-  positive_reviews = df.filter(fc.col("label") == 1).select("text", "label")
+# Filter and select
+positive_reviews = df.filter(fc.col("label") == 1).select("text", "label")
 
-  # Group by and aggregate
-  label_counts = df.group_by("label").agg(
-      fc.count("*").alias("count")
-  )
+# Group by and aggregate
+label_counts = df.group_by("label").agg(
+    fc.count("*").alias("count")
+)
 ```
 
 ### AI-Powered Operations
@@ -134,61 +144,92 @@ To use semantic and embedding operations, configure language and embedding model
 ```python
 import fenic as fc
 
-  # Create session
-  session = fc.Session.get_or_create()
-
-  # Load a text dataset from Hugging Face
-  df = session.read.parquet("hf://datasets/imdb/plain_text/train-00000-of-00001.parquet")
-
-  # Add embeddings to text columns
-  df_with_embeddings = df.select(
-      "*",
-      fc.semantic.embed(fc.col("text")).alias("embedding")
-  )
-
-  # Apply semantic functions for sentiment analysis
-  df_analyzed = df_with_embeddings.select(
-      "*",
-      fc.semantic.analyze_sentiment(
-          fc.col("text"),
-          model_alias="gpt-4o-mini"  # Optional: specify model
-      ).alias("sentiment")
-  )
+# Requires OPENAI_API_KEY to be set for language and embedding calls
+session = fc.Session.get_or_create(
+    fc.SessionConfig(
+        semantic=fc.SemanticConfig(
+            language_models={
+                "gpt-4o-mini": fc.OpenAILanguageModel(
+                    model_name="gpt-4o-mini",
+                    rpm=60,
+                    tpm=60000,
+                )
+            },
+            embedding_models={
+                "text-embedding-3-small": fc.OpenAIEmbeddingModel(
+                    model_name="text-embedding-3-small",
+                    rpm=60,
+                    tpm=60000,
+                )
+            },
+        )
+    )
+)
+
+# Load a text dataset from Hugging Face
+df = session.read.parquet("hf://datasets/imdb/plain_text/train-00000-of-00001.parquet")
+
+# Add embeddings to text columns
+df_with_embeddings = df.select(
+    "*",
+    fc.semantic.embed(fc.col("text")).alias("embedding")
+)
+
+# Apply semantic functions for sentiment analysis
+df_analyzed = df_with_embeddings.select(
+    "*",
+    fc.semantic.analyze_sentiment(
+        fc.col("text"),
+        model_alias="gpt-4o-mini",  # Optional: specify model
+    ).alias("sentiment")
+)
 ```
 
 ## Example: Analyzing MMLU Dataset
 
 ```python
-  import fenic as fc
-
-  # Create session
-  session = fc.Session.get_or_create()
-
-  # Load MMLU astronomy subset from Hugging Face
-  df = session.read.parquet("hf://datasets/cais/mmlu/astronomy/*.parquet")
-
-  # Process the data
-  processed_df = (df
-      # Filter for specific criteria
-      .filter(fc.col("subject") == "astronomy")
-      # Select relevant columns
-      .select("question", "choices", "answer")
-      # Add difficulty analysis using semantic.map
-      .select(
-          "*",
-          fc.semantic.map(
-              "Rate the difficulty of this question from 1-5: {{question}}",
-              question=fc.col("question"),
-              model_alias="gpt-4o-mini"  # Optional: specify model
-          ).alias("difficulty")
-      )
-  )
-
-  # Show results
-  processed_df.show()
+import fenic as fc
+
+# Requires OPENAI_API_KEY to be set for semantic calls
+session = fc.Session.get_or_create(
+    fc.SessionConfig(
+        semantic=fc.SemanticConfig(
+            language_models={
+                "gpt-4o-mini": fc.OpenAILanguageModel(
+                    model_name="gpt-4o-mini",
+                    rpm=60,
+                    tpm=60000,
+                )
+            },
+        )
+    )
+)
+
+# Load MMLU astronomy subset from Hugging Face
+df = session.read.parquet("hf://datasets/cais/mmlu/astronomy/*.parquet")
+
+# Process the data
+processed_df = (df
+    # Filter for specific criteria
+    .filter(fc.col("subject") == "astronomy")
+    # Select relevant columns
+    .select("question", "choices", "answer")
+    # Add difficulty analysis using semantic.map
+    .select(
+        "*",
+        fc.semantic.map(
+            "Rate the difficulty of this question from 1-5: {{question}}",
+            question=fc.col("question"),
+            model_alias="gpt-4o-mini"  # Optional: specify model
+        ).alias("difficulty")
+    )
+)
+
+# Show results
+processed_df.show()
 ```
 
 ## Resources
 
 - [fenic GitHub Repository](https://github.com/typedef-ai/fenic)
-- [fenic Documentation](https://docs.fenic.ai/latest/)
+- [fenic Documentation](https://docs.fenic.ai/latest/)