Skip to content

Commit 3fd5be9

Browse files
committed
Update fenic session examples with minimal configs
1 parent 24b351d commit 3fd5be9

File tree

1 file changed

+100
-59
lines changed

1 file changed

+100
-59
lines changed

docs/hub/datasets-fenic.md

Lines changed: 100 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ To get started, pip install `fenic`:
1414
pip install fenic
1515
```
1616

17+
### Create a Session
18+
19+
Instantiate a fenic session with the default configuration (sufficient for reading datasets and other non-semantic operations):
20+
21+
```python
22+
import fenic as fc
23+
24+
session = fc.Session.get_or_create(fc.SessionConfig())
25+
```
26+
1727
## Overview
1828

1929
fenic is an opinionated data processing framework that combines:
@@ -39,7 +49,8 @@ To read a dataset from the Hugging Face Hub:
3949
```python
4050
import fenic as fc
4151

42-
# Assuming session is already created
52+
session = fc.Session.get_or_create(fc.SessionConfig())
53+
4354
# Read a CSV file from a public dataset
4455
df = session.read.csv("hf://datasets/datasets-examples/doc-formats-csv-1/data.csv")
4556

@@ -112,19 +123,18 @@ Once loaded from Hugging Face, you can use fenic's full DataFrame API:
112123
```python
113124
import fenic as fc
114125

115-
# Create session
116-
session = fc.Session.get_or_create()
126+
session = fc.Session.get_or_create(fc.SessionConfig())
117127

118-
# Load IMDB dataset from Hugging Face
119-
df = session.read.parquet("hf://datasets/imdb/plain_text/train-*.parquet")
128+
# Load IMDB dataset from Hugging Face
129+
df = session.read.parquet("hf://datasets/imdb/plain_text/train-*.parquet")
120130

121-
# Filter and select
122-
positive_reviews = df.filter(fc.col("label") == 1).select("text", "label")
131+
# Filter and select
132+
positive_reviews = df.filter(fc.col("label") == 1).select("text", "label")
123133

124-
# Group by and aggregate
125-
label_counts = df.group_by("label").agg(
126-
fc.count("*").alias("count")
127-
)
134+
# Group by and aggregate
135+
label_counts = df.group_by("label").agg(
136+
fc.count("*").alias("count")
137+
)
128138
```
129139

130140
### AI-Powered Operations
@@ -134,61 +144,92 @@ To use semantic and embedding operations, configure language and embedding model
134144
```python
135145
import fenic as fc
136146

137-
# Create session
138-
session = fc.Session.get_or_create()
139-
140-
# Load a text dataset from Hugging Face
141-
df = session.read.parquet("hf://datasets/imdb/plain_text/train-00000-of-00001.parquet")
142-
143-
# Add embeddings to text columns
144-
df_with_embeddings = df.select(
145-
"*",
146-
fc.semantic.embed(fc.col("text")).alias("embedding")
147-
)
148-
149-
# Apply semantic functions for sentiment analysis
150-
df_analyzed = df_with_embeddings.select(
151-
"*",
152-
fc.semantic.analyze_sentiment(
153-
fc.col("text"),
154-
model_alias="gpt-4o-mini" # Optional: specify model
155-
).alias("sentiment")
156-
)
147+
# Requires OPENAI_API_KEY to be set for language and embedding calls
148+
session = fc.Session.get_or_create(
149+
fc.SessionConfig(
150+
semantic=fc.SemanticConfig(
151+
language_models={
152+
"gpt-4o-mini": fc.OpenAILanguageModel(
153+
model_name="gpt-4o-mini",
154+
rpm=60,
155+
tpm=60000,
156+
)
157+
},
158+
embedding_models={
159+
"text-embedding-3-small": fc.OpenAIEmbeddingModel(
160+
model_name="text-embedding-3-small",
161+
rpm=60,
162+
tpm=60000,
163+
)
164+
},
165+
)
166+
)
167+
)
168+
169+
# Load a text dataset from Hugging Face
170+
df = session.read.parquet("hf://datasets/imdb/plain_text/train-00000-of-00001.parquet")
171+
172+
# Add embeddings to text columns
173+
df_with_embeddings = df.select(
174+
"*",
175+
fc.semantic.embed(fc.col("text")).alias("embedding")
176+
)
177+
178+
# Apply semantic functions for sentiment analysis
179+
df_analyzed = df_with_embeddings.select(
180+
"*",
181+
fc.semantic.analyze_sentiment(
182+
fc.col("text"),
183+
model_alias="gpt-4o-mini", # Optional: specify model
184+
).alias("sentiment")
185+
)
157186
```
158187

159188
## Example: Analyzing MMLU Dataset
160189

161190
```python
162-
import fenic as fc
163-
164-
# Create session
165-
session = fc.Session.get_or_create()
166-
167-
# Load MMLU astronomy subset from Hugging Face
168-
df = session.read.parquet("hf://datasets/cais/mmlu/astronomy/*.parquet")
169-
170-
# Process the data
171-
processed_df = (df
172-
# Filter for specific criteria
173-
.filter(fc.col("subject") == "astronomy")
174-
# Select relevant columns
175-
.select("question", "choices", "answer")
176-
# Add difficulty analysis using semantic.map
177-
.select(
178-
"*",
179-
fc.semantic.map(
180-
"Rate the difficulty of this question from 1-5: {{question}}",
181-
question=fc.col("question"),
182-
model_alias="gpt-4o-mini" # Optional: specify model
183-
).alias("difficulty")
184-
)
185-
)
186-
187-
# Show results
188-
processed_df.show()
191+
import fenic as fc
192+
193+
# Requires OPENAI_API_KEY to be set for semantic calls
194+
session = fc.Session.get_or_create(
195+
fc.SessionConfig(
196+
semantic=fc.SemanticConfig(
197+
language_models={
198+
"gpt-4o-mini": fc.OpenAILanguageModel(
199+
model_name="gpt-4o-mini",
200+
rpm=60,
201+
tpm=60000,
202+
)
203+
},
204+
)
205+
)
206+
)
207+
208+
# Load MMLU astronomy subset from Hugging Face
209+
df = session.read.parquet("hf://datasets/cais/mmlu/astronomy/*.parquet")
210+
211+
# Process the data
212+
processed_df = (df
213+
# Filter for specific criteria
214+
.filter(fc.col("subject") == "astronomy")
215+
# Select relevant columns
216+
.select("question", "choices", "answer")
217+
# Add difficulty analysis using semantic.map
218+
.select(
219+
"*",
220+
fc.semantic.map(
221+
"Rate the difficulty of this question from 1-5: {{question}}",
222+
question=fc.col("question"),
223+
model_alias="gpt-4o-mini" # Optional: specify model
224+
).alias("difficulty")
225+
)
226+
)
227+
228+
# Show results
229+
processed_df.show()
189230
```
190231

191232
## Resources
192233

193234
- [fenic GitHub Repository](https://github.com/typedef-ai/fenic)
194-
- [fenic Documentation](https://docs.fenic.ai/latest/)
235+
- [fenic Documentation](https://docs.fenic.ai/latest/)

0 commit comments

Comments
 (0)