Skip to content

Commit 419edbb

Browse files
jjmachanJithin James
andauthored
docs: added quickstart (#14)
* docs: quickstart with pre-commit hooks * remove pre-commit hooks * fix mistakes * added langchain --------- Co-authored-by: Jithin James <[email protected]>
1 parent 01b9ba8 commit 419edbb

File tree

3 files changed

+46
-14
lines changed

3 files changed

+46
-14
lines changed

examples/data_prep.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import concurrent.futures as f
2+
3+
from datasets import DatasetDict, load_dataset
4+
from langchain.llms import OpenAI
5+
6+
7+
def format_for_belar(row):
8+
row["context"] = row["selftext"]
9+
row["prompt"] = row["title"]
10+
row["ground_truth"] = row["answers"]["text"]
11+
return row
12+
13+
14+
d = load_dataset("eli5")
15+
assert isinstance(d, DatasetDict)
16+
ds = d["test_eli5"].map(format_for_belar, batched=False)
17+
ds = ds.select_columns(["context", "prompt", "ground_truth"])
18+
19+
ds = ds.shuffle(seed=42).select(range(500))
20+
print(ds.shape, ds.column_names)
21+
22+
23+
llm = OpenAI() # type: ignore
24+
prompt = """
25+
{context}
26+
with the above context explain like I'm five: {prompt}
27+
"""
28+
29+
30+
def get_answers(row):
31+
qs, cs = row["prompt"], row["context"]
32+
33+
generated_answers = []
34+
with f.ThreadPoolExecutor(max_workers=10) as executor:
35+
results = executor.map(
36+
llm, [prompt.format(context=cs[i], prompt=qs[i]) for i in range(len(qs))]
37+
)
38+
for result in results:
39+
generated_answers.append(result)
40+
41+
row["generated_answers"] = generated_answers
42+
return row
43+
44+
45+
ds = ds.map(get_answers, batched=True, batch_size=10)

pyproject.toml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,6 @@ dependencies = [
1111
]
1212
dynamic = ["version", "readme"]
1313

14-
[project.optional-dependencies]
15-
test = [
16-
# "pytest-cov",
17-
"pytest",
18-
"rich",
19-
]
20-
21-
dev = [
22-
"ruff",
23-
"isort",
24-
"black[jupyter]",
25-
"pyright",
26-
]
27-
2814
[tool.setuptools.dynamic]
2915
readme = {file = ["README.md"], content-type = "text/plain"}
3016

requirements/dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ ruff
33
isort
44
black[jupyter]
55
pyright
6+
langchain

0 commit comments

Comments
 (0)