1+ {
2+ "cells" : [
3+ {
4+ "cell_type" : " markdown" ,
5+ "id" : " 930bc11c" ,
6+ "metadata" : {
7+ "id" : " 930bc11c"
8+ },
9+ "source" : [
10+ " # Meta Synthetic Data Generator (LLaMA 3.2 - 3B)\n " ,
11+ " \n " ,
12+ " This notebook demonstrates how to use Meta's LLaMA 3.2 3B model to generate synthetic data for use in AI training or application prototyping."
13+ ]
14+ },
15+ {
16+ "cell_type" : " markdown" ,
17+ "source" : [
18+ " [](https://colab.research.google.com/github/DhivyaBharathy-web/PraisonAI/blob/main/examples/cookbooks/Meta_LLaMA3_SyntheticData.ipynb)\n "
19+ ],
20+ "metadata" : {
21+ "id" : " KPi7FpbV9J2d"
22+ },
23+ "id" : " KPi7FpbV9J2d"
24+ },
25+ {
26+ "cell_type" : " markdown" ,
27+ "id" : " 80f68ecf" ,
28+ "metadata" : {
29+ "id" : " 80f68ecf"
30+ },
31+ "source" : [
32+ " ## Dependencies"
33+ ]
34+ },
35+ {
36+ "cell_type" : " code" ,
37+ "execution_count" : null ,
38+ "id" : " 7eeb508d" ,
39+ "metadata" : {
40+ "id" : " 7eeb508d"
41+ },
42+ "outputs" : [],
43+ "source" : [
44+ " !pip install -q transformers accelerate bitsandbytes"
45+ ]
46+ },
47+ {
48+ "cell_type" : " markdown" ,
49+ "id" : " eda75a0c" ,
50+ "metadata" : {
51+ "id" : " eda75a0c"
52+ },
53+ "source" : [
54+ " ## Tools\n " ,
55+ " * `transformers` for model loading and text generation\n " ,
56+ " * `pipeline` for simplified inference\n " ,
57+ " * `AutoTokenizer`, `AutoModelForCausalLM` for LLaMA 3.2"
58+ ]
59+ },
60+ {
61+ "cell_type" : " markdown" ,
62+ "id" : " 6ee96383" ,
63+ "metadata" : {
64+ "id" : " 6ee96383"
65+ },
66+ "source" : [
67+ " ## YAML Prompt"
68+ ]
69+ },
70+ {
71+ "cell_type" : " code" ,
72+ "execution_count" : null ,
73+ "id" : " aa4e56ef" ,
74+ "metadata" : {
75+ "id" : " aa4e56ef"
76+ },
77+ "outputs" : [],
78+ "source" : [
79+ " prompt: |\n " ,
80+ " task: \" Generate a customer complaint email\"\n " ,
81+ " style: \" Professional\"\n "
82+ ]
83+ },
84+ {
85+ "cell_type" : " markdown" ,
86+ "id" : " e9fbe400" ,
87+ "metadata" : {
88+ "id" : " e9fbe400"
89+ },
90+ "source" : [
91+ " ## Main"
92+ ]
93+ },
94+ {
95+ "cell_type" : " code" ,
96+ "execution_count" : null ,
97+ "id" : " 3e35336f" ,
98+ "metadata" : {
99+ "id" : " 3e35336f"
100+ },
101+ "outputs" : [],
102+ "source" : [
103+ " from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n " ,
104+ " \n " ,
105+ " # Load tokenizer and model (LLaMA 3.2 - 3B)\n " ,
106+ " model_id = \" meta-llama/Meta-Llama-3-3B-Instruct\"\n " ,
107+ " tokenizer = AutoTokenizer.from_pretrained(model_id)\n " ,
108+ " model = AutoModelForCausalLM.from_pretrained(model_id)\n " ,
109+ " \n " ,
110+ " # Create a simple text generation pipeline\n " ,
111+ " generator = pipeline(\" text-generation\" , model=model, tokenizer=tokenizer)\n " ,
112+ " \n " ,
113+ " # Example synthetic data prompt\n " ,
114+ " prompt = \" Create a customer support query about a late delivery.\"\n " ,
115+ " \n " ,
116+ " # Generate synthetic text\n " ,
117+ " output = generator(prompt, max_length=60, do_sample=True)[0]['generated_text']\n " ,
118+ " print(\" 📝 Synthetic Output:\" , output)"
119+ ]
120+ },
121+ {
122+ "cell_type" : " markdown" ,
123+ "id" : " ed28cc2e" ,
124+ "metadata" : {
125+ "id" : " ed28cc2e"
126+ },
127+ "source" : [
128+ " ## Output"
129+ ]
130+ },
131+ {
132+ "cell_type" : " markdown" ,
133+ "id" : " 6fc8f19e" ,
134+ "metadata" : {
135+ "id" : " 6fc8f19e"
136+ },
137+ "source" : [
138+ " 🖼️ Output Preview (Text Summary):\n " ,
139+ " \n " ,
140+ " Prompt: \" Create a customer support query about a late delivery.\"\n " ,
141+ " \n " ,
142+ " 📝 Output: The LLaMA model generates a realistic complaint, such as:\n " ,
143+ " \n " ,
144+ " \" Dear Support Team, I placed an order two weeks ago and have yet to receive it...\"\n " ,
145+ " \n " ,
146+ " 🎯 This illustrates how the model can be used to generate realistic synthetic data for tasks like training chatbots or support models.\n "
147+ ]
148+ }
149+ ],
150+ "metadata" : {
151+ "colab" : {
152+ "provenance" : []
153+ }
154+ },
155+ "nbformat" : 4 ,
156+ "nbformat_minor" : 5
157+ }
0 commit comments