@@ -76,16 +76,26 @@ toolsgen generate \
7676 --num 500 \
7777 --workers 6 \
7878 --worker-batch-size 4
79+
80+ # Generate and push directly to Hugging Face Hub
81+ export HF_TOKEN=" your-hf-token-here"
82+ toolsgen generate \
83+ --tools tools.json \
84+ --out output_dir \
85+ --num 100 \
86+ --push-to-hub \
87+ --repo-id username/dataset-name
7988```
8089
8190### Python API Usage
8291
8392``` python
84- import os
8593from pathlib import Path
94+ from dotenv import load_dotenv
95+
8696from toolsgen.core import GenerationConfig, ModelConfig, generate_dataset
8797
88- os.environ[ " OPENAI_API_KEY " ] = " your-api-key-here "
98+ load_dotenv() # Load from .env file
8999
90100# Configuration
91101tools_path = Path(" tools.json" )
@@ -119,6 +129,50 @@ print(f"Generated {manifest['num_generated']}/{manifest['num_requested']} record
119129print (f " Failed: { manifest[' num_failed' ]} attempts " )
120130```
121131
132+ ### Push to Hugging Face Hub
133+
134+ ``` python
135+ from pathlib import Path
136+ from dotenv import load_dotenv
137+
138+ from toolsgen import GenerationConfig, ModelConfig, generate_dataset, push_to_hub
139+
140+ load_dotenv() # Load from .env file
141+
142+ tools_path = Path(" tools.json" )
143+ output_dir = Path(" output" )
144+
145+ gen_config = GenerationConfig(
146+ num_samples = 100 ,
147+ strategy = " random" ,
148+ seed = 42 ,
149+ train_split = 0.9 ,
150+ )
151+
152+ model_config = ModelConfig(
153+ model = " gpt-4o-mini" ,
154+ temperature = 0.7 ,
155+ )
156+
157+ # Generate dataset
158+ manifest = generate_dataset(
159+ output_dir = output_dir,
160+ gen_config = gen_config,
161+ model_config = model_config,
162+ tools_path = tools_path,
163+ )
164+
165+ # Push to Hub
166+ hub_info = push_to_hub(
167+ output_dir = output_dir,
168+ repo_id = " username/dataset-name" ,
169+ private = False ,
170+ )
171+
172+ print (f " Generated: { manifest[' num_generated' ]} records " )
173+ print (f " Repository: { hub_info[' repo_url' ]} " )
174+ ```
175+
122176See ` examples/ ` directory for complete working examples.
123177
124178** Note** : The examples in ` examples/ ` use ` python-dotenv ` for convenience (load API keys from ` .env ` file). Install it with ` pip install python-dotenv ` if you want to use this approach.
@@ -228,7 +282,7 @@ For detailed information about the system architecture, pipeline, and core compo
228282- [ ] Custom prompt template system
229283- [x] Parallel generation with multiprocessing
230284- [ ] Additional sampling strategies (coverage-based, difficulty-based)
231- - [ ] Integration with Hugging Face Hub for direct dataset uploads
285+ - [x ] Integration with Hugging Face Hub for direct dataset uploads
232286- [ ] Support for more LLM providers (Anthropic, Cohere, etc.)
233287- [ ] Web UI for dataset inspection and curation
234288- [ ] Advanced filtering and deduplication
0 commit comments