Skip to content

Commit 5498460

Browse files
committed
feat: implement central dataset
1 parent 7ff3423 commit 5498460

File tree

6 files changed

+92
-23
lines changed

6 files changed

+92
-23
lines changed

neurons/validators/genie_validator.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
SeoChallenge,
2828
BalancedChallenge,
2929
)
30+
from webgenie.datasets import CentralDataset
3031
from webgenie.helpers.htmls import preprocess_html, is_valid_resources
3132
from webgenie.helpers.images import image_debug_str
3233
from webgenie.helpers.llms import set_seed
@@ -57,6 +58,17 @@ def __init__(self, neuron: BaseNeuron):
5758
self.task_generators = [
5859
(ImageTaskGenerator(), 1.0), # currently only image task generator is supported
5960
]
61+
self.init_signature()
62+
63+
def init_signature(self):
64+
"""Get signature for central database authentication using wallet"""
65+
try:
66+
message = b"I am the owner of the wallet"
67+
CentralDataset.SIGNATURE = self.neuron.wallet.hotkey.sign(message).hex()
68+
CentralDataset.HOTKEY = self.neuron.wallet.hotkey.ss58_address
69+
except Exception as e:
70+
bt.logging.error(f"Error initializing signature: {e}")
71+
raise e
6072

6173
async def query_miners(self):
6274
try:
@@ -250,7 +262,7 @@ async def score(self):
250262
except Exception as e:
251263
bt.logging.error(f"Error storing results to database: {e}")
252264

253-
async def synthensize_task(self):
265+
async def synthensize_task(self, session:int, task_index:int):
254266
try:
255267
with self.lock:
256268
if len(self.synthetic_tasks) > MAX_SYNTHETIC_TASK_SIZE:
@@ -264,7 +276,7 @@ async def synthensize_task(self):
264276
weights=[weight for _, weight in self.task_generators],
265277
)[0]
266278

267-
task, synapse = await task_generator.generate_task()
279+
task, synapse = await task_generator.generate_task(session=session, task_index=task_index)
268280
with self.lock:
269281
self.synthetic_tasks.append((task, synapse))
270282

@@ -295,22 +307,22 @@ async def forward(self):
295307
return
296308

297309
bt.logging.info(f"Forwarding task #{task_index} in session #{session}")
298-
seed = self.get_seed(session, task_index)
310+
# seed = self.get_seed(session, task_index)
299311

300-
bt.logging.info(f"Init random with seed: {seed}")
301-
random.seed(seed)
302-
set_seed(seed)
303-
304-
while True:
305-
try:
306-
await self.synthensize_task()
307-
break
308-
except Exception as e:
309-
bt.logging.error(
310-
f"Error in synthensize_task: {e}"
311-
f"Retrying..."
312-
)
312+
# bt.logging.info(f"Init random with seed: {seed}")
313+
# random.seed(seed)
314+
# set_seed(seed)
313315

316+
try:
317+
task, synapse = await self.synthensize_task(session, task_index)
318+
task.task_id = f"{session}_{task_index}"
319+
synapse.task_id = task.task_id
320+
except Exception as e:
321+
bt.logging.error(
322+
f"Error in synthensize_task: {e}"
323+
f"Retrying..."
324+
)
325+
314326
await self.query_miners()
315327
await self.score()
316328
except Exception as e:

webgenie/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import psutil
44
# Change this value when updating your code base.
55
# Define the version of the webgenie.
6-
__VERSION__ = "1.1.28" # version
6+
__VERSION__ = "1.2.0" # version
77

88
SPEC_VERSION = (
99
(1000 * int(__VERSION__.split(".")[0]))

webgenie/datasets/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .dataset import Dataset, DatasetEntry
22
from .synthetic_dataset import SyntheticDataset
33
from .huggingface_dataset import HuggingfaceDataset
4-
from .random_website_dataset import RandomWebsiteDataset
4+
from .random_website_dataset import RandomWebsiteDataset
5+
from .central_dataset import CentralDataset
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# https://huggingface.co/datasets/SALT-NLP/Design2Code_human_eval_pairwise
2+
3+
import bittensor as bt
4+
import random
5+
import requests
6+
from datasets import load_dataset
7+
8+
from webgenie.datasets.dataset import Dataset, DatasetEntry
9+
10+
11+
class CentralDataset(Dataset):
12+
HOTKEY = "hotkey"
13+
SIGNATURE = "signature"
14+
15+
def __init__(self):
16+
pass
17+
18+
async def generate_context(self) -> DatasetEntry:
19+
pass
20+
21+
async def generate_context(self, **kwargs)->DatasetEntry:
22+
try:
23+
bt.logging.info("Generating Central context")
24+
session = kwargs.get("session")
25+
task_number = kwargs.get("task_number")
26+
html = self.get_html(session, task_number)
27+
return DatasetEntry(
28+
src="central",
29+
url=f"central_{session}_{task_number}",
30+
ground_truth_html=html,
31+
prompt="",
32+
base64_image=""
33+
)
34+
except Exception as e:
35+
bt.logging.error(f"Error in generate_context: {e}")
36+
raise e
37+
38+
def get_html(self, session:int, task_number:int)->str:
39+
bt.logging.info(f"Getting HTML for session {session} and task {task_number}")
40+
method = "GET"
41+
url = f"http://209.126.9.130:18000/api/v1/task/generate"
42+
headers = {
43+
"Signature": CentralDataset.SIGNATURE,
44+
"Hotkey": CentralDataset.HOTKEY
45+
}
46+
params = {
47+
"session": int(session),
48+
"task_number": int(task_number)
49+
}
50+
response = requests.request(method, url, headers=headers, params=params)
51+
52+
if response.status_code != 200:
53+
raise Exception(f"Failed to get HTML: {response.status_code} {response.text}")
54+
bt.logging.info(f"HTML: {response.json()}")
55+
return response.json()["html"]

webgenie/tasks/image_task_generator.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,18 @@
2929
RandomWebsiteDataset,
3030
SyntheticDataset,
3131
HuggingfaceDataset,
32+
CentralDataset,
3233
)
3334

34-
3535
class ImageTaskGenerator(TaskGenerator):
3636
def __init__(self):
3737
super().__init__()
3838

3939
self.datasets = [
40+
(CentralDataset(), 1),
4041
#(RandomWebsiteDataset(), 1),
4142
#(SyntheticDataset(), 0.5),
42-
(HuggingfaceDataset(dataset_name="SALT-NLP/Design2Code-hf", split="train", html_column="text"), 1),
43+
#(HuggingfaceDataset(dataset_name="SALT-NLP/Design2Code-hf", split="train", html_column="text"), 1),
4344
]
4445

4546
self.metrics = {
@@ -48,11 +49,11 @@ def __init__(self):
4849
QUALITY_METRIC_NAME: QualityReward(),
4950
}
5051

51-
async def generate_task(self) -> Tuple[Task, bt.Synapse]:
52+
async def generate_task(self, **kwargs) -> Tuple[Task, bt.Synapse]:
5253
bt.logging.info("Generating Image task")
5354

5455
dataset, _ = random.choices(self.datasets, weights=[weight for _, weight in self.datasets])[0]
55-
dataset_entry = await dataset.generate_context()
56+
dataset_entry = await dataset.generate_context(**kwargs)
5657
bt.logging.debug(f"Generated dataset entry: {dataset_entry.url}")
5758

5859
ground_truth_html = preprocess_html(dataset_entry.ground_truth_html)

webgenie/tasks/task_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class TaskGenerator:
1111
def __init__(self):
1212
self.metrics: dict[str, Reward] = {}
1313

14-
async def generate_task(self) -> Tuple[Task, bt.Synapse]:
14+
async def generate_task(self, **kwargs) -> Tuple[Task, bt.Synapse]:
1515
pass
1616

1717
async def calculate_scores(self, task: Task, solutions: List[Solution]) -> dict[str, np.ndarray]:

0 commit comments

Comments
 (0)