diff --git a/.env.miner.example b/.env.miner.example new file mode 100644 index 00000000..e9fd95e7 --- /dev/null +++ b/.env.miner.example @@ -0,0 +1,8 @@ +WANDB_API_KEY = your_wandb_api_key +WANDB_ENTITY_NAME = your_wandb_entity_name + +HF_TOKEN = your_huggingface_token + +LLM_API_KEY = your_openai_api_key +LLM_MODEL_ID = your_openai_model_id +LLM_MODEL_URL = your_openai_model_url \ No newline at end of file diff --git a/.env.validator.example b/.env.validator.example new file mode 100644 index 00000000..3e0b781f --- /dev/null +++ b/.env.validator.example @@ -0,0 +1,6 @@ +WANDB_API_KEY = your_wandb_api_key +WANDB_ENTITY_NAME = your_wandb_entity_name + +LLM_API_KEY = your_openai_api_key +LLM_MODEL_ID = your_openai_model_id +LLM_MODEL_URL = your_openai_model_url \ No newline at end of file diff --git a/.gitignore b/.gitignore index 72a70c3a..b064338d 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,29 @@ testing/ # Editors .vscode/settings.json + +.DS_Store +temp.txt + +# env +.env.miner +.env.validator + +# test +test.py + +# Wandb +wandb/ + +# work dir +work/ +work_save/ +*.png + +# scripts +run_miner.sh +run_miner_2.sh +run_validator.sh + +# developer doc +developer_doc.md \ No newline at end of file diff --git a/README.md b/README.md index ba69bdae..dd113af1 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,146 @@ -
+# WebGenieAI Subnet -# **Bittensor Subnet Template** -[![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +Welcome to WebGenieAI Subnet, a pioneering Bittensor-based subnet designed to revolutionize project generation through advanced AI models. WebGenieAI aims to transform diverse prompts—ranging from text and voice to images and Figma designs—into fully functional, ready-to-deploy projects. This subnet is tailored for developers, designers, and innovators who seek to accelerate their project development process with high-quality, AI-generated outputs. ---- +## Table of Contents -## The Incentivized Internet +- [Overview](#overview) +- [Features](#features) +- [Incentive Mechanism](#incentive-mechanism-v1) +- [Roadmap](#roadmap) -[Discord](https://discord.gg/bittensor) • [Network](https://taostats.io/) • [Research](https://bittensor.com/whitepaper) -
+## Overview ---- -- [Quickstarter template](#quickstarter-template) -- [Introduction](#introduction) - - [Example](#example) -- [Installation](#installation) - - [Before you proceed](#before-you-proceed) - - [Install](#install) -- [Writing your own incentive mechanism](#writing-your-own-incentive-mechanism) -- [Writing your own subnet API](#writing-your-own-subnet-api) -- [Subnet Links](#subnet-links) -- [License](#license) +WebGenieAI Subnet leverages state-of-the-art AI models to interpret and convert various types of prompts into complete, deployable projects. Whether you're starting with a simple HTML/CSS framework or aiming to develop a complex React application, WebGenieAI can generate the entire codebase, ensuring it meets your specified requirements and is ready for immediate deployment. ---- -## Quickstarter template +### Vision -This template contains all the required installation instructions, scripts, and files and functions for: -- Building Bittensor subnets. -- Creating custom incentive mechanisms and running these mechanisms on the subnets. +WebGenieAI envisions a future where project creation is seamless, automated, and efficient, empowering developers to focus more on innovation and less on repetitive coding tasks. By harnessing the capabilities of the Bittensor network, WebGenieAI fosters a competitive environment that drives continuous improvement in AI-generated outputs. -In order to simplify the building of subnets, this template abstracts away the complexity of the underlying blockchain and other boilerplate code. While the default behavior of the template is sufficient for a simple subnet, you should customize the template in order to meet your specific requirements. ---- +### Purpose -## Introduction +The primary purpose of WebGenieAI is to: -**IMPORTANT**: If you are new to Bittensor subnets, read this section before proceeding to [Installation](#installation) section. +- Automate Project Generation: Provide a platform that can autonomously generate high-quality projects from diverse input prompts. +- Enhance Productivity: Reduce the time and effort required for project development, enabling developers to quickly bring their ideas to life. +- Promote Innovation: Encourage innovative solutions and optimizations in project generation through competitive incentivization. -The Bittensor blockchain hosts multiple self-contained incentive mechanisms called **subnets**. Subnets are playing fields in which: -- Subnet miners who produce value, and -- Subnet validators who produce consensus +## Features -determine together the proper distribution of TAO for the purpose of incentivizing the creation of value, i.e., generating digital commodities, such as intelligence or data. +- **Text Prompt**: Generate projects by describing them in text. +- **Voice Prompt**: Create projects by giving voice commands. +- **Image Prompt**: Upload an image of a website or app, and WebGenieAI will generate a pixel-perfect project. +- **Figma Prompt**: Convert Figma designs into functional projects. +- **Automated Downloads**: Directly download the generated projects as complete folders. -Each subnet consists of: -- Subnet miners and subnet validators. -- A protocol using which the subnet miners and subnet validators interact with one another. This protocol is part of the incentive mechanism. -- The Bittensor API using which the subnet miners and subnet validators interact with Bittensor's onchain consensus engine [Yuma Consensus](https://bittensor.com/documentation/validating/yuma-consensus). The Yuma Consensus is designed to drive these actors: subnet validators and subnet miners, into agreement on who is creating value and what that value is worth. +## Incentive Mechanism v1 -This starter template is split into three primary files. To write your own incentive mechanism, you should edit these files. These files are: -1. `template/protocol.py`: Contains the definition of the protocol used by subnet miners and subnet validators. -2. `neurons/miner.py`: Script that defines the subnet miner's behavior, i.e., how the subnet miner responds to requests from subnet validators. -3. `neurons/validator.py`: This script defines the subnet validator's behavior, i.e., how the subnet validator requests information from the subnet miners and determines the scores. +The WebGenieAI subnet incentivizes miners and validators to ensure high-quality outputs. Here’s how it works specifically for this subnet: -### Example +- Task Assignment: Subnet miners are assigned tasks related to generating and improving machine learning models based on various prompts (text and image). +- Performance Evaluation: Validators evaluate the outputs produced by miners. The evaluation criteria include accuracy, efficiency, and innovation. +- Ranking and Rewarding: Validators rank the miners according to their performance. The Bittensor blockchain’s Yuma Consensus mechanism determines the TAO rewards distribution based on these rankings. -The Bittensor Subnet 1 for Text Prompting is built using this template. See [prompting](https://github.com/macrocosm-os/prompting) for how to configure the files and how to add monitoring and telemetry and support multiple miner types. Also see this Subnet 1 in action on [Taostats](https://taostats.io/subnets/netuid-1/) explorer. +## Evaluation Process ---- +1) Image to HTML Model -## Installation +### Automatic evaluation of ImageToHTML task for design-wise +We automatically evaluate generated webpages by calculating the similarity between the original input image and the rendered screenshot of generated webpage. +We break down the evaluation into both high-level visual similarity and low-level element matching. + +#### High-level Visual Similarity + +To evaluate the visual similarity of $I_R$ and $I_G$, we use the similarity of their CLIP embedding, denoted as CLIP($I_R$, $I_G$). Specifically, we extract features by CLIP-ViT-B/32 after resizing screenshots to squares. +To rule out the texts in the screenshots, we use the inpainting algorithm from [Telea](https://docs.opencv.org/4.3.0/df/d3d/tutorial_py_inpainting.html) to mask all detected text boxes using their bounding box coordinates. + +#### Low-level Element Matching + +Metrics like CLIP similarity only capture the similarity of the overall images rather than the matching of all the details like text. Moreover, the metric itself does not offer any fine-grained breakdown to help diagnose model weaknesses. + +To complement that, we introduce a suite of element-matching metrics. Specifically, we consider whether the generated webpages manage to recall all visual elements, and whether the corresponding visual elements in the input image and generated webpages have aligned text content, position, and color. + +Given a reference webpage screenshot $I_R$ and a generated webpage screenshot $I_G$, we use a text detection module to output a set of detected visual element blocks for each: R = { $r_1$, $r_2$, ..., $r_m$ } and G = { $g_1$, $g_2$, ..., $g_n$ }, where each block contains its textual content and bounding box coordinates. + +Based on the two sets of detected blocks, we use the Jonker-Volgenant algorithm to get the optimal matching M between R and G based on text similarity, where (p, q) ∈ M indicates $r_p$ is matched with $g_q$. + +Given R, G, and matched pairs in M, we evaluate similarity along the following aspects: +- **Block-Match**: The first desideratum of the task is that all visual elements from the image should be reproduced in the generated webpage, and the generated webpage should not hallucinate non-existent new elements. We measure this by computing the total sizes of all matched blocks divided by the total sizes of all blocks, including unmatched ones (either because the generated webpages missed them or because the generated webpages contain hallucinated blocks): + +![Incentive Mechanism Formula](docs/incentive-fomula.png "WebGenieAI Incentive Formula") -### Before you proceed -Before you proceed with the installation of the subnet, note the following: +where S(·) returns the size of the blocks, $U_R$ and $U_G$ denotes the unmatched blocks in R +and G. The intuition here is that unmatched blocks will lower the score as they indicate +missing original blocks or generating hallucinated blocks, and the larger the unmatched +blocks are, the lower this score is. -- Use these instructions to run your subnet locally for your development and testing, or on Bittensor testnet or on Bittensor mainnet. -- **IMPORTANT**: We **strongly recommend** that you first run your subnet locally and complete your development and testing before running the subnet on Bittensor testnet. Furthermore, make sure that you next run your subnet on Bittensor testnet before running it on the Bittensor mainnet. -- You can run your subnet either as a subnet owner, or as a subnet validator or as a subnet miner. -- **IMPORTANT:** Make sure you are aware of the minimum compute requirements for your subnet. See the [Minimum compute YAML configuration](./min_compute.yml). -- Note that installation instructions differ based on your situation: For example, installing for local development and testing will require a few additional steps compared to installing for testnet. Similarly, installation instructions differ for a subnet owner vs a validator or a miner. +- **Text**: Given two strings from two matched blocks $r_p$ and $g_q$, the text similarity **sim**text($r_p$, $g_q$) is calculated as twice the number of overlapping characters divided by the total number of characters in the two strings (character-level Sørensen-Dice similarity). The overall score is averaged across all matched pairs. -### Install - -- **Running locally**: Follow the step-by-step instructions described in this section: [Running Subnet Locally](./docs/running_on_staging.md). -- **Running on Bittensor testnet**: Follow the step-by-step instructions described in this section: [Running on the Test Network](./docs/running_on_testnet.md). -- **Running on Bittensor mainnet**: Follow the step-by-step instructions described in this section: [Running on the Main Network](./docs/running_on_mainnet.md). +- Position: The positioning of the blocks largely impacts the overall layout. For each matched pair (p, q), we calculate the position similarity **sim**pos($r_p$, $g_q$) = 1 − max(abs($x_q$ − $x_p$), abs($y_q$ − $y_p$)), where ($x_p$, $y_p$) and ($x_q$, $y_q$) are normalized coordinates (in [0, 1]) of $r_p$ and $g_q$’s centors. The overall score is averaged across all matched pairs. ---- +- Color: We use the [CIEDE2000](https://en.wikipedia.org/wiki/Color_difference) color difference formula to assess the perceptual difference between the colors of the generated text in block $g_q$ and the reference text in block $r_p$, denoted as **sim**color(rp, gq), where the formula considers the complexities of human color vision. The overall score is averaged across all matched pairs. -## Writing your own incentive mechanism +2) Text Prompt to Html Model + +### Unsupervised Evaluation of Model by Round-Trip Correctness +We draw inspiration from a software testing technique known as property-based testing. It allows defining properties that must hold between inputs and outputs of a program (e.g., all items in the input list must also appear in the output list) Round-trip correctness is one such property (e.g., compressing and subsequently decompressing data must yield the original data). + +Consider two forms of data X and Y, such as text prompt and HTML and two (probabilistic) models whose task is to “translate” from one form of data to the other, i.e., a forward model M : X → Y and a backward model M-1: Y → X. These models could be a single LLM prompted differently. + +The central idea for unsupervised evaluation is the concept of round-trip correctness (RTC). Intuitively, for a “good” forward and backward model we expect ̂x =M-1 M(x) to be semantically equivalent to x. For example, we can describe the HTML code with text prompt in the forward pass and then generate back the code from the text prompt. To compute RTC we need some function sim(x, ̂x) that estimates the semantic equivalence between the original x and each predicted sample ̂x. Such functions may include discrete or continuous metrics such as exact match, BLEU and so on. + +### Supervised Evaluation of Model by CodeBERTScore +Let x is prompt, y is the ground truth html, ̂y is the generated html. +To evaluate the performance of the model, we can use [CodeBERTScore](https://github.com/neulab/code-bert-score). sim(y, ̂y ) = bert_score(y, ̂y) +CodeBERTScore is an evaluation metric for code generation, which builds on BERTScore. Instead of encoding only the generated tokens as in [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore), CodeBERTScore also encodes the natural language input preceding the generated code, thus modeling the consistency between the generated code and its given natural language context as well. + +![Webgenie Subnet workflow](docs/webgenie-workflow.png "WebGenieAI workflow") + + +### Example Scenario + +- Prompt: A miner receives a prompt to create a front-end focus application. +- Generation: The miner generates the code for the application and submits it. +- Evaluation: Validators review the submission: + - Accuracy: Does the application have all the features mentioned in the prompt? + - Efficiency: Is the code optimized for performance? + - Innovation: Does the application include any additional features or optimizations not explicitly requested but beneficial? +- Ranking: Validators rank this submission against others. +- Rewarding: Based on the ranking, the miner receives TAO rewards. + +## Installation -As described in [Quickstarter template](#quickstarter-template) section above, when you are ready to write your own incentive mechanism, update this template repository by editing the following files. The code in these files contains detailed documentation on how to update the template. Read the documentation in each of the files to understand how to update the template. There are multiple **TODO**s in each of the files identifying sections you should update. These files are: -- `template/protocol.py`: Contains the definition of the wire-protocol used by miners and validators. -- `neurons/miner.py`: Script that defines the miner's behavior, i.e., how the miner responds to requests from validators. -- `neurons/validator.py`: This script defines the validator's behavior, i.e., how the validator requests information from the miners and determines the scores. -- `template/forward.py`: Contains the definition of the validator's forward pass. -- `template/reward.py`: Contains the definition of how validators reward miner responses. - -In addition to the above files, you should also update the following files: -- `README.md`: This file contains the documentation for your project. Update this file to reflect your project's documentation. -- `CONTRIBUTING.md`: This file contains the instructions for contributing to your project. Update this file to reflect your project's contribution guidelines. -- `template/__init__.py`: This file contains the version of your project. -- `setup.py`: This file contains the metadata about your project. Update this file to reflect your project's metadata. -- `docs/`: This directory contains the documentation for your project. Update this directory to reflect your project's documentation. - -__Note__ -The `template` directory should also be renamed to your project name. ---- - -# Writing your own subnet API -To leverage the abstract `SubnetsAPI` in Bittensor, you can implement a standardized interface. This interface is used to interact with the Bittensor network and can be used by a client to interact with the subnet through its exposed axons. - -What does Bittensor communication entail? Typically two processes, (1) preparing data for transit (creating and filling `synapse`s) and (2), processing the responses received from the `axon`(s). - -This protocol uses a handler registry system to associate bespoke interfaces for subnets by implementing two simple abstract functions: -- `prepare_synapse` -- `process_responses` - -These can be implemented as extensions of the generic `SubnetsAPI` interface. E.g.: - - -This is abstract, generic, and takes(`*args`, `**kwargs`) for flexibility. See the extremely simple base class: -```python -class SubnetsAPI(ABC): - def __init__(self, wallet: "bt.wallet"): - self.wallet = wallet - self.dendrite = bt.dendrite(wallet=wallet) - - async def __call__(self, *args, **kwargs): - return await self.query_api(*args, **kwargs) - - @abstractmethod - def prepare_synapse(self, *args, **kwargs) -> Any: - """ - Prepare the synapse-specific payload. - """ - ... - - @abstractmethod - def process_responses(self, responses: List[Union["bt.Synapse", Any]]) -> Any: - """ - Process the responses from the network. - """ - ... - -``` - - -Here is a toy example: - -```python -from bittensor.subnets import SubnetsAPI -from MySubnet import MySynapse - -class MySynapseAPI(SubnetsAPI): - def __init__(self, wallet: "bt.wallet"): - super().__init__(wallet) - self.netuid = 99 - - def prepare_synapse(self, prompt: str) -> MySynapse: - # Do any preparatory work to fill the synapse - data = do_prompt_injection(prompt) - - # Fill the synapse for transit - synapse = StoreUser( - messages=[data], - ) - # Send it along - return synapse - - def process_responses(self, responses: List[Union["bt.Synapse", Any]]) -> str: - # Look through the responses for information required by your application - for response in responses: - if response.dendrite.status_code != 200: - continue - # potentially apply post processing - result_data = postprocess_data_from_response(response) - # return data to the client - return result_data -``` - -You can use a subnet API to the registry by doing the following: -1. Download and install the specific repo you want -1. Import the appropriate API handler from bespoke subnets -1. Make the query given the subnet specific API - - - -# Subnet Links -In order to see real-world examples of subnets in-action, see the `subnet_links.py` document or access them from inside the `template` package by: -```python -import template -template.SUBNET_LINKS -[{'name': 'sn0', 'url': ''}, - {'name': 'sn1', 'url': 'https://github.com/opentensor/prompting/'}, - {'name': 'sn2', 'url': 'https://github.com/bittranslateio/bittranslate/'}, - {'name': 'sn3', 'url': 'https://github.com/gitphantomman/scraping_subnet/'}, - {'name': 'sn4', 'url': 'https://github.com/manifold-inc/targon/'}, -... -] -``` - -## License -This repository is licensed under the MIT License. -```text -# The MIT License (MIT) -# Copyright © 2024 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -``` +- See [Running on Staging](docs/running_on_staging.md) for instructions on how to run the subnet on staging. +- See [Running on Testnet](docs/running_on_testnet.md) for instructions on how to run the subnet on testnet. +- See [Running on Mainnet](docs/running_on_mainnet.md) for instructions on how to run the subnet on mainnet. + +## Requirements + +- Miners can use any port. +- Miners can use OpenAI API key or can use their own model. +- Validators need to use OpenAI API key to generate a task for miners. + +## Roadmap + +### Phase 1: Foundation (Q4 2024) +- [x] Launch on testnet (214) +- [x] Launch front-end application v1 (webgenieai.co) + - Enable Text & image inputs +- [x] Incentive mechanism v1 + - Generate pure HTML/CSS web pages from text & image based prompts +- [ ] Begin marketing for brand awareness and interest +- [ ] Launch on mainnet + +### Phase 2: Upgrade (Q1 2025) +- [ ] Build dashboard to track miner performance and progress +- [ ] Upgrade front-end application to v2 + - Enable figma design inputs +- [ ] Upgrade incentive mechanism to v2 + - Generate full framework based on React, Vue, and Next.js projects from text, image, and figma prompts + +### Phase 3: Expand (Q2 2025) +- [ ] Add features to monetize the application + - Add payment gateways + - Automate the downloading of fully functional projects +- [ ] Market and B2B sales expansion +- [ ] Grow the team \ No newline at end of file diff --git a/auto_update.sh b/auto_update.sh new file mode 100644 index 00000000..99968193 --- /dev/null +++ b/auto_update.sh @@ -0,0 +1,32 @@ +#!/bin/bash + + +while true; do + # Log the start of the script execution + echo "$(date): Script started" + + # Save the current HEAD hash + current_head=$(git rev-parse HEAD) + + # Pull the latest changes from the repository + git stash + git pull -f + git reset --hard origin/main + + # Get the new HEAD hash + new_head=$(git rev-parse HEAD) + + # Check if the new HEAD is different from the current HEAD + if [ "$current_head" != "$new_head" ]; then + # The HEAD has changed, meaning there's a new version + echo "$(date): New version detected, installing requirements and restarting the validator." + pip install -e . + pm2 restart webgenie_validator + else + # No new version, no action needed + echo "$(date): No new version detected, no restart needed." + fi + + # Sleep until the beginning of the next hour + sleep 3600 +done diff --git a/contrib/CONTRIBUTING.md b/contrib/CONTRIBUTING.md index ba33ce3c..9e98a31f 100644 --- a/contrib/CONTRIBUTING.md +++ b/contrib/CONTRIBUTING.md @@ -1,6 +1,6 @@ -# Contributing to Bittensor Subnet Development +# Contributing to webgenie -The following is a set of guidelines for contributing to the Bittensor ecosystem. These are **HIGHLY RECOMMENDED** guidelines, but not hard-and-fast rules. Use your best judgment, and feel free to propose changes to this document in a pull request. +The following is a set of guidelines for contributing to the webgenie Subnet. These are **HIGHLY RECOMMENDED** guidelines, but not hard-and-fast rules. Use your best judgment, and feel free to propose changes to this document in a pull request. ## Table Of Contents 1. [How Can I Contribute?](#how-can-i-contribute) @@ -16,10 +16,10 @@ The following is a set of guidelines for contributing to the Bittensor ecosystem ## How Can I Contribute? -TODO(developer): Define your desired contribution procedure. +You can fork this repository and create a PR with your codes and request review from our development team. You can reference [Pull Request Philosophy](#pull-request-philosophy). ## Communication Channels -TODO(developer): Place your communication channels here +Contact us on [Our discord server](https://discord.gg/P2XRwVEJ) > Please follow the Bittensor Subnet [style guide](./STYLE.md) regardless of your contribution type. @@ -99,7 +99,7 @@ After you submit a pull request, it will be reviewed by the maintainers. They ma > Note: Be sure to merge the latest from "upstream" before making a pull request: ```bash -git remote add upstream https://github.com/opentensor/bittensor.git # TODO(developer): replace with your repo URL +git remote add upstream https://github.com/webgenie/webgenie.git git fetch upstream git merge upstream/ git push origin diff --git a/docs/incentive-fomula.png b/docs/incentive-fomula.png new file mode 100644 index 00000000..848b6fa9 Binary files /dev/null and b/docs/incentive-fomula.png differ diff --git a/docs/running_on_mainnet.md b/docs/running_on_mainnet.md index 38be00a6..f95f363c 100644 --- a/docs/running_on_mainnet.md +++ b/docs/running_on_mainnet.md @@ -24,20 +24,20 @@ After installing `bittensor`, proceed as below: ## Steps -## 1. Install your subnet template +## 1. Install web-genie-ai **NOTE: Skip this step if** you already did this during local testing and development. In your project directory: ```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git +git clone https://github.com/web-genie-ai/web-genie-ai.git ``` -Next, `cd` into `bittensor-subnet-template` repo directory: +Next, `cd` into `web-genie-ai` repo directory: ```bash -cd bittensor-subnet-template +cd web-genie-ai ``` Install the Bittensor subnet template package: @@ -132,7 +132,7 @@ This step registers your subnet validator and subnet miner keys to the subnet gi Register your miner key to the subnet: ```bash -btcli subnet recycle_register --netuid 1 --subtensor.network finney --wallet.name miner --wallet.hotkey default +btcli subnet recycle_register --netuid [mainnet_netuid] --subtensor.network finney --wallet.name miner --wallet.hotkey default ``` Follow the below prompts: @@ -149,13 +149,13 @@ Follow the below prompts: Next, register your validator key to the subnet: ```bash -btcli subnet recycle_register --netuid 1 --subtensor.network finney --wallet.name validator --wallet.hotkey default +btcli subnet recycle_register --netuid [mainnet_netuid] --subtensor.network finney --wallet.name validator --wallet.hotkey default ``` Follow the below prompts: ```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. +>> Enter netuid [1] (1): # Enter netuid [mainnet_netuid] to specify the subnet you just created. >> Continue Registration? hotkey: ... coldkey: ... @@ -202,7 +202,7 @@ miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 Run the subnet miner: ```bash -python neurons/miner.py --netuid 1 --wallet.name miner --wallet.hotkey default --logging.debug +python neurons/miner.py --netuid [mainnet_netuid] --wallet.name miner --wallet.hotkey default --logging.debug ``` You will see the below terminal output: @@ -214,7 +214,7 @@ You will see the below terminal output: Run the subnet validator: ```bash -python neurons/validator.py --netuid 1 --wallet.name validator --wallet.hotkey default --logging.debug +python neurons/validator.py --netuid [mainnet_netuid] --wallet.name validator --wallet.hotkey default --logging.debug ``` You will see the below terminal output: diff --git a/docs/running_on_staging.md b/docs/running_on_staging.md index e282dcfc..d93c9986 100644 --- a/docs/running_on_staging.md +++ b/docs/running_on_staging.md @@ -1,6 +1,6 @@ # Running Subnet Locally -This tutorial will guide you through: +This document will guide you through: - Setting up a local blockchain that is not connected to either Bittensor testchain or mainchain - Creating a subnet @@ -92,21 +92,21 @@ BUILD_BINARY=0 ./scripts/localnet.sh **NOTE**: Watch for any build or initialization outputs in this step. If you are building the project for the first time, this step will take a while to finish building, depending on your hardware. -## 6. Install subnet template +## 6. Install web-genie-ai subnet -`cd` to your project directory and clone the bittensor subnet template repository: +`cd` to your project directory and clone the web-genie-ai repository: ```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git +git clone https://github.com/web-genie-ai/web-genie-ai.git ``` Navigate to the cloned repository: ```bash -cd bittensor-subnet-template +cd web-genie-ai ``` -Install the bittensor-subnet-template Python package: +Install the web-genie-ai Python package: ```bash python -m pip install -e . diff --git a/docs/running_on_testnet.md b/docs/running_on_testnet.md index 9203d3a5..be4e8553 100644 --- a/docs/running_on_testnet.md +++ b/docs/running_on_testnet.md @@ -18,23 +18,23 @@ Before proceeding further, make sure that you have installed Bittensor. See the After installing `bittensor`, proceed as below: -## 1. Install Bittensor subnet template +## 1. Install web-genie-ai **NOTE: Skip this step if** you already did this during local testing and development. -`cd` into your project directory and clone the bittensor-subnet-template repo: +`cd` into your project directory and clone the web-genie-ai repo: ```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git +git clone https://github.com/web-genie-ai/web-genie-ai.git ``` -Next, `cd` into bittensor-subnet-template repo directory: +Next, `cd` into web-genie-ai repo directory: ```bash -cd bittensor-subnet-template # Enter the +cd web-genie-ai # Enter the ``` -Install the bittensor-subnet-template package: +Install the web-genie-ai package: ```bash python -m pip install -e . @@ -129,13 +129,13 @@ This step registers your subnet validator and subnet miner keys to the subnet, g Register your miner key to the subnet: ```bash -btcli subnet register --netuid 13 --subtensor.network test --wallet.name miner --wallet.hotkey default +btcli subnet register --netuid 214 --subtensor.network test --wallet.name miner --wallet.hotkey default ``` Follow the below prompts: ```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. +>> Enter netuid [1] (1): # Enter netuid 214 to specify web-genie-ai test subnet. >> Continue Registration? hotkey: ... coldkey: ... @@ -146,13 +146,13 @@ Follow the below prompts: Next, register your validator key to the subnet: ```bash -btcli subnet register --netuid 13 --subtensor.network test --wallet.name validator --wallet.hotkey default +btcli subnet register --netuid 214 --subtensor.network test --wallet.name validator --wallet.hotkey default ``` Follow the prompts: ```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. +>> Enter netuid [1] (1): # Enter netuid 214 to specify web-genie-ai test subnet. >> Continue Registration? hotkey: ... coldkey: ... @@ -201,7 +201,7 @@ miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 Run the subnet miner: ```bash -python neurons/miner.py --netuid 1 --subtensor.network test --wallet.name miner --wallet.hotkey default --logging.debug +python neurons/miner.py --netuid 214 --subtensor.network test --wallet.name miner --wallet.hotkey default --logging.debug ``` You will see the below terminal output: @@ -213,7 +213,7 @@ You will see the below terminal output: Next, run the subnet validator: ```bash -python neurons/validator.py --netuid 1 --subtensor.network test --wallet.name validator --wallet.hotkey default --logging.debug +python neurons/validator.py --netuid 214 --subtensor.network test --wallet.name validator --wallet.hotkey default --logging.debug ``` You will see the below terminal output: diff --git a/docs/stream_tutorial/client.py b/docs/stream_tutorial/client.py index 67e6f05c..4823f715 100644 --- a/docs/stream_tutorial/client.py +++ b/docs/stream_tutorial/client.py @@ -7,7 +7,7 @@ """ This has assumed you have: 1. Registered your miner on the chain (finney/test) -2. Are serving your miner on an open port (e.g. 12345) +2. Are serving your miner on an open(e.g. 12345) Steps: - Instantiate your synapse subclass with the relevant information. E.g. messages, roles, etc. diff --git a/docs/webgenie-workflow.png b/docs/webgenie-workflow.png new file mode 100644 index 00000000..2a74fd92 Binary files /dev/null and b/docs/webgenie-workflow.png differ diff --git a/min_compute.yml b/min_compute.yml index 1da3bb04..3d8e0153 100644 --- a/min_compute.yml +++ b/min_compute.yml @@ -57,8 +57,8 @@ compute_spec: gpu: required: True # Does the application require a GPU? - min_vram: 8 # Minimum GPU VRAM (GB) - recommended_vram: 24 # Recommended GPU VRAM (GB) + min_vram: 80 # Minimum GPU VRAM (GB) + recommended_vram: 100 # Recommended GPU VRAM (GB) cuda_cores: 1024 # Minimum number of CUDA cores (if applicable) min_compute_capability: 6.0 # Minimum CUDA compute capability recommended_compute_capability: 7.0 # Recommended CUDA compute capability @@ -71,8 +71,8 @@ compute_spec: ram_type: "DDR4" # RAM type (e.g., DDR4, DDR3, etc.) storage: - min_space: 10 # Minimum free storage space (GB) - recommended_space: 100 # Recommended free storage space (GB) + min_space: 500 # Minimum free storage space (GB) + recommended_space: 1000 # Recommended free storage space (GB) type: "SSD" # Preferred storage type (e.g., SSD, HDD) min_iops: 1000 # Minimum I/O operations per second (if applicable) recommended_iops: 5000 # Recommended I/O operations per second diff --git a/neurons/miners/hf_miner.py b/neurons/miners/hf_miner.py new file mode 100644 index 00000000..93ee3ab7 --- /dev/null +++ b/neurons/miners/hf_miner.py @@ -0,0 +1,49 @@ +import bittensor as bt +import os + +from webgenie.base.neuron import BaseNeuron +from webgenie.protocol import WebgenieTextSynapse, WebgenieImageSynapse +from webgenie.helpers.images import base64_to_image + +from webgenie.utils.gpus import get_gpu_info +total_memory_mb, _, _ = get_gpu_info() + +if total_memory_mb is None: + raise ValueError("No GPU detected. HfMiner requires a GPU.") + +bt.logging.info(f"Total memory: {total_memory_mb}") + +if total_memory_mb < 1024 * 23: + raise ValueError("Insufficient GPU memory. HfMiner requires at least 25GB of GPU memory.") + +from neurons.miners.hf_models.websight_finetuned import generate_html_from_image + +if total_memory_mb > 1024 * 50: + from neurons.miners.hf_models.falcon7b import generate_html_from_text + +class HfMiner: + def __init__(self, neuron: BaseNeuron): + self.neuron = neuron + + async def forward_text(self, synapse: WebgenieTextSynapse) -> WebgenieTextSynapse: + try: + if total_memory_mb > 1024 * 50: + synapse.html = generate_html_from_text(synapse.prompt) + else: + synapse.html = "you don't have enough memory to generate html from text" + return synapse + except Exception as e: + bt.logging.error(f"Error in HfMiner forward_text: {e}") + synapse.html = f"Error in HfMiner forward_text: {e}" + return synapse + + async def forward_image(self, synapse: WebgenieImageSynapse) -> WebgenieImageSynapse: + try: + bt.logging.debug(f"Generating HTML from image") + synapse.html = generate_html_from_image(base64_to_image(synapse.base64_image)) + bt.logging.debug(f"Generated HTML: {synapse.html}") + return synapse + except Exception as e: + bt.logging.error(f"Error in OpenaiMiner forward_image: {e}") + synapse.html = f"Error in OpenaiMiner forward_image: {e}" + return synapse \ No newline at end of file diff --git a/neurons/miners/hf_models/falcon7b.py b/neurons/miners/hf_models/falcon7b.py new file mode 100644 index 00000000..7e47e25d --- /dev/null +++ b/neurons/miners/hf_models/falcon7b.py @@ -0,0 +1,65 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + +# Loading original model +model_name = "ybelkada/falcon-7b-sharded-bf16" + +bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.float16, +) + +model = AutoModelForCausalLM.from_pretrained( + model_name, + quantization_config=bnb_config, + device_map="auto", + trust_remote_code=True, +) + +tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) +tokenizer.pad_token = tokenizer.eos_token +PEFT_MODEL = "PrincySinghal991/falcon-7b-sharded-bf16-finetuned-html-code-generation" +# PEFT_MODEL = "kasperius/falcon-7b-sharded-bf16-finetuned-html-code-generation-the-css-only" + +peft_model = AutoModelForCausalLM.from_pretrained( + PEFT_MODEL, + quantization_config=bnb_config, + device_map="auto", # Let the transformers library handle device placement + trust_remote_code=True, + torch_dtype=torch.float16, # Use mixed precision to reduce memory usage + low_cpu_mem_usage=True +) + +# Load tokenizer +peft_tokenizer = AutoTokenizer.from_pretrained(PEFT_MODEL, trust_remote_code=True) +peft_tokenizer.pad_token = peft_tokenizer.eos_token + +def generate_html_from_text(prompt): + # Tokenize and generate with the PEFT model + peft_encoding = peft_tokenizer(prompt, return_tensors="pt") + peft_outputs = peft_model.generate( + input_ids=peft_encoding["input_ids"].to(peft_model.device), + attention_mask=peft_encoding["attention_mask"].to(peft_model.device), + max_length=2048, + pad_token_id=peft_tokenizer.eos_token_id, + eos_token_id=peft_tokenizer.eos_token_id + ) + peft_model_html = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True) + return peft_model_html[len(prompt):] + +if __name__ == "__main__": + + # Example usage + prompt="create a simple login page with html and css" + print("=================") + import time + start_time = time.time() + print(f"Prompt: {prompt}") + html = generate_html_from_text(prompt) + end_time = time.time() + print(f"Time taken: {end_time - start_time} seconds") + print("=================") + print(html) + diff --git a/neurons/miners/hf_models/websight_finetuned.py b/neurons/miners/hf_models/websight_finetuned.py new file mode 100644 index 00000000..24769dae --- /dev/null +++ b/neurons/miners/hf_models/websight_finetuned.py @@ -0,0 +1,67 @@ +import os +import torch + +from PIL import Image +from transformers import AutoModelForCausalLM, AutoProcessor + +from transformers.image_utils import to_numpy_array, PILImageResampling, ChannelDimension +from transformers.image_transforms import resize, to_channel_dimension_format + +API_TOKEN = os.getenv("HF_TOKEN") +DEVICE = torch.device("cuda") +PROCESSOR = AutoProcessor.from_pretrained( + "HuggingFaceM4/VLM_WebSight_finetuned", + token=API_TOKEN, +) +MODEL = AutoModelForCausalLM.from_pretrained( + "HuggingFaceM4/VLM_WebSight_finetuned", + token=API_TOKEN, + trust_remote_code=True, + torch_dtype=torch.bfloat16, +).to(DEVICE) +image_seq_len = MODEL.config.perceiver_config.resampler_n_latents +BOS_TOKEN = PROCESSOR.tokenizer.bos_token +BAD_WORDS_IDS = PROCESSOR.tokenizer(["", ""], add_special_tokens=False).input_ids + + +def convert_to_rgb(image): + # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background + # for transparent images. The call to `alpha_composite` handles this case + if image.mode == "RGB": + return image + + image_rgba = image.convert("RGBA") + background = Image.new("RGBA", image_rgba.size, (255, 255, 255)) + alpha_composite = Image.alpha_composite(background, image_rgba) + alpha_composite = alpha_composite.convert("RGB") + return alpha_composite + +# The processor is the same as the Idefics processor except for the BILINEAR interpolation, +# so this is a hack in order to redefine ONLY the transform method +def custom_transform(x): + x = convert_to_rgb(x) + x = to_numpy_array(x) + x = resize(x, (960, 960), resample=PILImageResampling.BILINEAR) + x = PROCESSOR.image_processor.rescale(x, scale=1 / 255) + x = PROCESSOR.image_processor.normalize( + x, + mean=PROCESSOR.image_processor.image_mean, + std=PROCESSOR.image_processor.image_std + ) + x = to_channel_dimension_format(x, ChannelDimension.FIRST) + x = torch.tensor(x) + return x + +def generate_html_from_image(image): + global MODEL, PROCESSOR, image_seq_len, BOS_TOKEN, BAD_WORDS_IDS, DEVICE + inputs = PROCESSOR.tokenizer( + f"{BOS_TOKEN}{'' * image_seq_len}", + return_tensors="pt", + add_special_tokens=False, + ) + inputs["pixel_values"] = PROCESSOR.image_processor([image], transform=custom_transform) + inputs = {k: v.to(DEVICE) for k, v in inputs.items()} + generated_ids = MODEL.generate(**inputs, bad_words_ids=BAD_WORDS_IDS, max_length=4096) + generated_text = PROCESSOR.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text diff --git a/neurons/miner.py b/neurons/miners/miner.py similarity index 73% rename from neurons/miner.py rename to neurons/miners/miner.py index 5f7b9500..acc01d50 100644 --- a/neurons/miner.py +++ b/neurons/miners/miner.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 +# Copyright © 2024 pycorn0729, Sangar # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -16,17 +15,20 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +from dotenv import load_dotenv, find_dotenv +load_dotenv(find_dotenv(filename=".env.miner")) import time -import typing -import bittensor as bt -# Bittensor Miner Template: -import template +import typing -# import base miner class which takes care of most of the boilerplate -from template.base.miner import BaseMinerNeuron +import bittensor as bt +from webgenie.base.miner import BaseMinerNeuron +from webgenie.constants import MAX_DEBUG_IMAGE_STRING_LENGTH +from webgenie.helpers.weights import init_wandb +from webgenie.protocol import WebgenieTextSynapse, WebgenieImageSynapse +from neurons.miners.openai_miner import OpenaiMiner class Miner(BaseMinerNeuron): """ @@ -40,30 +42,48 @@ class Miner(BaseMinerNeuron): def __init__(self, config=None): super(Miner, self).__init__(config=config) - # TODO(developer): Anything specific to your use case you can do here - - async def forward( - self, synapse: template.protocol.Dummy - ) -> template.protocol.Dummy: - """ - Processes the incoming 'Dummy' synapse by performing a predefined operation on the input data. - This method should be replaced with actual logic relevant to the miner's purpose. - - Args: - synapse (template.protocol.Dummy): The synapse object containing the 'dummy_input' data. + # Attach determiners which functions are called when servicing a request. + bt.logging.info(f"Attaching forward function to miner axon.") + self.axon.attach( + forward_fn=self.forward_text, + blacklist_fn=self.blacklist_text, + priority_fn=self.priority_text, + ).attach( + forward_fn = self.forward_image, + blacklist_fn=self.blacklist_image, + priority_fn=self.priority_image, + ) - Returns: - template.protocol.Dummy: The synapse object with the 'dummy_output' field set to twice the 'dummy_input' value. + self.genie_miner = OpenaiMiner(self) - The 'forward' function is a placeholder and should be overridden with logic that is appropriate for - the miner's intended operation. This method demonstrates a basic transformation of input data. - """ - # TODO(developer): Replace with actual implementation logic. - synapse.dummy_output = synapse.dummy_input * 2 - return synapse + init_wandb(self) + + async def forward_text( + self, synapse: WebgenieTextSynapse + ) -> WebgenieTextSynapse: + bt.logging.debug(f"Miner text forward called with prompt: {synapse.prompt}") + return await self.genie_miner.forward_text(synapse) + + async def forward_image( + self, synapse: WebgenieImageSynapse + ) -> WebgenieImageSynapse: + bt.logging.debug(f"Miner image forward called with image: {synapse.base64_image[:MAX_DEBUG_IMAGE_STRING_LENGTH]}...") + return await self.genie_miner.forward_image(synapse) + + async def blacklist_text(self, synapse: WebgenieTextSynapse) -> typing.Tuple[bool, str]: + return await self.blacklist(synapse) + + async def blacklist_image(self, synapse: WebgenieImageSynapse) -> typing.Tuple[bool, str]: + return await self.blacklist(synapse) + + async def priority_text(self, synapse: WebgenieTextSynapse) -> float: + return await self.priority(synapse) + + async def priority_image(self, synapse: WebgenieImageSynapse) -> float: + return await self.priority(synapse) async def blacklist( - self, synapse: template.protocol.Dummy + self, synapse: bt.Synapse ) -> typing.Tuple[bool, str]: """ Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should @@ -74,7 +94,7 @@ async def blacklist( requests before they are deserialized to avoid wasting resources on requests that will be ignored. Args: - synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request. + synapse (template.protocol.webgenieSynapse): A synapse object constructed from the headers of the incoming request. Returns: Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted, @@ -124,7 +144,7 @@ async def blacklist( ) return False, "Hotkey recognized!" - async def priority(self, synapse: template.protocol.Dummy) -> float: + async def priority(self, synapse: bt.Synapse) -> float: """ The priority function determines the order in which requests are handled. More valuable or higher-priority requests are processed before others. You should design your own priority mechanism with care. @@ -132,7 +152,7 @@ async def priority(self, synapse: template.protocol.Dummy) -> float: This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph. Args: - synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request. + synapse (template.protocol.webgenieSynapse): The synapse object that contains metadata about the incoming request. Returns: float: A priority score derived from the stake of the calling entity. @@ -148,7 +168,6 @@ async def priority(self, synapse: template.protocol.Dummy) -> float: bt.logging.warning("Received a request without a dendrite or hotkey.") return 0.0 - # TODO(developer): Define how miners should prioritize requests. caller_uid = self.metagraph.hotkeys.index( synapse.dendrite.hotkey ) # Get the caller index. @@ -165,5 +184,4 @@ async def priority(self, synapse: template.protocol.Dummy) -> float: if __name__ == "__main__": with Miner() as miner: while True: - bt.logging.info(f"Miner running... {time.time()}") time.sleep(5) diff --git a/neurons/miners/openai_miner.py b/neurons/miners/openai_miner.py new file mode 100644 index 00000000..daaff6ea --- /dev/null +++ b/neurons/miners/openai_miner.py @@ -0,0 +1,81 @@ +import bittensor as bt +import os + +from langchain_openai import ChatOpenAI +from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.pydantic_v1 import BaseModel, Field + +from webgenie.base.neuron import BaseNeuron +from webgenie.helpers.llms import call_llm +from webgenie.protocol import WebgenieTextSynapse, WebgenieImageSynapse +from webgenie.tasks.solution import Solution + +class HTMLResponse(BaseModel): + html: str = Field(default="", description="The HTML code for the webpage") + +class OpenaiMiner: + def __init__(self, neuron: BaseNeuron): + self.neuron = neuron + + self.html_response_parser = JsonOutputParser(pydantic_object=HTMLResponse) + + async def forward_text(self, synapse: WebgenieTextSynapse) -> WebgenieTextSynapse: + try: + template = [ + ("system", """You are an expert web developer who specializes in HTML and CSS. A user will provide you with the webpage requirements. You need to return a single html file that uses HTML and CSS to satisfy the requirements. + Include all CSS code in the HTML file itself. + If it involves any images, use "rick.jpg" as the placeholder. + Do not hallucinate any dependencies to external files. You do not need to include JavaScript scripts for dynamic interactions. + Pay attention to things like size, text, position, and color of all the elements, as well as the overall layout. + Respond with the content of the HTML+CSS file: + {instructions}"""), + ("user", "{query}"), + ] + + html_response = await call_llm( + template=template, + params={"query": synapse.prompt, "instructions": self.html_response_parser.get_format_instructions()}, + output_parser=self.html_response_parser + ) + + synapse.html = html_response["html"] + return synapse + except Exception as e: + bt.logging.error(f"Error in OpenaiMiner forward_text: {e}") + synapse.html = f"Error in OpenaiMiner forward_text: {e}" + return synapse + + async def forward_image(self, synapse: WebgenieImageSynapse) -> WebgenieImageSynapse: + try: + prompt_messages = [ + SystemMessagePromptTemplate.from_template(""" + You are an expert web developer who specializes in HTML and CSS. + A user will provide you with a screenshot of a webpage, along with all texts that they want to put on the webpage. + You need to return a single html file that uses HTML and CSS to reproduce the given website. + Include all CSS code in the HTML file itself. + If it involves any images, use "rick.jpg" as the placeholder. + Some images on the webpage are replaced with a blue rectangle as the placeholder, use "rick.jpg" for those as well. + Do not hallucinate any dependencies to external files. You do not need to include JavaScript scripts for dynamic interactions. + Pay attention to things like size, text, position, and color of all the elements, as well as the overall layout. + Respond with the content of the HTML+CSS file: + {instructions}"""), + HumanMessagePromptTemplate.from_template( + template=[ + {"type": "image_url", "image_url": {"url": "{image_url}"}}, + ] + ) + ] + + html_response = await call_llm( + template=prompt_messages, + params={"instructions": self.html_response_parser.get_format_instructions(), "image_url": f"data:image/jpeg;base64,{synapse.base64_image}"}, + output_parser=self.html_response_parser + ) + + synapse.html = html_response["html"] + return synapse + except Exception as e: + bt.logging.error(f"Error in OpenaiMiner forward_image: {e}") + synapse.html = f"Error in OpenaiMiner forward_image: {e}" + return synapse \ No newline at end of file diff --git a/neurons/validator.py b/neurons/validator.py deleted file mode 100644 index e28b972c..00000000 --- a/neurons/validator.py +++ /dev/null @@ -1,67 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -import time - -# Bittensor -import bittensor as bt - -# import base validator class which takes care of most of the boilerplate -from template.base.validator import BaseValidatorNeuron -# Bittensor Validator Template: -from template.validator import forward - - -class Validator(BaseValidatorNeuron): - """ - Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic. - - This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior. - - This class provides reasonable default behavior for a validator such as keeping a moving average of the scores of the miners and using them to set weights at the end of each epoch. Additionally, the scores are reset for new hotkeys at the end of each epoch. - """ - - def __init__(self, config=None): - super(Validator, self).__init__(config=config) - - bt.logging.info("load_state()") - self.load_state() - - # TODO(developer): Anything specific to your use case you can do here - - async def forward(self): - """ - Validator forward pass. Consists of: - - Generating the query - - Querying the miners - - Getting the responses - - Rewarding the miners - - Updating the scores - """ - # TODO(developer): Rewrite this function based on your protocol definition. - return await forward(self) - - -# The main function parses the configuration and runs the validator. -if __name__ == "__main__": - with Validator() as validator: - while True: - bt.logging.info(f"Validator running... {time.time()}") - time.sleep(5) diff --git a/neurons/validators/genie_validator.py b/neurons/validators/genie_validator.py new file mode 100644 index 00000000..d8724956 --- /dev/null +++ b/neurons/validators/genie_validator.py @@ -0,0 +1,162 @@ +import os +import bittensor as bt +import asyncio +import numpy as np +import random +from typing import Union, List + +from webgenie.base.neuron import BaseNeuron +from webgenie.constants import ( + NUM_CONCURRENT_QUERIES, + MAX_SYNTHETIC_HISTORY_SIZE, + MAX_SYNTHETIC_TASK_SIZE, + MAX_DEBUG_IMAGE_STRING_LENGTH, + WORK_DIR +) +from webgenie.helpers.htmls import preprocess_html +from webgenie.protocol import WebgenieImageSynapse, WebgenieTextSynapse +from webgenie.tasks.solution import Solution +from webgenie.tasks.image_task_generator import ImageTaskGenerator +from webgenie.tasks.text_task_generator import TextTaskGenerator +from webgenie.utils.uids import get_random_uids + +class GenieValidator: + def __init__(self, neuron: BaseNeuron): + self.neuron = neuron + self.config = neuron.config + self.synthetic_history = [] + self.synthetic_tasks = [] + + self.task_generators = [ + (TextTaskGenerator(), 0.1), + (ImageTaskGenerator(), 0.9), + ] + + self.make_work_dir() + + def make_work_dir(self): + if not os.path.exists(WORK_DIR): + os.makedirs(WORK_DIR) + bt.logging.info(f"Created work directory at {WORK_DIR}") + + async def query_one_task(self, task, synapse, miner_uids): + try: + async with bt.dendrite(wallet=self.neuron.wallet) as dendrite: + all_synapse_results = await dendrite( + axons = [self.neuron.metagraph.axons[uid] for uid in miner_uids], + synapse=synapse, + timeout=task.timeout + ) + + solutions = [] + + for synapse, miner_uid in zip(all_synapse_results, miner_uids): + processed_synapse = await self.process_synapse(synapse) + if processed_synapse is not None: + solutions.append(Solution(html = processed_synapse.html, miner_uid = miner_uid, process_time = processed_synapse.dendrite.process_time)) + + return task, solutions + except Exception as e: + bt.logging.error(f"Error in query_one_task: {e}") + raise e + + async def query_miners(self): + try: + if len(self.synthetic_history) > MAX_SYNTHETIC_HISTORY_SIZE: + return + + if len(self.synthetic_tasks) < NUM_CONCURRENT_QUERIES: + return + + bt.logging.info("querying miners") + + miner_uids = get_random_uids(self.neuron, k=self.config.neuron.sample_size) + bt.logging.debug(f"Selected miner uids: {miner_uids}") + + query_coroutines = [self.query_one_task(task, synapse, miner_uids) for task, synapse in self.synthetic_tasks] + + self.synthetic_tasks = [] + + results = await asyncio.gather(*query_coroutines, return_exceptions=True) + self.synthetic_history.append(results) + + except Exception as e: + bt.logging.error(f"Error in query_miners: {e}") + raise e + + async def score(self): + if not self.synthetic_history: + return + + results = self.synthetic_history.pop(0) + tatal_scores = np.zeros(self.neuron.metagraph.n, dtype=np.float32) + for result in results: + if isinstance(result, Exception): + continue + + task, solutions = result + if not solutions: + continue + + task_generator = task.generator + miner_uids = [solution.miner_uid for solution in solutions] + rewards = await task_generator.reward(task, solutions) + bt.logging.success(f"Rewards for {miner_uids}: {rewards}") + + for i in range(len(miner_uids)): + tatal_scores[miner_uids[i]] += rewards[i] + + tatal_scores[:] = tatal_scores[:] ** 3 + self.neuron.update_scores(tatal_scores, range(self.neuron.metagraph.n)) + self.neuron.step += 1 + + async def synthensize_task(self): + try: + if len(self.synthetic_tasks) > MAX_SYNTHETIC_TASK_SIZE: + return + + bt.logging.debug(f"Synthensize task") + + task_generator, _ = random.choices( + self.task_generators, + weights=[weight for _, weight in self.task_generators] + )[0] + + task, synapse = await task_generator.generate_task() + self.synthetic_tasks.append((task, synapse)) + except Exception as e: + bt.logging.error(f"Error in synthensize_task: {e}") + + async def organic_forward(self, synapse: Union[WebgenieTextSynapse, WebgenieImageSynapse]): + if isinstance(synapse, WebgenieTextSynapse): + bt.logging.debug(f"Organic text forward: {synapse.prompt}") + else: + bt.logging.debug(f"Organic image forward: {synapse.base64_image[:MAX_DEBUG_IMAGE_STRING_LENGTH]}...") + + best_miner_uid = 3 + try: + axon = self.neuron.metagraph.axons[best_miner_uid] + async with bt.dendrite(wallet=self.neuron.wallet) as dendrite: + responses = await dendrite( + axons=[axon], + synapse=synapse, + timeout=synapse.timeout, + ) + + processed_synapse = await self.process_synapse(responses[0]) + if processed_synapse is None: + raise Exception(f"No valid solution received") + + return processed_synapse + except Exception as e: + bt.logging.error(f"[forward_organic_synapse] Error querying dendrite: {e}") + synapse.html = f"Error: {e}" + return synapse + + async def process_synapse(self, synapse: bt.Synapse) -> bt.Synapse: + if synapse.dendrite.status_code == 200: + synapse.html = preprocess_html(synapse.html) + if not synapse.html: + return None + return synapse + return None diff --git a/neurons/validators/validator.py b/neurons/validators/validator.py new file mode 100644 index 00000000..c833de85 --- /dev/null +++ b/neurons/validators/validator.py @@ -0,0 +1,144 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2024 pycorn, Sangar + +import bittensor as bt +import asyncio + +from dotenv import load_dotenv, find_dotenv +load_dotenv(find_dotenv(filename=".env.validator")) + +from typing import Tuple, Union + +from webgenie.base.validator import BaseValidatorNeuron +from webgenie.constants import API_HOTKEY +from webgenie.protocol import WebgenieTextSynapse, WebgenieImageSynapse +from neurons.validators.genie_validator import GenieValidator + +class Validator(BaseValidatorNeuron): + """ + Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic. + + This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior. + + This class provides reasonable default behavior for a validator such as keeping a moving average of the scores of the miners and using them to set weights at the end of each epoch. Additionally, the scores are reset for new hotkeys at the end of each epoch. + """ + + def __init__(self, config=None): + super(Validator, self).__init__(config=config) + if not self.config.axon_off: + self.serve_axon() + + self.genie_validator = GenieValidator(neuron=self) + + async def blacklist_text(self, synapse: WebgenieTextSynapse) -> Tuple[bool, str]: + """ + Only allow the backend owner to send synapse to the validator. + """ + if synapse.dendrite.hotkey == API_HOTKEY: + return False, "Backend hotkey" + return True, "Blacklisted" + + async def blacklist_image(self, synapse: WebgenieImageSynapse) -> Tuple[bool, str]: + """ + Only allow the backend owner to send synapse to the validator. + """ + if synapse.dendrite.hotkey == API_HOTKEY: + return False, "Backend hotkey" + return True, "Blacklisted" + + async def organic_forward_text(self, synapse: WebgenieTextSynapse): + return await self.genie_validator.organic_forward(synapse) + + async def organic_forward_image(self, synapse: WebgenieImageSynapse): + return await self.genie_validator.organic_forward(synapse) + + def serve_axon(self): + """Serve axon to enable external connections.""" + bt.logging.info("serving ip to chain...") + try: + self.axon = bt.axon(wallet=self.wallet, config=self.config) + + self.axon.attach( + forward_fn = self.organic_forward_text, + blacklist_fn = self.blacklist_text + ).attach( + forward_fn = self.organic_forward_image, + blacklist_fn = self.blacklist_image + ) + + self.axon.serve( + netuid=self.config.netuid, + subtensor=self.subtensor, + ) + self.axon.start() + bt.logging.info(f"Validator running in organic mode on port {self.config.neuron.axon_port}") + except Exception as e: + bt.logging.error(f"Failed to serve Axon with exception: {e}") + pass + + async def query_miners(self): + return await self.genie_validator.query_miners() + + async def concurrent_query(self): + coroutines = [ + self.query_miners() + for _ in range(self.config.neuron.num_concurrent_forwards) + ] + await asyncio.gather(*coroutines) + + async def query_miners_loop(self): + bt.logging.info(f"Validator starting at block: {self.block}") + self.sync() + while True: + try: + self.loop.run_until_complete(self.concurrent_query()) + self.sync() + except Exception as e: + bt.logging.error(f"Error during forward loop: {str(e)}") + await asyncio.sleep(1) + + async def score_loop(self): + bt.logging.info(f"Scoring loop starting") + while True: + try: + await self.genie_validator.score() + self.sync() + except Exception as e: + bt.logging.error(f"Error during scoring: {str(e)}") + await asyncio.sleep(1) + + async def synthensize_task_loop(self): + bt.logging.info(f"Synthensize task loop starting") + while True: + try: + await self.genie_validator.synthensize_task() + except Exception as e: + bt.logging.error(f"Error during synthensize task: {str(e)}") + await asyncio.sleep(1) + + async def __aenter__(self): + self.loop.create_task(self.synthensize_task_loop()) + self.loop.create_task(self.query_miners_loop()) + self.loop.create_task(self.score_loop()) + self.is_running = True + + bt.logging.debug("Starting validator in background thread") + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + if not self.is_running: + return + + self.should_exit = True + self.is_running = False + bt.logging.debug("Stopping validator in background thread") + +async def main(): + async with Validator() as validator: + while validator.is_running and not validator.should_exit: + await asyncio.sleep(15) + +# The main function parses the configuration and runs the validator. +if __name__ == "__main__": + asyncio.run(main()) diff --git a/requirements.txt b/requirements.txt index f44dfb74..396d10d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,24 @@ -bittensor>=7 -starlette>=0.30.0 -pydantic>=2 -rich>=13 -pytest>=8 -torch>=2 -numpy>=1 -setuptools>=68 \ No newline at end of file +ansible-vault==2.1.0 +beautifulsoup4==4.12.3 +bert-score==0.3.13 +bittensor +colormath==3.0.0 +datasets==3.2.0 +ddt==1.6.0 +datasets +einops +langchain==0.3.11 +langchain-openai==0.2.12 +lxml==5.3.0 +matplotlib-inline==0.1.7 +opencv-python==4.10.0.84 +peft +pip-chill==1.0.3 +playwright==1.49.1 +python-dotenv==1.0.1 +scikit-learn==1.6.0 +sentence-transformers +shtab==1.6.5 +tinycss2==1.4.0 +wandb==0.19.0 +git+https://github.com/openai/CLIP.git \ No newline at end of file diff --git a/setup.py b/setup.py index f76ec9b2..a71f5056 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 +# Copyright © 2023 Sangar, pycorn0729 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -63,18 +62,18 @@ def read_requirements(path): version_string = version_match.group(1) setup( - name="bittensor_subnet_template", # TODO(developer): Change this value to your module subnet name. + name="webgenie", version=version_string, - description="bittensor_subnet_template", # TODO(developer): Change this value to your module subnet description. + description="webgenie aims to transform diverse prompts—ranging from text and voice to images and Figma designs—into fully functional, ready-to-deploy projects.", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/opentensor/bittensor-subnet-template", # TODO(developer): Change this url to your module subnet github url. - author="bittensor.com", # TODO(developer): Change this value to your module subnet author name. + url="https://github.com/web-genie-ai/web-genie-ai", + author="Sangar, Dominique Hayes", packages=find_packages(), include_package_data=True, - author_email="", # TODO(developer): Change this value to your module subnet author email. + author_email="sangar.work1028@gmail.com, hayesdominique0729@gmail.com", license="MIT", - python_requires=">=3.8", + python_requires=">=3.12", install_requires=requirements, classifiers=[ "Development Status :: 3 - Alpha", diff --git a/template/protocol.py b/template/protocol.py deleted file mode 100644 index c601e58a..00000000 --- a/template/protocol.py +++ /dev/null @@ -1,76 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import typing -import bittensor as bt - -# TODO(developer): Rewrite with your protocol definition. - -# This is the protocol for the dummy miner and validator. -# It is a simple request-response protocol where the validator sends a request -# to the miner, and the miner responds with a dummy response. - -# ---- miner ---- -# Example usage: -# def dummy( synapse: Dummy ) -> Dummy: -# synapse.dummy_output = synapse.dummy_input + 1 -# return synapse -# axon = bt.axon().attach( dummy ).serve(netuid=...).start() - -# ---- validator --- -# Example usage: -# dendrite = bt.dendrite() -# dummy_output = dendrite.query( Dummy( dummy_input = 1 ) ) -# assert dummy_output == 2 - - -class Dummy(bt.Synapse): - """ - A simple dummy protocol representation which uses bt.Synapse as its base. - This protocol helps in handling dummy request and response communication between - the miner and the validator. - - Attributes: - - dummy_input: An integer value representing the input request sent by the validator. - - dummy_output: An optional integer value which, when filled, represents the response from the miner. - """ - - # Required request input, filled by sending dendrite caller. - dummy_input: int - - # Optional request output, filled by receiving axon. - dummy_output: typing.Optional[int] = None - - def deserialize(self) -> int: - """ - Deserialize the dummy output. This method retrieves the response from - the miner in the form of dummy_output, deserializes it and returns it - as the output of the dendrite.query() call. - - Returns: - - int: The deserialized response, which in this case is the value of dummy_output. - - Example: - Assuming a Dummy instance has a dummy_output value of 5: - >>> dummy_instance = Dummy(dummy_input=4) - >>> dummy_instance.dummy_output = 5 - >>> dummy_instance.deserialize() - 5 - """ - return self.dummy_output diff --git a/template/validator/__init__.py b/template/validator/__init__.py deleted file mode 100644 index e43fa856..00000000 --- a/template/validator/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .forward import forward -from .reward import reward diff --git a/template/validator/forward.py b/template/validator/forward.py deleted file mode 100644 index af5e7ee0..00000000 --- a/template/validator/forward.py +++ /dev/null @@ -1,63 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import time -import bittensor as bt - -from template.protocol import Dummy -from template.validator.reward import get_rewards -from template.utils.uids import get_random_uids - - -async def forward(self): - """ - The forward function is called by the validator every time step. - - It is responsible for querying the network and scoring the responses. - - Args: - self (:obj:`bittensor.neuron.Neuron`): The neuron object which contains all the necessary state for the validator. - - """ - # TODO(developer): Define how the validator selects a miner to query, how often, etc. - # get_random_uids is an example method, but you can replace it with your own. - miner_uids = get_random_uids(self, k=self.config.neuron.sample_size) - - # The dendrite client queries the network. - responses = await self.dendrite( - # Send the query to selected miner axons in the network. - axons=[self.metagraph.axons[uid] for uid in miner_uids], - # Construct a dummy query. This simply contains a single integer. - synapse=Dummy(dummy_input=self.step), - # All responses have the deserialize function called on them before returning. - # You are encouraged to define your own deserialization function. - deserialize=True, - ) - - # Log the results for monitoring purposes. - bt.logging.info(f"Received responses: {responses}") - - # TODO(developer): Define how the validator scores responses. - # Adjust the scores based on responses from miners. - rewards = get_rewards(self, query=self.step, responses=responses) - - bt.logging.info(f"Scored responses: {rewards}") - # Update the scores based on the rewards. You may want to define your own update_scores function for custom behavior. - self.update_scores(rewards, miner_uids) - time.sleep(5) diff --git a/template/validator/reward.py b/template/validator/reward.py deleted file mode 100644 index 58492183..00000000 --- a/template/validator/reward.py +++ /dev/null @@ -1,55 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -import numpy as np -from typing import List -import bittensor as bt - - -def reward(query: int, response: int) -> float: - """ - Reward the miner response to the dummy request. This method returns a reward - value for the miner, which is used to update the miner's score. - - Returns: - - float: The reward value for the miner. - """ - bt.logging.info(f"In rewards, query val: {query}, response val: {response}, rewards val: {1.0 if response == query * 2 else 0}") - return 1.0 if response == query * 2 else 0 - - -def get_rewards( - self, - query: int, - responses: List[float], -) -> np.ndarray: - """ - Returns an array of rewards for the given query and responses. - - Args: - - query (int): The query sent to the miner. - - responses (List[float]): A list of responses from the miner. - - Returns: - - np.ndarray: An array of rewards for the given query and responses. - """ - # Get all the reward results by iteratively calling your reward() function. - - return np.array( - [reward(query, response) for response in responses] - ) diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py index 48e015a9..021318e7 100644 --- a/tests/test_template_validator.py +++ b/tests/test_template_validator.py @@ -23,10 +23,10 @@ import torch from neurons.validator import Validator -from template.base.validator import BaseValidatorNeuron -from template.protocol import Dummy -from template.utils.uids import get_random_uids -from template.validator.reward import get_rewards +from webgenie.base.validator import BaseValidatorNeuron +from webgenie.protocol import Dummy +from webgenie.utils.uids import get_random_uids +from webgenie.validator.reward import get_rewards class TemplateValidatorNeuronTestCase(unittest.TestCase): diff --git a/template/__init__.py b/webgenie/__init__.py similarity index 86% rename from template/__init__.py rename to webgenie/__init__.py index cb07b8c0..5a811b92 100644 --- a/template/__init__.py +++ b/webgenie/__init__.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 +# Copyright © 2023 Sangar, pycorn0729 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -17,9 +16,9 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -# TODO(developer): Change this value when updating your code base. -# Define the version of the template module. -__version__ = "0.0.0" +# Change this value when updating your code base. +# Define the version of the webgenie. +__version__ = "1.0.0" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) @@ -27,9 +26,10 @@ + (1 * int(version_split[2])) ) +PROJECT_NAME = f"webgenie-{__version__}" + # Import all submodules. from . import protocol from . import base -from . import validator from . import api from .subnet_links import SUBNET_LINKS diff --git a/template/api/__init__.py b/webgenie/api/__init__.py similarity index 100% rename from template/api/__init__.py rename to webgenie/api/__init__.py diff --git a/template/api/dummy.py b/webgenie/api/dummy.py similarity index 98% rename from template/api/dummy.py rename to webgenie/api/dummy.py index f6a433f1..2845f6b5 100644 --- a/template/api/dummy.py +++ b/webgenie/api/dummy.py @@ -19,7 +19,7 @@ import bittensor as bt from typing import List, Optional, Union, Any, Dict -from template.protocol import Dummy +from webgenie.protocol import Dummy from bittensor.subnets import SubnetsAPI diff --git a/template/api/get_query_axons.py b/webgenie/api/get_query_axons.py similarity index 100% rename from template/api/get_query_axons.py rename to webgenie/api/get_query_axons.py diff --git a/template/base/__init__.py b/webgenie/base/__init__.py similarity index 100% rename from template/base/__init__.py rename to webgenie/base/__init__.py diff --git a/template/base/miner.py b/webgenie/base/miner.py similarity index 93% rename from template/base/miner.py rename to webgenie/base/miner.py index 1788e24b..4f449b0d 100644 --- a/template/base/miner.py +++ b/webgenie/base/miner.py @@ -23,8 +23,8 @@ import bittensor as bt -from template.base.neuron import BaseNeuron -from template.utils.config import add_miner_args +from webgenie.base.neuron import BaseNeuron +from webgenie.utils.config import add_miner_args from typing import Union @@ -55,21 +55,15 @@ def __init__(self, config=None): # The axon handles request processing, allowing validators to send this miner requests. self.axon = bt.axon(wallet=self.wallet, config=self.config() if callable(self.config) else self.config) - # Attach determiners which functions are called when servicing a request. - bt.logging.info(f"Attaching forward function to miner axon.") - self.axon.attach( - forward_fn=self.forward, - blacklist_fn=self.blacklist, - priority_fn=self.priority, - ) - bt.logging.info(f"Axon created: {self.axon}") - # Instantiate runners self.should_exit: bool = False self.is_running: bool = False self.thread: Union[threading.Thread, None] = None self.lock = asyncio.Lock() + async def forward(self): + pass + def run(self): """ Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors. @@ -186,7 +180,4 @@ def __exit__(self, exc_type, exc_value, traceback): def resync_metagraph(self): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - bt.logging.info("resync_metagraph()") - - # Sync the metagraph. self.metagraph.sync(subtensor=self.subtensor) diff --git a/template/base/neuron.py b/webgenie/base/neuron.py similarity index 91% rename from template/base/neuron.py rename to webgenie/base/neuron.py index 9b2ce7b2..3fffe10b 100644 --- a/template/base/neuron.py +++ b/webgenie/base/neuron.py @@ -23,10 +23,10 @@ from abc import ABC, abstractmethod # Sync calls set weights and also resyncs the metagraph. -from template.utils.config import check_config, add_args, config -from template.utils.misc import ttl_get_block -from template import __spec_version__ as spec_version -from template.mock import MockSubtensor, MockMetagraph +from webgenie.utils.config import check_config, add_args, config +from webgenie.utils.misc import ttl_get_block +from webgenie import __spec_version__ as spec_version +from webgenie.mock import MockSubtensor, MockMetagraph class BaseNeuron(ABC): @@ -108,14 +108,6 @@ def __init__(self, config=None): ) self.step = 0 - @abstractmethod - async def forward(self, synapse: bt.Synapse) -> bt.Synapse: - ... - - @abstractmethod - def run(self): - ... - def sync(self): """ Wrapper for synchronizing the state of the network for the given miner or validator. @@ -169,9 +161,8 @@ def should_set_weights(self) -> bool: ) # don't set weights if you're a miner def save_state(self): - bt.logging.warning( - "save_state() not implemented for this neuron. You can implement this function to save model checkpoints or other useful data." - ) + #TODO: Implement this + pass def load_state(self): bt.logging.warning( diff --git a/template/base/utils/__init__.py b/webgenie/base/utils/__init__.py similarity index 100% rename from template/base/utils/__init__.py rename to webgenie/base/utils/__init__.py diff --git a/template/base/utils/weight_utils.py b/webgenie/base/utils/weight_utils.py similarity index 100% rename from template/base/utils/weight_utils.py rename to webgenie/base/utils/weight_utils.py diff --git a/template/base/validator.py b/webgenie/base/validator.py similarity index 56% rename from template/base/validator.py rename to webgenie/base/validator.py index c1ca07ed..89f13319 100644 --- a/template/base/validator.py +++ b/webgenie/base/validator.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 +# Copyright © 2024 Sangar, pycorn0729 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -25,17 +24,17 @@ import threading import bittensor as bt -from typing import List, Union +from typing import List, Union, Tuple from traceback import print_exception -from template.base.neuron import BaseNeuron -from template.base.utils.weight_utils import ( +from webgenie.base.neuron import BaseNeuron +from webgenie.base.utils.weight_utils import ( process_weights_for_netuid, convert_weights_and_uids_for_emit, ) # TODO: Replace when bittensor switches to numpy -from template.mock import MockDendrite -from template.utils.config import add_validator_args - +from webgenie.helpers.weights import init_wandb +from webgenie.mock import MockDendrite +from webgenie.utils.config import add_validator_args class BaseValidatorNeuron(BaseNeuron): """ @@ -50,11 +49,9 @@ def add_args(cls, parser: argparse.ArgumentParser): add_validator_args(cls, parser) def __init__(self, config=None): - super().__init__(config=config) - - # Save a copy of the hotkeys to local memory. - self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) - + super().__init__(config=config) + init_wandb(self) + # Dendrite lets us send messages to other nodes (axons) in the network. if self.config.mock: self.dendrite = MockDendrite(wallet=self.wallet) @@ -62,19 +59,12 @@ def __init__(self, config=None): self.dendrite = bt.dendrite(wallet=self.wallet) bt.logging.info(f"Dendrite: {self.dendrite}") - # Set up initial scoring weights for validation - bt.logging.info("Building validation weights.") - self.scores = np.zeros(self.metagraph.n, dtype=np.float32) - + bt.logging.info("load_state()") + self.load_state() + # Init sync with the network. Updates the metagraph. self.sync() - # Serve axon to enable external connections. - if not self.config.neuron.axon_off: - self.serve_axon() - else: - bt.logging.warning("axon off, not serving ip to chain.") - # Create asyncio event loop to manage async tasks. self.loop = asyncio.get_event_loop() @@ -84,136 +74,6 @@ def __init__(self, config=None): self.thread: Union[threading.Thread, None] = None self.lock = asyncio.Lock() - def serve_axon(self): - """Serve axon to enable external connections.""" - - bt.logging.info("serving ip to chain...") - try: - self.axon = bt.axon(wallet=self.wallet, config=self.config) - - try: - self.subtensor.serve_axon( - netuid=self.config.netuid, - axon=self.axon, - ) - bt.logging.info( - f"Running validator {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}" - ) - except Exception as e: - bt.logging.error(f"Failed to serve Axon with exception: {e}") - pass - - except Exception as e: - bt.logging.error(f"Failed to create Axon initialize with exception: {e}") - pass - - async def concurrent_forward(self): - coroutines = [ - self.forward() for _ in range(self.config.neuron.num_concurrent_forwards) - ] - await asyncio.gather(*coroutines) - - def run(self): - """ - Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors. - - This function performs the following primary tasks: - 1. Check for registration on the Bittensor network. - 2. Continuously forwards queries to the miners on the network, rewarding their responses and updating the scores accordingly. - 3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights. - - The essence of the validator's operations is in the forward function, which is called every step. The forward function is responsible for querying the network and scoring the responses. - - Note: - - The function leverages the global configurations set during the initialization of the miner. - - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests. - - Raises: - KeyboardInterrupt: If the miner is stopped by a manual interruption. - Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis. - """ - - # Check that validator is registered on the network. - self.sync() - - bt.logging.info(f"Validator starting at block: {self.block}") - - # This loop maintains the validator's operations until intentionally stopped. - try: - while True: - bt.logging.info(f"step({self.step}) block({self.block})") - - # Run multiple forwards concurrently. - self.loop.run_until_complete(self.concurrent_forward()) - - # Check if we should exit. - if self.should_exit: - break - - # Sync metagraph and potentially set weights. - self.sync() - - self.step += 1 - - # If someone intentionally stops the validator, it'll safely terminate operations. - except KeyboardInterrupt: - self.axon.stop() - bt.logging.success("Validator killed by keyboard interrupt.") - exit() - - # In case of unforeseen errors, the validator will log the error and continue operations. - except Exception as err: - bt.logging.error(f"Error during validation: {str(err)}") - bt.logging.debug(str(print_exception(type(err), err, err.__traceback__))) - - def run_in_background_thread(self): - """ - Starts the validator's operations in a background thread upon entering the context. - This method facilitates the use of the validator in a 'with' statement. - """ - if not self.is_running: - bt.logging.debug("Starting validator in background thread.") - self.should_exit = False - self.thread = threading.Thread(target=self.run, daemon=True) - self.thread.start() - self.is_running = True - bt.logging.debug("Started") - - def stop_run_thread(self): - """ - Stops the validator's operations that are running in the background thread. - """ - if self.is_running: - bt.logging.debug("Stopping validator in background thread.") - self.should_exit = True - self.thread.join(5) - self.is_running = False - bt.logging.debug("Stopped") - - def __enter__(self): - self.run_in_background_thread() - return self - - def __exit__(self, exc_type, exc_value, traceback): - """ - Stops the validator's background operations upon exiting the context. - This method facilitates the use of the validator in a 'with' statement. - - Args: - exc_type: The type of the exception that caused the context to be exited. - None if the context was exited without an exception. - exc_value: The instance of the exception that caused the context to be exited. - None if the context was exited without an exception. - traceback: A traceback object encoding the stack trace. - None if the context was exited without an exception. - """ - if self.is_running: - bt.logging.debug("Stopping validator in background thread.") - self.should_exit = True - self.thread.join(5) - self.is_running = False - bt.logging.debug("Stopped") - def set_weights(self): """ Sets the validator weights to the metagraph hotkeys based on the scores it has received from the miners. The weights determine the trust and incentive level the validator assigns to miner nodes on the network. @@ -280,8 +140,6 @@ def set_weights(self): def resync_metagraph(self): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - bt.logging.info("resync_metagraph()") - # Copies state of metagraph before syncing. previous_metagraph = copy.deepcopy(self.metagraph) @@ -303,7 +161,6 @@ def resync_metagraph(self): # Check to see if the metagraph has changed size. # If so, we need to add new hotkeys and moving averages. if len(self.hotkeys) < len(self.metagraph.hotkeys): - # Update the size of the moving average scores. new_moving_average = np.zeros((self.metagraph.n)) min_len = min(len(self.hotkeys), len(self.scores)) new_moving_average[:min_len] = self.scores[:min_len] @@ -318,7 +175,6 @@ def update_scores(self, rewards: np.ndarray, uids: List[int]): # Check if rewards contains NaN values. if np.isnan(rewards).any(): bt.logging.warning(f"NaN values detected in rewards: {rewards}") - # Replace any NaN values in rewards with 0. rewards = np.nan_to_num(rewards, nan=0) # Ensure rewards is a numpy array. @@ -359,7 +215,6 @@ def update_scores(self, rewards: np.ndarray, uids: List[int]): def save_state(self): """Saves the state of the validator to a file.""" - bt.logging.info("Saving validator state.") # Save the state of the validator to file. np.savez( @@ -368,13 +223,20 @@ def save_state(self): scores=self.scores, hotkeys=self.hotkeys, ) - + def load_state(self): """Loads the state of the validator from a file.""" bt.logging.info("Loading validator state.") # Load the state of the validator from file. - state = np.load(self.config.neuron.full_path + "/state.npz") - self.step = state["step"] - self.scores = state["scores"] - self.hotkeys = state["hotkeys"] + try: + state = np.load(self.config.neuron.full_path + "/state.npz") + self.step = state["step"] + self.scores = state["scores"] + self.hotkeys = state["hotkeys"] + except Exception as e: + self.step = 0 + self.scores = np.zeros(self.metagraph.n, dtype=np.float32) + self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) + + bt.logging.debug(f"Loaded state: step={self.step}, scores={self.scores}") diff --git a/webgenie/constants.py b/webgenie/constants.py new file mode 100644 index 00000000..0eb9312a --- /dev/null +++ b/webgenie/constants.py @@ -0,0 +1,33 @@ +# backend api hotkey +API_HOTKEY = "5DXDCYTuPfLqQXbxfvvnarG31SdTDtaubqpQrzjrcMgoP9dp" + +# image task timeout +IMAGE_TASK_TIMEOUT = 100 + +# text task timeout +TEXT_TASK_TIMEOUT = 100 + +# max synthetic history size +MAX_SYNTHETIC_HISTORY_SIZE = 30 + +# max synthensize task size +MAX_SYNTHETIC_TASK_SIZE = 30 + +# the number of concurrent queries +NUM_CONCURRENT_QUERIES = 10 + +# max debug image string length +MAX_DEBUG_IMAGE_STRING_LENGTH = 20 + +# place holder image url +PLACE_HOLDER_IMAGE_URL = "https://picsum.photos/seed/picsum/800/600" + +# python command +PYTHON_CMD = "python" + +# screenshot script path +SCREENSHOT_SCRIPT_PATH = "webgenie/rewards/metrics/screenshot_single.py" + +# work dir +WORK_DIR = "work" + diff --git a/webgenie/datasets/__init__.py b/webgenie/datasets/__init__.py new file mode 100644 index 00000000..0e4c0f25 --- /dev/null +++ b/webgenie/datasets/__init__.py @@ -0,0 +1,2 @@ +from .dataset import Dataset, DatasetEntry +from .synthetic_dataset import SyntheticDataset diff --git a/webgenie/datasets/dataset.py b/webgenie/datasets/dataset.py new file mode 100644 index 00000000..8b8cf31b --- /dev/null +++ b/webgenie/datasets/dataset.py @@ -0,0 +1,12 @@ +from pydantic import Field, BaseModel + +class DatasetEntry(BaseModel): + src: str = Field(default="", description="The source of the dataset entry") + topic: str = Field(default="", description="The topic of the dataset entry") + ground_truth_html: str = Field(default="", description="The ground truth html") + prompt: str = Field(default="", description="The prompt for the text task") + base64_image: str = Field(default="", description="The base64 encoded image") + +class Dataset: + async def generate_context(self)->DatasetEntry: + pass diff --git a/webgenie/datasets/huggingface_dataset.py b/webgenie/datasets/huggingface_dataset.py new file mode 100644 index 00000000..febd94f7 --- /dev/null +++ b/webgenie/datasets/huggingface_dataset.py @@ -0,0 +1,57 @@ +# https://huggingface.co/datasets/SALT-NLP/Design2Code_human_eval_pairwise + +import bittensor as bt +import os +import random +from datasets import load_dataset + +from langchain_openai import ChatOpenAI +from langchain.prompts import ChatPromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.pydantic_v1 import BaseModel, Field + +from webgenie.datasets.dataset import Dataset, DatasetEntry +from webgenie.helpers.llms import call_llm +from webgenie.prompts import PROMPT_MAKE_HTML_COMPLEX + +class HTMLResponse(BaseModel): + complex_html: str = Field(description="the complex html code") + +class HuggingfaceDataset(Dataset): + def __init__(self , **kwargs): + dataset_name = kwargs["dataset_name"] + html_column = kwargs["html_column"] + split = kwargs["split"] + + self.dataset = load_dataset(dataset_name, split=split) + self.html_column = html_column + self.output_parser = JsonOutputParser(pydantic_object=HTMLResponse) + + async def _make_html_complex(self, html: str)->str: + bt.logging.info("Making HTML complex") + response = await call_llm( + template=[ + ("system", PROMPT_MAKE_HTML_COMPLEX), + ], + params={"html": html, "instructions": self.output_parser.get_format_instructions()}, + output_parser=self.output_parser + ) + return response["complex_html"] + + async def generate_context(self)->DatasetEntry: + try: + bt.logging.info("Generating Huggingface context") + random_index = random.randint(0, len(self.dataset) - 1) + html = self.dataset[random_index][self.html_column] + complex_html = await self._make_html_complex(html) + return DatasetEntry( + src="huggingface", + topic="design2code", + ground_truth_html=complex_html, + prompt="", + base64_image="" + ) + except Exception as e: + bt.logging.error(f"Error in generate_context: {e}") + raise e + diff --git a/webgenie/datasets/synthetic_dataset.py b/webgenie/datasets/synthetic_dataset.py new file mode 100644 index 00000000..5de18660 --- /dev/null +++ b/webgenie/datasets/synthetic_dataset.py @@ -0,0 +1,70 @@ +# The paper [Unlocking the conversion of Web Screenshots into HTML Code with the WebSight Dataset] +# (https://arxiv.org/pdf/2403.09029v1#bib.bib5) is our inspiration. +# The paper suggests using Mistral-7B-Instruct to generate concepts and use Deepseek-Coder-33b-instruct +# to generate html, but now we are using openai models here. We are going to use that models on the mainnet + +import bittensor as bt +import os +from typing import List + +from langchain_openai import ChatOpenAI +from langchain.prompts import ChatPromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.pydantic_v1 import BaseModel, Field + +from webgenie.datasets.dataset import Dataset, DatasetEntry +from webgenie.helpers.llms import call_llm +from webgenie.prompts import PROMPT_GEN_CONCEPT, PROMPT_GEN_HTML + +class ConceptResponse(BaseModel): + concepts: List[str] = Field(description="The concept of the website") + +class HTMLResponse(BaseModel): + html: str = Field(description="The html code of the website") + +class SyntheticDataset(Dataset): + def __init__(self, has_ground_truth_html: bool = True): + self.has_ground_truth_html = has_ground_truth_html + self.concept_parser = JsonOutputParser(pydantic_object=ConceptResponse) + self.html_parser = JsonOutputParser(pydantic_object=HTMLResponse) + self.concepts = [] + + async def _generate_concepts(self): + bt.logging.info("Generating concepts") + response = await call_llm( + template=[ + ("system", PROMPT_GEN_CONCEPT), + ], + params={"instructions": self.concept_parser.get_format_instructions()}, + output_parser=self.concept_parser + ) + return response["concepts"] + + async def _generate_html(self, concept: str): + bt.logging.info("Generating HTML from concept") + response = await call_llm( + template=[ + ("system", PROMPT_GEN_HTML), + ], + params={"concept": concept, "instructions": self.html_parser.get_format_instructions()}, + output_parser=self.html_parser + ) + return response["html"] + + async def generate_context(self)->DatasetEntry: + bt.logging.info("Generating Synthetic context") + if not self.concepts: + self.concepts = await self._generate_concepts() + + concept = self.concepts.pop(0) + + if self.has_ground_truth_html == True: + ground_truth_html = await self._generate_html(concept) + else: + ground_truth_html = "" + + return DatasetEntry( + src="synthetic", + prompt=concept, + ground_truth_html=ground_truth_html, + ) diff --git a/webgenie/helpers/htmls.py b/webgenie/helpers/htmls.py new file mode 100644 index 00000000..616d721f --- /dev/null +++ b/webgenie/helpers/htmls.py @@ -0,0 +1,134 @@ +import bittensor as bt +import os +from bs4 import BeautifulSoup +from lxml import etree +import time +import re +import uuid +from webgenie.constants import ( + SCREENSHOT_SCRIPT_PATH, + WORK_DIR, + PLACE_HOLDER_IMAGE_URL, + PYTHON_CMD +) +from webgenie.helpers.images import image_to_base64 + +def is_valid_html(html: str): + try: + soup = BeautifulSoup(html, 'html.parser') + return True + except Exception as e: + bt.logging.debug(f"Error during HTML parsing: {e}") + return False + +def seperate_html_css(html_content: str): + soup = BeautifulSoup(html_content, 'html.parser') + + css = '' + for style_tag in soup.find_all('style'): + css += style_tag.get_text() + for style_tag in soup.find_all('style'): + style_tag.decompose() + + head = soup.head + if not head: + head = soup.new_tag('head') + soup.html.insert(0, head) + + link_tag = soup.new_tag('link', rel='stylesheet', href='styles.css') + head.append(link_tag) + cleaned_html = str(soup) + return cleaned_html, css + +def html_to_screenshot(html: str) -> str: + html_path = f"{WORK_DIR}/screenshot_{uuid.uuid4()}.html" + with open(html_path, "w") as f: + f.write(html) + png_path = f"{WORK_DIR}/screenshot_{uuid.uuid4()}.png" + os.system(f"{PYTHON_CMD} {SCREENSHOT_SCRIPT_PATH} --html {html_path} --png {png_path}") + + time.sleep(0.1) + base64_image = image_to_base64(png_path) + + time.sleep(0.1) + os.remove(html_path) + os.remove(png_path) + return base64_image + +def beautify_html(html: str) -> str: + soup = BeautifulSoup(html, 'html.parser') + return soup.prettify() + +import re +from bs4 import BeautifulSoup + +def replace_image_sources(html_content, new_url=PLACE_HOLDER_IMAGE_URL): + soup = BeautifulSoup(html_content, 'html.parser') + + # Replace 'src' attribute in tags + for img_tag in soup.find_all('img'): + img_tag['src'] = new_url + + # Replace 'srcset' attribute in tags + for source_tag in soup.find_all('source'): + if 'srcset' in source_tag.attrs: + source_tag['srcset'] = new_url + + # Replace URLs in inline styles (background-image) in elements + for tag in soup.find_all(style=True): + style = tag['style'] + # Match both background-image and shorthand background property + updated_style = re.sub(r'background\s*:\s*[^;]*url\([^)]+\)', f'background: url({new_url})', style) + updated_style = re.sub(r'background-image\s*:\s*url\([^)]+\)', f'background-image: url({new_url})', updated_style) + tag['style'] = updated_style + + # Replace URLs in