diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..dbaaffa13 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +WANDB_API_KEY = +WANDB_BASE_URL="https://api.wandb.ai" +WANDB_SILENT="true" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 72a70c3a0..7d958bcb9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ __pycache__/ *.py[cod] *$py.class +.DS_Store +*.onnx + # C extensions *.so @@ -60,7 +63,7 @@ cover/ local_settings.py db.sqlite3 db.sqlite3-journal - +*.db # Flask stuff: instance/ .webassets-cache @@ -163,3 +166,15 @@ testing/ # Editors .vscode/settings.json +datasets +data +wandb +ecosystem.config.js + + +keys + + +local_datasets/ +*.csv +tmp/images \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..395ec94dd --- /dev/null +++ b/.pylintrc @@ -0,0 +1,14 @@ +[MESSAGES CONTROL] +disable=W1203, # logging-fstring-interpolation + C0111, # missing-docstring + C0103, # invalid-name + C0114 # missing-module-docstring + +[FORMAT] +max-line-length=120 + +[BASIC] +good-names=i,j,k,ex,Run,_,id,ge + +[MASTER] +ignore=migrations diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..4cd34add8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +## 0.6.2 + +- Establish version_key to match weights_version subnet hyperparameter + +## 0.6.1 + +- TLDR for validators +- Safe-Scan banner for validator script + +## 0.6 + +- added blacklisting of miners +- various logic fixes +- protection from model copying +- chain synchtonization fixes +- fixes for miner CLI +- stability improvements + +## 0.5.1 + +- Various fixes for miner script + +## 0.5 + +- Initial release of the subnet code diff --git a/DOCS/COMPETITIONS.md b/DOCS/COMPETITIONS.md new file mode 100644 index 000000000..633593404 --- /dev/null +++ b/DOCS/COMPETITIONS.md @@ -0,0 +1,69 @@ +# Safe Scan: Machine Learning Competitions for Cancer Detection + +Welcome to **Safe Scan**, a platform dedicated to organizing machine learning competitions focused on cancer detection. Our goal is to foster innovation in developing accurate and efficient models for cancer detection using machine learning. Here, you will find all the details needed to participate, submit your models, and understand the evaluation process. + +## Table of Contents + +1. [Overview](#overview) +2. [Competition Trigger and Data Batch Handling](#competition-trigger-and-data-batch-handling) +3. [Model Submission Requirements](#model-submission-requirements) +4. [Evaluation and Scoring](#evaluation-and-scoring) +5. [Configuration and Development](#configuration-and-development) +6. [Command-Line Interface (CLI) Tools](#command-line-interface-cli-tools) +7. [Communication Channels](#communication-channels) +8. [Contribute](#contribute) + +## Overview + +Safe Scan organizes dynamic competitions focused on cancer detection using machine learning. These competitions provide participants with the opportunity to develop and test their models in a responsive and adaptive environment driven by real-world data. + +## Competition Trigger and Data Batch Handling + +- **Competition Initiation**: Competitions are triggered by data batch insertions from external medical institutions, creating a steady stream of new, non-public data for testing purposes. +- **Data Handling Process**: Medical institutions upload each new batch of data to a central reference repository on Hugging Face, along with a reference entry for the new data batch file. +- **Automatic Detection and Competition Start**: Validators monitor this centralized repository for new data batch entries. When new data is detected, validators initiate a competition by downloading and processing the data batch. + +## Model Submission Requirements + +- **Model Submission**: Participants, referred to as miners, must submit their trained models at the end of each competition. +- **Format**: All models must be in ONNX format. This ensures uniform testing and allows for broad deployment options, including on mobile and web platforms. +- **License**: All models must be licensed under the MIT License to ensure the open nature of the competition and to allow unrestricted use worldwide. +- **Training Code**: Each submission should include the code used for training the model to ensure transparency and reproducibility. +- **Upload Process**: Models are uploaded to Hugging Face at the end of each test. Miners then submit the Hugging Face repository link on the blockchain for evaluation by validators. +- **Timing Constraint**: Only models submitted at least 30 minutes before the competition start time are eligible for evaluation. This requirement ensures that models have not been retrained with the new data batch, maintaining fairness and integrity across the competition. + +## Evaluation and Scoring + +- **Independent Evaluation**: Each validator independently evaluates the submitted models according to predefined criteria. +- **Scoring Mechanism**: Detailed scoring mechanisms are outlined in the [DOCS](/DOCS/competitions) directory. Validators run scheduled competitions and assess the models based on these criteria. +- **Winning Criteria**: The best-performing model, according to the evaluation metrics, is declared the winner of the competition. +- **Rewards**: The winner receives the full emission for that competition, divided by the number of competitions held. +- **Rewards Time Decay**: If a miner stays in the top position for more than 30 days, their rewards start to decrease gradually. Every 7 days after the initial 30 days, their share of the rewards decreases by 10%. This reduction continues until their share reaches a minimum of 10% of the original reward. + +## Command-Line Interface (CLI) Tools + +- **Local Testing**: Miners are provided with an easy-to-use command-line interface (CLI) for local testing of their models. This tool helps streamline the process of testing models, uploading to Hugging Face, and submitting to the competition. +- **Automated Data Retrieval**: Code for automating the retrieval of training data for each competition is available to integrate with the model training process. The script is defined in [scripts/get_dataset.py](/scripts/get_dataset.py). + +## Communication Channels + +Stay connected and up-to-date with the latest news, discussions, and support: + +- **Discord**: Join our [Safe Scan Discord channel](https://discord.gg/rbBu7WuZ) and the Bittensor Discord in the #safescan channel for real-time updates and community interaction. +- **Dashboard**: Access the competition dashboard on [Hugging Face](https://huggingface.co/spaces/safescanai/dashboard). +- **Blog**: Visit our [blog](https://safe-scan.ai/news/) for news and updates. +- **Twitter/X**: Follow us on [Twitter/X](https://x.com/SAFESCAN_AI) for announcements and highlights. +- **Email**: Contact us directly at [info@safescanai.ai](mailto:info@safescanai.ai) for any inquiries or support. + +## Development + +- **Software Lifecycle**: The project follows a structured software lifecycle, including Git flow and integration testing. These practices ensure robust development and encourage community contributions. + + +## Contribute + +We welcome contributions to this project! Whether you're interested in improving our codebase, adding new features, or enhancing documentation, your involvement is valued. To contribute: + +- Follow our software lifecycle and Git flow processes. +- Ensure all code changes pass integration testing. +- Contact us on our [Safe Scan Discord channel](https://discord.gg/rbBu7WuZ) for more details on how to get started. diff --git a/DOCS/COMPETITIONS.md.old b/DOCS/COMPETITIONS.md.old new file mode 100644 index 000000000..29d3f1d45 --- /dev/null +++ b/DOCS/COMPETITIONS.md.old @@ -0,0 +1,75 @@ + + + + + +# Safe Scan: Machine Learning Competitions for Cancer Detection + +Welcome to **Safe Scan**, a platform dedicated to organizing machine learning competitions focused on cancer detection. Our goal is to foster innovation in developing accurate and efficient models for cancer detection using machine learning. Here, you can find all the details needed to participate, submit your models, and understand the evaluation process. + +## Table of Contents + +1. [Overview](#overview) +2. [Competition Schedule](#competition-schedule) +3. [Dataset and Model Submission](#dataset-and-model-submission) +4. [Evaluation and Scoring](#evaluation-and-scoring) +5. [Configuration and Development](#configuration-and-development) +6. [Command-Line Interface (CLI) Tools](#command-line-interface-cli-tools) +7. [Communication Channels](#communication-channels) +8. [Contribute](#contribute) + +## Overview + +Safe Scan organizes continuous competitions focused on cancer detection using machine learning. These competitions aim to advance the field by providing participants with the opportunity to develop and test their models in a structured environment. + +## Competition Schedule + +- **Frequency**: Competitions are held multiple times a day, at specific hours, continuously. This allows participants to join at different times that suit them best. +- **Timed Events**: Each competition starts with a dataset release 5 minutes before testing, providing a short window for participants to prepare. +- **Testing and Evaluation**: Models are evaluated immediately after each test, ensuring a quick feedback loop for participants. + +## Dataset and Model Submission + +- **Dataset Release**: A new dataset is provided for each competition, which is released exactly 5 minutes before testing begins. This dataset is used for training the models. +- **Model Submission**: Participants, referred to as "miners," are required to submit their trained models at the end of each competition. + - **Format**: All models must be in ONNX format. This ensures uniform testing and allows for broad deployment options, including on mobile and web platforms. + - **Training Code**: Each submission should include the code used for training the model to ensure transparency and reproducibility. + - **Upload Process**: Models are uploaded to Hugging Face at the end of each test. Miners then submit the Hugging Face repository link on the blockchain for evaluation by validators. + +## Evaluation and Scoring + +- **Independent Evaluation**: Each validator independently evaluates the submitted models according to predefined criteria. +- **Scoring Mechanism**: Detailed scoring mechanisms are outlined in the [DOCS](/DOCS/competitions) directory. Validators run scheduled competitions and assess the models based on these criteria. +- **Winning Criteria**: The best-performing model, according to the evaluation metrics, is declared the winner of the competition. +- **Rewards**: The winner receives the full emission for that competition, divided by the number of competitions held. +- **Rewards time decay**: If a miner stays at the top position for more than 30 days, their rewards start to decrease gradually. Every 7 days after the initial 30 days, their share of the rewards decreases by 10%. This reduction continues until their share reaches a minimum of 10% of the original reward. + +## Configuration and Development + +- **Competition Configuration**: Each competition is configured through a `competition_config.json` file. This file defines all parameters and rules for the competition and is used by both miners and validators. +- **Tracking Changes**: Changes to the competition configuration are tracked via a GitHub issue tracker, ensuring transparency and allowing for community input. +- **Software Lifecycle**: The project follows a structured software lifecycle, including Git flow and integration testing. This ensures robust development practices and encourages community contributions. + +## Command-Line Interface (CLI) Tools + +- **Local Testing**: Miners are provided with an easy-to-use command-line interface (CLI) for local testing of their models. This tool helps streamline the process of testing models, uploading to Hugging Face, and submitting to the competition. +- **Automated Data Retrieval**: Code for automating the retrieval of training data for each competition is available to integrate with the model training process. The script is defined in [scripts/get_dataset.py](/scripts/get_dataset.py). + +## Communication Channels + +Stay connected and up-to-date with the latest news, discussions, and support: + +- **Discord**: Join our [Safe Scan Discord channel](https://discord.gg/rbBu7WuZ) and the Bittensor Discord in the #safescan channel for real-time updates and community interaction. +- **Dashboard**: Access the competition dashboard on [Hugging Face](https://huggingface.co/spaces/safescanai/dashboard). +- **Blog**: Visit our [blog](https://safe-scan.ai/news/) for news and updates. +- **Twitter/X**: Follow us on [Twitter/X](https://x.com/SAFESCAN_AI) for announcements and highlights. +- **Email**: Contact us directly at [info@safescanai.ai](mailto:info@safescanai.ai) for any inquiries or support. + +## Contribute + +We welcome contributions to this project! Whether you're interested in improving our codebase, adding new features, or enhancing documentation, your involvement is valued. To contribute: + +- Follow our software lifecycle and Git flow processes. +- Ensure all code changes pass integration testing. +- Contact us on our [Safe Scan Discord channel](https://discord.gg/rbBu7WuZ) for more details on how to get started. + diff --git a/DOCS/competitions/1-MELANOMA-V3.md.old b/DOCS/competitions/1-MELANOMA-V3.md.old new file mode 100644 index 000000000..fb7095276 --- /dev/null +++ b/DOCS/competitions/1-MELANOMA-V3.md.old @@ -0,0 +1,70 @@ +# Description of Melanoma Competition + +## Overview +This competition invites participants to develop a machine learning model that **aids in detecting the possibility of melanoma**. The goal is to create a model that can identify patterns in data that are associated with an increased likelihood of melanoma in visual recognition. + +### Objective +The primary objective is to develop a model that can analyze photos taken by users of their skin lesions or areas of concern. +The model should **assist users** by providing a risk assessment or likelihood score that helps them decide if they should seek further medical advice. +As a result, best model will be released in Skin Scan mobile app to run locally on the phone, and a website that will host it, free for anyone to use. + +## Evaluation Criteria +Models will be evaluated based on described **performance metrics** of the model. +The evaluation will be calculaded on following metrics with described weights. + +### Performance Metrics + + The models will be assessed on the following metrics with the corresponding weights: + +| **Metric** | **Description** | **Weight** | +|-------------|-------------------------------------------------------|------------| +| **F-beta** | Prioritizes recall, with a high beta to emphasize it. $\beta = 2$ | 0.60 | +| **Accuracy**| Measures the overall correctness of predictions. | 0.30 | +| **AUC** | Evaluates the model's ability to distinguish classes. | 0.10 | + +### Mathematical Formulas + +1. **F-beta Score $F\_\beta\$** + + + $$F_\beta = \left(1 + \beta^2\right) \cdot \frac{\text{Precision} \cdot \text{Recall}}{\left(\beta^2 \cdot \text{Precision}\right) + \text{Recall}}$$ + + + Where: + - **$\beta$** is the weight of recall in the combined score + - in our case $\beta = 2$ for higher recall importance + +2. **Accuracy** + + $$\text{Accuracy} = \frac{\text{True Positives} + \text{True Negatives}}{\text{Total Number of Samples}}$$ + +3. **Area Under the Curve (AUC)** + + AUC is the area under the Receiver Operating Characteristic (ROC) curve. It is calculated using the trapezoidal rule: + + $$\text{AUC} = \int_0^1 \text{TPR} \, d(\text{FPR})$$ + + Where: + - **TPR** = True Positive Rate + - **FPR** = False Positive Rate + + +## Model Inputs and Outputs + +### Inputs +- **Input Format**: Multiple images in JPEG or PNG format. +- **Input Features**: During preprocessing, images are resized to 224x224 pixels. Images are converted to numpy arrays with a datatype of `np.float32`, normalized to the range [0, 1]. + +### Outputs +- **Output Format**: A numerical value between 0 and 1, represented as a `float`. This value indicates the likelihood or risk score of the area of concern warranting further investigation. + +### Submission Requirements +- **Model Submission**: Models must be submitted in ONNX format. They should be capable of handling dynamic batch sizes and accept inputs with the shape `(batch , 3 , 224 , 224)`, where `batch` represents the batch dimension. This ensures that the model can process a variable number of images in a single batch. + + +## Rules and Guidelines + +- **Timeline**: +- Competitions are triggered dynamically by new data batch uploads from external medical institutions, with no predefined schedule. Competitions may occur at any time based on the timing of new data insertions. +- Each time a new data batch is detected in the central reference repository on Hugging Face, a new competition is initiated immediately by validators. +- Results of competition will be available on the dashboard diff --git a/DOCS/competitions/1-MELANOMA.md b/DOCS/competitions/1-MELANOMA.md new file mode 100644 index 000000000..fb7095276 --- /dev/null +++ b/DOCS/competitions/1-MELANOMA.md @@ -0,0 +1,70 @@ +# Description of Melanoma Competition + +## Overview +This competition invites participants to develop a machine learning model that **aids in detecting the possibility of melanoma**. The goal is to create a model that can identify patterns in data that are associated with an increased likelihood of melanoma in visual recognition. + +### Objective +The primary objective is to develop a model that can analyze photos taken by users of their skin lesions or areas of concern. +The model should **assist users** by providing a risk assessment or likelihood score that helps them decide if they should seek further medical advice. +As a result, best model will be released in Skin Scan mobile app to run locally on the phone, and a website that will host it, free for anyone to use. + +## Evaluation Criteria +Models will be evaluated based on described **performance metrics** of the model. +The evaluation will be calculaded on following metrics with described weights. + +### Performance Metrics + + The models will be assessed on the following metrics with the corresponding weights: + +| **Metric** | **Description** | **Weight** | +|-------------|-------------------------------------------------------|------------| +| **F-beta** | Prioritizes recall, with a high beta to emphasize it. $\beta = 2$ | 0.60 | +| **Accuracy**| Measures the overall correctness of predictions. | 0.30 | +| **AUC** | Evaluates the model's ability to distinguish classes. | 0.10 | + +### Mathematical Formulas + +1. **F-beta Score $F\_\beta\$** + + + $$F_\beta = \left(1 + \beta^2\right) \cdot \frac{\text{Precision} \cdot \text{Recall}}{\left(\beta^2 \cdot \text{Precision}\right) + \text{Recall}}$$ + + + Where: + - **$\beta$** is the weight of recall in the combined score + - in our case $\beta = 2$ for higher recall importance + +2. **Accuracy** + + $$\text{Accuracy} = \frac{\text{True Positives} + \text{True Negatives}}{\text{Total Number of Samples}}$$ + +3. **Area Under the Curve (AUC)** + + AUC is the area under the Receiver Operating Characteristic (ROC) curve. It is calculated using the trapezoidal rule: + + $$\text{AUC} = \int_0^1 \text{TPR} \, d(\text{FPR})$$ + + Where: + - **TPR** = True Positive Rate + - **FPR** = False Positive Rate + + +## Model Inputs and Outputs + +### Inputs +- **Input Format**: Multiple images in JPEG or PNG format. +- **Input Features**: During preprocessing, images are resized to 224x224 pixels. Images are converted to numpy arrays with a datatype of `np.float32`, normalized to the range [0, 1]. + +### Outputs +- **Output Format**: A numerical value between 0 and 1, represented as a `float`. This value indicates the likelihood or risk score of the area of concern warranting further investigation. + +### Submission Requirements +- **Model Submission**: Models must be submitted in ONNX format. They should be capable of handling dynamic batch sizes and accept inputs with the shape `(batch , 3 , 224 , 224)`, where `batch` represents the batch dimension. This ensures that the model can process a variable number of images in a single batch. + + +## Rules and Guidelines + +- **Timeline**: +- Competitions are triggered dynamically by new data batch uploads from external medical institutions, with no predefined schedule. Competitions may occur at any time based on the timing of new data insertions. +- Each time a new data batch is detected in the central reference repository on Hugging Face, a new competition is initiated immediately by validators. +- Results of competition will be available on the dashboard diff --git a/DOCS/competitions/2-TRICORDER.md b/DOCS/competitions/2-TRICORDER.md new file mode 100644 index 000000000..1ecab6672 --- /dev/null +++ b/DOCS/competitions/2-TRICORDER.md @@ -0,0 +1,145 @@ +# 🏆 Competition: Skin Lesion Classification Based on Images + +## 🎯 Competition Goal + +The goal of the competition is to build a lightweight and effective ML model that classifies skin lesions into one of 10 predefined disease classes based on lesion images and demographic data. + +## 📥 Input and Output Data + +### Input + +#### 1. Skin Lesion Image +- **Format**: JPEG or PNG +- **Channels**: RGB (3 channels), no alpha channel +- **Minimum side length**: ≥ 512 px +- **Pixel values**: range [0, 512], dtype=np.uint8 + +#### 2. Patient Demographic Data +- **Age**: integer in years (e.g., 42) +- **Gender**: "m" (male) / "f" (female) +- **Body location**: integer according to the table below + +> **Note**: The model must utilize both image and demographic data. + +### Output +- **List of 10 class probabilities**: List[float] +- **Probabilities must sum to 1.0** (softmax) +- **Value range**: [0.0, 1.0] + +## 🧬 Class List (order in model output) + +| No. | Class | Clinical Type | Symbol | +|-----|-------|---------------|--------| +| 1 | Actinic keratosis (AK) | Benign | AK | +| 2 | Basal cell carcinoma (BCC) | Malignant | BCC | +| 3 | Seborrheic keratosis (SK) | Medium risk | SK | +| 4 | Squamous cell carcinoma (SCC) | Malignant | SCC | +| 5 | Vascular lesion | Medium risk | VASC | +| 6 | Dermatofibroma | Benign | DF | +| 7 | Benign nevus | Benign | NV | +| 8 | Other non-neoplastic | Benign | NON | +| 9 | Melanoma | Malignant | MEL | +| 10 | Other neoplastic / Benign | Benign | ON | + +## ⚖️ Class Weights + +| Class Type | Classes (No.) | Color | Weight | +|------------|---------------|-------|--------| +| Malignant | 2, 4, 9 | 🔴 | 3× (BCC, SCC, MEL) | +| Medium risk | 3, 5 | 🟠 | 2× (SK, VASC) | +| Benign | 1, 6, 7, 8, 10 | 🟢 | 1× (AK, DF, NV, NON, ON) | + +## 📍 Body Location List + +| No. | Location | +|-----|----------| +| 1 | Arm | +| 2 | Feet | +| 3 | Genitalia | +| 4 | Hand | +| 5 | Head | +| 6 | Leg | +| 7 | Torso | + +## 🧮 Evaluation Criteria (100 pts) + +| Category | Weight | Max pts | Notes | +|----------|--------|---------|-------| +| Prediction Quality | 90% | 90 pts | Weighted average: 50% Accuracy, 50% Weighted-F1 | +| Efficiency | 10% | 10 pts | Model size (50%) + inference speed (50%) | + +## 📊 Score Calculation + +### F1-score for class types + +``` +F1_malignant = (F1_2 + F1_4 + F1_9) / 3 +F1_medium = (F1_3 + F1_5) / 2 +F1_benign = (F1_1 + F1_6 + F1_7 + F1_8 + F1_10) / 5 +``` + +### Weighted-F1 + +``` +Weighted-F1 = (3 × F1_malignant + 2 × F1_medium + 1 × F1_benign) / 6 +``` + +### Accuracy +Standard top-1 classification accuracy (percentage of correct classifications) + +### Prediction Score (90%) + +``` +Prediction Score = 0.5 × Accuracy + 0.5 × Weighted-F1 +``` + +### Efficiency Score + +``` +Efficiency Score = 0.5 × (1 - (S - S_min) / (S_max - S_min)) + + 0.5 × (1 - (T - T_min) / (T_max - T_min)) +``` + +**Where:** +- **S** – model size in MB +- **T** – inference time for single image (in ms) +- **S_min = 50 MB, S_max = 150 MB** +- **T_min = shortest time in competition, T_max = longest time in competition** +- **Efficiency Score ∈ [0.0, 1.0]** + +> **Note**: Inference time will be measured on uniform CPU hardware (no GPU). + +### Final Score + +``` +Final Score = 0.9 × Prediction Score + 0.1 × Efficiency Score +``` + +## 💡 Additional Notes + +- Models may return high probabilities for multiple classes – this will not be penalized as long as softmax is correct. +- Calibration is not required but may improve prediction usefulness. +- Models with size < 50 MB receive maximum points for size in efficiency scoring. + +## 🔧 Example Implementation + +Example scripts and pipeline available in: `DOCS/competitions/tricorder_samples/` + +### Running the example: +```bash +cd DOCS/competitions/tricorder_samples +./run_pipeline.sh +``` + +### Example structure: +- `generate_tricorder_model.py` - 10-class model generation +- `run_tricorder_inference.py` - Inference script with demographic data +- `example_dataset/` - Sample dataset with images and labels +- `README_EXAMPLE_TRICORDER.md` - Detailed documentation + +## 📋 Submission Requirements + +- Model must accept both image and demographic inputs +- Output exactly 10 probabilities that sum to 1.0 +- Model size should be optimized (< 150 MB, ideally < 50 MB) +- Include inference script compatible with the evaluation framework diff --git a/DOCS/competitions/tricorder_samples/README_EXAMPLE_TRICORDER.md b/DOCS/competitions/tricorder_samples/README_EXAMPLE_TRICORDER.md new file mode 100644 index 000000000..860449ed9 --- /dev/null +++ b/DOCS/competitions/tricorder_samples/README_EXAMPLE_TRICORDER.md @@ -0,0 +1,98 @@ +# Tricorder Model Pipeline + +This directory contains all scripts needed for the Tricorder skin lesion classification model, including data loading, model generation, verification, and inference. + +## Prerequisites + +- Python 3.8+ +- Required Python packages (install with `pip install -r requirements.txt` from project root) +- Bash shell (for running the pipeline script) + +## Directory Structure + +``` +DOCS/competitions/tricorder_samples/ +├── run_pipeline.sh # Main pipeline script +├── example_dataset/ # Sample dataset with images and labels +│ ├── *.jpg # Sample skin lesion images +│ └── label.csv # Labels with demographics +├── generate_tricorder_model.py # Model generation (10-class with demographics) +├── run_tricorder_inference.py # Inference script with new demographic format +``` + +## Usage + +1. Make the script executable (if not already): + ```bash + chmod +x run_pipeline.sh + ``` + +2. Run the pipeline: + ```bash + ./run_pipeline.sh + ``` + +The pipeline will execute the following steps: + +1. **Load Test Data**: Copies sample data from `example_dataset/` to the `data/` directory +2. **Generate Model**: Creates the Tricorder model files +3. **Verify Model Files**: Checks that model files were created successfully +4. **Run Inference**: Tests the model with a sample image and demographic data (also validates model functionality) + +## Outputs + +- Model files will be saved in the root directory: + - `sample_tricorder_model.pt`: PyTorch model + - `sample_tricorder_model.onnx`: ONNX model +- Sample data will be saved in the `data/` directory +- Output and logs will be displayed in the console + +## Running Custom Inference + +To run inference on a custom image with specific demographic data: + +```bash +python3 ./run_tricorder_inference.py \ + --model ../../../sample_tricorder_model.onnx \ + --image /path/to/your/image.jpg \ + --age \ + --gender \ + --location <1-7> +``` + +Example: +```bash +python3 ./run_tricorder_inference.py \ + --model ../../../sample_tricorder_model.onnx \ + --image ../../../data/images/sample.jpg \ + --age 42 \ + --gender f \ + --location 7 +``` + +## Demographic Parameters + +- **Age**: Integer years (e.g., 42) +- **Gender**: "m" (male) or "f" (female) +- **Location**: Integer 1-7 mapping to: + - 1: Arm + - 2: Feet + - 3: Genitalia + - 4: Hand + - 5: Head + - 6: Leg + - 7: Torso + +## Model Specifications + +- **Input**: 512x512 RGB image with pixel values in [0,512] range + demographics +- **Output**: 10 class probabilities (sum to 1.0) +- **Classes**: AK, BCC, SK, SCC, VASC, DF, NV, NON, MEL, ON + +## Notes + +- The script will create necessary directories if they don't exist +- All paths are relative to the project root (3 levels up from this directory) +- Model files are saved in the project root directory +- Sample dataset is included in the `example_dataset/` subdirectory +- For production use, consider adding proper error handling and logging diff --git a/DOCS/competitions/tricorder_samples/example_dataset/ebd5b2e4-ad50-46bd-a437-7c91bc5d48f7.jpg b/DOCS/competitions/tricorder_samples/example_dataset/ebd5b2e4-ad50-46bd-a437-7c91bc5d48f7.jpg new file mode 100644 index 000000000..fd185e486 Binary files /dev/null and b/DOCS/competitions/tricorder_samples/example_dataset/ebd5b2e4-ad50-46bd-a437-7c91bc5d48f7.jpg differ diff --git a/DOCS/competitions/tricorder_samples/example_dataset/label.csv b/DOCS/competitions/tricorder_samples/example_dataset/label.csv new file mode 100644 index 000000000..bb42c09e4 --- /dev/null +++ b/DOCS/competitions/tricorder_samples/example_dataset/label.csv @@ -0,0 +1,2 @@ +"image_path","Class","Age","Location","Gender" +"ebd5b2e4-ad50-46bd-a437-7c91bc5d48f7.jpg","BCC","50","Torso","f" diff --git a/DOCS/competitions/tricorder_samples/generate_tricorder_model.py b/DOCS/competitions/tricorder_samples/generate_tricorder_model.py new file mode 100644 index 000000000..95d8e1ca0 --- /dev/null +++ b/DOCS/competitions/tricorder_samples/generate_tricorder_model.py @@ -0,0 +1,93 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +class SimpleSkinLesionModel(nn.Module): + def __init__(self, num_classes=10, num_demographics=3): + super().__init__() + # Image feature extractor + self.features = nn.Sequential( + nn.Conv2d(3, 16, 3, padding=1, bias=False), + nn.BatchNorm2d(16), + nn.ReLU(inplace=True), + nn.MaxPool2d(2, 2), # 256x256 + + nn.Conv2d(16, 32, 3, padding=1, bias=False), + nn.BatchNorm2d(32), + nn.ReLU(inplace=True), + nn.MaxPool2d(2, 2), # 128x128 + + nn.Conv2d(32, 64, 3, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)) + ) + + # Demographic data processor + self.demographics_processor = nn.Sequential( + nn.Linear(num_demographics, 16), + nn.ReLU(), + nn.BatchNorm1d(16) + ) + + # Combined classifier + self.classifier = nn.Linear(64 + 16, num_classes, bias=False) + + def forward(self, image, demographics): + image_features = self.features(image) + image_features = image_features.view(image_features.size(0), -1) + + demographics_features = self.demographics_processor(demographics) + + combined_features = torch.cat((image_features, demographics_features), dim=1) + + logits = self.classifier(combined_features) + # Apply softmax to get probabilities that sum to 1.0 + probabilities = F.softmax(logits, dim=1) + return probabilities + +def export_optimized_model(output_path='sample_tricorder_model.pt'): + # Create model and set to evaluation mode + model = SimpleSkinLesionModel(num_classes=10, num_demographics=3) + model.eval() + + # Create dummy inputs + dummy_image = torch.randn(1, 3, 512, 512) + dummy_demo = torch.tensor([[42.0, 1.0, 5.0]]) # age, sex, location + + # Export to ONNX with optimization + onnx_path = output_path.replace('.pt', '.onnx') + torch.onnx.export( + model, + (dummy_image, dummy_demo), + onnx_path, + export_params=True, + opset_version=13, + do_constant_folding=True, + input_names=['image', 'demographics'], + output_names=['output'], + dynamic_axes={ + 'image': {0: 'batch_size'}, + 'demographics': {0: 'batch_size'}, + 'output': {0: 'batch_size'} + } + ) + + # Save the model in FP16 + model = model.half() + dummy_image = dummy_image.half() + dummy_demo = dummy_demo.half() + scripted_model = torch.jit.trace(model, (dummy_image, dummy_demo)) + torch.jit.save(scripted_model, output_path) + + # Print model size information + import os + pt_size = os.path.getsize(output_path) / (1024 * 1024) + onnx_size = os.path.getsize(onnx_path) / (1024 * 1024) + print(f"Optimized model sizes:") + print(f"- PyTorch (FP16): {pt_size:.2f} MB") + print(f"- ONNX: {onnx_size:.2f} MB") + +if __name__ == "__main__": + export_optimized_model() diff --git a/DOCS/competitions/tricorder_samples/run_pipeline.sh b/DOCS/competitions/tricorder_samples/run_pipeline.sh new file mode 100755 index 000000000..1071282d6 --- /dev/null +++ b/DOCS/competitions/tricorder_samples/run_pipeline.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Exit on error +set -e + +# Set PYTHONPATH to include the project root +export PYTHONPATH="${PYTHONPATH}:$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../../.." && pwd)" +DATA_DIR="${ROOT_DIR}/data" +MODEL_DIR="${ROOT_DIR}/models" +OUTPUT_DIR="${ROOT_DIR}/output" + +# Create directories +mkdir -p "$DATA_DIR" +mkdir -p "$MODEL_DIR" +mkdir -p "$OUTPUT_DIR" + +# Function to print section headers +section() { + echo -e "\n=== $1 ===" +} + +# Step 1: Verify example dataset +section "Checking Example Dataset" +echo "Using example dataset from ${SCRIPT_DIR}/example_dataset" +echo "Images: $(ls -1 "$SCRIPT_DIR/example_dataset/"*.jpg 2>/dev/null | wc -l)" +echo "Labels: $(tail -n +2 "$SCRIPT_DIR/example_dataset/label.csv" 2>/dev/null | wc -l) entries" + +# Step 2: Generate model +section "Generating Model" +python3 "${SCRIPT_DIR}/generate_tricorder_model.py" + +# Step 3: Verify model files exist +section "Verifying Model Files" +if [ -f "${ROOT_DIR}/sample_tricorder_model.pt" ] && [ -f "${ROOT_DIR}/sample_tricorder_model.onnx" ]; then + echo " Model files generated successfully" + ls -lh "${ROOT_DIR}/sample_tricorder_model.pt" "${ROOT_DIR}/sample_tricorder_model.onnx" +else + echo " Error: Model files not found!" + exit 1 +fi + +# Step 4: Run inference on a sample image +section "Running Inference" +SAMPLE_IMAGE=$(find "${SCRIPT_DIR}/example_dataset" -type f -name "*.jpg" | head -n 1) +if [ -z "$SAMPLE_IMAGE" ]; then + echo " Error: No sample images found in ${SCRIPT_DIR}/example_dataset" + exit 1 +fi + +echo "Using sample image: $SAMPLE_IMAGE" +python3 "${SCRIPT_DIR}/run_tricorder_inference.py" \ + --model "${ROOT_DIR}/sample_tricorder_model.onnx" \ + --image "$SAMPLE_IMAGE" \ + --age 42 \ + --gender f \ + --location 7 + +# Print completion message +section "Pipeline Completed Successfully" +echo " All steps completed successfully!" +echo -e "\n Outputs:" +echo "- Example dataset: ${SCRIPT_DIR}/example_dataset" +echo "- Model files: ${ROOT_DIR}/sample_tricorder_model.{pt,onnx}" diff --git a/DOCS/competitions/tricorder_samples/run_tricorder_inference.py b/DOCS/competitions/tricorder_samples/run_tricorder_inference.py new file mode 100644 index 000000000..a2f089fed --- /dev/null +++ b/DOCS/competitions/tricorder_samples/run_tricorder_inference.py @@ -0,0 +1,117 @@ +import os +import numpy as np +import onnxruntime as ort +from PIL import Image +import torchvision.transforms as transforms +import argparse + +# Class mapping for tricorder competition +CLASS_NAMES = [ + "Actinic keratosis (AK)", + "Basal cell carcinoma (BCC)", + "Seborrheic keratosis (SK)", + "Squamous cell carcinoma (SCC)", + "Vascular lesion (VASC)", + "Dermatofibroma (DF)", + "Benign nevus (NV)", + "Other non-neoplastic (NON)", + "Melanoma (MEL)", + "Other neoplastic (ON)" +] + +class ONNXInference: + def __init__(self, model_path): + """Initialize ONNX model session.""" + self.session = ort.InferenceSession(model_path) + self.input_names = [inp.name for inp in self.session.get_inputs()] + + # Image preprocessing + self.transform = transforms.Compose([ + transforms.Resize((512, 512)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + def preprocess_image(self, image_path): + """Load and preprocess image to [0,512] range as specified.""" + img = Image.open(image_path).convert('RGB') + # Resize to 512x512 + img = img.resize((512, 512)) + # Convert to numpy array with [0,512] range + img_array = np.array(img, dtype=np.float32) + # Scale from [0,255] to [0,512] + img_array = img_array * (512.0 / 255.0) + # Convert to BCHW format + img_array = np.transpose(img_array, (2, 0, 1)) + img_array = np.expand_dims(img_array, axis=0) + return img_array + + def predict(self, image_path, age, gender, location): + """Run inference on a single image with demographic data.""" + # Preprocess image + image_tensor = self.preprocess_image(image_path) + + # Convert demographics to proper format + # Gender: 'm' -> 1.0, 'f' -> 0.0 + gender_encoded = 1.0 if gender.lower() == 'm' else 0.0 + + # Prepare demographic data as [age, gender_encoded, location] + demo_tensor = np.array([[float(age), gender_encoded, float(location)]], dtype=np.float32) + + # Run inference + inputs = {self.input_names[0]: image_tensor, self.input_names[1]: demo_tensor} + outputs = self.session.run(None, inputs) + + # Model already outputs probabilities (softmax applied in forward pass) + probs = outputs[0].flatten() + + # Get top 3 predictions + top3_idx = np.argsort(probs)[-3:][::-1] + top3 = [(CLASS_NAMES[i], float(probs[i])) for i in top3_idx] + + return top3 + +def main(): + import argparse + + parser = argparse.ArgumentParser(description='Run inference with ONNX model') + parser.add_argument('--model', type=str, default='sample_models/sample_tricorder_model.onnx', + help='Path to ONNX model') + parser.add_argument('--image', type=str, required=True, + help='Path to input image') + parser.add_argument('--age', type=int, required=True, + help='Patient age in years (e.g., 42)') + parser.add_argument('--gender', type=str, required=True, choices=['m', 'f'], + help='Patient gender: m (male) or f (female)') + parser.add_argument('--location', type=int, required=True, choices=range(1, 8), + help='Body location: 1=Arm, 2=Feet, 3=Genitalia, 4=Hand, 5=Head, 6=Leg, 7=Torso') + args = parser.parse_args() + + # Check if files exist + if not os.path.exists(args.model): + print(f"Error: Model file not found at {args.model}") + return + if not os.path.exists(args.image): + print(f"Error: Image file not found at {args.image}") + return + + # Initialize and run inference + print(f"\nLoading model: {args.model}") + print(f"Processing image: {args.image}\n") + + try: + model = ONNXInference(args.model) + predictions = model.predict(args.image, args.age, args.gender, args.location) + + location_names = {1: "Arm", 2: "Feet", 3: "Genitalia", 4: "Hand", 5: "Head", 6: "Leg", 7: "Torso"} + print(f"Demographics: Age={args.age}, Gender={args.gender.upper()}, Location={location_names[args.location]}") + print("\nTop 3 Predictions:") + print("-" * 40) + for i, (class_name, prob) in enumerate(predictions, 1): + print(f"{i}. {class_name}: {prob*100:.2f}%") + + except RuntimeError as e: + print(f"Error during inference: {str(e)}") + +if __name__ == "__main__": + main() diff --git a/DOCS/header.png b/DOCS/header.png new file mode 100644 index 000000000..3db2325af Binary files /dev/null and b/DOCS/header.png differ diff --git a/DOCS/miner.md b/DOCS/miner.md new file mode 100644 index 000000000..bddd62084 --- /dev/null +++ b/DOCS/miner.md @@ -0,0 +1,169 @@ +# Miner Script Documentation + +This documentation provides an overview of the miner script, its functionality, requirements, and usage instructions. + +## Overview + +The miner script is designed to manage models, evaluate them locally, and upload them to HuggingFace, as well as submit models to validators within a specified network. + +Key features of the script include: + +- **Local Model Evaluation**: Allows you to evaluate models against a dataset locally. +- **HuggingFace Upload**: Compresses and uploads models and code to HuggingFace. +- **Model Submission to Validators**: Saves model information in the metagraph, enabling validators to test the models. + +## Prerequisites + +- **Python 3.12**: The script is written in Python and requires Python 3.12 to run. +- **Virtual Environment**: It's recommended to run the script within a virtual environment to manage dependencies. +- **8GB RAM**: minimum required operating memory for testing (evaluate) machine learning model locally + +## Installation + +1. **Clone repository** + + ```bash + git clone git@github.com:safe-scan-ai/cancer-ai.git + cd cancer-ai + ``` + +1. **Create a Virtual Environment** + + ```bash + virtualenv venv --python=3.12 + source venv/bin/activate + ``` + +1. **Install Required Python Packages** + + Install any required Python packages listed in `requirements.txt`: + + ```bash + pip install -r requirements.txt + ``` + +## Registering miner on the subnet + +If you haven't yet created a miner wallet and registered on our subnet here is the set of commands to run: + +Create a miner coldkey: + +``` +btcli wallet new_coldkey --wallet.name miner +``` + +Create a hotkey for the miner: +``` +btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default +``` + +Register miner on the CancerAI subnet: +``` +btcli subnet recycle_register --netuid --subtensor.network finney --wallet.name miner --wallet.hotkey default +``` + +Check that your key was registered: +``` +btcli wallet overview --wallet.name miner +``` + +## Usage + +### Prerequisites + +Before running the script, ensure the following: + +- You are in the base directory of the project. +- Your virtual environment is activated. +- Run the following command to set the `PYTHONPATH`: + +``` +export PYTHONPATH="${PYTHONPATH}:./" +``` + +### Evaluate Model Locally + +This mode performs the following tasks: + +- Downloads the dataset. +- Loads your model. +- Prepares data for execution. +- Logs evaluation results. + +To evaluate a model locally, use the following command: + +``` +python neurons/miner.py --action evaluate --competition_id --model_path +``` + +Command line argument explanation + +- `--action` - action to perform, choices are "upload", "evaluate", "submit" +- `--model_path` - local path of ONNX model +- `--competition_id` - ID of competition. List of current competitions are in [competition_config.json](config/competition_config.json) +- `--clean-after-run` - it will delete dataset after evaluating the model +- `--model_dir` - path for storing models (default: "./models") +- `--dataset_dir` - path for storing datasets (default: "./datasets") +- `--datasets_config_hf_repo_id` - hugging face repository ID for datasets configuration - ex. "safescanai/competition-configuration-testnet" in case of testnet + +### Upload to HuggingFace + +This mode compresses the code provided by `--code-path` and uploads the model and code to HuggingFace. +Repository ID should be a repository type "model". + +The repository needs to be public for validator to pick it up. + +To upload to HuggingFace, use the following command: + +```bash +python neurons/miner.py \ + --action upload \ + --competition_id \ + --model_path \ + --code_directory \ + --hf_model_name \ + --hf_repo_id \ + --hf_token +``` + +Command line argument explanation + +- `--code_directory` - local directory of code +- `--hf_repo_id` - hugging face repository ID - ex. "username/repo" +- `--hf_token` - hugging face authentication token +- `--hf_model_name` - name of file to store in hugging face repository + +### Submit Model to Validators + +This mode saves model information in the metagraph, allowing validators to retrieve information about your model for testing. + +The repository you are submitting needs to be public for validator to pick it up. + +To submit a model to validators, use the following command: + +```bash +python neurons/miner.py \ + --action submit \ + --competition_id melanoma-1\ + --hf_code_filename skin_melanoma_small.zip\ + --hf_model_name best_model.onnx \ + --hf_repo_id safescanai/test_dataset \ + --wallet.name miner2 \ + --wallet.hotkey default \ + --netuid 163 \ + --subtensor.network test +``` + +Command line argument explanation + +- `--hf_code_filename` - name of file in hugging face repository containing zipped code +- `--hf_model_name` - name of file in hugging face repository containing model +- `--wallet.name` - name of wallet coldkey used for authentication with Bittensor network +- `--wallet.hotkey` - name of wallet hotkey used for authentication with Bittensor network +- `--netuid` - subnet number +- `--subtensor.network` - Bittensor network to connect to - + +## Notes + +- **Environment**: The script uses the environment from which it is executed, so ensure all necessary environment variables and dependencies are correctly configured. +- **Model Evaluation**: The `evaluate` action downloads necessary datasets and runs the model locally; ensure that your local environment has sufficient resources. diff --git a/DOCS/onnx_runner/image.jpg b/DOCS/onnx_runner/image.jpg new file mode 100644 index 000000000..d3cf94368 Binary files /dev/null and b/DOCS/onnx_runner/image.jpg differ diff --git a/DOCS/onnx_runner/onnx_example_requirements.txt b/DOCS/onnx_runner/onnx_example_requirements.txt new file mode 100644 index 000000000..baecf4fb5 --- /dev/null +++ b/DOCS/onnx_runner/onnx_example_requirements.txt @@ -0,0 +1,11 @@ +coloredlogs==15.0.1 +flatbuffers==24.3.25 +humanfriendly==10.0 +mpmath==1.3.0 +numpy==2.1.2 +onnx==1.17.0 +onnxruntime==1.19.2 +packaging==24.1 +pillow==11.0.0 +protobuf==5.28.3 +sympy==1.13.3 diff --git a/DOCS/onnx_runner/onnx_example_runner.py b/DOCS/onnx_runner/onnx_example_runner.py new file mode 100644 index 000000000..59606d73b --- /dev/null +++ b/DOCS/onnx_runner/onnx_example_runner.py @@ -0,0 +1,40 @@ +import onnxruntime +import numpy as np +from PIL import Image + +model_path = "best_model.onnx" +image_path = "image.jpg" +target_size = (512, 512) + +try: + session = onnxruntime.InferenceSession(model_path) +except Exception as e: + print(f"Failed to load model: {e}") + exit(1) + +# Load and preprocess the image +img = Image.open(image_path) +img = img.resize(target_size) # Resize the image +img_array = np.array(img, dtype=np.float32) / 255.0 # Normalize and convert to float32 + +# Ensure 3 channels (RGB) if image is grayscale +if img_array.shape[-1] != 3: + img_array = np.stack((img_array,) * 3, axis=-1) + +# Transpose image to (C, H, W) format +img_array = np.transpose(img_array, (2, 0, 1)) + +# Add batch dimension +input_batch = np.expand_dims(img_array, axis=0) + +# Prepare input dictionary for the model +input_name = session.get_inputs()[0].name +input_data = {input_name: input_batch} + +# Run inference +try: + results = session.run(None, input_data)[0] + print(results) +except Exception as e: + print(f"Failed to run model inference: {e}") + exit(1) \ No newline at end of file diff --git a/DOCS/prerequirements.md b/DOCS/prerequirements.md new file mode 100644 index 000000000..ef22db8b8 --- /dev/null +++ b/DOCS/prerequirements.md @@ -0,0 +1,242 @@ +# 💡BITTENSOR + +Bittensor is a mining network, similar to Bitcoin, that includes built-in incentives designed to encourage computers to provide access to machine learning models in an efficient and censorship-resistant manner. These models can be queried by users seeking outputs from the network, for instance; generating text, audio, and images, or for extracting numerical representations of these input types. Under the hood, Bittensor’s *economic market*, is facilitated by a blockchain token mechanism, through which producers (***miners***) and the verification of the work done by those miners (***validators***) are rewarded. Miners host, train or otherwise procure machine learning systems into the network as a means of fulfilling the verification problems defined by the validators, like the ability to generate responses from prompts i.e. “What is the capital of Texas?. + +The token based mechanism under which the miners are incentivized ensures that they are constantly driven to make their knowledge output more useful, in terms of speed, intelligence and diversity. The value generated by the network is distributed directly to the individuals producing that value, without intermediaries. Anyone can participate in this endeavour, extract value from the network, and govern Bittensor. The network is open to all participants, and no individual or group has full control over what is learned, who can profit from it, or who can access it. + +To learn more about Bittensor, please read our [paper](https://bittensor.com/whitepaper). + +# **🛠️ INSTALL** + +There are three ways to install Bittensor + +1. Through the installer: + +``` +$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/opentensor/bittensor/master/scripts/install.sh)" +``` + +1. With pip: + +``` +$ pip3 install bittensor +``` + +1. From source: + +``` +$ git clone https://github.com/opentensor/bittensor.git +$ python3 -m pip install -e bittensor/ +``` + +1. Using Conda (recommended for **Apple M1**): + +``` +$ conda env create -f ~/.bittensor/bittensor/scripts/environments/apple_m1_environment.yml +$ conda activate bittensor +``` + +To test your installation, type: + +``` +$ btcli --help +``` + +or using python + +``` +import bittensor +``` + +**CUDA** + +If you anticipate using PoW registration for subnets or the faucet (only available on staging), please install `cubit` as well for your version of python. You can find the Opentensor cubit implementation and instructions [here](https://github.com/opentensor/cubit). + +For example with python 3.10: + +``` +pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl +``` + +# **👛 WALLETS** + +Wallets are the core ownership and identity technology around which all functions on Bittensor are carried out. Bittensor wallets consists of a coldkey and hotkey where the coldkey may contain many hotkeys, while each hotkey can only belong to a single coldkey. Coldkeys store funds securely, and operate functions such as transfers and staking, while hotkeys are used for all online operations such as signing queries, running miners and validating. + +Wallets can be created in two ways. + +1. Using the python-api + +``` +import bittensor +wallet = bittensor.wallet() +wallet.create_new_coldkey() +wallet.create_new_hotkey() +print (wallet) +"Wallet (default, default, ~/.bittensor/wallets/)" +``` + +1. Or using btcli + +> Use the subcommand wallet or it's alias w: +> + +``` +$ btcli wallet new_coldkey + Enter wallet name (default): + + IMPORTANT: Store this mnemonic in a secure (preferably offline place), as anyone who has possession of this mnemonic can use it to regenerate the key and access your tokens. + The mnemonic to the new coldkey is: + **** *** **** **** ***** **** *** **** **** **** ***** ***** + You can use the mnemonic to recreate the key in case it gets lost. The command to use to regenerate the key using this mnemonic is: + btcli w regen_coldkey --mnemonic post maid erode shy captain verify scan shoulder brisk mountain pelican elbow + +$ btcli wallet new_hotkey + Enter wallet name (default): d1 + Enter hotkey name (default): + + IMPORTANT: Store this mnemonic in a secure (preferably offline place), as anyone who has possession of this mnemonic can use it to regenerate the key and access your tokens. + The mnemonic to the new hotkey is: + **** *** **** **** ***** **** *** **** **** **** ***** ***** + You can use the mnemonic to recreate the key in case it gets lost. The command to use to regenerate the key using this mnemonic is: + btcli w regen_hotkey --mnemonic total steak hour bird hedgehog trim timber can friend dry worry text +``` + +In both cases you should be able to view your keys by navigating to ~/.bittensor/wallets or viewed by running `btcli wallet list` + +``` +$ tree ~/.bittensor/ + .bittensor/ # Bittensor, root directory. + wallets/ # The folder containing all bittensor wallets. + default/ # The name of your wallet, "default" + coldkey # You encrypted coldkey. + coldkeypub.txt # Your coldkey public address + hotkeys/ # The folder containing all of your hotkeys. + default # You unencrypted hotkey information. +``` + +Your default wallet `Wallet (default, default, ~/.bittensor/wallets/)` is always used unless you specify otherwise. Be sure to store your mnemonics safely. If you lose your password to your wallet, or the access to the machine where the wallet is stored, you can always regenerate the coldkey using the mnemonic you saved from above. + +``` +$ btcli wallet regen_coldkey --mnemonic **** *** **** **** ***** **** *** **** **** **** ***** ***** +``` + +**Using the cli** + +The Bittensor command line interface (`btcli`) is the primary command line tool for interacting with the Bittensor network. It can be used to deploy nodes, manage wallets, stake/unstake, nominate, transfer tokens, and more. + +**Basic Usage** + +To get the list of all the available commands and their descriptions, you can use: + +``` +btcli --help + +usage: btcli + +bittensor cli v{bittensor.__version__} + +commands: + subnets (s, subnet) - Commands for managing and viewing subnetworks. + root (r, roots) - Commands for managing and viewing the root network. + wallet (w, wallets) - Commands for managing and viewing wallets. + stake (st, stakes) - Commands for staking and removing stake from hotkey accounts. + sudo (su, sudos) - Commands for subnet management. + legacy (l) - Miscellaneous commands. +``` + +**Example Commands** + +**Viewing Senate Proposals** + +``` +btcli root proposals +``` + +**Viewing Senate Members** + +``` +btcli root list_delegates +``` + +**Viewing Proposal Votes** + +``` +btcli root senate_vote --proposal=[PROPOSAL_HASH] +``` + +**Registering for Senate** + +``` +btcli root register +``` + +**Leaving Senate** + +``` +btcli root undelegate +``` + +**Voting in Senate** + +``` +btcli root senate_vote --proposal=[PROPOSAL_HASH] +``` + +**Miscellaneous Commands** + +``` +btcli legacy update +btcli legacy faucet +``` + +**Managing Subnets** + +``` +btcli subnets list +btcli subnets create +``` + +**Managing Wallets** + +``` +btcli wallet list +btcli wallet transfer +``` + +**Note** + +Please replace the subcommands and arguments as necessary to suit your needs, and always refer to `btcli --help` or `btcli --help` for the most up-to-date and accurate information. + +For example: + +``` +btcli subnets --help + +usage: btcli subnets [-h] {list,metagraph,lock_cost,create,register,pow_register,hyperparameters} ... + +positional arguments: + {list,metagraph,lock_cost,create,register,pow_register,hyperparameters} + Commands for managing and viewing subnetworks. + list List all subnets on the network. + metagraph View a subnet metagraph information. + lock_cost Return the lock cost to register a subnet. + create Create a new bittensor subnetwork on this chain. + register Register a wallet to a network. + pow_register Register a wallet to a network using PoW. + hyperparameters View subnet hyperparameters. + +options: + -h, --help show this help message and exit +``` + +**Post-Installation Steps** + +To enable autocompletion for Bittensor CLI, run the following commands: + +``` +btcli --print-completion bash >> ~/.bashrc # For Bash +btcli --print-completion zsh >> ~/.zshrc # For Zsh +source ~/.bashrc # Reload Bash configuration to take effect +``` + +# diff --git a/DOCS/validator.md b/DOCS/validator.md new file mode 100644 index 000000000..ba3349df1 --- /dev/null +++ b/DOCS/validator.md @@ -0,0 +1,118 @@ +# Validator Script Documentation + +This documentation provides an overview of the validator script, its functionality, requirements, and usage instructions. + +## Overview + +The validator script is designed to run a validator process and automatically update it whenever a new version is released. This script was adapted from the [original script](https://github.com/macrocosm-os/pretraining/blob/main/scripts/start_validator.py) in the Pretraining Subnet repository. + +Key features of the script include: + +- **Automatic Updates**: The script checks for updates periodically and ensures that the latest version of the validator is running by pulling the latest code from the repository and upgrading necessary Python packages. +- **Command-Line Argument Compatibility**: The script now properly handles custom command-line arguments and forwards them to the validator (`neurons/validator.py`). +- **Virtual Environment Support**: The script runs within the same virtual environment that it is executed in, ensuring compatibility and ease of use. +- **PM2 Process Management**: The script uses PM2, a process manager, to manage the validator process. + +## Prerequisites + +### Server requirements + +- 16GB of RAM +- storage: 50GB, extendable +- GPU - nVidia RTX, 6GB VRAM (will work without GPU, but slower) + +### System requirements + +- **Python 3.10 and virtualenv**: The script is written in Python and requires Python 3.10 to run. +- **PM2**: PM2 must be installed and available on your system. It is used to manage the validator process. +- **zip and unzip** + +## Installation and Setup + +1. **Clone the Repository**: Make sure you have cloned the repository containing this script and have navigated to the correct directory. + +2. **Install PM2**: Ensure PM2 is installed globally on your system. If it isn't, you can install it using npm: + +``` + npm install -g pm2 +``` + +3. **Set Up Virtual Environment**: If you wish to run the script within a virtual environment, create and activate the environment before running the script: + +``` + python3 -m venv venv + source venv/bin/activate # On Windows use `venv\Scripts\activate` +``` + +4. **Install Required Python Packages**: Install any required Python packages listed in requirements.txt: + +``` +pip install -r requirements.txt +``` + +## Usage + +To run the validator script, use the following command: + +```bash +python3 scripts/start_validator.py --wallet.name=my-wallet --wallet.hotkey=my-hotkey --netuid=76 --hf_token + +``` + +## Command-Line Arguments + +- `--pm2_name`: Specifies the name of the PM2 process. Default is `"cancer_ai_vali"`. +- `--wallet.name`: Specifies the wallet name to be used by the validator. +- `--wallet.hotkey`: Specifies the hotkey associated with the wallet. +- `--subtensor.network`: Specifies the network name. Default is `"finney"`. +- `--netuid`: Specifies the Netuid of the network. Default is `"76"`. +- `--logging.debug`: Enables debug logging if set to `1`. Default is `1`. +- `--hf_token`: Required for hugging face requests. + +## How It Works + +1. **Start Validator Process**: The script starts the validator process using PM2, based on the provided PM2 process name. +2. **Periodic Updates**: The script periodically checks for updates (every 5 minutes by default) by fetching the latest code from the git repository. +3. **Handle Updates**: If a new version is detected, the script pulls the latest changes, upgrades the Python packages, stops the current validator process, and restarts it with the updated code. +4. **Local Changes**: If there are local changes in the repository that conflict with the updates, the script attempts to rebase them. If conflicts persist, the rebase is aborted to preserve the local changes. + +## Notes + +- **Local Changes**: If you have made local changes to the codebase, the auto-update feature will attempt to preserve them. However, conflicts might require manual resolution. +- **Environment**: The script uses the environment from which it is executed, so ensure all necessary environment variables and dependencies are correctly configured. + +# TLDR Installation script from fresh Ubuntu 24.04 + +```bash +# from root +apt install software-properties-common -y +apt update +apt install python3.12 python3.12-venv python3.12-dev python3-pip unzip +apt install python3-virtualenv git nodejs npm + +npm install pm2 -g + +adduser cancerai +su cancerai +cd + +git clone https://github.com/safe-scan-ai/cancer-ai +cd cancer-ai + +virtualenv --python=3.10 venv +source venv/bin/activate +pip install -U setuptools +pip install -r requirements.txt + +export PYTHONPATH="${PYTHONPATH}:./" + +# import keys + +cp .env.example .env + +# add wandb API key + +# example for testnet +python3 scripts/start_validator.py --wallet.name=validator-staked --wallet.hotkey=default --subtensor.network test --logging.debug 1 --netuid 163 + +``` diff --git a/README.md b/README.md index ba69bdaee..36f09f989 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,220 @@ -
+# SAFE SCAN -# **Bittensor Subnet Template** -[![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor) + +*Bittensor Subnet 76 for improving cancer detection algorithms* + +![header.png](https://github.com/safe-scan-ai/cancer-ai-3/blob/LEMSTUDI0-patch-1/DOCS/header.png) + + +[![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)]([https://discord.gg/bittensor](https://discord.com/channels/1259812760280236122/1262383307832823809)) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ---- +[www.SAFE-SCAN.ai](https://www.safe-scan.ai)      [www.SKIN-SCAN.ai](https://www.skin-scan.ai)      [Follow us on X](https://x.com/SAFESCAN_AI) + + +# **📋 TABLE OF CONTENT** + + +- [👋 Introduction](#-introduction) +- [⚙️ Features](#features) +- [👁️ Vision](#vision) +- [🌍 Real-world Applications](#-real-world-applications) +- [⚠️ Why is SAFESCAN Subnet Important?](#why-is-safescan-subnet-important) +- [📢 Marketing](#-marketing) +- [💰 Tokenomy & Economy](#-tokenomy--economy) +- [👨‍👨‍👦‍👦 Team Composition](#-team-composition) +- [🛣️ Roadmap](#roadmap) +- [✅ Pre requirments](#-pre-requirments) +- [👍 Running Validator](#-running-validator) +- [⛏️ Running Miner](#-running-miner) +- [🚀 Get invloved](#-get-involved) +- [📝 License](#-license) + + +# **👋 INTRODUCTION** + +Welcome to Safe Scan Cancer AI Detection, a groundbreaking initiative leveraging the power of AI and blockchain technology to revolutionize cancer detection. Our mission is to make advanced cancer detection algorithms accessible and free for everyone. Through our project, we aim to provide cutting-edge, open-source tools that support early cancer diagnosis for patients and healthcare professionals worldwide. + +This repository contains subnet code to run on Bittensor network. + + +# **⚙️ FEATURES** + +🤗 Validator-friendly code + +🏆 Rewards for best-performing algorithms + +👑 Royalties for algorithms used in our real-world software solutions + +🤳 Free app for skin cancer detection + +⚔️ Various cancer detection algorithm competitions + +📊 Dashboard + +💻 Specialized software for detecting other types of cancer + +💸 Self-sustaining economy + + +# **👁️ VISION** + +Cancer is one of the most significant challenges of our time, and we believe that AI holds the key to addressing it. However, this solution should be accessible and free for everyone. Machine vision technology has long proven effective in early diagnosis, which is crucial for curing cancer. Yet, until now, it has largely remained in the realm of whitepapers. SAFESCAN is a project dedicated to aggregating and enhancing the best algorithms for detecting various types of cancer and providing free computational power for practical cancer detection. We aim to create open-source products that support cancer diagnosis for both patients and doctors. + +To date, many crypto and AI projects, particularly those focused on medicine, have struggled to achieve real-world implementation due to various barriers. Our solution focuses on: + +**🛠️ Development of Applications and Software:** Invest in the ongoing development and enhancement of our cancer detection applications and software to ensure they are at the cutting edge of technology. + +**📝 Medical Device Registration:** Allocate funds to cover the costs associated with registering our solutions as medical devices, ensuring they meet all regulatory requirements for safety and efficacy. + +**📢 Marketing and Awareness:** Implement comprehensive marketing strategies to raise awareness about our solutions and Bittensor project, making them known to both potential users and healthcare professionals. + +**🤝 Collaboration and Networking:** Build strong networks with cancer organizations, researchers, and healthcare providers to facilitate the practical implementation and continuous improvement of our technology. + +**📈Continuous Improvement of Algorithms:** Reward top researchers, maintain algorithms in the open domain, and constantly expand our anonymized cancer detection dataset through partnerships and user contributions. + +**⚖️ Legislative Efforts:** Engage in legislative activities to support the recognition and adoption of AI-driven cancer detection technologies within the medical community. + +By focusing on these areas, we aim to overcome the barriers to the practical use of AI in cancer detection and provide a solution that is accessible to everyone. + +To expedite the process and navigate the complexities of medical certification, we are beginning our initiatives with authorized clinical trials. After completing clinical trials of our first project, **SELFSCAN** – an application for detecting skin cancer through self-made pictures – we will focus on its deployment as a Class II medical device in the USA and Europe, obtaining the necessary FDA and CE approvals. + +Concurrently, with the help of the Bittensor community and our unique tokenomics supporting researchers, we will continuously improve the best cancer detection algorithms. This ensures that, by the time our products are brought to market, our solutions surpass all existing algorithms. + +Subsequently, we will focus on detecting other types of cancer, starting with breast and lung cancer. + +For more information about our project visit our website: + +[safe-scan.ai](https://www.safe-scan.ai/) + +[skin-scan.ai](https://skin-scan.ai/) + +# **🌍 REAL-WORLD APPLICATIONS** + +Our SKIN SCAN app, accessible at [www.skin-scan.ai](http://www.skin-scan.ai/), is designed to bridge the gap between AI's proven efficiency in cancer detection and its limited real-world application. Despite numerous studies validating AI's potential in cancer detection, its use in everyday healthcare is still not widespread. Our app aims to change this by providing a user-friendly, accessible tool for early skin cancer detection. + +Building on this foundation, we are developing dedicated software for breast cancer detection, utilizing advanced AI to offer accurate assessments. Following this, we will expand our focus to include lung and brain cancer detection solutions, aiming to make these life-saving technologies widely available and effective in clinical settings. + +SKIN SCAN app live demo: + +[https://x.com/SAFESCAN_AI/status/1819351129362149876](https://x.com/SAFESCAN_AI/status/1819351129362149876) + + +# **⚠️ WHY IS SAFESCAN SUBNET IMPORTANT?** + +SAFE SCAN harnesses the power of the Bittensor network to address one of the world's most pressing issues: cancer detection. Researchers can contribute to refining detection algorithms and earn TAO, with additional royalties for those whose algorithms are integrated into our software. By focusing on obtaining large datasets, including paid and hard-to-access medical data, we ensure the development of superior models. Our decentralized, transparent system guarantees fair competition and protects against model overfitting. With strong community and validator support, we can expand to create and register standalone software for detecting other types of cancer. -## The Incentivized Internet +Additionally with Safe Scan, we can significantly broaden awareness of Bittensor's capabilities and resonate with a more general audience. This will be crucial for the network's growth and increasing market cap, attracting both large and microinvestors. -[Discord](https://discord.gg/bittensor) • [Network](https://taostats.io/) • [Research](https://bittensor.com/whitepaper) -
+# **📢 MARKETING** ---- -- [Quickstarter template](#quickstarter-template) -- [Introduction](#introduction) - - [Example](#example) -- [Installation](#installation) - - [Before you proceed](#before-you-proceed) - - [Install](#install) -- [Writing your own incentive mechanism](#writing-your-own-incentive-mechanism) -- [Writing your own subnet API](#writing-your-own-subnet-api) -- [Subnet Links](#subnet-links) -- [License](#license) +Our first goal is to develop the best skin cancer detection algorithm and establish ourselves as a recognized leader in cancer detection. We aim not only to create the most popular and widely accessible skin cancer detection app but also to demonstrate Bittensor's power. We plan to spread awareness through partnerships with skin cancer foundations, growth hacking strategies like affiliate links for unlocking premium features, and promotional support from Apple and Google stores, aiming to reach over 1 million users within 18 months. And every app launch will display “proudly powered by BITTENSOR.” ---- -## Quickstarter template +However, brand recognition is just the beginning. Our marketing strategy will focus on creating hype by engaging bloggers, reaching to celebrities affected by skin cancer, and sending articles to major tech, health, and news outlets. We will leverage the current interest in AI and blockchain to showcase the life-saving potential of these technologies. -This template contains all the required installation instructions, scripts, and files and functions for: -- Building Bittensor subnets. -- Creating custom incentive mechanisms and running these mechanisms on the subnets. +# **💰 TOKENOMY & ECONOMY** -In order to simplify the building of subnets, this template abstracts away the complexity of the underlying blockchain and other boilerplate code. While the default behavior of the template is sufficient for a simple subnet, you should customize the template in order to meet your specific requirements. ---- +**🪙 UNIQUE TOKENOMY** -## Introduction +Our tokenomics are uniquely designed to drive research and development of new algorithms while also supporting real-life applications. +**Competitions** Safe Scan organizes ongoing competitions focused on cancer detection using machine learning, providing a structured environment for participants to develop and test their models. -**IMPORTANT**: If you are new to Bittensor subnets, read this section before proceeding to [Installation](#installation) section. +You can find comprehensive details about competition scheduling, dataset release, model submission, evaluation, configuration, and development tools here: [COMPETITION README](DOCS/COMPETITIONS.md) -The Bittensor blockchain hosts multiple self-contained incentive mechanisms called **subnets**. Subnets are playing fields in which: -- Subnet miners who produce value, and -- Subnet validators who produce consensus +**Incentives**: The winner of each competition receives the entire reward pool for that specific competition. The reward pool is determined by the total emission allocated for miners, divided by the number of competitions being held. -determine together the proper distribution of TAO for the purpose of incentivizing the creation of value, i.e., generating digital commodities, such as intelligence or data. +If a miner stays at the top position for more than 30 days, their rewards start to decrease gradually. Every 7 days after the initial 30 days, their share of the rewards decreases by 10%. This reduction continues until their share reaches a minimum of 10% of the original reward. -Each subnet consists of: -- Subnet miners and subnet validators. -- A protocol using which the subnet miners and subnet validators interact with one another. This protocol is part of the incentive mechanism. -- The Bittensor API using which the subnet miners and subnet validators interact with Bittensor's onchain consensus engine [Yuma Consensus](https://bittensor.com/documentation/validating/yuma-consensus). The Yuma Consensus is designed to drive these actors: subnet validators and subnet miners, into agreement on who is creating value and what that value is worth. +**📈 SELF-SUSTAINING ECONOMY** -This starter template is split into three primary files. To write your own incentive mechanism, you should edit these files. These files are: -1. `template/protocol.py`: Contains the definition of the protocol used by subnet miners and subnet validators. -2. `neurons/miner.py`: Script that defines the subnet miner's behavior, i.e., how the subnet miner responds to requests from subnet validators. -3. `neurons/validator.py`: This script defines the subnet validator's behavior, i.e., how the subnet validator requests information from the subnet miners and determines the scores. +Although our primary focus is on using our subnet to save lives with state-of-the-art algorithms and custom-made software while promoting the power of Bittensor computing worldwide, our long-term goal is to establish a self-sustaining economy. -### Example +We aim to keep our cancer detection app and software free for those who need it most: regular people and public hospitals, especially in developing countries with limited medical personnel, while offering paid solutions for the private healthcare sector and developed countries. -The Bittensor Subnet 1 for Text Prompting is built using this template. See [prompting](https://github.com/macrocosm-os/prompting) for how to configure the files and how to add monitoring and telemetry and support multiple miner types. Also see this Subnet 1 in action on [Taostats](https://taostats.io/subnets/netuid-1/) explorer. +- Premium features for paid users (e.g., more lesions for detection, exporting data to doctors, etc.) +- Support from sponsors, donors, cancer foundations, and companies that align with our mission +- Rewards for miners and validators for generating economic value (e.g., analyzing mammography data for private healthcare) +- Proceeds generated from sponsors and end-users will be distributed among the network's participants. ---- +# **👨‍👨‍👦‍👦 TEAM COMPOSITION** -## Installation +The SafeScan team is not only composed of professionals with diverse expertise in crypto, software development, machine learning, marketing, UX design, and business, but we are also close friends united by a shared vision. -### Before you proceed -Before you proceed with the installation of the subnet, note the following: +Our team is deeply committed to supporting and improving the Bittensor network with passion and dedication. While we are still in development, we are actively engaging with the Bittensor community and contributing to the overall experience, continuously striving to make a meaningful difference. -- Use these instructions to run your subnet locally for your development and testing, or on Bittensor testnet or on Bittensor mainnet. -- **IMPORTANT**: We **strongly recommend** that you first run your subnet locally and complete your development and testing before running the subnet on Bittensor testnet. Furthermore, make sure that you next run your subnet on Bittensor testnet before running it on the Bittensor mainnet. -- You can run your subnet either as a subnet owner, or as a subnet validator or as a subnet miner. -- **IMPORTANT:** Make sure you are aware of the minimum compute requirements for your subnet. See the [Minimum compute YAML configuration](./min_compute.yml). -- Note that installation instructions differ based on your situation: For example, installing for local development and testing will require a few additional steps compared to installing for testnet. Similarly, installation instructions differ for a subnet owner vs a validator or a miner. +Team members: -### Install +- **@Q. -** Business development +- **@czlowiek** - Project manager & HEAD DEV +- **@Konrad -** Subnet developer +- **@bulubula -** Machine learning engineer +- **@Izuael -** Mobile software Engineer -- **Running locally**: Follow the step-by-step instructions described in this section: [Running Subnet Locally](./docs/running_on_staging.md). -- **Running on Bittensor testnet**: Follow the step-by-step instructions described in this section: [Running on the Test Network](./docs/running_on_testnet.md). -- **Running on Bittensor mainnet**: Follow the step-by-step instructions described in this section: [Running on the Main Network](./docs/running_on_mainnet.md). + +# **🛣️ ROADMAP** ---- +Given the complexity of creating a state-of-the-art roleplay LLM, we plan to divide the process into 3 distinct phases. -## Writing your own incentive mechanism +**Phase 1:** -As described in [Quickstarter template](#quickstarter-template) section above, when you are ready to write your own incentive mechanism, update this template repository by editing the following files. The code in these files contains detailed documentation on how to update the template. Read the documentation in each of the files to understand how to update the template. There are multiple **TODO**s in each of the files identifying sections you should update. These files are: -- `template/protocol.py`: Contains the definition of the wire-protocol used by miners and validators. -- `neurons/miner.py`: Script that defines the miner's behavior, i.e., how the miner responds to requests from validators. -- `neurons/validator.py`: This script defines the validator's behavior, i.e., how the validator requests information from the miners and determines the scores. -- `template/forward.py`: Contains the definition of the validator's forward pass. -- `template/reward.py`: Contains the definition of how validators reward miner responses. +- [ ] Launch competition for melanoma skin cancer +- [ ] Public model leaderboard based on evaluation criteria +- [ ] Start marketing of Skin Scan app and Bittensor -In addition to the above files, you should also update the following files: -- `README.md`: This file contains the documentation for your project. Update this file to reflect your project's documentation. -- `CONTRIBUTING.md`: This file contains the instructions for contributing to your project. Update this file to reflect your project's contribution guidelines. -- `template/__init__.py`: This file contains the version of your project. -- `setup.py`: This file contains the metadata about your project. Update this file to reflect your project's metadata. -- `docs/`: This directory contains the documentation for your project. Update this directory to reflect your project's documentation. +**Phase 2:** -__Note__ -The `template` directory should also be renamed to your project name. ---- +- [ ] Run multiple competitions at once for other skin cancer types +- [ ] Integrate skin cancer detection models within our Skin Scan app +- [ ] Publicly release website for testing models -# Writing your own subnet API -To leverage the abstract `SubnetsAPI` in Bittensor, you can implement a standardized interface. This interface is used to interact with the Bittensor network and can be used by a client to interact with the subnet through its exposed axons. +**Phase 3:** -What does Bittensor communication entail? Typically two processes, (1) preparing data for transit (creating and filling `synapse`s) and (2), processing the responses received from the `axon`(s). +- [ ] Optimize skin cancer detection models to create one mixture-of-experts model which will run on mobile devices +- [ ] Start the process for certifying models - FDA approval +- [ ] Make competitions for breast cancer +# **✅ PRE REQUIRMENTS** +To install BITTENSOR and set up a wallet follow instructions in this link: -This protocol uses a handler registry system to associate bespoke interfaces for subnets by implementing two simple abstract functions: -- `prepare_synapse` -- `process_responses` - -These can be implemented as extensions of the generic `SubnetsAPI` interface. E.g.: +[PRE REQUIRMENTS](DOCS/prerequirements.md) -This is abstract, generic, and takes(`*args`, `**kwargs`) for flexibility. See the extremely simple base class: -```python -class SubnetsAPI(ABC): - def __init__(self, wallet: "bt.wallet"): - self.wallet = wallet - self.dendrite = bt.dendrite(wallet=wallet) +# **👍 RUNNING VALIDATOR** +To run a validator follow instructions in this link: - async def __call__(self, *args, **kwargs): - return await self.query_api(*args, **kwargs) +[RUNNING VALIDATOR](DOCS/validator.md) - @abstractmethod - def prepare_synapse(self, *args, **kwargs) -> Any: - """ - Prepare the synapse-specific payload. - """ - ... +# **⛏️ RUNNING MINER** +To run a miner follow instructions in this link: - @abstractmethod - def process_responses(self, responses: List[Union["bt.Synapse", Any]]) -> Any: - """ - Process the responses from the network. - """ - ... +[RUNNING MINER](DOCS/miner.md) -``` +# **🚀 GET INVOLVED** +1. Visit our [![GitHub](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/safe-scan-ai/cancer-ai) to explore the code behind SAFE SCAN. -Here is a toy example: +2. Join our [![Discord](https://img.shields.io/discord/308323056592486420.svg)](https://discord.com/channels/1259812760280236122/1262383307832823809) to stay updated and engage with the team. -```python -from bittensor.subnets import SubnetsAPI -from MySubnet import MySynapse +3. Follow us on [![X (Twitter)](https://img.shields.io/badge/X-000000?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/SAFESCAN_AI) and help us spread the word. -class MySynapseAPI(SubnetsAPI): - def __init__(self, wallet: "bt.wallet"): - super().__init__(wallet) - self.netuid = 99 - def prepare_synapse(self, prompt: str) -> MySynapse: - # Do any preparatory work to fill the synapse - data = do_prompt_injection(prompt) +# **📝 LICENSE** - # Fill the synapse for transit - synapse = StoreUser( - messages=[data], - ) - # Send it along - return synapse - - def process_responses(self, responses: List[Union["bt.Synapse", Any]]) -> str: - # Look through the responses for information required by your application - for response in responses: - if response.dendrite.status_code != 200: - continue - # potentially apply post processing - result_data = postprocess_data_from_response(response) - # return data to the client - return result_data -``` - -You can use a subnet API to the registry by doing the following: -1. Download and install the specific repo you want -1. Import the appropriate API handler from bespoke subnets -1. Make the query given the subnet specific API - - - -# Subnet Links -In order to see real-world examples of subnets in-action, see the `subnet_links.py` document or access them from inside the `template` package by: -```python -import template -template.SUBNET_LINKS -[{'name': 'sn0', 'url': ''}, - {'name': 'sn1', 'url': 'https://github.com/opentensor/prompting/'}, - {'name': 'sn2', 'url': 'https://github.com/bittranslateio/bittranslate/'}, - {'name': 'sn3', 'url': 'https://github.com/gitphantomman/scraping_subnet/'}, - {'name': 'sn4', 'url': 'https://github.com/manifold-inc/targon/'}, -... -] -``` - -## License This repository is licensed under the MIT License. -```text -# The MIT License (MIT) -# Copyright © 2024 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -``` + ``` + # The MIT License (MIT) + # Copyright © 2024 Opentensor Foundation + + # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + # documentation files (the “Software”), to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + # and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + # The above copyright notice and this permission notice shall be included in all copies or substantial portions of + # the Software. + + # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + # DEALINGS IN THE SOFTWARE. + ``` diff --git a/template/__init__.py b/cancer_ai/__init__.py similarity index 92% rename from template/__init__.py rename to cancer_ai/__init__.py index cb07b8c00..c295fd58e 100644 --- a/template/__init__.py +++ b/cancer_ai/__init__.py @@ -18,8 +18,7 @@ # DEALINGS IN THE SOFTWARE. # TODO(developer): Change this value when updating your code base. -# Define the version of the template module. -__version__ = "0.0.0" +__version__ = "0.6.2" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) @@ -31,5 +30,3 @@ from . import protocol from . import base from . import validator -from . import api -from .subnet_links import SUBNET_LINKS diff --git a/template/api/__init__.py b/cancer_ai/base/__init__.py similarity index 100% rename from template/api/__init__.py rename to cancer_ai/base/__init__.py diff --git a/template/base/miner.py b/cancer_ai/base/base_miner.py similarity index 62% rename from template/base/miner.py rename to cancer_ai/base/base_miner.py index 1788e24bd..e203b21c7 100644 --- a/template/base/miner.py +++ b/cancer_ai/base/base_miner.py @@ -23,8 +23,8 @@ import bittensor as bt -from template.base.neuron import BaseNeuron -from template.utils.config import add_miner_args +from .neuron import BaseNeuron +from ..utils.config import add_miner_args from typing import Union @@ -43,27 +43,6 @@ def add_args(cls, parser: argparse.ArgumentParser): def __init__(self, config=None): super().__init__(config=config) - # Warn if allowing incoming requests from anyone. - if not self.config.blacklist.force_validator_permit: - bt.logging.warning( - "You are allowing non-validators to send requests to your miner. This is a security risk." - ) - if self.config.blacklist.allow_non_registered: - bt.logging.warning( - "You are allowing non-registered entities to send requests to your miner. This is a security risk." - ) - # The axon handles request processing, allowing validators to send this miner requests. - self.axon = bt.axon(wallet=self.wallet, config=self.config() if callable(self.config) else self.config) - - # Attach determiners which functions are called when servicing a request. - bt.logging.info(f"Attaching forward function to miner axon.") - self.axon.attach( - forward_fn=self.forward, - blacklist_fn=self.blacklist, - priority_fn=self.priority, - ) - bt.logging.info(f"Axon created: {self.axon}") - # Instantiate runners self.should_exit: bool = False self.is_running: bool = False @@ -71,41 +50,8 @@ def __init__(self, config=None): self.lock = asyncio.Lock() def run(self): - """ - Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors. - - This function performs the following primary tasks: - 1. Check for registration on the Bittensor network. - 2. Starts the miner's axon, making it active on the network. - 3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights. - - The miner continues its operations until `should_exit` is set to True or an external interruption occurs. - During each epoch of its operation, the miner waits for new blocks on the Bittensor network, updates its - knowledge of the network (metagraph), and sets its weights. This process ensures the miner remains active - and up-to-date with the network's latest state. - - Note: - - The function leverages the global configurations set during the initialization of the miner. - - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests. - - Raises: - KeyboardInterrupt: If the miner is stopped by a manual interruption. - Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis. - """ - # Check that miner is registered on the network. self.sync() - - # Serve passes the axon information to the network + netuid we are hosting on. - # This will auto-update if the axon port of external ip have changed. - bt.logging.info( - f"Serving miner axon {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}" - ) - self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor) - - # Start starts the miner's axon, making it active on the network. - self.axon.start() - bt.logging.info(f"Miner starting at block: {self.block}") # This loop maintains the miner's operations until intentionally stopped. @@ -186,7 +132,7 @@ def __exit__(self, exc_type, exc_value, traceback): def resync_metagraph(self): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - bt.logging.info("resync_metagraph()") + bt.logging.info("resync_metagraph() miner") # Sync the metagraph. self.metagraph.sync(subtensor=self.subtensor) diff --git a/template/base/validator.py b/cancer_ai/base/base_validator.py similarity index 66% rename from template/base/validator.py rename to cancer_ai/base/base_validator.py index c1ca07edc..7052e6680 100644 --- a/template/base/validator.py +++ b/cancer_ai/base/base_validator.py @@ -17,7 +17,9 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +from abc import abstractmethod +import sys import copy import numpy as np import asyncio @@ -25,16 +27,20 @@ import threading import bittensor as bt -from typing import List, Union +from typing import Union from traceback import print_exception -from template.base.neuron import BaseNeuron -from template.base.utils.weight_utils import ( +from .neuron import BaseNeuron +from .utils.weight_utils import ( process_weights_for_netuid, convert_weights_and_uids_for_emit, -) # TODO: Replace when bittensor switches to numpy -from template.mock import MockDendrite -from template.utils.config import add_validator_args +) +from ..mock import MockDendrite +from ..utils.config import add_validator_args + +from cancer_ai.validator.rewarder import CompetitionResultsStore +from cancer_ai.validator.models import OrganizationDataReferenceFactory +from .. import __spec_version__ as spec_version class BaseValidatorNeuron(BaseNeuron): @@ -49,7 +55,7 @@ def add_args(cls, parser: argparse.ArgumentParser): super().add_args(parser) add_validator_args(cls, parser) - def __init__(self, config=None): + def __init__(self, config=None, exit_event: threading.Event = None): super().__init__(config=config) # Save a copy of the hotkeys to local memory. @@ -65,9 +71,15 @@ def __init__(self, config=None): # Set up initial scoring weights for validation bt.logging.info("Building validation weights.") self.scores = np.zeros(self.metagraph.n, dtype=np.float32) - + self.organizations_data_references = OrganizationDataReferenceFactory.get_instance() + self.competition_results_store = CompetitionResultsStore() + self.org_latest_updates = {} + # add log with file path for loading state + state_file_path = self.config.neuron.full_path + "/state.json" + bt.logging.info(f"Loading state from {state_file_path}") + self.load_state() # Init sync with the network. Updates the metagraph. - self.sync() + self.sync(force_sync=True) # Serve axon to enable external connections. if not self.config.neuron.axon_off: @@ -83,6 +95,7 @@ def __init__(self, config=None): self.is_running: bool = False self.thread: Union[threading.Thread, None] = None self.lock = asyncio.Lock() + self.exit_event = exit_event def serve_axon(self): """Serve axon to enable external connections.""" @@ -107,11 +120,9 @@ def serve_axon(self): bt.logging.error(f"Failed to create Axon initialize with exception: {e}") pass - async def concurrent_forward(self): - coroutines = [ - self.forward() for _ in range(self.config.neuron.num_concurrent_forwards) - ] - await asyncio.gather(*coroutines) + @abstractmethod + def concurrent_forward(self): + pass def run(self): """ @@ -141,8 +152,6 @@ def run(self): # This loop maintains the validator's operations until intentionally stopped. try: while True: - bt.logging.info(f"step({self.step}) block({self.block})") - # Run multiple forwards concurrently. self.loop.run_until_complete(self.concurrent_forward()) @@ -152,9 +161,7 @@ def run(self): # Sync metagraph and potentially set weights. self.sync() - self.step += 1 - # If someone intentionally stops the validator, it'll safely terminate operations. except KeyboardInterrupt: self.axon.stop() @@ -163,38 +170,73 @@ def run(self): # In case of unforeseen errors, the validator will log the error and continue operations. except Exception as err: - bt.logging.error(f"Error during validation: {str(err)}") - bt.logging.debug(str(print_exception(type(err), err, err.__traceback__))) + bt.logging.error(f"VALIDATOR FAILURE: Error during validation: {str(err)}") + bt.logging.error(f"Error type: {type(err).__name__}") + bt.logging.error(f"Error occurred in method: {self.concurrent_forward.__name__}") + bt.logging.error(f"Current step: {self.step}") + + # Log the full stack trace + import traceback + stack_trace = traceback.format_exc() + bt.logging.error(f"Full stack trace:\n{stack_trace}") + bt.logging.error(str(print_exception(type(err), err, err.__traceback__))) + + # Log additional context information + bt.logging.error(f"Validator state: running={self.is_running}, should_exit={self.should_exit}") + + if self.exit_event: + bt.logging.error("Setting exit event and terminating validator", exc_info=True) + self.exit_event.set() + sys.exit(1) def run_in_background_thread(self): """ Starts the validator's operations in a background thread upon entering the context. This method facilitates the use of the validator in a 'with' statement. """ + bt.logging.info(f"run_in_background_thread called with is_running={self.is_running}") + + # Get the current call stack to see what's calling run_in_background_thread + import traceback + stack_trace = traceback.format_stack() + bt.logging.info(f"Call stack for run_in_background_thread:\n{''.join(stack_trace)}") + if not self.is_running: - bt.logging.debug("Starting validator in background thread.") + bt.logging.info("Starting validator in background thread.") self.should_exit = False + bt.logging.info(f"Set should_exit to {self.should_exit}, creating thread") self.thread = threading.Thread(target=self.run, daemon=True) + bt.logging.info(f"Starting thread with daemon={self.thread.daemon}") self.thread.start() self.is_running = True - bt.logging.debug("Started") + bt.logging.info(f"Thread started, set is_running to {self.is_running}") + bt.logging.info("Validator started successfully in background thread") + else: + bt.logging.warning("Attempted to start validator that is already running") def stop_run_thread(self): """ Stops the validator's operations that are running in the background thread. """ + bt.logging.info(f"stop_run_thread called with is_running={self.is_running}") + import traceback + stack_trace = traceback.format_stack() + bt.logging.info(f"Call stack for stop_run_thread:\n{''.join(stack_trace)}") + if self.is_running: - bt.logging.debug("Stopping validator in background thread.") + bt.logging.info("Stopping validator in background thread.") self.should_exit = True + bt.logging.info(f"Set should_exit to {self.should_exit}, joining thread") self.thread.join(5) self.is_running = False - bt.logging.debug("Stopped") + bt.logging.info(f"Thread joined, set is_running to {self.is_running}") + bt.logging.info("Validator stopped successfully") def __enter__(self): self.run_in_background_thread() return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback_obj): """ Stops the validator's background operations upon exiting the context. This method facilitates the use of the validator in a 'with' statement. @@ -204,20 +246,39 @@ def __exit__(self, exc_type, exc_value, traceback): None if the context was exited without an exception. exc_value: The instance of the exception that caused the context to be exited. None if the context was exited without an exception. - traceback: A traceback object encoding the stack trace. + traceback_obj: A traceback object encoding the stack trace. None if the context was exited without an exception. """ + bt.logging.info(f"__exit__ called with exc_type={exc_type}, exc_value={exc_value}") + + # Get the current call stack to see what's calling __exit__ + import traceback + stack_trace = traceback.format_stack() + bt.logging.info(f"Call stack for __exit__:\n{''.join(stack_trace)}") + + # If there's an exception, log it + if exc_type is not None: + bt.logging.error(f"Exception in context: {exc_type.__name__}: {exc_value}") + if traceback_obj: + bt.logging.error(f"Exception traceback: {''.join(traceback.format_tb(traceback_obj))}") + if self.is_running: - bt.logging.debug("Stopping validator in background thread.") + bt.logging.info("Stopping validator in background thread from __exit__ method.") self.should_exit = True + bt.logging.info(f"Set should_exit to {self.should_exit}, joining thread") self.thread.join(5) self.is_running = False - bt.logging.debug("Stopped") + bt.logging.info(f"Thread joined, set is_running to {self.is_running}") + bt.logging.info("Validator stopped successfully from __exit__ method") def set_weights(self): """ Sets the validator weights to the metagraph hotkeys based on the scores it has received from the miners. The weights determine the trust and incentive level the validator assigns to miner nodes on the network. """ + # test mode, don't commit weights + if self.config.filesystem_evaluation: + bt.logging.debug("Skipping settings weights in filesystem evaluation mode") + return # Check if self.scores contains any NaN values and log a warning if it does. if np.isnan(self.scores).any(): @@ -237,6 +298,18 @@ def set_weights(self): # Compute raw_weights safely raw_weights = self.scores / norm + # Ensure UID 0 gets 100% of weights for burning + # Store the original weight for UID 0 for logging + original_uid0_weight = raw_weights[0] + + # # Set UID 0 to 100% + # raw_weights[0] = 1.0 + + # # Set all other UIDs to 0 + # raw_weights[1:] = 0.0 + + # bt.logging.info(f"Set UID 0 weight from {original_uid0_weight:.4f} to {raw_weights[0]:.4f} (100%)") + bt.logging.debug("raw_weights", raw_weights) bt.logging.debug("raw_weight_uids", str(self.metagraph.uids.tolist())) # Process the raw weights to final_weights via subtensor limitations. @@ -253,6 +326,14 @@ def set_weights(self): bt.logging.debug("processed_weights", processed_weights) bt.logging.debug("processed_weight_uids", processed_weight_uids) + # Verify UID 0 weight after processing + # if 0 in processed_weight_uids: + # uid0_index = np.where(processed_weight_uids == 0)[0][0] + # uid0_processed_weight = processed_weights[uid0_index] + # total_processed_weight = np.sum(processed_weights) + # uid0_percentage = (uid0_processed_weight / total_processed_weight) * 100 if total_processed_weight > 0 else 0 + # bt.logging.info(f"UID 0 weight after processing: {uid0_processed_weight:.4f} ({uid0_percentage:.1f}% of total)") + # Convert to uint16 weights and uids. ( uint_uids, @@ -271,16 +352,16 @@ def set_weights(self): weights=uint_weights, wait_for_finalization=False, wait_for_inclusion=False, - version_key=self.spec_version, + version_key=spec_version ) if result is True: bt.logging.info("set_weights on chain successfully!") else: bt.logging.error("set_weights failed", msg) - def resync_metagraph(self): + def resync_metagraph(self, force_sync=False): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - bt.logging.info("resync_metagraph()") + bt.logging.info("resync_metagraph() validator") # Copies state of metagraph before syncing. previous_metagraph = copy.deepcopy(self.metagraph) @@ -289,7 +370,7 @@ def resync_metagraph(self): self.metagraph.sync(subtensor=self.subtensor) # Check if the metagraph axon info has changed. - if previous_metagraph.axons == self.metagraph.axons: + if previous_metagraph.axons == self.metagraph.axons and not force_sync: return bt.logging.info( @@ -312,69 +393,10 @@ def resync_metagraph(self): # Update the hotkeys. self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) - def update_scores(self, rewards: np.ndarray, uids: List[int]): - """Performs exponential moving average on the scores based on the rewards received from the miners.""" - - # Check if rewards contains NaN values. - if np.isnan(rewards).any(): - bt.logging.warning(f"NaN values detected in rewards: {rewards}") - # Replace any NaN values in rewards with 0. - rewards = np.nan_to_num(rewards, nan=0) - - # Ensure rewards is a numpy array. - rewards = np.asarray(rewards) - - # Check if `uids` is already a numpy array and copy it to avoid the warning. - if isinstance(uids, np.ndarray): - uids_array = uids.copy() - else: - uids_array = np.array(uids) - - # Handle edge case: If either rewards or uids_array is empty. - if rewards.size == 0 or uids_array.size == 0: - bt.logging.info(f"rewards: {rewards}, uids_array: {uids_array}") - bt.logging.warning( - "Either rewards or uids_array is empty. No updates will be performed." - ) - return - - # Check if sizes of rewards and uids_array match. - if rewards.size != uids_array.size: - raise ValueError( - f"Shape mismatch: rewards array of shape {rewards.shape} " - f"cannot be broadcast to uids array of shape {uids_array.shape}" - ) - - # Compute forward pass rewards, assumes uids are mutually exclusive. - # shape: [ metagraph.n ] - scattered_rewards: np.ndarray = np.zeros_like(self.scores) - scattered_rewards[uids_array] = rewards - bt.logging.debug(f"Scattered rewards: {rewards}") - - # Update scores with rewards produced by this step. - # shape: [ metagraph.n ] - alpha: float = self.config.neuron.moving_average_alpha - self.scores: np.ndarray = alpha * scattered_rewards + (1 - alpha) * self.scores - bt.logging.debug(f"Updated moving avg scores: {self.scores}") - + @abstractmethod def save_state(self): """Saves the state of the validator to a file.""" - bt.logging.info("Saving validator state.") - - # Save the state of the validator to file. - np.savez( - self.config.neuron.full_path + "/state.npz", - step=self.step, - scores=self.scores, - hotkeys=self.hotkeys, - ) - + + @abstractmethod def load_state(self): """Loads the state of the validator from a file.""" - bt.logging.info("Loading validator state.") - - # Load the state of the validator from file. - state = np.load(self.config.neuron.full_path + "/state.npz") - self.step = state["step"] - self.scores = state["scores"] - self.hotkeys = state["hotkeys"] diff --git a/template/base/neuron.py b/cancer_ai/base/neuron.py similarity index 52% rename from template/base/neuron.py rename to cancer_ai/base/neuron.py index 9b2ce7b28..33da3cf10 100644 --- a/template/base/neuron.py +++ b/cancer_ai/base/neuron.py @@ -16,17 +16,20 @@ # DEALINGS IN THE SOFTWARE. import copy -import typing +import sys +import random +import time +import sys import bittensor as bt from abc import ABC, abstractmethod # Sync calls set weights and also resyncs the metagraph. -from template.utils.config import check_config, add_args, config -from template.utils.misc import ttl_get_block -from template import __spec_version__ as spec_version -from template.mock import MockSubtensor, MockMetagraph +from ..utils.config import check_config, add_args, path_config +from ..utils.misc import ttl_get_block +from .. import __spec_version__ as spec_version +from ..mock import MockSubtensor, MockMetagraph class BaseNeuron(ABC): @@ -48,7 +51,7 @@ def add_args(cls, parser): @classmethod def config(cls): - return config(cls) + return path_config(cls) subtensor: "bt.subtensor" wallet: "bt.wallet" @@ -81,12 +84,8 @@ def __init__(self, config=None): # The wallet holds the cryptographic key pairs for the miner. if self.config.mock: self.wallet = bt.MockWallet(config=self.config) - self.subtensor = MockSubtensor( - self.config.netuid, wallet=self.wallet - ) - self.metagraph = MockMetagraph( - self.config.netuid, subtensor=self.subtensor - ) + self.subtensor = MockSubtensor(self.config.netuid, wallet=self.wallet) + self.metagraph = MockMetagraph(self.config.netuid, subtensor=self.subtensor) else: self.wallet = bt.wallet(config=self.config) self.subtensor = bt.subtensor(config=self.config) @@ -100,58 +99,105 @@ def __init__(self, config=None): self.check_registered() # Each miner gets a unique identity (UID) in the network for differentiation. - self.uid = self.metagraph.hotkeys.index( - self.wallet.hotkey.ss58_address - ) + self.uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address) bt.logging.info( f"Running neuron on subnet: {self.config.netuid} with uid {self.uid} using network: {self.subtensor.chain_endpoint}" ) self.step = 0 - @abstractmethod - async def forward(self, synapse: bt.Synapse) -> bt.Synapse: - ... + self._last_updated_block = self.metagraph.last_update[self.uid] @abstractmethod - def run(self): - ... + def run(self): ... - def sync(self): + def sync(self, force_sync: bool = False): """ - Wrapper for synchronizing the state of the network for the given miner or validator. + Synchronize network state, retrying up to 5 times with fixed back-off: + 20s → 40s → 80s → 160s → 300s (total = 600s) + Exits with sys.exit(1) only if *all* retries fail. """ - # Ensure miner or validator hotkey is still registered on the network. - self.check_registered() - - if self.should_sync_metagraph(): - self.resync_metagraph() - - if self.should_set_weights(): - self.set_weights() - - # Always save state. - self.save_state() + delays = [20, 40, 80, 160, 300] + + for attempt, delay in enumerate(delays, start=1): + try: + # Ensure the hotkey is still registered. + self.check_registered() + + # If filesystem evaluation mode, no need to retry. + if self.config.filesystem_evaluation: + break + + # Resync metagraph if needed or forced. + if self.should_sync_metagraph() or force_sync: + bt.logging.info("Resyncing metagraph in progress.") + self.resync_metagraph(force_sync=True) + self.save_state() + + # Set weights if needed. + if self.should_set_weights(): + self.set_weights() + self._last_updated_block = self.block + self.save_state() + + break + + except BrokenPipeError as e: + bt.logging.error( + f"[Attempt {attempt}] BrokenPipeError: {e}. " + f"Sleeping {delay}s before retry…", exc_info=True + ) + except Exception as e: + bt.logging.error( + f"[Attempt {attempt}] Unexpected error: {e}. " + f"Sleeping {delay}s before retry…", exc_info=True + ) + + # back-off before next attempt + time.sleep(delay) - def check_registered(self): - # --- Check for registration. - if not self.subtensor.is_hotkey_registered( - netuid=self.config.netuid, - hotkey_ss58=self.wallet.hotkey.ss58_address, - ): + else: bt.logging.error( - f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}." - f" Please register the hotkey using `btcli subnets register` before trying again" + f"Failed to sync metagraph after {len(delays)} retries (≈10 minutes); exiting.", exc_info=True ) - exit() + sys.exit(1) + + def check_registered(self): + retries = 3 + while retries > 0: + try: + if not hasattr(self, "is_registered"): + self.is_registered = self.subtensor.is_hotkey_registered( + netuid=self.config.netuid, + hotkey_ss58=self.wallet.hotkey.ss58_address, + ) + if not self.is_registered: + bt.logging.error( + f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}." + f" Please register the hotkey using `btcli subnets register` before trying again", + exc_info=True + ) + sys.exit() + + return self.is_registered + + except Exception as e: + bt.logging.error(f"Error checking validator's hotkey registration: {e}", exc_info=True) + retries -= 1 + if retries == 0: + sys.exit() + else: + bt.logging.info(f"Retrying... {retries} retries left.") def should_sync_metagraph(self): """ Check if enough epoch blocks have elapsed since the last checkpoint to sync. """ - return ( - self.block - self.metagraph.last_update[self.uid] - ) > self.config.neuron.epoch_length + elapsed = self.block - self._last_updated_block + + # Only set weights if epoch has passed + return elapsed > self.config.neuron.epoch_length + def should_set_weights(self) -> bool: # Don't set weights on initialization. if self.step == 0: @@ -161,19 +207,7 @@ def should_set_weights(self) -> bool: if self.config.neuron.disable_set_weights: return False - # Define appropriate logic for when set weights. - return ( - (self.block - self.metagraph.last_update[self.uid]) - > self.config.neuron.epoch_length - and self.neuron_type != "MinerNeuron" - ) # don't set weights if you're a miner - - def save_state(self): - bt.logging.warning( - "save_state() not implemented for this neuron. You can implement this function to save model checkpoints or other useful data." - ) + elapsed = self.block - self._last_updated_block - def load_state(self): - bt.logging.warning( - "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data." - ) + # Only set weights if epoch has passed and this isn't a MinerNeuron. + return elapsed > self.config.neuron.epoch_length and self.neuron_type != "MinerNeuron" diff --git a/template/base/__init__.py b/cancer_ai/base/utils/__init__.py similarity index 100% rename from template/base/__init__.py rename to cancer_ai/base/utils/__init__.py diff --git a/template/base/utils/weight_utils.py b/cancer_ai/base/utils/weight_utils.py similarity index 95% rename from template/base/utils/weight_utils.py rename to cancer_ai/base/utils/weight_utils.py index 26133efde..89f260e2e 100644 --- a/template/base/utils/weight_utils.py +++ b/cancer_ai/base/utils/weight_utils.py @@ -159,15 +159,21 @@ def process_weights_for_netuid( non_zero_weight_idx = np.argwhere(weights > 0).squeeze() non_zero_weight_uids = uids[non_zero_weight_idx] non_zero_weights = weights[non_zero_weight_idx] + + if not isinstance(non_zero_weights, np.ndarray): + non_zero_weights = np.array(non_zero_weights) + bittensor.logging.debug("Converting non_zero_weights to numpy array") + if non_zero_weights.size == 0 or metagraph.n < min_allowed_weights: bittensor.logging.warning("No non-zero weights returning all ones.") final_weights = np.ones(metagraph.n) / metagraph.n bittensor.logging.debug("final_weights", final_weights) return np.arange(len(final_weights)), final_weights + elif non_zero_weights.size < min_allowed_weights: bittensor.logging.warning( - "No non-zero weights less then min allowed weight, returning all ones." + "No non-zero weights less than min allowed weight, returning all ones." ) weights = ( np.ones(metagraph.n) * 1e-5 @@ -182,9 +188,7 @@ def process_weights_for_netuid( bittensor.logging.debug("non_zero_weights", non_zero_weights) # Compute the exclude quantile and find the weights in the lowest quantile - max_exclude = max(0, len(non_zero_weights) - min_allowed_weights) / len( - non_zero_weights - ) + max_exclude = max(0, non_zero_weights.size - min_allowed_weights) / non_zero_weights.size exclude_quantile = min([quantile, max_exclude]) lowest_quantile = np.quantile(non_zero_weights, exclude_quantile) bittensor.logging.debug("max_exclude", max_exclude) diff --git a/cancer_ai/chain_models_store.py b/cancer_ai/chain_models_store.py new file mode 100644 index 000000000..101c81094 --- /dev/null +++ b/cancer_ai/chain_models_store.py @@ -0,0 +1,176 @@ +import functools +from typing import Optional, Type +import asyncio +from functools import wraps + +import bittensor as bt +from pydantic import BaseModel, Field +from retry import retry +from websockets.client import OPEN as WS_OPEN + + + +class ChainMinerModel(BaseModel): + """Uniquely identifies a trained model""" + + competition_id: Optional[str] = Field(description="The competition id") + hf_repo_id: Optional[str] = Field(description="Hugging Face repository id.") + hf_model_filename: Optional[str] = Field(description="Hugging Face model filename.") + hf_repo_type: Optional[str] = Field( + description="Hugging Face repository type.", default="model" + ) + hf_code_filename: Optional[str] = Field( + description="Hugging Face code zip filename." + ) + block: Optional[int] = Field( + description="Block on which this model was claimed on the chain." + ) + + model_hash: Optional[str] = Field( + description="8-byte SHA-1 hash of the model file from Hugging Face." + ) + + class Config: + arbitrary_types_allowed = True + + def to_compressed_str(self) -> str: + """Returns a compressed string representation.""" + return f"{self.hf_repo_id}:{self.hf_model_filename}:{self.hf_code_filename}:{self.competition_id}:{self.hf_repo_type}:{self.model_hash}" + + @property + def hf_link(self) -> str: + """Returns the Hugging Face link for the model.""" + return f"https://huggingface.co/{self.hf_repo_id}/blob/main/{self.hf_model_filename}" + + @classmethod + def from_compressed_str(cls, cs: str) -> Type["ChainMinerModel"]: + """Returns an instance of this class from a compressed string representation""" + tokens = cs.split(":") + if len(tokens) != 6: + return None + return cls( + hf_repo_id=tokens[0], + hf_model_filename=tokens[1], + hf_code_filename=tokens[2], + competition_id=tokens[3], + hf_repo_type=tokens[4], + model_hash=tokens[5], + block=None, + ) + + +class ChainModelMetadata: + """Chain based implementation for storing and retrieving metadata about a model.""" + + def __init__( + self, + subtensor: bt.subtensor, + netuid: int, + wallet: Optional[bt.wallet] = None, + ): + self.subtensor = subtensor + self.wallet = ( + wallet # Wallet is only needed to write to the chain, not to read. + ) + self.netuid = netuid + + self._orig_ws_connect = self.subtensor.substrate.connect + self.subtensor.substrate.connect = self._ws_connect + + try: + ws = self.subtensor.substrate.connect() + bt.logging.info(f"[ChainModelMetadata] Initial WS state: {ws.state}") + except Exception as e: + bt.logging.error("Initial WS connect failed: %s", e, exc_info=True) + + self.subnet_metadata = self.subtensor.metagraph(self.netuid) + + + def _ws_connect(self, *args, **kwargs): + """ + Replacement for substrate.connect(). + Reuses existing WebSocketClientProtocol if State.OPEN; + otherwise performs a fresh handshake via original connect(). + """ + # Check current socket + current = getattr(self.subtensor.substrate, "ws", None) + if current is not None and current.state == WS_OPEN: + return current + + # If socket not open, reconnect + bt.logging.warning("⚠️ Subtensor WebSocket not OPEN—reconnecting…") + try: + new_ws = self._orig_ws_connect(*args, **kwargs) + except Exception as e: + bt.logging.error("Failed to reconnect WebSocket: %s", e, exc_info=True) + raise + + # Update the substrate.ws attribute so future calls reuse this socket + setattr(self.subtensor.substrate, "ws", new_ws) + return new_ws + + async def store_model_metadata(self, model_id: ChainMinerModel): + """Stores model metadata on this subnet for a specific wallet.""" + if self.wallet is None: + raise ValueError("No wallet available to write to the chain.") + + # Wrap calls to the subtensor in a subprocess with a timeout to handle potential hangs. + self.subtensor.commit( + self.wallet, + self.netuid, + model_id.to_compressed_str(), + ) + + async def retrieve_model_metadata(self, hotkey: str, uid: int) -> ChainMinerModel: + """Retrieves model metadata on this subnet for specific hotkey""" + await asyncio.sleep(2) # temp fix for 429 + + metadata = get_metadata(self.subtensor, self.netuid, hotkey) + + if metadata is None: + raise ValueError(f"No metadata found for hotkey {hotkey}") + + chain_str = get_commitment(self.subtensor, self.netuid, uid) + + if chain_str is None: + raise ValueError( + f"No chain string found for hotkey '{hotkey}' and uid {uid}" + ) + + model = ChainMinerModel.from_compressed_str(chain_str) + bt.logging.debug(f"Model: {model}") + if model is None: + raise ValueError( + f"Metadata might be in old format or invalid for hotkey '{hotkey}'. Raw value: {chain_str}" + ) + + # The block id at which the metadata is stored + model.block = metadata["block"] + return model + + def close(self): + try: + bt.logging.debug("Closing ModelDBController and websocket connection.") + self.subtensor.substrate.close_websocket() + except Exception: + pass + +@retry(tries=12, delay=1, backoff=2, max_delay=30) +def get_metadata(subtensor, netuid, hotkey): + """Synchronous metadata fetch with retry logic.""" + try: + return bt.core.extrinsics.serving.get_metadata(subtensor, netuid, hotkey) + except Exception as e: + raise RuntimeError( + f"Failed to get metadata from chain for hotkey '{hotkey}': {e}" + ) from e + +@retry(tries=12, delay=0.5, backoff=2, max_delay=20) +def get_commitment(subtensor, netuid, uid): + """Synchronous commitment fetch with exponential-backoff and contextual errors.""" + try: + return subtensor.get_commitment(netuid, uid) + except Exception as e: + raise RuntimeError( + f"Failed to get commitment from chain for uid={uid}: {e}" + ) from e \ No newline at end of file diff --git a/template/mock.py b/cancer_ai/mock.py similarity index 84% rename from template/mock.py rename to cancer_ai/mock.py index 2b027ffb6..9138e52c8 100644 --- a/template/mock.py +++ b/cancer_ai/mock.py @@ -15,24 +15,26 @@ def __init__(self, netuid, n=16, wallet=None, network="mock"): self.create_subnet(netuid) # Register ourself (the validator) as a neuron at uid=0 - if wallet is not None: - self.force_register_neuron( - netuid=netuid, - hotkey=wallet.hotkey.ss58_address, - coldkey=wallet.coldkey.ss58_address, - balance=100000, - stake=100000, - ) - # Register n mock neurons who will be miners - for i in range(1, n + 1): - self.force_register_neuron( - netuid=netuid, - hotkey=f"miner-hotkey-{i}", - coldkey="mock-coldkey", - balance=100000, - stake=100000, - ) + # TODO not supported in bittensor==8.0 + # if wallet is not None: + # self.force_register_neuron( + # netuid=netuid, + # hotkey=wallet.hotkey.ss58_address, + # coldkey=wallet.coldkey.ss58_address, + # balance=100000, + # stake=100000, + # ) + + # # Register n mock neurons who will be miners + # for i in range(1, n + 1): + # self.force_register_neuron( + # netuid=netuid, + # hotkey=f"miner-hotkey-{i}", + # coldkey="mock-coldkey", + # balance=100000, + # stake=100000, + # ) class MockMetagraph(bt.metagraph): diff --git a/template/protocol.py b/cancer_ai/protocol.py similarity index 100% rename from template/protocol.py rename to cancer_ai/protocol.py diff --git a/template/utils/__init__.py b/cancer_ai/utils/__init__.py similarity index 100% rename from template/utils/__init__.py rename to cancer_ai/utils/__init__.py diff --git a/template/utils/config.py b/cancer_ai/utils/config.py similarity index 64% rename from template/utils/config.py rename to cancer_ai/utils/config.py index 99c610e9b..613a07ed1 100644 --- a/template/utils/config.py +++ b/cancer_ai/utils/config.py @@ -22,6 +22,7 @@ import bittensor as bt from .logging import setup_events_logger + def is_cuda_available(): try: output = subprocess.check_output(["nvidia-smi", "-L"], stderr=subprocess.STDOUT) @@ -37,6 +38,7 @@ def is_cuda_available(): pass return "cpu" + def check_config(cls, config: "bt.Config"): r"""Checks/validates the config namespace object.""" bt.logging.check_config(config) @@ -50,7 +52,7 @@ def check_config(cls, config: "bt.Config"): config.neuron.name, ) ) - print("full path:", full_path) + print("Log path:", full_path) config.neuron.full_path = os.path.expanduser(full_path) if not os.path.exists(config.neuron.full_path): os.makedirs(config.neuron.full_path, exist_ok=True) @@ -81,7 +83,7 @@ def add_args(cls, parser): "--neuron.epoch_length", type=int, help="The default epoch length (how often we set weights, measured in 12 second blocks).", - default=100, + default=180, ) parser.add_argument( @@ -126,43 +128,113 @@ def add_args(cls, parser): default="", ) + parser.add_argument( + "--models_query_cutoff", + type=int, + help="The cutoff for the models query in minutes.", + default=30, + ) + + parser.add_argument( + "--datasets_config_hf_repo_id", + type=str, + help="The reference to Hugging Face datasets config.", + default="safescanai/competition-configuration", + ) + def add_miner_args(cls, parser): """Add miner specific arguments to the parser.""" + parser.add_argument( + "--model_dir", + type=str, + help="Path for for loading the starting model related to a training run.", + default="./models", + ) parser.add_argument( - "--neuron.name", + "--hf_repo_id", type=str, - help="Trials for this neuron go in neuron.root / (wallet_cold - wallet_hot) / neuron.name. ", - default="miner", + help="Hugging Face model repository ID", + default="", ) parser.add_argument( - "--blacklist.force_validator_permit", - action="store_true", - help="If set, we will force incoming requests to have a permit.", - default=False, + "--hf_model_name", + type=str, + help="Filename of the model to push to hugging face.", + ) + parser.add_argument( + "--hf_code_filename", + type=str, + help="Filename of the code zip to push to hugging face.", + ) + + parser.add_argument( + "--action", + choices=["submit", "evaluate", "upload"], + ) + + parser.add_argument( + "--model_path", + type=str, + help="Path to ONNX model, used for evaluation", ) parser.add_argument( - "--blacklist.allow_non_registered", + "--dataset_dir", + type=str, + help="Path for storing datasets.", + default="./datasets", + ) + + parser.add_argument( + "--clean_after_run", action="store_true", - help="If set, miners will accept queries from non registered entities. (Dangerous!)", + help="Whether to clean up (dataset, temporary files) after running", default=False, ) parser.add_argument( - "--wandb.project_name", + "--code_directory", + type=str, + help="Path to code directory", + default=".", + ) + + +def add_common_args(cls, parser): + """Add validator and miner specific arguments to the parser.""" + parser.add_argument( + "--hf_token", + type=str, + help="Hugging Face API token", + ) + parser.add_argument( + "--competition_id", + type=str, + help="Path for storing competition participants models .", + ) + + parser.add_argument( + "--models.model_dir", type=str, - default="template-miners", - help="Wandb project to log to.", + help="Path for storing competition participants models .", + default="/tmp/models", ) parser.add_argument( - "--wandb.entity", + "--models.dataset_dir", type=str, - default="opentensor-dev", - help="Wandb entity to log to.", + help="Path for storing datasets.", + default="/tmp/datasets-extracted", + ) + + parser.add_argument( + "--competition.config_path", + type=str, + help="Path with competition configuration .", + default="./config/competition_config.json", ) @@ -218,7 +290,7 @@ def add_validator_args(cls, parser): # Note: the validator needs to serve an Axon with their IP or they may # be blacklisted by the firewall of serving peers on the network. help="Set this flag to not attempt to serve an Axon.", - default=False, + default=True, ) parser.add_argument( @@ -229,28 +301,75 @@ def add_validator_args(cls, parser): ) parser.add_argument( - "--wandb.project_name", + "--db_path", + type=str, + help="Path to the sqlite DB for storing the miners models reference", + default="models.db" + ) + + parser.add_argument( + "--wandb_project_name", type=str, help="The name of the project where you are sending the new run.", - default="template-validators", + default="melanoma-testnet", ) parser.add_argument( - "--wandb.entity", + "--wandb_entity", type=str, help="The name of the project where you are sending the new run.", - default="opentensor-dev", + default="safe-scan-ai", + ) + + parser.add_argument( + "--test_mode", + action="store_true", + help="Test(net) mode", + default=False, + ) + + + parser.add_argument( + "--miners_refresh_interval", + type=int, + help="The interval at which to refresh the miners in minutes", + default=30, ) + parser.add_argument( + "--monitor_datasets_interval", + type=int, + help="The interval at which to monitor the datasets in seconds", + default=20, + ) -def config(cls): + parser.add_argument( + "--local_dataset_dir", + type=str, + help="Path to the local dataset directory", + default="local_datasets/", + ) + + parser.add_argument( + "--filesystem_evaluation", + type=bool, + help="Should use local datasets instead of HF? Use together with --local_dataset_dir", + default=False + ) + + +def path_config(cls=None): """ Returns the configuration object specific to this miner or validator after adding relevant arguments. """ + + # config from huggingface parser = argparse.ArgumentParser() bt.wallet.add_args(parser) bt.subtensor.add_args(parser) bt.logging.add_args(parser) bt.axon.add_args(parser) - cls.add_args(parser) + add_common_args(cls, parser) + if cls: + cls.add_args(parser) return bt.config(parser) diff --git a/template/utils/logging.py b/cancer_ai/utils/logging.py similarity index 100% rename from template/utils/logging.py rename to cancer_ai/utils/logging.py diff --git a/template/utils/misc.py b/cancer_ai/utils/misc.py similarity index 100% rename from template/utils/misc.py rename to cancer_ai/utils/misc.py diff --git a/cancer_ai/utils/models_storage_utils.py b/cancer_ai/utils/models_storage_utils.py new file mode 100644 index 000000000..54d147c23 --- /dev/null +++ b/cancer_ai/utils/models_storage_utils.py @@ -0,0 +1,49 @@ +import functools +import multiprocessing +from typing import Any + + +def run_in_subprocess(func: functools.partial, ttl: int) -> Any: + """Runs the provided function on a subprocess with 'ttl' seconds to complete. + + Args: + func (functools.partial): Function to be run. + ttl (int): How long to try for in seconds. + + Returns: + Any: The value returned by 'func' + """ + + def wrapped_func(func: functools.partial, queue: multiprocessing.Queue): + try: + result = func() + queue.put(result) + except (Exception, BaseException) as e: + # Catch exceptions here to add them to the queue. + queue.put(e) + + # Use "fork" (the default on all POSIX except macOS), because pickling doesn't seem + # to work on "spawn". + ctx = multiprocessing.get_context("fork") + queue = ctx.Queue() + process = ctx.Process(target=wrapped_func, args=[func, queue]) + + process.start() + + process.join(timeout=ttl) + + if process.is_alive(): + process.terminate() + process.join() + raise TimeoutError(f"Failed to {func.func.__name__} after {ttl} seconds") + + # Raises an error if the queue is empty. This is fine. It means our subprocess timed out. + result = queue.get(block=False) + + # If we put an exception on the queue then raise instead of returning. + if isinstance(result, Exception): + raise result + if isinstance(result, BaseException): + raise Exception(f"BaseException raised in subprocess: {str(result)}") + + return result diff --git a/template/utils/uids.py b/cancer_ai/utils/uids.py similarity index 100% rename from template/utils/uids.py rename to cancer_ai/utils/uids.py diff --git a/cancer_ai/validator/__init__.py b/cancer_ai/validator/__init__.py new file mode 100644 index 000000000..456f206d6 --- /dev/null +++ b/cancer_ai/validator/__init__.py @@ -0,0 +1,2 @@ +# from .forward import forward +# from .reward import reward diff --git a/cancer_ai/validator/cancer_ai_logo.py b/cancer_ai/validator/cancer_ai_logo.py new file mode 100644 index 000000000..667cb858d --- /dev/null +++ b/cancer_ai/validator/cancer_ai_logo.py @@ -0,0 +1,34 @@ +cancer_ai_logo = """ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@ @@@@ @@@@@@@@@@@@ +@@@@@@@@@@ @@@@@@@@@@@ @@@@@@@@@@ @@@@@@@@@@@ +@@@@@@@ @@@@@@@@@ @@@@@@@@ @@@@@@@@@@@@ +@@@@@@ @@@@@@@@@@@@@@ @@@@@@@@ @@@@@@@@ @@@@@@@@@@@@ +@@@@@ @@@@@@@@@@@@@@@@@@ @@@@@@@@ @@@@@@ @@@@@@@@@@@@@ +@@@@@ @@@@@@@@@@@@@@@@@@ @@@@@@@@@ @@@@ @@@@@@@@@@@@@@ +@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@ +@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@ +@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@ +@@@@@@@@@@@ @@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@ @@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@ @@@@@@@@@@@@@@@@ +@@@@ @@@@@@@@@@@@@@@@@@@ @@@@@@@@@@ @@@@@@@@@@@@@@@ +@@@@@ @@@@@@@@@@@@@@@@@ @@@@@@@@@ @@ @@@@@@@@@@@@@ +@@@@@@ @@@@@@@@@@@ @@@@@@@@ @@@@ @@@@@@@@@@@@ +@@@@@@@@ @@@@@@@@@ @@@@@@@ @@@@@@@@@@ +@@@@@@@@@@@ @@@@@@@@@@@ @@@@@@@@@@ @@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@ @@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@ @@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +""" \ No newline at end of file diff --git a/cancer_ai/validator/competition_handlers/base_handler.py b/cancer_ai/validator/competition_handlers/base_handler.py new file mode 100644 index 000000000..9701f7ed3 --- /dev/null +++ b/cancer_ai/validator/competition_handlers/base_handler.py @@ -0,0 +1,106 @@ +from abc import ABC, abstractmethod +from typing import List + +from pydantic import BaseModel + + +class BaseModelEvaluationResult(BaseModel): + score: float = 0.0 + predictions_raw: list = [] + error: str = "" + + run_time_s: float = 0.0 + tested_entries: int = 0 + + class Config: + arbitrary_types_allowed = True + + +class BaseCompetitionHandler(ABC): + """ + Base class for handling different competition types. + + This class initializes the config and competition_id attributes. + """ + + def __init__(self, X_test: list, y_test: list) -> None: + """ + Initializes the BaseCompetitionHandler object. + """ + self.X_test = X_test + self.y_test = y_test + + @abstractmethod + def preprocess_and_serialize_data(self, X_test: list) -> list: + """ + Abstract method to preprocess and serialize data. + + This method is responsible for preprocessing the data for the competition + and serializing it for efficient reuse across multiple model evaluations. + + Args: + X_test: List of input data (typically file paths for images) + + Returns: + List of paths to serialized preprocessed data chunks + """ + + @abstractmethod + def set_preprocessed_data_dir(self, data_dir: str) -> None: + """ + Abstract method to set directory for storing preprocessed data. + """ + + @abstractmethod + def get_preprocessed_data_generator(self): + """ + Abstract method to get preprocessed data generator. + + Returns: + Generator that yields preprocessed data chunks + """ + + @abstractmethod + def cleanup_preprocessed_data(self) -> None: + """ + Abstract method to cleanup preprocessed data files. + """ + + @abstractmethod + def preprocess_data(self): + """ + Abstract method to prepare the data. + + This method is responsible for preprocessing the data for the competition. + """ + + @abstractmethod + def get_model_result(self, y_test: List[int], y_pred: List[float], run_time_s: float, model_size_mb: float = None) -> tuple: + """ + Abstract method to evaluate the competition. + + This method should be implemented by subclasses. + + Args: + y_test: Ground truth labels + y_pred: Model predictions + run_time_s: Inference time in seconds + model_size_mb: Model size in megabytes (optional, for efficiency scoring) + """ + raise NotImplementedError + + @abstractmethod + def get_comparable_result(self, result: BaseModelEvaluationResult) -> tuple: + """ + Create a comparable representation of the result for grouping duplicates. + + This method should be implemented by each competition handler to specify + which metrics are used for comparing results. + + Args: + result: The evaluation result object. + + Returns: + A tuple of key metrics that can be used for comparison. + """ + raise NotImplementedError diff --git a/cancer_ai/validator/competition_handlers/melanoma_handler.py b/cancer_ai/validator/competition_handlers/melanoma_handler.py new file mode 100644 index 000000000..0615ceb1f --- /dev/null +++ b/cancer_ai/validator/competition_handlers/melanoma_handler.py @@ -0,0 +1,275 @@ + +from typing import List, AsyncGenerator +import numpy as np +import pickle +import os +from pathlib import Path +from collections import defaultdict +import bittensor as bt +from PIL import Image +from sklearn.metrics import ( + accuracy_score, + precision_score, + fbeta_score, + recall_score, + confusion_matrix, + roc_curve, + auc, +) + +from cancer_ai.validator.models import WanDBLogModelBase +from .base_handler import BaseCompetitionHandler, BaseModelEvaluationResult + +class MelanomaWanDBLogModelEntry(WanDBLogModelBase): + model_url: str + accuracy: float + precision: float + fbeta: float + recall: float + confusion_matrix: list + roc_curve: dict + roc_auc: float + + + +class MelanomaEvaluationResult(BaseModelEvaluationResult): + accuracy: float = 0.0 + precision: float = 0.0 + recall: float = 0.0 + fbeta: float = 0.0 + confusion_matrix: list = [[0, 0], [0, 0]] + fpr: list = [] + tpr: list = [] + roc_auc: float = 0.0 + + def to_log_dict(self) -> dict: + return { + "accuracy": self.accuracy, + "precision": self.precision, + "fbeta": self.fbeta, + "recall": self.recall, + "confusion_matrix": self.confusion_matrix, + "roc_curve": {"fpr": self.fpr, "tpr": self.tpr} if self.fpr and self.tpr else {}, + "roc_auc": self.roc_auc, + } + + class Config: + arbitrary_types_allowed = True + + +# Weights for the competition, for calcualting model score +WEIGHT_FBETA = 0.6 +WEIGHT_ACCURACY = 0.3 +WEIGHT_AUC = 0.1 + +# Melanoma-specific preprocessing constants +MELANOMA_TARGET_SIZE = (512, 512) +MELANOMA_CHUNK_SIZE = 200 + + +class MelanomaCompetitionHandler(BaseCompetitionHandler): + WanDBLogModelClass = MelanomaWanDBLogModelEntry + + """Handler for melanoma competition - handles both data preprocessing and model evaluation""" + + def __init__(self, X_test, y_test, config=None) -> None: + super().__init__(X_test, y_test) + self.config = config + self.preprocessed_data_dir = None + self.preprocessed_chunks = [] + + def set_preprocessed_data_dir(self, data_dir: str) -> None: + """Set directory for storing preprocessed data""" + self.preprocessed_data_dir = Path(data_dir) / "melanoma_preprocessed" + self.preprocessed_data_dir.mkdir(exist_ok=True) + + async def preprocess_and_serialize_data(self, X_test: List[str]) -> List[str]: + """ + Preprocess all images and serialize them to disk in chunks. + Returns list of paths to serialized chunk files. + """ + if not self.preprocessed_data_dir: + raise ValueError("Preprocessed data directory not set") + + bt.logging.info(f"Preprocessing {len(X_test)} images for melanoma competition") + error_counter = defaultdict(int) + chunk_paths = [] + + for i in range(0, len(X_test), MELANOMA_CHUNK_SIZE): + bt.logging.debug(f"Processing chunk {i} to {i + MELANOMA_CHUNK_SIZE}") + chunk_data = [] + + for img_path in X_test[i: i + MELANOMA_CHUNK_SIZE]: + try: + if not os.path.isfile(img_path): + raise FileNotFoundError(f"File does not exist: {img_path}") + + with Image.open(img_path) as img: + img = img.convert('RGB') + preprocessed_img = self._preprocess_single_image(img) + chunk_data.append(preprocessed_img) + + except FileNotFoundError: + error_counter['FileNotFoundError'] += 1 + continue + except IOError: + error_counter['IOError'] += 1 + continue + except Exception as e: + bt.logging.debug(f"Unexpected error processing {img_path}: {e}") + error_counter['UnexpectedError'] += 1 + continue + + if chunk_data: + try: + chunk_array = np.array(chunk_data, dtype=np.float32) + chunk_file = self.preprocessed_data_dir / f"chunk_{len(chunk_paths)}.pkl" + + with open(chunk_file, 'wb') as f: + pickle.dump(chunk_array, f) + + chunk_paths.append(str(chunk_file)) + bt.logging.debug(f"Saved chunk with {len(chunk_data)} images to {chunk_file}") + + except Exception as e: + bt.logging.error(f"Failed to serialize chunk: {e}") + error_counter['SerializationError'] += 1 + + if error_counter: + error_summary = "; ".join([f"{count} {error_type.replace('_', ' ')}(s)" + for error_type, count in error_counter.items()]) + bt.logging.info(f"Preprocessing completed with issues: {error_summary}") + + bt.logging.info(f"Preprocessed data saved in {len(chunk_paths)} chunks") + self.preprocessed_chunks = chunk_paths + return chunk_paths + + def _preprocess_single_image(self, img: Image.Image) -> np.ndarray: + """Preprocess a single PIL image for melanoma competition""" + # Resize to target size + img = img.resize(MELANOMA_TARGET_SIZE) + + # Convert to numpy array and normalize + img_array = np.array(img, dtype=np.float32) / 255.0 + + # Handle grayscale images + if img_array.ndim == 2: + img_array = np.stack((img_array,) * 3, axis=-1) + elif img_array.shape[-1] != 3: + raise ValueError(f"Unexpected number of channels: {img_array.shape[-1]}") + + # Transpose to (C, H, W) format + img_array = np.transpose(img_array, (2, 0, 1)) + return img_array + + async def get_preprocessed_data_generator(self) -> AsyncGenerator[np.ndarray, None]: + """Generator that yields preprocessed data chunks""" + for chunk_file in self.preprocessed_chunks: + if os.path.exists(chunk_file): + try: + with open(chunk_file, 'rb') as f: + chunk_data = pickle.load(f) + yield chunk_data + except Exception as e: + bt.logging.error(f"Error loading preprocessed chunk {chunk_file}: {e}") + continue + else: + bt.logging.warning(f"Preprocessed chunk file not found: {chunk_file}") + + def preprocess_data(self): + """Prepare the data for melanoma competition.""" + pass + + def cleanup_preprocessed_data(self) -> None: + """Clean up preprocessed data files""" + if self.preprocessed_data_dir and self.preprocessed_data_dir.exists(): + import shutil + try: + shutil.rmtree(self.preprocessed_data_dir) + bt.logging.debug("Cleaned up preprocessed data") + except Exception as e: + bt.logging.error(f"Failed to cleanup preprocessed data: {e}") + + def prepare_y_pred(self, y_pred: np.ndarray) -> np.ndarray: + return [1 if y == "True" else 0 for y in self.y_test] + + def calculate_score(self, fbeta: float, accuracy: float, roc_auc: float) -> float: + return fbeta * WEIGHT_FBETA + accuracy * WEIGHT_ACCURACY + roc_auc * WEIGHT_AUC + + def get_model_result( + self, y_test: List[float], y_pred, run_time_s: float + ) -> MelanomaEvaluationResult: + # Convert y_pred to numpy array if it's a list + if isinstance(y_pred, list): + y_pred = np.array(y_pred) + + # Handle the case where y_pred contains arrays instead of scalars + try: + # If y_pred is a 2D array, take the first column or flatten it if it's a single prediction per sample + if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: + # If we have multiple predictions per sample, take the first column + y_pred_flat = y_pred[:, 0] + else: + # Otherwise flatten the array to ensure it's 1D + y_pred_flat = y_pred.flatten() + except (AttributeError, TypeError): + # If y_pred doesn't have shape attribute or other issues, use it directly + y_pred_flat = y_pred + + y_pred_binary = [1 if y > 0.5 else 0 for y in y_pred_flat] + tested_entries = len(y_test) + accuracy = accuracy_score(y_test, y_pred_binary) + precision = precision_score(y_test, y_pred_binary, zero_division=0) + fbeta = fbeta_score(y_test, y_pred_binary, beta=2, zero_division=0) + recall = recall_score(y_test, y_pred_binary, zero_division=0) + conf_matrix = confusion_matrix(y_test, y_pred_binary) + fpr, tpr, _ = roc_curve(y_test, y_pred_flat) + roc_auc = auc(fpr, tpr) + + score = self.calculate_score(fbeta, accuracy, roc_auc) + + return MelanomaEvaluationResult( + tested_entries=tested_entries, + run_time_s=run_time_s, + accuracy=accuracy, + precision=precision, + fbeta=fbeta, + recall=recall, + confusion_matrix=conf_matrix.tolist(), + fpr=fpr.tolist(), + tpr=tpr.tolist(), + roc_auc=roc_auc, + score=score, + predictions_raw=y_pred_flat.tolist(), + ) + + def get_comparable_result_fields(self) -> tuple[str, ...]: + """Field names for get_comparable_result, in order.""" + return ( + "accuracy", + "precision", + "recall", + "fbeta", + "predictions_raw", + ) + + def get_comparable_result(self, result: MelanomaEvaluationResult) -> tuple: + """ + Create a comparable representation of the result for grouping duplicates. + + Args: + result: The evaluation result object. + + Returns: + A tuple of key metrics that can be used for comparison. + """ + if not isinstance(result, MelanomaEvaluationResult): + return tuple() + + return ( + round(result.accuracy, 6), + round(result.precision, 6), + round(result.recall, 6), + round(result.fbeta, 6), + tuple(result.predictions_raw), + ) diff --git a/cancer_ai/validator/competition_handlers/tricorder_handler.py b/cancer_ai/validator/competition_handlers/tricorder_handler.py new file mode 100644 index 000000000..2246240d3 --- /dev/null +++ b/cancer_ai/validator/competition_handlers/tricorder_handler.py @@ -0,0 +1,665 @@ +from typing import List, Dict, Any, AsyncGenerator, Tuple, Optional, TypedDict, Literal +from pydantic import Field +import numpy as np +import os +import pickle +from pathlib import Path +from collections import defaultdict +import bittensor as bt +from PIL import Image +from sklearn.metrics import ( + accuracy_score, + precision_score, + f1_score, + recall_score, + confusion_matrix, +) +from enum import Enum, IntEnum + +from .base_handler import BaseCompetitionHandler, BaseModelEvaluationResult + +# --- Constants --- +TARGET_SIZE = (512, 512) +CHUNK_SIZE = 200 + +# Image preprocessing constants +NORMALIZATION_FACTOR = 255.0 + +# Risk category weights for scoring +CATEGORY_WEIGHTS = { + 'HIGH_RISK': 3.0, + 'MEDIUM_RISK': 2.0, + 'BENIGN': 1.0 +} + +# Efficiency scoring constants +MIN_MODEL_SIZE_MB = 50 +MAX_MODEL_SIZE_MB = 150 +EFFICIENCY_RANGE_MB = 100 # MAX - MIN + +# Final scoring weights +PREDICTION_WEIGHT = 0.9 +EFFICIENCY_WEIGHT = 0.1 +ACCURACY_WEIGHT = 0.5 +WEIGHTED_F1_WEIGHT = 0.5 + +# Age validation +MAX_AGE = 120 + +# --- Data Structures --- +class RiskCategory(str, Enum): + BENIGN = "benign" + MEDIUM_RISK = "medium_risk" + HIGH_RISK = "high_risk" + +class ClassInfo(TypedDict): + """Metadata for each skin lesion class""" + id: int # 1-based class ID + name: str # Full class name + short_name: str # Short class identifier + risk_category: RiskCategory # Risk level + weight: float # Scoring weight + +# 1-based class IDs for better readability +class ClassId(IntEnum): + ACTINIC_KERATOSIS = 1 + BASAL_CELL_CARCINOMA = 2 + SEBORRHEIC_KERATOSIS = 3 + SQUAMOUS_CELL_CARCINOMA = 4 + VASCULAR_LESION = 5 + DERMATOFIBROMA = 6 + BENIGN_NEVUS = 7 + OTHER_NON_NEOPLASTIC = 8 + MELANOMA = 9 + OTHER_NEOPLASTIC = 10 + +class LocationId(IntEnum): + ARM = 1 + FEET = 2 + GENITALIA = 3 + HAND = 4 + HEAD = 5 + LEG = 6 + TORSO = 7 + +# Class metadata mapping +CLASS_INFO: Dict[ClassId, ClassInfo] = { + ClassId.ACTINIC_KERATOSIS: { + "name": "Actinic Keratosis (AK)", + "short_name": "AK", + "risk_category": RiskCategory.BENIGN, + "weight": 1.0 + }, + ClassId.BASAL_CELL_CARCINOMA: { + "name": "Basal Cell Carcinoma (BCC)", + "short_name": "BCC", + "risk_category": RiskCategory.HIGH_RISK, + "weight": 3.0 + }, + ClassId.SEBORRHEIC_KERATOSIS: { + "name": "Seborrheic Keratosis (SK)", + "short_name": "SK", + "risk_category": RiskCategory.MEDIUM_RISK, + "weight": 2.0 + }, + ClassId.SQUAMOUS_CELL_CARCINOMA: { + "name": "Squamous Cell Carcinoma (SCC)", + "short_name": "SCC", + "risk_category": RiskCategory.HIGH_RISK, + "weight": 3.0 + }, + ClassId.VASCULAR_LESION: { + "name": "Vascular Lesion", + "short_name": "VASC", + "risk_category": RiskCategory.MEDIUM_RISK, + "weight": 2.0 + }, + ClassId.DERMATOFIBROMA: { + "name": "Dermatofibroma", + "short_name": "DF", + "risk_category": RiskCategory.BENIGN, + "weight": 1.0 + }, + ClassId.BENIGN_NEVUS: { + "name": "Benign Nevus", + "short_name": "NV", + "risk_category": RiskCategory.BENIGN, + "weight": 1.0 + }, + ClassId.OTHER_NON_NEOPLASTIC: { + "name": "Other Non-Neoplastic", + "short_name": "NON", + "risk_category": RiskCategory.BENIGN, + "weight": 1.0 + }, + ClassId.MELANOMA: { + "name": "Melanoma", + "short_name": "MEL", + "risk_category": RiskCategory.HIGH_RISK, + "weight": 3.0 + }, + ClassId.OTHER_NEOPLASTIC: { + "name": "Other Neoplastic", + "short_name": "ON", + "risk_category": RiskCategory.BENIGN, + "weight": 1.0 + }, +} + +# Convert to 0-based indices for model output +CLASS_IDS_0_BASED = [cid - 1 for cid in ClassId] # [0, 1, 2, ..., 9] +RISK_CATEGORIES = { + RiskCategory.BENIGN: [cid - 1 for cid, info in CLASS_INFO.items() + if info["risk_category"] == RiskCategory.BENIGN], + RiskCategory.MEDIUM_RISK: [cid - 1 for cid, info in CLASS_INFO.items() + if info["risk_category"] == RiskCategory.MEDIUM_RISK], + RiskCategory.HIGH_RISK: [cid - 1 for cid, info in CLASS_INFO.items() + if info["risk_category"] == RiskCategory.HIGH_RISK] +} + +# For backward compatibility +BENIGN_CLASSES = RISK_CATEGORIES[RiskCategory.BENIGN] +MEDIUM_RISK_CLASSES = RISK_CATEGORIES[RiskCategory.MEDIUM_RISK] +HIGH_RISK_CLASSES = RISK_CATEGORIES[RiskCategory.HIGH_RISK] + +# Weights for different risk categories +BENIGN_WEIGHT = 1.0 +MEDIUM_RISK_WEIGHT = 2.0 +HIGH_RISK_WEIGHT = 3.0 + +from cancer_ai.validator.models import WanDBLogModelBase + +class TricorderWanDBLogModelEntry(WanDBLogModelBase): + tested_entries: int + model_url: str + accuracy: float + precision: float + fbeta: float + recall: float + confusion_matrix: list + roc_curve: dict | None = None + roc_auc: float | None = None + weighted_f1: float | None = None + f1_by_class: list | None = None + class_weights: list | None = None + risk_category_scores: dict | None = None + predictions_raw: list | None = None + error: str | None = None + +class TricorderEvaluationResult(BaseModelEvaluationResult): + """Results from evaluating a model on the tricorder competition.""" + accuracy: float = 0.0 + precision: float = 0.0 + recall: float = 0.0 + fbeta: float = 0.0 + weighted_f1: float = 0.0 + efficiency_score: float = 1.0 + f1_by_class: List[float] = Field(default_factory=lambda: [0.0] * len(CLASS_INFO)) + class_weights: List[float] = Field(default_factory=lambda: [info["weight"] for info in CLASS_INFO.values()]) + confusion_matrix: List[List[int]] = Field(default_factory=lambda: [[0] * len(CLASS_INFO) for _ in range(len(CLASS_INFO))]) + risk_category_scores: Dict[RiskCategory, float] = Field(default_factory=lambda: {category: 0.0 for category in RiskCategory}) + + def to_log_dict(self) -> dict: + return { + "tested_entries": self.tested_entries, + "accuracy": self.accuracy, + "precision": self.precision, + "fbeta": self.fbeta, + "recall": self.recall, + "efficiency_score": self.efficiency_score, + "confusion_matrix": self.confusion_matrix, + "roc_curve": getattr(self, "roc_curve", None), + "roc_auc": getattr(self, "roc_auc", None), + "weighted_f1": getattr(self, "weighted_f1", None), + "f1_by_class": getattr(self, "f1_by_class", None), + "class_weights": getattr(self, "class_weights", None), + "risk_category_scores": getattr(self, "risk_category_scores", None), + "predictions_raw": getattr(self, "predictions_raw", None), + "score": getattr(self, "score", None), + "error": getattr(self, "error", None), + } + +class TricorderCompetitionHandler(BaseCompetitionHandler): + WanDBLogModelClass = TricorderWanDBLogModelEntry + + """Handler for skin lesion classification competition with 10 classes. + + This handler manages the entire competition pipeline including: + - Data preprocessing and serialization + - Model evaluation + - Scoring based on competition rules + """ + + def __init__(self, X_test: List[str], y_test: List[int], metadata: Optional[List[Dict[str, Any]]] = None, config: Optional[Dict[str, Any]] = None) -> None: + super().__init__(X_test, y_test) + self.config = config or {} + self.metadata = metadata or [{'age': None, 'gender': None, 'location': None} for _ in X_test] + self.preprocessed_data_dir = None + self.preprocessed_chunks = [] + + validation_errors = [] + + for i, meta_entry in enumerate(self.metadata): + # Validate age + age = meta_entry.get('age') + if age is None: + validation_errors.append(f"Missing age at index {i}") + elif not isinstance(age, (int, float)) or age < 0 or age > MAX_AGE: + validation_errors.append(f"Invalid age at index {i}: {age} (must be 0-120)") + + # Validate gender + gender = meta_entry.get('gender') + if gender is None: + validation_errors.append(f"Missing gender at index {i}") + else: + gender_lower = str(gender).lower() + if gender_lower not in ['m', 'f', 'male', 'female']: + validation_errors.append(f"Invalid gender at index {i}: {gender} (must be 'm', 'f', 'male', 'female')") + else: + meta_entry['gender'] = gender_lower + + # Validate location + location = meta_entry.get('location') + if location is None: + validation_errors.append(f"Missing location at index {i}") + else: + location_lower = str(location).lower() + valid_locations = ['arm', 'feet', 'genitalia', 'hand', 'head', 'leg', 'torso'] + if location_lower not in valid_locations: + validation_errors.append(f"Invalid location at index {i}: {location} (must be one of {valid_locations})") + else: + meta_entry['location'] = location_lower + + # Validate labels + valid_label_names = [info["short_name"] for info in CLASS_INFO.values()] + for i, label in enumerate(y_test): + if isinstance(label, str): + if label not in valid_label_names: + validation_errors.append(f"Invalid label at index {i}: {label} (must be one of {valid_label_names})") + elif isinstance(label, int): + if label < 1 or label > len(CLASS_INFO): + validation_errors.append(f"Invalid label at index {i}: {label} (must be 1-{len(CLASS_INFO)})") + else: + validation_errors.append(f"Invalid label type at index {i}: {type(label)} (must be string or int)") + + # If any validation errors, fail the competition + if validation_errors: + error_summary = "\n".join(validation_errors[:10]) + if len(validation_errors) > 10: + error_summary += f"\n... and {len(validation_errors) - 10} more errors" + + bt.logging.error(f"TRICORDER COMPETITION CANCELLED: Dataset validation failed") + bt.logging.error(f"Found {len(validation_errors)} validation errors:") + bt.logging.error(error_summary) + + raise ValueError(f"Tricorder competition requires complete metadata. Found {len(validation_errors)} validation errors:\n{error_summary}") + + # Convert string labels to 0-based indices + self.y_test = [] + for y in y_test: + if isinstance(y, str) and y in [info["short_name"] for info in CLASS_INFO.values()]: + # Find class ID by short name + class_id = next((cid for cid, info in CLASS_INFO.items() + if info["short_name"] == y), None) + if class_id is not None: + self.y_test.append(class_id - 1) # Convert to 0-based + elif isinstance(y, int) and y > 0: + self.y_test.append(y - 1) # Convert to 0-based if numeric + else: + raise ValueError(f"Invalid label: {y}") + + # Get class weights from CLASS_INFO + self.class_weights = [info["weight"] for info in CLASS_INFO.values()] + + # Create mapping from short names to class indices for reference + self.class_name_to_idx = { + info["short_name"]: cid - 1 + for cid, info in CLASS_INFO.items() + } + + # Initialize metrics + self.metrics = { + 'accuracy': 0.0, + 'weighted_f1': 0.0, + 'efficiency': 1.0 # Placeholder for efficiency score + } + + def set_preprocessed_data_dir(self, data_dir: str) -> None: + """Set directory for storing preprocessed data""" + self.preprocessed_data_dir = Path(data_dir) / "tricorder_preprocessed" + self.preprocessed_data_dir.mkdir(exist_ok=True) + + async def preprocess_and_serialize_data(self, X_test: List[str]) -> List[str]: + """ + Preprocess all images with metadata and serialize them to disk in chunks. + Returns list of paths to serialized chunk files. + """ + if not self.preprocessed_data_dir: + raise ValueError("Preprocessed data directory not set") + + bt.logging.debug(f"TRICORDER: Preprocessing {len(X_test)} images for tricorder competition") + bt.logging.debug(f"TRICORDER: Using chunk size: {CHUNK_SIZE}") + bt.logging.debug(f"TRICORDER: Available metadata entries: {len(self.metadata)}") + error_counter = defaultdict(int) + chunk_paths = [] + + for i in range(0, len(X_test), CHUNK_SIZE): + bt.logging.debug(f"TRICORDER: Processing chunk {len(chunk_paths)} - images {i} to {min(i + CHUNK_SIZE, len(X_test))}") + chunk_data = [] + chunk_metadata = [] + + for idx, img_path in enumerate(X_test[i: i + CHUNK_SIZE]): + try: + if not os.path.isfile(img_path): + raise FileNotFoundError(f"File does not exist: {img_path}") + + with Image.open(img_path) as img: + img = img.convert('RGB') + preprocessed_img = self._preprocess_single_image(img) + chunk_data.append(preprocessed_img) + + # Add corresponding metadata + global_idx = i + idx + if global_idx < len(self.metadata): + chunk_metadata.append(self.metadata[global_idx]) + else: + chunk_metadata.append({'age': None, 'gender': None, 'location': None}) + + except FileNotFoundError: + error_counter['FileNotFoundError'] += 1 + continue + except IOError: + error_counter['IOError'] += 1 + continue + except Exception as e: + bt.logging.debug(f"Unexpected error processing {img_path}: {e}") + error_counter['UnexpectedError'] += 1 + continue + + if chunk_data: + try: + chunk_array = np.array(chunk_data, dtype=np.float32) + chunk_file = self.preprocessed_data_dir / f"chunk_{len(chunk_paths)}.pkl" + metadata_file = self.preprocessed_data_dir / f"metadata_{len(chunk_paths)}.pkl" + + with open(chunk_file, 'wb') as f: + pickle.dump(chunk_array, f) + + with open(metadata_file, 'wb') as f: + pickle.dump(chunk_metadata, f) + + chunk_paths.append(str(chunk_file)) + bt.logging.debug(f"TRICORDER: Saved chunk with {len(chunk_data)} images and metadata to {chunk_file}") + + except Exception as e: + bt.logging.error(f"TRICORDER: Failed to serialize chunk: {e}") + error_counter['SerializationError'] += 1 + + if error_counter: + error_summary = "; ".join([f"{count} {error_type.replace('_', ' ')}" + for error_type, count in error_counter.items()]) + bt.logging.debug(f"TRICORDER: Preprocessing completed with issues: {error_summary}") + + bt.logging.debug(f"TRICORDER: Preprocessed data saved in {len(chunk_paths)} chunks") + bt.logging.debug(f"TRICORDER: Chunk paths: {chunk_paths}") + self.preprocessed_chunks = chunk_paths + return chunk_paths + + def _preprocess_single_image(self, img: Image.Image) -> np.ndarray: + """Preprocess a single PIL image for tricorder competition""" + # Resize to target size + img = img.resize(TARGET_SIZE) + + # Convert to numpy array and normalize + img_array = np.array(img, dtype=np.float32) / NORMALIZATION_FACTOR + + # Handle grayscale images + if img_array.ndim == 2: + img_array = np.stack((img_array,) * 3, axis=-1) + elif img_array.shape[-1] != 3: + raise ValueError(f"Unexpected number of channels: {img_array.shape[-1]}") + + # Transpose to (C, H, W) format + img_array = np.transpose(img_array, (2, 0, 1)) + return img_array + + async def get_preprocessed_data_generator(self) -> AsyncGenerator[Tuple[np.ndarray, List[Dict[str, Any]]], None]: + """Generator that yields preprocessed data chunks with metadata""" + bt.logging.debug(f"TRICORDER: Starting data generator with {len(self.preprocessed_chunks)} chunks") + + for i, chunk_file in enumerate(self.preprocessed_chunks): + bt.logging.debug(f"TRICORDER: Processing chunk {i}: {chunk_file}") + if os.path.exists(chunk_file): + try: + # Load image data + bt.logging.debug(f"TRICORDER: Loading image data from {chunk_file}") + with open(chunk_file, 'rb') as f: + chunk_data = pickle.load(f) + bt.logging.debug(f"TRICORDER: Loaded chunk data shape: {chunk_data.shape}") + + # Load corresponding metadata + metadata_file = str(Path(chunk_file).parent / f"metadata_{i}.pkl") + bt.logging.debug(f"TRICORDER: Loading metadata from {metadata_file}") + chunk_metadata = [] + if os.path.exists(metadata_file): + with open(metadata_file, 'rb') as f: + chunk_metadata = pickle.load(f) + bt.logging.debug(f"TRICORDER: Loaded {len(chunk_metadata)} metadata entries") + else: + # Default metadata if file doesn't exist + bt.logging.warning(f"TRICORDER: Metadata file not found, using defaults") + chunk_metadata = [{'age': None, 'gender': None, 'location': None} for _ in range(len(chunk_data))] + + bt.logging.debug(f"TRICORDER: Yielding chunk {i} with {len(chunk_data)} samples and {len(chunk_metadata)} metadata") + yield chunk_data, chunk_metadata + except Exception as e: + bt.logging.error(f"Error loading preprocessed chunk {chunk_file}: {e}") + continue + else: + bt.logging.warning(f"Preprocessed chunk file not found: {chunk_file}") + + def cleanup_preprocessed_data(self) -> None: + """Clean up preprocessed data files""" + if self.preprocessed_data_dir and self.preprocessed_data_dir.exists(): + import shutil + try: + shutil.rmtree(self.preprocessed_data_dir) + bt.logging.debug("Cleaned up preprocessed data") + except Exception as e: + bt.logging.error(f"Failed to cleanup preprocessed data: {e}") + + def preprocess_data(self): + """Legacy method - using preprocess_and_serialize_data instead""" + pass + + def prepare_y_pred(self, y_pred): + """ + Convert string labels to 0-based indices for evaluation. + + Args: + y_pred: List of prediction labels (either string short names or 1-based indices) + + Returns: + List of 0-based class indices + """ + converted = [] + for y in y_pred: + if isinstance(y, str): + # Find class ID by short name + class_id = next((cid for cid, info in CLASS_INFO.items() + if info["short_name"] == y), None) + if class_id is not None: + converted.append(class_id - 1) # Convert to 0-based + else: + raise ValueError(f"Unknown class short name: {y}") + elif isinstance(y, (int, float)): + converted.append(int(y) - 1) # Convert to 0-based if numeric + else: + raise ValueError(f"Invalid label type: {type(y).__name__}") + return converted + + def _calculate_risk_category_scores(self, f1_scores: np.ndarray) -> Dict[RiskCategory, float]: + """Calculate F1 scores for each risk category based on pre-computed F1 scores per class.""" + category_scores = {} + + for category, class_indices in RISK_CATEGORIES.items(): + if class_indices: + category_f1 = np.mean([f1_scores[i] for i in class_indices]) + category_scores[category] = float(category_f1) + else: + category_scores[category] = 0.0 + + return category_scores + + def _calculate_weighted_f1(self, category_scores: Dict[RiskCategory, float]) -> float: + """Calculate weighted F1 score based on risk categories.""" + # Use category-level weights from constants + category_weights = { + RiskCategory.HIGH_RISK: CATEGORY_WEIGHTS['HIGH_RISK'], + RiskCategory.MEDIUM_RISK: CATEGORY_WEIGHTS['MEDIUM_RISK'], + RiskCategory.BENIGN: CATEGORY_WEIGHTS['BENIGN'] + } + + total_weight = sum(category_weights.values()) + weighted_sum = sum( + category_scores.get(category, 0.0) * weight + for category, weight in category_weights.items() + ) + + return weighted_sum / total_weight if total_weight > 0 else 0.0 + + def calculate_score(self, metrics: Dict[str, float]) -> float: + """Calculate final competition score (0-1).""" + # Prediction quality (accuracy + weighted F1) + prediction_score = ACCURACY_WEIGHT * metrics['accuracy'] + WEIGHTED_F1_WEIGHT * metrics['weighted_f1'] + + # Efficiency score + efficiency_score = metrics.get('efficiency', 1.0) # Default to max if not set + + final_score = PREDICTION_WEIGHT * prediction_score + EFFICIENCY_WEIGHT * efficiency_score + return final_score + + def get_model_result(self, y_test: List[int], y_pred: List[float], run_time_s: float, model_size_mb: float = None) -> TricorderEvaluationResult: + """ + Evaluate model predictions and return detailed results. + + Args: + y_test: List of true class indices (0-9) + y_pred: List of predicted probabilities (shape [n_samples, 10]) + run_time_s: Inference time in seconds + model_size_mb: Model size in MB for efficiency calculation + + Returns: + TricorderEvaluationResult with comprehensive evaluation metrics + """ + try: + # Convert to numpy arrays + y_test = np.array(y_test) + y_pred = np.array(y_pred) + + # Define all possible class labels (0 to 9) + labels = list(range(len(CLASS_INFO))) + + # Get predicted class indices + y_pred_classes = np.argmax(y_pred, axis=1) + + # Calculate basic metrics + accuracy = float(accuracy_score(y_test, y_pred_classes)) + precision = float(precision_score(y_test, y_pred_classes, labels=labels, average='weighted', zero_division=0)) + recall = float(recall_score(y_test, y_pred_classes, labels=labels, average='weighted', zero_division=0)) + fbeta = float(f1_score(y_test, y_pred_classes, labels=labels, average='weighted', zero_division=0)) + + # Calculate F1 scores by class, ensuring all classes are included + f1_scores = f1_score(y_test, y_pred_classes, labels=labels, average=None, zero_division=0) + + # Calculate risk category scores and weighted F1 + category_scores = self._calculate_risk_category_scores(f1_scores) + weighted_f1 = self._calculate_weighted_f1(category_scores) + + # Log important metrics + bt.logging.info(f"Model evaluation results:") + bt.logging.info(f"- Accuracy: {accuracy:.4f}") + bt.logging.info(f"- Weighted F1: {weighted_f1:.4f}") + for category, score in category_scores.items(): + bt.logging.info(f"- {category.value} F1: {score:.4f}") + + # Calculate efficiency score based on model size + efficiency_score = 1.0 # Default to max if size not provided + if model_size_mb is not None: + if model_size_mb <= MIN_MODEL_SIZE_MB: + efficiency_score = 1.0 # Full efficiency score + elif model_size_mb <= MAX_MODEL_SIZE_MB: + # Linear decay from 1.0 to 0.0 between MIN and MAX MB + efficiency_score = (MAX_MODEL_SIZE_MB - model_size_mb) / EFFICIENCY_RANGE_MB + else: + efficiency_score = 0.0 # No efficiency score above MAX MB + + bt.logging.info(f"- Model size: {model_size_mb:.1f}MB, Efficiency score: {efficiency_score:.2f}") + + # Calculate final score using calculate_score method + metrics = { + 'accuracy': accuracy, + 'weighted_f1': weighted_f1, + 'efficiency': efficiency_score + } + score = self.calculate_score(metrics) + # Create result object + result = TricorderEvaluationResult( + tested_entries=len(y_test), + run_time_s=run_time_s, + predictions_raw=y_pred.tolist(), + accuracy=accuracy, + precision=precision, + recall=recall, + fbeta=fbeta, + weighted_f1=weighted_f1, + efficiency_score=efficiency_score, + f1_by_class=f1_scores.tolist(), + class_weights=self.class_weights, + confusion_matrix=confusion_matrix(y_test, y_pred_classes, labels=labels).tolist(), + risk_category_scores=category_scores, + score=score + ) + + return result + + except Exception as e: + error_msg = f"Error in get_model_result: {str(e)}" + bt.logging.error(error_msg, exc_info=True) + return TricorderEvaluationResult( + tested_entries=len(y_test) if 'y_test' in locals() else 0, + run_time_s=run_time_s, + error=error_msg + ) + + def get_comparable_result_fields(self) -> tuple[str, ...]: + """Field names for get_comparable_result, in order.""" + return ( + "accuracy", + "weighted_f1", + "risk_category_scores", + ) + + def get_comparable_result(self, result: TricorderEvaluationResult) -> tuple: + """ + Create a comparable representation of the result for grouping duplicates. + + This method should be implemented by each competition handler to specify + which metrics are used for comparing results. + + Args: + result: The evaluation result object. + + Returns: + A tuple of key metrics that can be used for comparison. + """ + if not isinstance(result, TricorderEvaluationResult): + return tuple() + + # Round floats to handle potential floating point inaccuracies + return ( + round(result.accuracy, 6), + round(result.weighted_f1, 6), + # Sort risk category scores by key to ensure consistent order + tuple(sorted((k.value, round(v, 6)) for k, v in result.risk_category_scores.items())), + ) \ No newline at end of file diff --git a/cancer_ai/validator/competition_manager.py b/cancer_ai/validator/competition_manager.py new file mode 100644 index 000000000..d9c71ac32 --- /dev/null +++ b/cancer_ai/validator/competition_manager.py @@ -0,0 +1,312 @@ +import time +from typing import List, Tuple, Optional + +import bittensor as bt +import wandb +import hashlib + +from dotenv import load_dotenv + +from .manager import SerializableManager +from .model_manager import ModelManager, ModelInfo +from .dataset_manager import DatasetManager +from .model_run_manager import ModelRunManager +from .exceptions import ModelRunException +from .model_db import ModelDBController +from .utils import chain_miner_to_model_info + +from .competition_handlers.base_handler import BaseCompetitionHandler, BaseModelEvaluationResult +from .competition_handlers.melanoma_handler import MelanomaCompetitionHandler +from .competition_handlers.tricorder_handler import TricorderCompetitionHandler + +from .tests.mock_data import get_mock_hotkeys_with_models +from cancer_ai.chain_models_store import ( + ChainModelMetadata, + ChainMinerModel, +) + +load_dotenv() + +COMPETITION_HANDLER_MAPPING = { + "melanoma-1": MelanomaCompetitionHandler, + "melanoma-testnet": MelanomaCompetitionHandler, + "melanoma-testnet2": MelanomaCompetitionHandler, + "melanoma-7": MelanomaCompetitionHandler, + "melanoma-2": MelanomaCompetitionHandler, + "melanoma-3": MelanomaCompetitionHandler, + "tricorder-1": TricorderCompetitionHandler, + "tricorder-2": TricorderCompetitionHandler, +} + + +class ImagePredictionCompetition: + def score_model( + self, model_info: ModelInfo, pred_y: List, model_pred_y: List + ) -> float: + pass + + +class CompetitionManager(SerializableManager): + """ + CompetitionManager is responsible for managing a competition. + + It handles the scoring, model management and synchronization with the chain. + """ + + def __init__( + self, + config, + subtensor: bt.subtensor, + hotkeys: list[str], + validator_hotkey: str, + competition_id: str, + dataset_hf_repo: str, + dataset_hf_filename: str, + dataset_hf_repo_type: str, + db_controller: ModelDBController, + test_mode: bool = False, + local_fs_mode: bool = False, + ) -> None: + """ + Responsible for managing a competition. + + Args: + config (dict): Config dictionary. + competition_id (str): Unique identifier for the competition. + """ + bt.logging.trace(f"Initializing Competition: {competition_id}") + self.config = config + self.subtensor = subtensor + self.competition_id = competition_id + self.results: list[tuple[str, BaseModelEvaluationResult]] = [] + self.error_results: list[tuple[str, str]] = [] + self.model_manager = ModelManager(self.config, db_controller, parent=self, subtensor=subtensor) + self.dataset_manager = DatasetManager( + config=self.config, + competition_id=competition_id, + hf_repo_id=dataset_hf_repo, + hf_filename=dataset_hf_filename, + hf_repo_type=dataset_hf_repo_type, + local_fs_mode=local_fs_mode, + ) + self.chain_model_metadata_store = ChainModelMetadata( + self.subtensor, self.config.netuid + ) + + self.hotkeys = hotkeys + self.validator_hotkey = validator_hotkey + self.db_controller = db_controller + self.test_mode = test_mode + self.local_fs_mode = local_fs_mode + + self.competition_handler: Optional[BaseCompetitionHandler] = None + + def __repr__(self) -> str: + return f"CompetitionManager<{self.competition_id}>" + + + async def chain_miner_to_model_info( + self, chain_miner_model: ChainMinerModel + ) -> ModelInfo: + if chain_miner_model.competition_id != self.competition_id: + bt.logging.debug( + f"Chain miner model {chain_miner_model.to_compressed_str()} does not belong to this competition" + ) + raise ValueError("Chain miner model does not belong to this competition") + model_info = ModelInfo( + hf_repo_id=chain_miner_model.hf_repo_id, + hf_model_filename=chain_miner_model.hf_model_filename, + hf_code_filename=chain_miner_model.hf_code_filename, + hf_repo_type=chain_miner_model.hf_repo_type, + competition_id=chain_miner_model.competition_id, + block=chain_miner_model.block, + model_hash=chain_miner_model.model_hash, + ) + return model_info + + async def get_mock_miner_models(self): + """Get registered mineres from testnet subnet 163""" + self.model_manager.hotkey_store = get_mock_hotkeys_with_models() + + async def update_miner_models(self): + """ + Updates hotkeys and downloads information of models from the chain + """ + bt.logging.info("Selecting models for competition") + bt.logging.info(f"Amount of hotkeys: {len(self.hotkeys)}") + + latest_models = self.db_controller.get_latest_models( + self.hotkeys, self.competition_id, self.config.models_query_cutoff + ) + for hotkey, model in latest_models.items(): + model_info = chain_miner_to_model_info(model) + if model_info.competition_id != self.competition_id: + bt.logging.warning( + f"Miner {hotkey} with competition id {model.competition_id} does not belong to {self.competition_id} competition, skipping" + ) + continue + self.model_manager.hotkey_store[hotkey] = model_info + bt.logging.info( + f"Amount of hotkeys with valid models: {len(self.model_manager.hotkey_store)}" + ) + + async def evaluate(self) -> Tuple[str | None, BaseModelEvaluationResult | None]: + """Returns hotkey and competition id of winning model miner""" + bt.logging.info(f"Start of evaluation of {self.competition_id}") + + hotkeys_to_slash = [] + # TODO add mock models functionality + + await self.update_miner_models() + if len(self.model_manager.hotkey_store) == 0: + bt.logging.error("No models to evaluate") + return None, None + + + await self.dataset_manager.prepare_dataset() + X_test, y_test, metadata = await self.dataset_manager.get_data() + + # Pass metadata to tricorder handler, otherwise use default parameters + if self.competition_id == "tricorder-2": + self.competition_handler: BaseCompetitionHandler = COMPETITION_HANDLER_MAPPING[self.competition_id]( + X_test=X_test, y_test=y_test, metadata=metadata, config=self.config + ) + else: + self.competition_handler: BaseCompetitionHandler = COMPETITION_HANDLER_MAPPING[self.competition_id]( + X_test=X_test, y_test=y_test, config=self.config + ) + + # Set preprocessing directory and preprocess data once + self.competition_handler.set_preprocessed_data_dir(self.config.models.dataset_dir) + await self.competition_handler.preprocess_and_serialize_data(X_test) + + y_test = self.competition_handler.prepare_y_pred(y_test) + evaluation_counter = 0 + models_amount = len(self.model_manager.hotkey_store.items()) + bt.logging.info(f"Evaluating {models_amount} models") + + for miner_hotkey, model_info in self.model_manager.hotkey_store.items(): + evaluation_counter +=1 + bt.logging.info(f"Evaluating {evaluation_counter}/{models_amount} hotkey: {miner_hotkey}") + model_downloaded = await self.model_manager.download_miner_model(miner_hotkey, token=self.config.hf_token) + if not model_downloaded: + bt.logging.error( + f"Failed to download model for hotkey {miner_hotkey} Skipping." + ) + continue + + computed_hash = self._compute_model_hash(model_info.file_path) + if not computed_hash: + bt.logging.info("Could not determine model hash. Skipping.") + self.error_results.append((miner_hotkey, "Could not determine model hash")) + continue + + if computed_hash != model_info.model_hash: + bt.logging.info(f"The hash of model uploaded by {miner_hotkey} does not match hash of model submitted on-chain. Slashing.") + self.error_results.append((miner_hotkey, "The hash of model uploaded does not match hash of model submitted on-chain")) + hotkeys_to_slash.append(miner_hotkey) + + model_manager = ModelRunManager( + self.config, self.model_manager.hotkey_store[miner_hotkey] + ) + start_time = time.time() + + try: + # Pass the preprocessed data generator instead of raw paths + preprocessed_data_gen = self.competition_handler.get_preprocessed_data_generator() + bt.logging.info(f"Running model inference for hotkey {miner_hotkey}") + y_pred = await model_manager.run(preprocessed_data_gen) + except ModelRunException as e: + bt.logging.error( + f"Model hotkey: {miner_hotkey} failed to run. Skipping. error: {e}" + ) + self.error_results.append((miner_hotkey, f"Failed to run model: {e}")) + continue + + try: + model_result = self.competition_handler.get_model_result( + y_test, y_pred, time.time() - start_time, model_info.model_size_mb + ) + self.results.append((miner_hotkey, model_result)) + except Exception as e: + bt.logging.error( + f"Error evaluating model for hotkey: {miner_hotkey}. Error: {str(e)}", exc_info=True + ) + self.error_results.append((miner_hotkey, f"Error evaluating model: {e}")) + bt.logging.info(f"Skipping model {miner_hotkey} due to evaluation error. error: {e}") + + if len(self.results) == 0: + bt.logging.error("No models were able to run") + return None, None + + # see if there are any duplicate scores, slash the copied models owners + grouped_duplicated_hotkeys = self.group_duplicate_scores(hotkeys_to_slash) + bt.logging.info(f"duplicated models: {grouped_duplicated_hotkeys}") + if len(grouped_duplicated_hotkeys) > 0: + pioneer_models_hotkeys = self.model_manager.get_pioneer_models(grouped_duplicated_hotkeys) + hotkeys_to_slash.extend([hotkey for group in grouped_duplicated_hotkeys for hotkey in group if hotkey not in pioneer_models_hotkeys]) + self.slash_model_copiers(hotkeys_to_slash) + + winning_hotkey, winning_model_result = sorted( + self.results, key=lambda x: x[1].score, reverse=True + )[0] + + for miner_hotkey, model_result in self.results: + bt.logging.info(f"Model from {miner_hotkey} successfully evaluated") + bt.logging.trace( + f"Model result for {miner_hotkey}:\n {model_result.model_dump_json(indent=4)} \n" + ) + + bt.logging.info( + f"Winning hotkey for competition {self.competition_id}: {winning_hotkey}" + ) + + # Cleanup preprocessed data + self.competition_handler.cleanup_preprocessed_data() + self.dataset_manager.delete_dataset() + return winning_hotkey, winning_model_result + + + + + def group_duplicate_scores(self, hotkeys_to_slash: list[str]) -> list[list[str]]: + """ + Groups hotkeys for models whose full evaluation‐metric tuple is identical. + """ + metrics_to_hotkeys: dict[tuple, list[str]] = {} + + for hotkey, result in self.results: + if hotkey in hotkeys_to_slash: + continue + + # Skip models with score 0.0 from duplicate detection + if result.score == 0.0: + continue + + comparable_result = self.competition_handler.get_comparable_result(result) + metrics_to_hotkeys.setdefault(comparable_result, []).append(hotkey) + + return [group for group in metrics_to_hotkeys.values() if len(group) > 1] + + + def slash_model_copiers(self, hotkeys_to_slash: list[str]): + for hotkey, result in self.results: + if hotkey in hotkeys_to_slash: + bt.logging.info(f"Slashing model copier for hotkey: {hotkey} (setting score to 0.0)") + self.error_results.append((hotkey, "Slashing model copier - setting score to 0.0")) + result.score = 0.0 + + def _compute_model_hash(self, file_path) -> str: + """Compute an 8-character hexadecimal SHA-1 hash of the model file.""" + sha1 = hashlib.sha1() + try: + with open(file_path, 'rb') as f: + while chunk := f.read(8192): + sha1.update(chunk) + full_hash = sha1.hexdigest() + truncated_hash = full_hash[:8] + bt.logging.info(f"Computed 8-character hash: {truncated_hash}") + return truncated_hash + except Exception as e: + bt.logging.error(f"Error computing hash for {file_path}: {e}", exc_info=True) + return None diff --git a/template/base/utils/__init__.py b/cancer_ai/validator/dataset_handlers/__init__.py similarity index 100% rename from template/base/utils/__init__.py rename to cancer_ai/validator/dataset_handlers/__init__.py diff --git a/cancer_ai/validator/dataset_handlers/base_handler.py b/cancer_ai/validator/dataset_handlers/base_handler.py new file mode 100644 index 000000000..cf38c3792 --- /dev/null +++ b/cancer_ai/validator/dataset_handlers/base_handler.py @@ -0,0 +1,62 @@ +from typing import List, Tuple +from abc import abstractmethod + + +class BaseDatasetHandler: + """ + Base class for handling different dataset types. + + This class initializes the config and path attributes. + + Args: + config (dict): Configuration dictionary. + path (str): Path to the dataset. + + Attributes: + config (dict): Configuration dictionary. + path (str): Path to the dataset. + + """ + + def __init__(self, config, path) -> None: + """ + Initializes the BaseDatasetHandler object. + + Args: + config (dict): Configuration dictionary. + path (str): Path to the dataset. + + """ + # Initialize the config and path attributes + self.config = config # Configuration dictionary + self.path = path # Path to the dataset + self.entries = [] + + @abstractmethod + async def get_training_data(self) -> Tuple[List, List]: + """ + Abstract method to get the training data. + + This method is responsible for loading the training data and returning it as a tuple of two lists: the first list contains the input data and the second list contains the labels. + + Returns: + Tuple[List, List]: A tuple containing two lists: the first list contains the input data and the second list contains the labels. + """ + + @abstractmethod + async def sync_training_data(self): + """ + Abstract method to synchronize the training data. + + This method is responsible for reading the training data from the dataset and storing it in the self.entries attribute. + """ + + async def process_training_data(self): + """ + Process the training data. + + This method is responsible for preprocessing the training data and returning it as a tuple of two lists: the first list contains the input data and the second list contains the labels. + + Returns: + Tuple[List, List]: A tuple containing two lists: the first list contains the input data and the second list contains the labels. + """ diff --git a/cancer_ai/validator/dataset_handlers/image_csv.py b/cancer_ai/validator/dataset_handlers/image_csv.py new file mode 100644 index 000000000..cc28cfcca --- /dev/null +++ b/cancer_ai/validator/dataset_handlers/image_csv.py @@ -0,0 +1,110 @@ +from .base_handler import BaseDatasetHandler +from PIL import Image +from typing import List, Tuple +from dataclasses import dataclass +import csv +import aiofiles +from pathlib import Path + +from ..utils import log_time + + +@dataclass +class ImageEntry: + relative_path: str + label: str | int | bool # Generic label - could be is_melanoma (bool) or disease_type (str) + age: int | None = None + gender: str | None = None + location: str | None = None + + +class DatasetImagesCSV(BaseDatasetHandler): + """ + DatasetImagesCSV is responsible for handling the CSV dataset where directory structure looks as follows: + + ├── images + │ ├── image_1.jpg + │ ├── image_2.jpg + │ └── ... + ├── labels.csv + """ + + def __init__(self, config, dataset_path, label_path: str) -> None: + self.config = config + self.dataset_path = dataset_path + self.label_path = label_path + self.metadata_columns = ["filepath", "label", "age", "location", "gender"] + + @log_time + async def sync_training_data(self): + self.entries: List[ImageEntry] = [] + # go over csv file + async with aiofiles.open(self.label_path, "r") as f: + content = await f.read() + + # Parse CSV with DictReader for column-agnostic access + import io + reader = csv.DictReader(io.StringIO(content)) + + for row in reader: + # Get filepath - support different column names + filepath = row.get('NewFileName') or row.get('filepath') or row.get('filename') or '' + + # Get label - support different column names + label = row.get('Class') or row.get('label') or '' + + # Parse age + age = None + age_str = row.get('Age') or row.get('age') or '' + if age_str: + try: + age = int(age_str) + except ValueError: + pass # Keep as None if invalid + + # Parse location + location = None + location_str = row.get('Location') or row.get('location') or '' + if location_str: + location = location_str.strip().lower() + # Validate against expected location values + valid_locations = ['arm', 'feet', 'genitalia', 'hand', 'head', 'leg', 'torso'] + if location not in valid_locations: + location = None # Keep as None if invalid + + # Parse gender + gender = None + gender_str = row.get('Gender') or row.get('gender') or '' + if gender_str: + gender = gender_str.strip().lower() + # Keep the raw value - validation will happen in tricorder handler + + self.entries.append(ImageEntry( + relative_path=filepath, + label=label, + age=age, + gender=gender, + location=location + )) + + @log_time + async def get_training_data(self) -> Tuple[List, List, List]: + """ + Get the training data. + + This method is responsible for loading the training data and returning a tuple containing three lists: + the first list contains paths to the images, the second list contains the labels, + and the third list contains patient metadata (age, gender). + """ + await self.sync_training_data() + pred_x = [ + Path(self.dataset_path, entry.relative_path).resolve() + for entry in self.entries + ] + pred_y = [entry.label for entry in self.entries] + pred_metadata = [ + {'age': entry.age, 'gender': entry.gender, 'location': entry.location} + for entry in self.entries + ] + await self.process_training_data() + return pred_x, pred_y, pred_metadata diff --git a/cancer_ai/validator/dataset_manager.py b/cancer_ai/validator/dataset_manager.py new file mode 100644 index 000000000..20c721df9 --- /dev/null +++ b/cancer_ai/validator/dataset_manager.py @@ -0,0 +1,206 @@ +import os +import shutil +from pathlib import Path +from typing import List, Tuple + +from huggingface_hub import HfApi +import bittensor as bt + +from .manager import SerializableManager +from .utils import run_command, log_time +from .dataset_handlers.image_csv import DatasetImagesCSV +from .exceptions import DatasetManagerException + +class DatasetManager(SerializableManager): + def __init__( + self, + config, + competition_id: str, + hf_repo_id: str, + hf_filename: str, + hf_repo_type: str, + use_auth: bool = True, + local_fs_mode: bool = False, + ) -> None: + """ + Initializes a new instance of the DatasetManager class. + + Args: + config: The configuration object. + competition_id (str): The ID of the competition. + dataset_hf_id (str): The Hugging Face ID of the dataset. + file_hf_id (str): The Hugging Face ID of the file. + + Returns: + None + """ + self.config = config + + self.hf_repo_id = hf_repo_id + self.hf_filename = hf_filename + self.hf_repo_type = hf_repo_type + self.competition_id = competition_id + self.use_auth = use_auth + self.local_compressed_path = "" + self.local_extracted_dir = Path(self.config.models.dataset_dir, competition_id) + self.data: Tuple[List, List] = () + self.handler = None + self.local_fs_mode = local_fs_mode + + def get_state(self) -> dict: + return {} + + def set_state(self, state: dict): + return {} + + @log_time + async def download_dataset(self): + if not os.path.exists(self.local_extracted_dir): + os.makedirs(self.local_extracted_dir) + + self.local_compressed_path = HfApi(token=self.config.hf_token).hf_hub_download( + self.hf_repo_id, + self.hf_filename, + cache_dir=Path(self.config.models.dataset_dir), + repo_type=self.hf_repo_type, + ) + + def delete_dataset(self) -> None: + """Delete dataset from disk""" + + bt.logging.info("Deleting dataset: ") + + try: + if not os.access(self.config.models.dataset_dir, os.W_OK): + bt.logging.error(f"No write permissions for: {self.local_extracted_dir}") + return + + # Optional: Check if any files are open or being used. + shutil.rmtree(self.config.models.dataset_dir) + bt.logging.info("Dataset deleted") + except OSError as e: + bt.logging.error(f"Failed to delete dataset from disk: {e}") + + @log_time + async def unzip_dataset(self) -> None: + """Unzip dataset""" + + self.local_extracted_dir = Path( + self.config.models.dataset_dir, self.competition_id + ) + # delete old unpacked dataset + if os.path.exists(self.local_extracted_dir): + os.system(f"chmod -R u+rw {self.local_extracted_dir} && rm -R {self.local_extracted_dir}") + + bt.logging.debug(f"Dataset extracted to: { self.local_compressed_path}") + + # Ensure the extraction directory exists + os.makedirs(self.local_extracted_dir, exist_ok=True) + + # TODO add error handling + zip_file_path = self.local_compressed_path + extract_dir = self.local_extracted_dir + command = f'unzip -o "{zip_file_path}" -d {extract_dir} && chmod -R u+rw {extract_dir}' + _, err = await run_command(command) + if err: + bt.logging.error(f"Error unzipping dataset: {err}") + raise DatasetManagerException(f"Error unzipping dataset: {err}") + bt.logging.info("Dataset unzipped") + + def set_dataset_handler(self) -> None: + """Detect dataset type and set handler""" + if not self.local_compressed_path: + raise DatasetManagerException( + f"Dataset '{self.config.competition_id}' not downloaded" + ) + + # Look for CSV file in the extracted directory or its subdirectories + # Try common names: labels.csv, test.csv, data.csv, etc. + csv_names = ["labels.csv", "test.csv", "data.csv", "metadata.csv", "dataset.csv"] + labels_csv_path = None + dataset_root_dir = None + + # Check directly in extracted dir + for csv_name in csv_names: + direct_csv_path = Path(self.local_extracted_dir, csv_name) + if direct_csv_path.exists(): + labels_csv_path = direct_csv_path + dataset_root_dir = self.local_extracted_dir + bt.logging.info(f"Found CSV file: {csv_name}") + break + + # If not found, check in subdirectories + if not labels_csv_path: + for item in os.listdir(self.local_extracted_dir): + subdir_path = Path(self.local_extracted_dir, item) + if subdir_path.is_dir() and not item.startswith('__'): # Skip __MACOSX etc + for csv_name in csv_names: + potential_csv = Path(subdir_path, csv_name) + if potential_csv.exists(): + labels_csv_path = potential_csv + dataset_root_dir = subdir_path + bt.logging.info(f"Found CSV file in subdirectory {subdir_path}: {csv_name}") + break + if labels_csv_path: + break + + # If still not found, look for any .csv file + if not labels_csv_path: + bt.logging.info("Specific CSV names not found, looking for any .csv file...") + # Check directly in extracted dir + for item in os.listdir(self.local_extracted_dir): + if item.endswith('.csv'): + labels_csv_path = Path(self.local_extracted_dir, item) + dataset_root_dir = self.local_extracted_dir + bt.logging.info(f"Found CSV file: {item}") + break + + # Check in subdirectories + if not labels_csv_path: + for item in os.listdir(self.local_extracted_dir): + subdir_path = Path(self.local_extracted_dir, item) + if subdir_path.is_dir() and not item.startswith('__'): + for subitem in os.listdir(subdir_path): + if subitem.endswith('.csv'): + labels_csv_path = Path(subdir_path, subitem) + dataset_root_dir = subdir_path + bt.logging.info(f"Found CSV file in subdirectory {subdir_path}: {subitem}") + break + if labels_csv_path: + break + + if labels_csv_path and dataset_root_dir: + self.handler = DatasetImagesCSV( + self.config, + dataset_root_dir, + labels_csv_path, + ) + else: + raise NotImplementedError(f"Dataset handler not implemented - no CSV file found in {self.local_extracted_dir}") + + async def prepare_dataset(self) -> None: + """Download dataset, unzip and set dataset handler""" + if self.local_fs_mode: + self.local_compressed_path = self.hf_filename + else: + bt.logging.info(f"Downloading dataset '{self.competition_id}'") + await self.download_dataset() + bt.logging.info(f"Unzipping dataset '{self.competition_id}'") + await self.unzip_dataset() + bt.logging.info(f"Setting dataset handler '{self.competition_id}'") + self.set_dataset_handler() + bt.logging.info(f"Preprocessing dataset '{self.competition_id}'") + self.data = await self.handler.get_training_data() + + async def get_data(self) -> Tuple[List, List, List]: + """Get data from dataset handler""" + if not self.data: + raise DatasetManagerException( + f"Dataset '{self.competition_id}' not initalized " + ) + # Handle backward compatibility - if data has 2 elements, add empty metadata + if len(self.data) == 2: + x_data, y_data = self.data + metadata = [{'age': None, 'gender': None} for _ in x_data] + return x_data, y_data, metadata + return self.data diff --git a/cancer_ai/validator/exceptions.py b/cancer_ai/validator/exceptions.py new file mode 100644 index 000000000..e34e06c30 --- /dev/null +++ b/cancer_ai/validator/exceptions.py @@ -0,0 +1,7 @@ + + +class ModelRunException(Exception): + pass + +class DatasetManagerException(Exception): + pass \ No newline at end of file diff --git a/cancer_ai/validator/manager.py b/cancer_ai/validator/manager.py new file mode 100644 index 000000000..308172666 --- /dev/null +++ b/cancer_ai/validator/manager.py @@ -0,0 +1,5 @@ +from abc import ABC, abstractmethod + + +class SerializableManager(ABC): + pass \ No newline at end of file diff --git a/cancer_ai/validator/model_db.py b/cancer_ai/validator/model_db.py new file mode 100644 index 000000000..e2e72e5ad --- /dev/null +++ b/cancer_ai/validator/model_db.py @@ -0,0 +1,310 @@ +import bittensor as bt +import os +import re, traceback + +import traceback +from sqlalchemy import create_engine, Column, String, DateTime, PrimaryKeyConstraint, Integer +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from datetime import datetime, timedelta, timezone +from ..chain_models_store import ChainMinerModel +from websockets.client import OPEN as WS_OPEN + +from retry import retry + +Base = declarative_base() + +STORED_MODELS_PER_HOTKEY = 10 + +class ChainMinerModelDB(Base): + __tablename__ = 'models' + competition_id = Column(String, nullable=False) + hf_repo_id = Column(String, nullable=False) + hf_model_filename = Column(String, nullable=False) + hf_repo_type = Column(String, nullable=False) + hf_code_filename = Column(String, nullable=False) + date_submitted = Column(DateTime, nullable=False) + block = Column(Integer, nullable=False) + hotkey = Column(String, nullable=False) + model_hash = Column(String, nullable=False) + + __table_args__ = ( + PrimaryKeyConstraint('date_submitted', 'hotkey', name='pk_date_hotkey'), + ) + +class ModelDBController: + def __init__(self, db_path: str = "models.db", subtensor: bt.subtensor = None): + db_url = f"sqlite:///{os.path.abspath(db_path)}" + self.engine = create_engine(db_url, echo=False) + Base.metadata.create_all(self.engine) + self.Session = sessionmaker(bind=self.engine) + + if subtensor is not None and "test" not in subtensor.chain_endpoint.lower(): + subtensor = bt.subtensor(network="archive") + self.subtensor = subtensor + + # Capture the original connect() and override with _ws_connect wrapper + # Substrate-interface calls connect() on every RPC under the hood, + # so we wrap it to reuse the same socket unless it's truly closed. + self._orig_ws_connect = self.subtensor.substrate.connect + self.subtensor.substrate.connect = self._ws_connect + + ws = self.subtensor.substrate.connect() + bt.logging.info(f"Initial WebSocket state: {ws.state}") + + self._migrate_database() + + def _ws_connect(self, *args, **kwargs): + """ + Replacement for substrate.connect(). + Reuses existing WebSocketClientProtocol if State.OPEN; + otherwise performs a fresh handshake via original connect(). + """ + # Check current socket + current = getattr(self.subtensor.substrate, "ws", None) + if current is not None and current.state == WS_OPEN: + return current + + # If socket not open, reconnect + bt.logging.warning("⚠️ Subtensor WebSocket not OPEN—reconnecting…") + try: + new_ws = self._orig_ws_connect(*args, **kwargs) + except Exception as e: + bt.logging.error("Failed to reconnect WebSocket: %s", e, exc_info=True) + raise + + # Update the substrate.ws attribute so future calls reuse this socket + setattr(self.subtensor.substrate, "ws", new_ws) + return new_ws + + def _migrate_database(self): + """Check and apply migration for model_hash column if missing.""" + with self.engine.connect() as connection: + result = connection.execute("PRAGMA table_info(models)").fetchall() + column_names = [row[1] for row in result] + if "model_hash" not in column_names: + try: + connection.execute("ALTER TABLE models ADD COLUMN model_hash TEXT CHECK(LENGTH(model_hash) <= 8)") + bt.logging.info("Migrated database: Added model_hash column with length constraint to models table") + except Exception as e: + bt.logging.error(f"Failed to migrate database: {e}") + raise + + + + def add_model(self, chain_miner_model: ChainMinerModel, hotkey: str): + session = self.Session() + existing_model = self.get_model(hotkey) + if not existing_model: + try: + model_record = self.convert_chain_model_to_db_model(chain_miner_model, hotkey) + session.add(model_record) + session.commit() + bt.logging.debug(f"Successfully added DB model info for hotkey {hotkey} into the DB.") + except Exception as e: + session.rollback() + raise e + finally: + session.close() + else: + bt.logging.debug(f"DB model for hotkey {hotkey} already exists, proceeding with updating the model info.") + self.update_model(chain_miner_model, hotkey) + + def get_model(self, hotkey: str) -> ChainMinerModel | None: + session = self.Session() + try: + model_record = session.query(ChainMinerModelDB).filter_by( + hotkey=hotkey + ).first() + if model_record: + return self.convert_db_model_to_chain_model(model_record) + return None + finally: + session.close() + + def get_latest_model(self, hotkey: str, cutoff_time: float = None) -> ChainMinerModel | None: + cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=cutoff_time) if cutoff_time else datetime.now(timezone.utc) + bt.logging.trace(f"Getting latest DB model for hotkey {hotkey}") + session = self.Session() + model_record = None + try: + model_record = ( + session.query(ChainMinerModelDB) + .filter(ChainMinerModelDB.hotkey == hotkey) + .filter(ChainMinerModelDB.date_submitted < cutoff_time) + .order_by(ChainMinerModelDB.date_submitted.desc()) + .first() + ) + except Exception as e: + bt.logging.error(f"Error in get_latest_model for hotkey {hotkey}: {e}\n {traceback.format_exc()}") + raise + finally: + session.close() + + if not model_record: + return None + + return self.convert_db_model_to_chain_model(model_record) + + def delete_model(self, date_submitted: datetime, hotkey: str): + session = self.Session() + try: + model_record = session.query(ChainMinerModelDB).filter_by( + date_submitted=date_submitted, hotkey=hotkey + ).first() + if model_record: + session.delete(model_record) + session.commit() + return True + return False + except Exception as e: + session.rollback() + raise e + finally: + session.close() + + def update_model(self, chain_miner_model: ChainMinerModel, hotkey: str): + session = self.Session() + try: + existing_model = session.query(ChainMinerModelDB).filter_by( + hotkey=hotkey + ).first() + + if existing_model: + existing_model.competition_id = chain_miner_model.competition_id + existing_model.hf_repo_id = chain_miner_model.hf_repo_id + existing_model.hf_model_filename = chain_miner_model.hf_model_filename + existing_model.hf_repo_type = chain_miner_model.hf_repo_type + existing_model.hf_code_filename = chain_miner_model.hf_code_filename + existing_model.date_submitted = self.get_block_timestamp(chain_miner_model.block) + existing_model.block = chain_miner_model.block + existing_model.model_hash = chain_miner_model.model_hash + + session.commit() + bt.logging.debug(f"Successfully updated DB model for hotkey {hotkey}.") + return True + else: + bt.logging.debug(f"No existing DB model found for hotkey {hotkey}. Update skipped.") + return False + + except Exception as e: + session.rollback() + bt.logging.error(f"Error updating DB model for hotkey {hotkey}: {e}", exc_info=True) + raise e + finally: + session.close() + + + def get_latest_models(self, hotkeys: list[str], competition_id: str, cutoff: int = None) -> dict[str, ChainMinerModel]: + cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=cutoff) if cutoff else datetime.now(timezone.utc) + session = self.Session() + try: + # Use a correlated subquery to get the latest record for each hotkey that doesn't violate the cutoff + latest_models_to_hotkeys = {} + for hotkey in hotkeys: + model_record = ( + session.query(ChainMinerModelDB) + .filter(ChainMinerModelDB.hotkey == hotkey) + .filter(ChainMinerModelDB.competition_id == competition_id) + .filter(ChainMinerModelDB.date_submitted < cutoff_time) + .order_by(ChainMinerModelDB.date_submitted.desc()) # Order by newest first + .first() # Get the first (newest) record that meets the cutoff condition + ) + if model_record: + latest_models_to_hotkeys[hotkey] = self.convert_db_model_to_chain_model(model_record) + + return latest_models_to_hotkeys + finally: + session.close() + + def clean_old_records(self, hotkeys: list[str]): + session = self.Session() + + for hotkey in hotkeys: + try: + records = ( + session.query(ChainMinerModelDB) + .filter(ChainMinerModelDB.hotkey == hotkey) + .order_by(ChainMinerModelDB.date_submitted.desc()) + .all() + ) + + # If there are more than STORED_MODELS_PER_HOTKEY records, delete the oldest ones + if len(records) > STORED_MODELS_PER_HOTKEY: + records_to_delete = records[STORED_MODELS_PER_HOTKEY:] + for record in records_to_delete: + session.delete(record) + + session.commit() + + except Exception as e: + session.rollback() + bt.logging.error(f"Error processing hotkey {hotkey}: {e}") + + try: + # Delete all records for hotkeys not in the given list + session.query(ChainMinerModelDB).filter(ChainMinerModelDB.hotkey.notin_(hotkeys)).delete(synchronize_session=False) + session.commit() + except Exception as e: + session.rollback() + bt.logging.error(f"Error deleting DB records for hotkeys not in list: {e}") + + finally: + session.close() + + def convert_chain_model_to_db_model(self, chain_miner_model: ChainMinerModel, hotkey: str) -> ChainMinerModelDB: + return ChainMinerModelDB( + competition_id = chain_miner_model.competition_id, + hf_repo_id = chain_miner_model.hf_repo_id, + hf_model_filename = chain_miner_model.hf_model_filename, + hf_repo_type = chain_miner_model.hf_repo_type, + hf_code_filename = chain_miner_model.hf_code_filename, + date_submitted = self.get_block_timestamp(chain_miner_model.block), + block = chain_miner_model.block, + hotkey = hotkey, + model_hash=chain_miner_model.model_hash + ) + + def convert_db_model_to_chain_model(self, model_record: ChainMinerModelDB) -> ChainMinerModel: + return ChainMinerModel( + competition_id=model_record.competition_id, + hf_repo_id=model_record.hf_repo_id, + hf_model_filename=model_record.hf_model_filename, + hf_repo_type=model_record.hf_repo_type, + hf_code_filename=model_record.hf_code_filename, + block=model_record.block, + model_hash=model_record.model_hash, + ) + + @retry(tries=10, delay=1, backoff=2, max_delay=30) + def get_block_timestamp(self, block_number) -> datetime: + """Gets the timestamp of a given block.""" + try: + block_hash = self.subtensor.get_block_hash(block_number) + + if block_hash is None: + raise ValueError(f"Block hash not found for block number {block_number}") + + timestamp_info = self.subtensor.substrate.query( + module="Timestamp", + storage_function="Now", + block_hash=block_hash + ) + + if timestamp_info is None: + raise ValueError(f"Timestamp not found for block hash {block_hash}") + + timestamp_ms = timestamp_info.value + block_datetime = datetime.fromtimestamp(timestamp_ms / 1000.0, tz=timezone.utc) + + return block_datetime + except Exception as e: + bt.logging.exception(f"Error retrieving block timestamp: {e}") + raise + + def close(self): + try: + bt.logging.debug("Closing ModelDBController and websocket connection.") + self.subtensor.substrate.close_websocket() + except Exception: + pass diff --git a/cancer_ai/validator/model_manager.py b/cancer_ai/validator/model_manager.py new file mode 100644 index 000000000..dc2533faf --- /dev/null +++ b/cancer_ai/validator/model_manager.py @@ -0,0 +1,367 @@ +import os +import asyncio +import json +from dataclasses import dataclass, asdict, is_dataclass +from typing import Optional +from datetime import datetime, timezone, timedelta + +import bittensor as bt +from huggingface_hub import HfApi, HfFileSystem + +from .models import ModelInfo +from .exceptions import ModelRunException +from .utils import decode_params +from websockets.client import OPEN as WS_OPEN + + +class ModelManager(): + def __init__(self, config, db_controller, subtensor: bt.subtensor, parent: Optional["CompetitionManager"] = None) -> None: + self.config = config + self.db_controller = db_controller + + if not os.path.exists(self.config.models.model_dir): + os.makedirs(self.config.models.model_dir) + self.api = HfApi(token=self.config.hf_token) + self.hotkey_store: dict[str, ModelInfo] = {} + self.parent = parent + + if subtensor is not None and "test" not in subtensor.chain_endpoint.lower(): + subtensor = bt.subtensor(network="archive") + self.subtensor = subtensor + + # Capture the original connect() and override with _ws_connect wrapper + # Substrate-interface calls connect() on every RPC under the hood, + # so we wrap it to reuse the same socket unless it's truly closed. + self._orig_ws_connect = self.subtensor.substrate.connect + self.subtensor.substrate.connect = self._ws_connect + + ws = self.subtensor.substrate.connect() + bt.logging.info(f"Initial WebSocket state: {ws.state}") + + def _ws_connect(self, *args, **kwargs): + """ + Replacement for substrate.connect(). + Reuses existing WebSocketClientProtocol if State.OPEN; + otherwise performs a fresh handshake via original connect(). + """ + # Check current socket + current = getattr(self.subtensor.substrate, "ws", None) + if current is not None and current.state == WS_OPEN: + return current + + # If socket not open, reconnect + bt.logging.warning("⚠️ Subtensor WebSocket not OPEN—reconnecting…") + try: + new_ws = self._orig_ws_connect(*args, **kwargs) + except Exception as e: + bt.logging.error("Failed to reconnect WebSocket: %s", e, exc_info=True) + raise + + # Update the substrate.ws attribute so future calls reuse this socket + setattr(self.subtensor.substrate, "ws", new_ws) + return new_ws + + async def model_license_valid(self, hotkey) -> tuple[bool, Optional[str]]: + hf_id = self.hotkey_store[hotkey].hf_repo_id + try: + model_info = self.api.model_info(hf_id, timeout=30) + except Exception as e: + bt.logging.error(f"Cannot get information about repository {hf_id}. Error: {e}") + return False, f"HF API ERROR: {e}" + + meta_license = None + if model_info.card_data: + meta_license = model_info.card_data.get("license") + + if meta_license and "mit" in meta_license.lower(): + return True, None + + return False, "NOT_MIT" + + async def download_miner_model(self, hotkey, token: Optional[str] = None) -> bool: + """Downloads the newest model from Hugging Face and saves it to disk. + Returns: + bool: True if the model was downloaded successfully, False otherwise. + """ + MAX_RETRIES = 3 + RETRY_DELAY = 2 # seconds + + model_info = self.hotkey_store[hotkey] + + fs = HfFileSystem(token=token) + + repo_path = os.path.join(model_info.hf_repo_id, model_info.hf_model_filename) + + is_valid, reason = await self.model_license_valid(hotkey) + if not is_valid: + hf_id = self.hotkey_store[hotkey].hf_repo_id + + if reason.startswith("HF API ERROR"): + bt.logging.error(f"Could not verify license for {hf_id}: {reason.split(':', 1)[1]}") + self.parent.error_results.append((hotkey, "Couldn't verify license due to HF API error")) + else: + bt.logging.error(f"License for {hf_id} not found or invalid") + self.parent.error_results.append((hotkey, "MIT license not found or invalid")) + return False + + + bt.logging.debug(f"License found for {model_info.hf_repo_id}") + # List files in the repository and get file date with retry + files = None + file_date = None + for retry_counter in range(MAX_RETRIES): + try: + files = fs.ls(model_info.hf_repo_id) + + # Find the specific file and its upload date + for file in files: + if model_info.hf_model_filename.lower() in file["name"].lower(): + # Extract the upload date + file_date = file["last_commit"]["date"] + break + + if file_date: # If we found the file, break out of the retry loop + break + else: + # File not found but repository exists, so we'll try again + if retry_counter < MAX_RETRIES - 1: + bt.logging.warning(f"Retry {retry_counter+1}/{MAX_RETRIES}: File {model_info.hf_model_filename} not found in repository {model_info.hf_repo_id}, retrying...") + await asyncio.sleep(RETRY_DELAY * (retry_counter + 1)) + else: + bt.logging.error(f"File {model_info.hf_model_filename} not found in repository {model_info.hf_repo_id} after {MAX_RETRIES} attempts") + self.parent.error_results.append((hotkey, f"File {model_info.hf_model_filename} not found in repository {model_info.hf_repo_id}")) + return False + + except Exception as e: + if retry_counter < MAX_RETRIES - 1: + bt.logging.warning(f"Retry {retry_counter+1}/{MAX_RETRIES}: Failed to list files in repository {model_info.hf_repo_id}: {e}") + await asyncio.sleep(RETRY_DELAY * (retry_counter + 1)) # Exponential backoff + else: + bt.logging.error(f"Failed to list files in repository {model_info.hf_repo_id} after {MAX_RETRIES} attempts: {e}") + self.parent.error_results.append((hotkey, f"Cannot list files in repo {model_info.hf_repo_id}")) + return False + + # We don't need this check anymore since we handle it in the retry loop + + # Parse and check if the model is too recent to download + is_too_recent, parsed_date = self.is_model_too_recent(file_date, model_info.hf_model_filename, hotkey) + if is_too_recent: + self.parent.error_results.append((hotkey, f"Model is too recent")) + return False + + file_date = parsed_date + + # Download the file with retry + for retry_counter in range(MAX_RETRIES): + try: + model_info.file_path = self.api.hf_hub_download( + repo_id=model_info.hf_repo_id, + repo_type="model", + filename=model_info.hf_model_filename, + cache_dir=self.config.models.model_dir, + token=self.config.hf_token if hasattr(self.config, "hf_token") else None, + ) + break + except Exception as e: + if retry_counter < MAX_RETRIES - 1: + bt.logging.warning(f"Retry {retry_counter+1}/{MAX_RETRIES}: Failed to download model file: {e}") + await asyncio.sleep(RETRY_DELAY * (retry_counter + 1)) # Exponential backoff + else: + bt.logging.error(f"Failed to download model file after {MAX_RETRIES} attempts: {e}") + self.parent.error_results.append((hotkey, f"Failed to download model file: {e}")) + return False + + # Verify the downloaded file exists + if not os.path.exists(model_info.file_path): + bt.logging.error(f"Downloaded file does not exist at {model_info.file_path}") + self.parent.error_results.append((hotkey, f"Downloaded file does not exist at {model_info.file_path}")) + return False + + # Check model size for efficiency scoring + model_size_bytes = os.path.getsize(model_info.file_path) + model_size_mb = model_size_bytes / (1024 * 1024) + + # Store model size for efficiency scoring + model_info.model_size_mb = model_size_mb + + # Log model size with efficiency implications + if model_size_mb <= 50: + bt.logging.info( + f"Model size: {model_size_mb:.1f}MB - Full efficiency score" + ) + elif model_size_mb <= 150: + efficiency_percent = ((150 - model_size_mb) / 100) * 100 + bt.logging.info( + f"Model size: {model_size_mb:.1f}MB - {efficiency_percent:.0f}% efficiency score" + ) + else: + bt.logging.warning( + f"Model size: {model_size_mb:.1f}MB - 0% efficiency score (exceeds 150MB)" + ) + + bt.logging.info(f"Successfully downloaded model file to {model_info.file_path}") + return True + + def is_model_too_recent(self, file_date, filename, hotkey): + """Checks if a model file was uploaded too recently based on the cutoff time. + + Args: + file_date: The date when the file was uploaded (string or datetime) + filename: The name of the model file + hotkey: The hotkey of the miner + + Returns: + tuple: (is_too_recent, parsed_date) where is_too_recent is a boolean indicating if the model + is too recent to download, and parsed_date is the parsed datetime object with timezone + """ + # Ensure file_date is a datetime with timezone + try: + if isinstance(file_date, str): + file_date = datetime.fromisoformat(file_date) + if file_date.tzinfo is None: + file_date = file_date.replace(tzinfo=timezone.utc) + except Exception as e: + bt.logging.error(f"Failed to parse file date {file_date}: {e}") + return True, None + + bt.logging.debug(f"File {filename} was uploaded on: {file_date}") + + # Check if file is newer than our cutoff date (uploaded within last X minutes) + now = datetime.now(timezone.utc) # Get current time in UTC + + # Calculate time difference in minutes + time_diff = (now - file_date).total_seconds() / 60 + + if time_diff < self.config.models_query_cutoff: + bt.logging.warning(f"Skipping model for hotkey {hotkey} because it was uploaded {time_diff:.2f} minutes ago, which is within the cutoff of {self.config.models_query_cutoff} minutes") + return True, file_date + + return False, file_date + + + def add_model( + self, + hotkey, + hf_repo_id, + hf_model_filename, + hf_code_filename=None, + hf_repo_type=None, + ) -> None: + """Saves locally information about a new model.""" + self.hotkey_store[hotkey] = ModelInfo( + hf_repo_id, hf_model_filename, hf_code_filename, hf_repo_type + ) + + def delete_model(self, hotkey) -> None: + """Deletes locally information about a model and the corresponding file on disk.""" + + bt.logging.info(f"Deleting model: {hotkey}") + if hotkey in self.hotkey_store and self.hotkey_store[hotkey].file_path: + os.remove(self.hotkey_store[hotkey].file_path) + self.hotkey_store[hotkey] = None + + def _extract_raw_value(self, fields: list[dict]) -> str: + """Return the first Raw value from the `fields` list.""" + for field in fields: + for k, v in field.items(): + if k.startswith("Raw"): + return v + raise KeyError("No Raw entry found in `info.fields`") + + + def get_pioneer_models(self, grouped_hotkeys: list[list[str]]) -> list[str]: + """ + Does a check on whether chain submit date was later then HF commit date. If not slashes. + Compares chain submit date duplicated models to elect a pioneer based on block of submission (date) + Every hotkey that is not included in the candidate list is a subject to slashing. + """ + pioneers = [] + + if self.config.hf_token: + fs = HfFileSystem(token=self.config.hf_token) + else: + fs = HfFileSystem() + + for group in grouped_hotkeys: + candidate_hotkeys = [] + + for hotkey in group: + model_info = self.hotkey_store.get(hotkey) + if not model_info: + bt.logging.error(f"Model info for hotkey {hotkey} not found.") + self.parent.error_results.append((hotkey, "Model info not found.")) + continue + + try: + matches = fs.glob(f"{model_info.hf_repo_id}/extrinsic_record.json", refresh=True) + if not matches: + raise FileNotFoundError("extrinsic_record.json not found in repo") + + record_path = matches[0] + with fs.open(record_path, "r", encoding="utf-8") as rf: + record_data = json.load(rf) + + file_hotkey = record_data.get("hotkey") + extrinsic_id = record_data.get("extrinsic") + if file_hotkey != hotkey or not extrinsic_id: + raise ValueError(f"Invalid record contents: {record_data}") + + except Exception as e: + bt.logging.error(f"Failed to load HF repo extrinsic record for {hotkey}: {e}", exc_info=True) + self.parent.error_results.append((hotkey, "Invalid or missing extrinsic record in HF repo.")) + continue + + try: + blk_str, idx_str = extrinsic_id.split("-", 1) + block_num = int(blk_str) + ext_idx = int(idx_str, 16 if idx_str.lower().startswith("0x") else 10) + + block_data = self.subtensor.substrate.get_block(block_number=block_num) + extrinsics = block_data.get("extrinsics", []) + + if ext_idx < 0 or ext_idx >= len(extrinsics): + raise IndexError(f"Extrinsic index {ext_idx} out of bounds") + + ext = extrinsics[ext_idx] + signer = ext.value.get("address") + if signer != hotkey: + raise ValueError(f"Extrinsic signer {signer} != expected hotkey {hotkey}") + + call = ext.value.get("call", {}) + module = call.get("call_module") + function = call.get("call_function") + + raw_params = {p["name"]: p["value"] for p in call.get("call_args", [])} + decoded_params = decode_params(raw_params) + + except Exception as e: + bt.logging.exception(f"Failed to decode extrinsic {extrinsic_id} for {hotkey}: {e}", exc_info=True) + self.parent.error_results.append( + (hotkey, f"Extrinsic {extrinsic_id} not found or invalid for hotkey.") + ) + continue + + bt.logging.info(f"Found Extrinsic {extrinsic_id} → {module}.{function} {decoded_params} for hotkey {hotkey}") + try: + info = decoded_params.get("info", {}) + fields = info.get("fields", []) + raw_val = self._extract_raw_value(fields) + chain_model_hash = raw_val.split(":")[-1] + participant_model_hash = self.hotkey_store[hotkey].model_hash + + if chain_model_hash != participant_model_hash: + raise ValueError( + f"chain {chain_model_hash} != participant {participant_model_hash}" + ) + + except Exception as e: + bt.logging.error(f"Model hash comparison failed for {hotkey}: {e}", exc_info=True) + self.parent.error_results.append((hotkey, "Model hash mismatch or extraction error.")) + continue + + candidate_hotkeys.append((hotkey, block_num)) + + if candidate_hotkeys: + pioneer_hotkey = min(candidate_hotkeys, key=lambda x: x[1])[0] + pioneers.append(pioneer_hotkey) + return pioneers diff --git a/cancer_ai/validator/model_manager_test.py b/cancer_ai/validator/model_manager_test.py new file mode 100644 index 000000000..7ad5a1b55 --- /dev/null +++ b/cancer_ai/validator/model_manager_test.py @@ -0,0 +1,53 @@ +import os +import pytest +from types import SimpleNamespace +from unittest.mock import patch, MagicMock +from .model_manager import ( + ModelManager, +) + +hotkey = "test_hotkey" +repo_id = "test_repo_id" +filename = "test_filename" + + +@pytest.fixture +def model_manager() -> ModelManager: + config_obj = SimpleNamespace(**{"model_dir": "/tmp/models", "models": SimpleNamespace(**{"model_dir": "/tmp/models"})}) + # Create a mock db_controller + db_controller = MagicMock() + return ModelManager(config=config_obj, db_controller=db_controller) + + +def test_add_model(model_manager: ModelManager) -> None: + model_manager.add_model(hotkey, repo_id, filename) + + assert hotkey in model_manager.get_state() + assert model_manager.get_state()[hotkey]["hf_repo_id"] == repo_id + assert model_manager.get_state()[hotkey]["hf_model_filename"] == filename + + +def test_delete_model(model_manager: ModelManager) -> None: + model_manager.add_model(hotkey, repo_id, filename) + model_manager.delete_model(hotkey) + + assert hotkey not in model_manager.get_state() + + +@pytest.mark.skip( + reason="we don't want to test every time with downloading data from huggingface" +) +def test_real_downloading(model_manager: ModelManager) -> None: + model_manager.add_model( + "example", "vidhiparikh/House-Price-Estimator", "model_custom.pkcls" + ) + model_manager.download_miner_model("example") + model_path = model_manager.hotkey_store["example"].file_path + + assert os.path.exists(model_path) + + # delete the file + model_manager.delete_model("example") + + # assert the file is deleted + assert not os.path.exists(model_path) diff --git a/cancer_ai/validator/model_run_manager.py b/cancer_ai/validator/model_run_manager.py new file mode 100644 index 000000000..ce851069d --- /dev/null +++ b/cancer_ai/validator/model_run_manager.py @@ -0,0 +1,63 @@ +import bittensor as bt +from typing import List + +from .manager import SerializableManager +from .model_manager import ModelInfo +from .utils import detect_model_format, ModelType +from .model_runners.pytorch_runner import PytorchRunnerHandler +from .model_runners.tensorflow_runner import TensorflowRunnerHandler +from .model_runners.onnx_runner import OnnxRunnerHandler +from .exceptions import ModelRunException + + +MODEL_TYPE_HANDLERS = { + ModelType.PYTORCH: PytorchRunnerHandler, + ModelType.TENSORFLOW_SAVEDMODEL: TensorflowRunnerHandler, + ModelType.ONNX: OnnxRunnerHandler, +} + + + +class ModelRunManager(SerializableManager): + def __init__(self, config, model: ModelInfo) -> None: + self.config = config + self.model = model + self.set_runner_handler() + + def get_state(self) -> dict: + return {} + + def set_state(self, state: dict): + pass + + def set_runner_handler(self) -> None: + """Sets the model runner handler based on the model type.""" + + model_type = detect_model_format(self.model.file_path) + # initializing ml model handler object + + model_handler = MODEL_TYPE_HANDLERS.get(model_type) + if model_handler == None: + bt.logging.error (f"Unknown model format {self.model.hf_repo_id} {self.model.hf_repo_id}") + raise ModelRunException("Unknown model format") + + + self.handler = model_handler(self.config, self.model.file_path) + + async def run(self, preprocessed_data_generator) -> List: + """ + Run the model with the given preprocessed data generator. + + Args: + preprocessed_data_generator: Generator yielding preprocessed data chunks + + Returns: + List: model predictions + """ + + try: + model_predictions = await self.handler.run(preprocessed_data_generator) + return model_predictions + except ModelRunException as e: + bt.logging.error(f"Error running model {self.model.hf_repo_id}: {e}") + return [] # Return empty list to indicate failure diff --git a/cancer_ai/validator/model_runners/__init__.py b/cancer_ai/validator/model_runners/__init__.py new file mode 100644 index 000000000..492460b4d --- /dev/null +++ b/cancer_ai/validator/model_runners/__init__.py @@ -0,0 +1,12 @@ +from abc import abstractmethod +from typing import AsyncGenerator, Union, Dict, Any, Tuple, List +import numpy as np + +class BaseRunnerHandler: + def __init__(self, config, model_path: str) -> None: + self.config = config + self.model_path = model_path + + @abstractmethod + async def run(self, preprocessed_data_generator: AsyncGenerator[Union[np.ndarray, Tuple[np.ndarray, List[Dict[str, Any]]]], None]): + """Execute the run process of the model with preprocessed data chunks.""" diff --git a/cancer_ai/validator/model_runners/onnx_runner.py b/cancer_ai/validator/model_runners/onnx_runner.py new file mode 100644 index 000000000..57c07a9ce --- /dev/null +++ b/cancer_ai/validator/model_runners/onnx_runner.py @@ -0,0 +1,118 @@ +from typing import List, AsyncGenerator, Union, Dict, Any, Tuple +import numpy as np +import bittensor as bt +from collections import defaultdict +from ..exceptions import ModelRunException +from ..competition_handlers.tricorder_handler import LocationId + +from . import BaseRunnerHandler + + +class OnnxRunnerHandler(BaseRunnerHandler): + async def run(self, preprocessed_data_generator: AsyncGenerator[Union[np.ndarray, Tuple[np.ndarray, List[Dict[str, Any]]]], None]) -> List: + """ + Run ONNX model inference on preprocessed data chunks. + + Args: + preprocessed_data_generator: Generator yielding preprocessed numpy arrays, + or tuples of (numpy arrays, metadata) for tricorder + + Returns: + List of model predictions + """ + import onnxruntime + + error_counter = defaultdict(int) + + try: + session = onnxruntime.InferenceSession(self.model_path) + except Exception as e: + bt.logging.error(f"An unexpected error occurred when loading ONNX model: {e}") + raise ModelRunException(f"An unexpected error occurred when loading ONNX model: {e}") from e + + results = [] + + async for data in preprocessed_data_generator: + try: + # Handle both formats: plain numpy array or tuple with metadata + if isinstance(data, tuple): + # Tricorder format: (image_data, metadata) + chunk, metadata = data + + # Prepare inputs for ONNX model + inputs = session.get_inputs() + input_data = {} + + if len(inputs) >= 2: + # Model expects both image and metadata inputs + image_input_name = inputs[0].name + metadata_input_name = inputs[1].name + metadata_array = self._prepare_metadata_array(metadata) + + input_data = { + image_input_name: chunk, + metadata_input_name: metadata_array + } + else: + # Model only expects image input (fallback) + input_data = {inputs[0].name: chunk} + else: + # Melanoma format: plain numpy array (no metadata) + chunk = data + input_name = session.get_inputs()[0].name + input_data = {input_name: chunk} + + chunk_results = session.run(None, input_data)[0] + results.extend(chunk_results) + + except Exception as e: + bt.logging.warning(f"An error occurred during inference on chunk {data}: {e}") + error_counter['InferenceError'] += 1 + continue + + # Handle error summary + if error_counter: + error_summary = "; ".join([f"{count} {error_type.replace('_', ' ')}(s)" + for error_type, count in error_counter.items()]) + bt.logging.info(f"ONNX inference completed with issues: {error_summary}") + + if not results: + raise ModelRunException("No results obtained from model inference") + + return results + + def _prepare_metadata_array(self, metadata: List[Dict[str, Any]]): + """Convert metadata list to numpy array for ONNX model input""" + # Convert metadata to numerical format + metadata_array = [] + for entry in metadata: + age = entry.get('age', 0) if entry.get('age') is not None else 0 + # Convert gender to numerical: male=1, female=0, unknown=-1 + gender_str = entry.get('gender', '').lower() if entry.get('gender') else '' + if gender_str in ['male', 'm']: + gender = 1 + elif gender_str in ['female', 'f']: + gender = 0 + else: + gender = -1 # Unknown/missing gender + + # Convert location to numerical using LocationId enum + location_str = entry.get('location', '').lower() if entry.get('location') else '' + location = self._get_location_value(location_str) + + metadata_array.append([age, gender, location]) + + return np.array(metadata_array, dtype=np.float32) + + def _get_location_value(self, location_str: str) -> int: + """Convert location string to numerical value using LocationId enum.""" + if not location_str: + return -1 + + try: + # Convert to uppercase to match enum names + location_enum = LocationId[location_str.upper()] + return location_enum.value + except KeyError: + # Unknown/invalid location + return -1 diff --git a/cancer_ai/validator/model_runners/pytorch_runner.py b/cancer_ai/validator/model_runners/pytorch_runner.py new file mode 100644 index 000000000..0102d2240 --- /dev/null +++ b/cancer_ai/validator/model_runners/pytorch_runner.py @@ -0,0 +1,37 @@ +from . import BaseRunnerHandler +from typing import List, AsyncGenerator +import numpy as np +import bittensor as bt + + +class PytorchRunnerHandler(BaseRunnerHandler): + async def run(self, preprocessed_data_generator: AsyncGenerator[np.ndarray, None]) -> List: + """ + Run PyTorch model inference on preprocessed data chunks. + + Args: + preprocessed_data_generator: Generator yielding preprocessed numpy arrays + + Returns: + List of model predictions + """ + import torch + + bt.logging.info("Running PyTorch model inference on preprocessed data") + + model = torch.load(self.model_path) + model.eval() + results = [] + + async for chunk in preprocessed_data_generator: + try: + # Convert numpy array to torch tensor + chunk_tensor = torch.from_numpy(chunk) + with torch.no_grad(): + chunk_results = model(chunk_tensor) + results.extend(chunk_results.cpu().numpy()) + except Exception as e: + bt.logging.error(f"PyTorch inference error on chunk: {e}") + continue + + return results \ No newline at end of file diff --git a/cancer_ai/validator/model_runners/tensorflow_runner.py b/cancer_ai/validator/model_runners/tensorflow_runner.py new file mode 100644 index 000000000..05a0c2243 --- /dev/null +++ b/cancer_ai/validator/model_runners/tensorflow_runner.py @@ -0,0 +1,34 @@ +from . import BaseRunnerHandler +from typing import List, AsyncGenerator +import bittensor as bt +import numpy as np + +class TensorflowRunnerHandler(BaseRunnerHandler): + async def run(self, preprocessed_data_generator: AsyncGenerator[np.ndarray, None]) -> List: + """ + Run TensorFlow model inference on preprocessed data chunks. + + Args: + preprocessed_data_generator: Generator yielding preprocessed numpy arrays + + Returns: + List of model predictions + """ + import tensorflow as tf + + bt.logging.info("Running TensorFlow model inference on preprocessed data") + + model = tf.keras.models.load_model(self.model_path) + results = [] + + async for chunk in preprocessed_data_generator: + try: + # TensorFlow expects (N, H, W, C) format, so transpose from (N, C, H, W) + chunk_transposed = np.transpose(chunk, (0, 2, 3, 1)) + chunk_results = model.predict(chunk_transposed, batch_size=10) + results.extend(chunk_results) + except Exception as e: + bt.logging.error(f"TensorFlow inference error on chunk: {e}") + continue + + return results \ No newline at end of file diff --git a/cancer_ai/validator/models.py b/cancer_ai/validator/models.py new file mode 100644 index 000000000..4f1184cd5 --- /dev/null +++ b/cancer_ai/validator/models.py @@ -0,0 +1,106 @@ +from typing import List, ClassVar, Optional, ClassVar, Optional +from pydantic import BaseModel, EmailStr, Field, ValidationError +from datetime import datetime +from dataclasses import dataclass + +class CompetitionModel(BaseModel): + competition_id: str + category: str | None = None + evaluation_times: List[str] + dataset_hf_repo: str + dataset_hf_filename: str + dataset_hf_repo_type: str + + +class CompetitionsListModel(BaseModel): + competitions: List[CompetitionModel] + +class OrganizationDataReference(BaseModel): + competition_id: str = Field(..., min_length=1, description="Competition identifier") + organization_id: str = Field(..., min_length=1, description="Unique identifier for the organization") + dataset_hf_repo: str = Field(..., min_length=1, description="Hugging Face repository path for the dataset") + dataset_hf_dir: str = Field("", min_length=0, description="Directory for the datasets in the repository") + +class OrganizationDataReferenceFactory(BaseModel): + organizations: List[OrganizationDataReference] = Field(default_factory=list) + _instance: ClassVar[Optional["OrganizationDataReferenceFactory"]] = None + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = cls() + return cls._instance + + def add_organizations(self, organizations: List[OrganizationDataReference]): + self.organizations.extend(organizations) + + def update_from_dict(self, data: dict): + """Updates the singleton instance's state from a dictionary.""" + if "organizations" in data: + # Convert each dict in 'organizations' to an OrganizationDataReference instance + self.organizations = [OrganizationDataReference(**org) for org in data["organizations"]] + for key, value in data.items(): + if key != "organizations": + setattr(self, key, value) + + def find_organization_by_competition_id(self, competition_id: str) -> Optional[OrganizationDataReference]: + """Find an organization by competition ID. + Returns: + The organization data reference for the given competition ID, or None if not found + """ + return next((o for o in self.organizations if o.competition_id == competition_id), None) + +class NewDatasetFile(BaseModel): + competition_id: str = Field(..., min_length=1, description="Competition identifier") + dataset_hf_repo: str = Field(..., min_length=1, description="Hugging Face repository path for the dataset") + dataset_hf_filename: str = Field(..., min_length=1, description="Filename for the dataset in the repository") + + + +class WanDBLogBase(BaseModel): + """Base class for WandB log entries""" + uuid: str # competition unique identifier + log_type: str + validator_hotkey: str + dataset_filename: str + + competition_id: str + + errors: str = "" + run_time_s: float = 0.0 + +class WanDBLogModelBase(WanDBLogBase): + log_type: str = "model_results" + uid: int + miner_hotkey: str + + score: float = 0.0 + average_score: float = 0.0 + +class WanDBLogModelErrorEntry(WanDBLogModelBase): + pass + + +class WanDBLogCompetitionWinners(WanDBLogBase): + """Summary of competition""" + log_type: str = "competition_summary" + + competition_winning_hotkey: str + competition_winning_uid: int + + average_winning_hotkey: str + average_winning_uid: int + + +@dataclass +class ModelInfo: + hf_repo_id: str | None = None + hf_model_filename: str | None = None + hf_code_filename: str | None = None + hf_repo_type: str | None = None + + competition_id: str | None = None + file_path: str | None = None + model_type: str | None = None + block: int | None = None + model_hash: str | None = None + model_size_mb: float | None = None diff --git a/cancer_ai/validator/rewarder.py b/cancer_ai/validator/rewarder.py new file mode 100644 index 000000000..6270ef467 --- /dev/null +++ b/cancer_ai/validator/rewarder.py @@ -0,0 +1,216 @@ +from pydantic import BaseModel +import bittensor as bt +from datetime import datetime, timezone + +from cancer_ai.validator.competition_handlers.base_handler import BaseModelEvaluationResult +from cancer_ai.validator.model_db import ModelDBController +from cancer_ai.validator.utils import get_competition_weights + +# add type hotkey which is string +Hotkey = str + +HISTORY_LENGTH = 10 + +# how many results should we use for calculating average score +MOVING_AVERAGE_LENGTH = 5 + + +class ModelScore(BaseModel): + date: datetime + score: float + + +class CompetitionResultsStore(BaseModel): + def get_newest_score(self, competition_id: str, hotkey: Hotkey) -> float | None: + """Return the newest score for a given competition/hotkey, or None if not found.""" + scores = self.score_map.get(competition_id, {}).get(hotkey, []) + if not scores: + return None + return scores[-1].score + + def get_scores(self, competition_id: str, hotkey: Hotkey) -> list[float]: + """Return all scores for a given competition/hotkey, newest first.""" + scores = self.score_map.get(competition_id, {}).get(hotkey, []) + return [s.score for s in reversed(scores)] + + # Structure: {competition_id: {hotkey: [ModelScore, ...]}} + score_map: dict[str, dict[Hotkey, list[ModelScore]]] = {} + # Structure: {competition_id: {hotkey: average_score}} + average_scores: dict[str, dict[Hotkey, float]] = {} + # Structure: {competition_id: (hotkey, score)} + current_top_hotkeys: dict[str, tuple[Hotkey, float]] = {} + + def add_score(self, competition_id: str, hotkey: Hotkey, score: float, date: datetime = None): + """Add a score for a specific hotkey in a specific competition.""" + + if competition_id not in self.score_map: + self.score_map[competition_id] = {} + if competition_id not in self.average_scores: + self.average_scores[competition_id] = {} + + if hotkey not in self.score_map[competition_id]: + self.score_map[competition_id][hotkey] = [] + + score_date = date if date is not None else datetime.now(timezone.utc) + + self.score_map[competition_id][hotkey].append( + ModelScore(date=score_date, score=score) + ) + + # Sort by date and keep only the last HISTORY_LENGTH scores + self.score_map[competition_id][hotkey].sort(key=lambda x: x.date) + if len(self.score_map[competition_id][hotkey]) > HISTORY_LENGTH: + # remove the oldest one + self.score_map[competition_id][hotkey] = self.score_map[competition_id][hotkey][1:] + + self.update_average_score(competition_id, hotkey) + + def update_average_score(self, competition_id: str, hotkey: Hotkey) -> None: + """Update the average score for a specific hotkey in a specific competition""" + if ( + competition_id not in self.score_map + or hotkey not in self.score_map[competition_id] + ): + return 0.0 + + scores = self.score_map[competition_id][hotkey][-MOVING_AVERAGE_LENGTH:] + scores = [score.score for score in scores] + bt.logging.debug(f"Scores used to calculate average for hotkey {hotkey}: {scores}") + result = sum(score for score in scores) / MOVING_AVERAGE_LENGTH + + if competition_id not in self.average_scores: + self.average_scores[competition_id] = {} + self.average_scores[competition_id][hotkey] = result + + def delete_dead_hotkeys(self, competition_id: str, active_hotkeys: list[Hotkey]): + """Delete hotkeys that are no longer active in a specific competition.""" + if competition_id not in self.score_map: + return + + hotkeys_to_delete = [] + for hotkey in self.score_map[competition_id].keys(): + if hotkey not in active_hotkeys: + hotkeys_to_delete.append(hotkey) + for hotkey in hotkeys_to_delete: + del self.score_map[competition_id][hotkey] + if ( + competition_id in self.average_scores + and hotkey in self.average_scores[competition_id] + ): + del self.average_scores[competition_id][hotkey] + + def get_top_hotkey(self, competition_id: str) -> Hotkey: + if ( + competition_id not in self.average_scores + or not self.average_scores[competition_id] + ): + raise ValueError( + f"No hotkeys to choose from for competition {competition_id}" + ) + + # Find the new top hotkey and score + new_top_hotkey = max( + self.average_scores[competition_id], + key=self.average_scores[competition_id].get, + ) + new_top_score = self.average_scores[competition_id][new_top_hotkey] + + # Check if we have a current top hotkey for this competition + if competition_id in self.current_top_hotkeys: + current_top_hotkey, current_top_score = self.current_top_hotkeys[competition_id] + + # If the current top hotkey is still active and the new top score + # is not significantly better (within threshold), keep the current top hotkey + if ( + current_top_hotkey in self.average_scores[competition_id] and + abs(new_top_score - current_top_score) <= 0.0001 + ): + return current_top_hotkey + + # Update the current top hotkey and score + self.current_top_hotkeys[competition_id] = (new_top_hotkey, new_top_score) + return new_top_hotkey + + def get_hotkeys_with_non_zero_scores(self, competition_id: str) -> list[str]: + """ + Return all hotkeys for `competition_id` whose *average* score is > 0, + sorted descending by that average. + """ + if competition_id not in self.average_scores: + raise ValueError(f"No average scores to choose from for competition {competition_id}") + + comp_avg_map = self.average_scores[competition_id] + + bt.logging.debug(f"Average scores for competition {competition_id}: {comp_avg_map}") + + positive_avg = { hk: avg for hk, avg in comp_avg_map.items() if avg > 0 } + + if not positive_avg: + return [] + + return sorted( + positive_avg.keys(), + key=lambda hk: positive_avg[hk], + reverse=True + ) + + + def get_competitions(self) -> list[str]: + return list(self.score_map.keys()) + + def delete_inactive_competitions(self, active_competitions: list[str]): + """Delete competitions that are no longer active.""" + competitions_to_delete = [] + for competition_id in self.score_map.keys(): + if competition_id not in active_competitions: + competitions_to_delete.append(competition_id) + + for competition_id in competitions_to_delete: + bt.logging.info(f"Deleting inactive competition {competition_id} from results store") + del self.score_map[competition_id] + if competition_id in self.average_scores: + del self.average_scores[competition_id] + if competition_id in self.current_top_hotkeys: + del self.current_top_hotkeys[competition_id] + + + async def update_competition_results(self, competition_id: str, model_results: list[tuple[str, BaseModelEvaluationResult]], config: bt.config, metagraph_hotkeys:list[Hotkey], hf_api, db_controller: ModelDBController): + """Update competition results for a specific competition.""" + + # Delete hotkeys from competition result score which don't exist anymore + self.delete_dead_hotkeys(competition_id, metagraph_hotkeys) + + # Get competition weights from the config + competition_weights = await get_competition_weights(config, hf_api) + + # Delete competitions that don't exist in the weights mapping + self.delete_inactive_competitions(list(competition_weights.keys())) + + # Get all hotkeys that have models for this competition from the database + latest_models = db_controller.get_latest_models(metagraph_hotkeys, competition_id) + competition_miners = set(latest_models.keys()) + + evaluated_miners = set() + + evaluation_timestamp = datetime.now(timezone.utc) + + for hotkey, result in model_results: + self.add_score(competition_id, hotkey, result.score, date=evaluation_timestamp) + evaluated_miners.add(hotkey) + + # Add score of 0 for miners who are in the competition but didn't take part in the evaluation + # This is necessary to decrease their average score when their model fails or has errors + failed_miners = competition_miners - evaluated_miners + for hotkey in failed_miners: + bt.logging.info(f"Adding score of 0 for hotkey {hotkey} in competition {competition_id} due to model failure or error") + self.add_score(competition_id, hotkey, 0.0, date=evaluation_timestamp) + + # Get the winner hotkey for this competition + try: + winner_hotkey = self.get_top_hotkey(competition_id) + bt.logging.info(f"Competition result for {competition_id}: {winner_hotkey}") + except ValueError as e: + bt.logging.warning(f"Could not determine winner for competition {competition_id}: {e}") + winner_hotkey = None + + return competition_weights diff --git a/cancer_ai/validator/rewarder_test.py b/cancer_ai/validator/rewarder_test.py new file mode 100644 index 000000000..e0fd1616b --- /dev/null +++ b/cancer_ai/validator/rewarder_test.py @@ -0,0 +1,560 @@ +import pytest +from datetime import datetime, timedelta, timezone +from .rewarder import CompetitionLeader, Score, CompetitionWinnersStore, Rewarder +from cancer_ai.validator.competition_handlers.base_handler import BaseModelEvaluationResult +import numpy as np + + +@pytest.mark.asyncio +async def test_winner_results_model_improved(): + """ + Set new leader if winner's model has better scores + """ + current_model_results = BaseModelEvaluationResult( + score=0.90, + ) + + new_model_results = BaseModelEvaluationResult( + score=0.99, + ) + + competition_leaders = { + "competition1": CompetitionLeader( + hotkey="player_1", + leader_since=datetime.now() - timedelta(days=30 + 3 * 7), + model_result=current_model_results, + ), + } + + scores = { + "player_1": Score(score=1.0, reduction=0.0), + } + + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores + ) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores( + winner_hotkey="player_2", + competition_id="competition1", + winner_model_result=new_model_results, + ) + assert ( + winners_store.competition_leader_map["competition1"].model_result + == new_model_results + ) + assert winners_store.competition_leader_map["competition1"].hotkey == "player_2" + + +@pytest.mark.asyncio +async def test_winner_empty_store(): + """ + Test rewards if store is empty + """ + model_results = ModelEvaluationResult( + score=0.9, + ) + competition_leaders = {} + scores = {} + + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores + ) + rewarder = Rewarder(winners_store) + await rewarder.update_scores( + winner_hotkey="player_1", + competition_id="competition1", + winner_model_result=model_results, + ) + assert ( + winners_store.competition_leader_map["competition1"].model_result + == model_results + ) + + +@pytest.mark.asyncio +async def test_winner_results_model_copying(): + """ + Set new leader if winner's model has better scores + """ + current_model_results = ModelEvaluationResult( + score=0.9, + ) + + new_model_results = ModelEvaluationResult( + score=0.9002, + ) + + competition_leaders = { + "competition1": CompetitionLeader( + hotkey="player_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=current_model_results, + ), + } + + scores = { + "player_1": Score(score=1.0, reduction=0.0), + } + + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores + ) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores( + winner_hotkey="player_2", + competition_id="competition1", + winner_model_result=new_model_results, + ) + assert ( + winners_store.competition_leader_map["competition1"].model_result.score + == current_model_results.score + ) + assert winners_store.competition_leader_map["competition1"].hotkey == "player_1" + +@pytest.mark.asyncio +async def test_update_scores_single_competitor(): + # Set up initial data for a single competitor + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=10), + model_result=ModelEvaluationResult(score=0.9), + ) + } + + scores = { + "competitor_1": Score(score=1.0, reduction=0.0), + } + + # Set up the configuration with a single competition and a single competitor + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + expected_winner_model_score = 0.95 + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=expected_winner_model_score)) + + + # Check the updated scores and reductions for the single competitor + updated_score = rewarder.scores["competitor_1"].score + updated_reduction = rewarder.scores["competitor_1"].reduction + + # # With only one competitor, they should receive the full score of 1.0 + expected_score = 1.0 + expected_reduction = 0.0 + + assert ( + winners_store.competition_leader_map["competition_1"].model_result.score + == expected_winner_model_score + ) + assert winners_store.competition_leader_map["competition_1"].hotkey == "competitor_1" + assert ( + updated_score == expected_score + ), f"Expected score: {expected_score}, got: {updated_score}" + assert ( + updated_reduction == expected_reduction + ), f"Expected reduction: {expected_reduction}, got: {updated_reduction}" + +@pytest.mark.asyncio +async def test_update_scores_multiple_competitors_no_reduction(): + # Set up initial data for a multiple competitors + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=10), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_2": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=10), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_3": CompetitionLeader( + hotkey="competitor_3", + leader_since=datetime.now(timezone.utc) - timedelta(days=10), + model_result=ModelEvaluationResult(score=0.9), + ), + } + + scores = { + "competitor_1": Score(score=0.0, reduction=0.0), + "competitor_2": Score(score=0.0, reduction=0.0), + "competitor_3": Score(score=0.0, reduction=0.0), + } + + # Set up the configuration with multiple competitions and multiple competitors + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=0.9)) + + + # Check the updated scores and reductions for the multiple competitors + updated_scores = {hotkey: score.score for hotkey, score in rewarder.scores.items()} + updated_reductions = { + hotkey: score.reduction for hotkey, score in rewarder.scores.items() + } + updated_model_scores = {competition_id: leader.model_result.score for competition_id, leader in winners_store.competition_leader_map.items()} + + # With multiple competitors and no reductions, they should all receive the same score of 1/3 + expected_score = 1 / 3 + expected_reduction = 0.0 + expected_model_score = 0.9 + + + for _, score in updated_model_scores.items(): + assert ( + score == expected_model_score + ), f"Expected score: {expected_model_score}, got: {score}" + + for _, score in updated_scores.items(): + assert ( + score == expected_score + ), f"Expected score: {expected_score}, got: {score}" + + for _, reduction in updated_reductions.items(): + assert ( + reduction == expected_reduction + ), f"Expected reduction: {expected_reduction}, got: {reduction}" + +@pytest.mark.asyncio +async def test_update_scores_multiple_competitors_with_some_reduced_shares(): + # Set up initial data for a multiple competitors + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_2": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 6 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_3": CompetitionLeader( + hotkey="competitor_3", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_4": CompetitionLeader( + hotkey="competitor_4", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + } + + scores = { + "competitor_1": Score(score=0.0, reduction=0.0), + "competitor_2": Score(score=0.0, reduction=0.0), + "competitor_3": Score(score=0.0, reduction=0.0), + "competitor_4": Score(score=0.0, reduction=0.0), + } + + # Set up the configuration with multiple competitions and multiple competitors + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=0.9)) + + + # Check the updated scores and reductions for the multiple competitors + updated_scores = {hotkey: score.score for hotkey, score in rewarder.scores.items()} + updated_reductions = { + hotkey: score.reduction for hotkey, score in rewarder.scores.items() + } + updated_model_scores = {competition_id: leader.model_result.score for competition_id, leader in winners_store.competition_leader_map.items()} + + # With multiple competitors and some reduced shares, they should receive different scores and reductions + expected_reductions = { + "competitor_1": 1 / 4 * 0.3, + "competitor_2": 1 / 4 * 0.6, + "competitor_3": 0.0, + "competitor_4": 0.0, + } + + expected_reductions_sum = sum(expected_reductions.values()) + expected_scores = { + "competitor_1": 1 / 4 - expected_reductions["competitor_1"], + "competitor_2": 1 / 4 - expected_reductions["competitor_2"], + "competitor_3": 1 / 4 + expected_reductions_sum / 2, + "competitor_4": 1 / 4 + expected_reductions_sum / 2, + } + expected_model_score = 0.9 + + + for _, score in updated_model_scores.items(): + assert ( + score == expected_model_score + ), f"Expected score: {expected_model_score}, got: {score}" + + for hotkey, score in updated_scores.items(): + assert score == pytest.approx( + expected_scores[hotkey], rel=1e-9 + ), f"Expected score: {expected_scores[hotkey]}, got: {score}" + + for hotkey, reduction in updated_reductions.items(): + assert reduction == pytest.approx( + expected_reductions[hotkey], rel=1e-9 + ), f"Expected reduction: {expected_reductions[hotkey]}, got: {reduction}" + +@pytest.mark.asyncio +async def test_update_scores_all_competitors_with_reduced_shares(): + # Set up initial data for a multiple competitors + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_2": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 6 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_3": CompetitionLeader( + hotkey="competitor_3", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 9 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + } + + scores = { + "competitor_1": Score(score=0.0, reduction=0.0), + "competitor_2": Score(score=0.0, reduction=0.0), + "competitor_3": Score(score=0.0, reduction=0.0), + } + + # Set up the configuration with multiple competitions and multiple competitors + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=0.9)) + + + # Check the updated scores and reductions for the multiple competitors + updated_scores = {hotkey: score.score for hotkey, score in rewarder.scores.items()} + updated_reductions = { + hotkey: score.reduction for hotkey, score in rewarder.scores.items() + } + updated_model_scores = {competition_id: leader.model_result.score for competition_id, leader in winners_store.competition_leader_map.items()} + + # With multiple competitors and some reduced shares, they should receive different scores and reductions + expected_reductions = {"competitor_1": 0.1, "competitor_2": 0.2, "competitor_3": 0.3} + + expected_reductions_sum = sum(expected_reductions.values()) + expected_scores = { + "competitor_1": 1 / 3 + - expected_reductions["competitor_1"] + + expected_reductions_sum / 3, + "competitor_2": 1 / 3 + - expected_reductions["competitor_2"] + + expected_reductions_sum / 3, + "competitor_3": 1 / 3 + - expected_reductions["competitor_3"] + + expected_reductions_sum / 3, + } + expected_model_score = 0.9 + + + for _, score in updated_model_scores.items(): + assert ( + score == expected_model_score + ), f"Expected score: {expected_model_score}, got: {score}" + + for hotkey, score in updated_scores.items(): + assert score == pytest.approx( + expected_scores[hotkey], rel=1e-9 + ), f"Expected score: {expected_scores[hotkey]}, got: {score}" + + for hotkey, reduction in updated_reductions.items(): + assert reduction == pytest.approx( + expected_reductions[hotkey], rel=1e-9 + ), f"Expected reduction: {expected_reductions[hotkey]}, got: {reduction}" + +@pytest.mark.asyncio +async def test_update_scores_more_competitions_then_competitors(): + # Set up initial data for a multiple competitors + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_2": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_3": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_4": CompetitionLeader( + hotkey="competitor_3", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + } + + scores = { + "competitor_1": Score(score=0.0, reduction=0.0), + "competitor_2": Score(score=0.0, reduction=0.0), + "competitor_3": Score(score=0.0, reduction=0.0), + } + + # Set up the configuration with multiple competitions and multiple competitors + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=0.9)) + + + # Check the updated scores and reductions for the multiple competitors + updated_scores = {hotkey: score.score for hotkey, score in rewarder.scores.items()} + updated_reductions = { + hotkey: score.reduction for hotkey, score in rewarder.scores.items() + } + updated_model_scores = {competition_id: leader.model_result.score for competition_id, leader in winners_store.competition_leader_map.items()} + + # With multiple competitors and some reduced shares, they should receive different scores and reductions + expected_reductions = { + "competitor_1": 1 / 4 * 0.3, + "competitor_2": 0.0, + "competitor_3": 0.0, + } + + expected_reductions_sum = sum(expected_reductions.values()) + expected_scores = { + "competitor_1": 2 / 4 + - expected_reductions["competitor_1"] + + expected_reductions_sum / 3, + "competitor_2": 1 / 4 + expected_reductions_sum / 3, + "competitor_3": 1 / 4 + expected_reductions_sum / 3, + } + expected_model_score = 0.9 + + + for _, score in updated_model_scores.items(): + assert ( + score == expected_model_score + ), f"Expected score: {expected_model_score}, got: {score}" + + for hotkey, score in updated_scores.items(): + assert score == pytest.approx( + expected_scores[hotkey], rel=1e-9 + ), f"Expected score: {expected_scores[hotkey]}, got: {score}" + + for hotkey, reduction in updated_reductions.items(): + assert reduction == pytest.approx( + expected_reductions[hotkey], rel=1e-9 + ), f"Expected reduction: {expected_reductions[hotkey]}, got: {reduction}" + +@pytest.mark.asyncio +async def test_update_scores_6_competitions_4_competitors(): + # Set up initial data for a multiple competitors + competition_leaders = { + "competition_1": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_2": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 6 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_3": CompetitionLeader( + hotkey="competitor_3", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 9 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_4": CompetitionLeader( + hotkey="competitor_4", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_5": CompetitionLeader( + hotkey="competitor_1", + leader_since=datetime.now(timezone.utc) - timedelta(days=30), + model_result=ModelEvaluationResult(score=0.9), + ), + "competition_6": CompetitionLeader( + hotkey="competitor_2", + leader_since=datetime.now(timezone.utc) - timedelta(days=30 + 3 * 7), + model_result=ModelEvaluationResult(score=0.9), + ), + } + + scores = { + "competitor_1": Score(score=0.0, reduction=0.0), + "competitor_2": Score(score=0.0, reduction=0.0), + "competitor_3": Score(score=0.0, reduction=0.0), + "competitor_4": Score(score=0.0, reduction=0.0), + } + + # Set up the configuration with multiple competitions and multiple competitors + winners_store = CompetitionWinnersStore( + competition_leader_map=competition_leaders, hotkey_score_map=scores) + + rewarder = Rewarder(winners_store) + await rewarder.update_scores(winner_hotkey="competitor_1", competition_id="competition_1", + winner_model_result=ModelEvaluationResult(score=0.9)) + + + # Check the updated scores and reductions for the multiple competitors + updated_scores = {hotkey: score.score for hotkey, score in rewarder.scores.items()} + updated_reductions = { + hotkey: score.reduction for hotkey, score in rewarder.scores.items() + } + updated_model_scores = {competition_id: leader.model_result.score for competition_id, leader in winners_store.competition_leader_map.items()} + + # With multiple competitors and some reduced shares, they should receive different scores and reductions + expected_reductions = { + "competitor_1": 1 / 6 * 0.3, + "competitor_2": (1 / 6 * 0.6) + (1 / 6 * 0.3), + "competitor_3": 1 / 6 * 0.9, + "competitor_4": 0.0, + } + + expected_reductions_sum = sum(expected_reductions.values()) + expected_scores = { + "competitor_1": (2 / 6 - expected_reductions["competitor_1"]) + + expected_reductions_sum / 2, + "competitor_2": (2 / 6 - expected_reductions["competitor_2"]), + "competitor_3": 1 / 6 - expected_reductions["competitor_3"], + "competitor_4": 1 / 6 + expected_reductions_sum / 2, + } + expected_model_score = 0.9 + + + for _, score in updated_model_scores.items(): + assert ( + score == expected_model_score + ), f"Expected score: {expected_model_score}, got: {score}" + + for hotkey, score in updated_scores.items(): + assert score == pytest.approx( + expected_scores[hotkey], rel=1e-9 + ), f"Expected score: {expected_scores[hotkey]}, got: {score}" + + for hotkey, reduction in updated_reductions.items(): + assert reduction == pytest.approx( + expected_reductions[hotkey], rel=1e-9 + ), f"Expected reduction: {expected_reductions[hotkey]}, got: {reduction}" + + +if __name__ == "__main__": + pytest.main() diff --git a/.dependencies_installed b/cancer_ai/validator/scripts/__init__.py similarity index 100% rename from .dependencies_installed rename to cancer_ai/validator/scripts/__init__.py diff --git a/cancer_ai/validator/scripts/dataset_api_integration.py b/cancer_ai/validator/scripts/dataset_api_integration.py new file mode 100644 index 000000000..8095b7f20 --- /dev/null +++ b/cancer_ai/validator/scripts/dataset_api_integration.py @@ -0,0 +1,41 @@ +import os +import csv +import requests +import bittensor as bt + + +# Base URL for downloading images (replace with actual base URL) +BASE_URL = "http://localhost:8001/" +API_GET_IMAGES = "dataset/skin/melanoma?amount=10" + +# Create the images directory +os.makedirs("images", exist_ok=True) + +# Open the CSV file for writing +with open("labels.csv", mode="w", newline="") as csv_file: + csv_writer = csv.writer(csv_file) + # Write the header row + csv_writer.writerow(["path", "is_melanoma"]) + data = requests.get(BASE_URL + API_GET_IMAGES).json() + # Process each entry in the JSON data + for entry in data["entries"]: + image_id = entry["id"] + image_url = BASE_URL + entry["image_url"] + is_melanoma = entry["label"]["melanoma"] + + # Define the local file path + image_filename = f"images/{image_id}.jpg" + + # Download the image + response = requests.get(image_url) + if response.status_code == 200: + with open(image_filename, "wb") as image_file: + image_file.write(response.content) + bt.logging.info(f"Downloaded {image_filename}") + else: + bt.logging.info(f"Failed to download {image_filename}") + + # Write the image path and label to the CSV file + csv_writer.writerow([image_filename, is_melanoma]) + +print("Process completed.") diff --git a/cancer_ai/validator/tests/mock_data.py b/cancer_ai/validator/tests/mock_data.py new file mode 100644 index 000000000..f338c9086 --- /dev/null +++ b/cancer_ai/validator/tests/mock_data.py @@ -0,0 +1,15 @@ +from cancer_ai.validator.model_manager import ModelInfo + +def get_mock_hotkeys_with_models(): + return { + "5HeH6kmR6FyfC6K39aGozMJ3wUTdgxrQAQsy4BBbskxHKqgG": ModelInfo( + hf_repo_id="eatcats/test", + hf_model_filename="melanoma-1-piwo.onnx", + hf_repo_type="model", + ), + "5CQFdhmRyQtiTwHLumywhWtQYTQkF4SpGtdT8aoh3WK3E4E2": ModelInfo( + hf_repo_id="eatcats/melanoma-test", + hf_model_filename="2024-08-24_04-37-34-melanoma-1.onnx", + hf_repo_type="model", + ), + } \ No newline at end of file diff --git a/cancer_ai/validator/tests/test_model_db.py b/cancer_ai/validator/tests/test_model_db.py new file mode 100644 index 000000000..72cb11f68 --- /dev/null +++ b/cancer_ai/validator/tests/test_model_db.py @@ -0,0 +1,151 @@ +import pytest +import time +from unittest import mock +from datetime import datetime, timedelta +from cancer_ai.validator.model_db import ModelDBController, ChainMinerModelDB, Base +from sqlalchemy import create_engine, inspect +from sqlalchemy.orm import sessionmaker +from cancer_ai.chain_models_store import ChainMinerModel + +@pytest.fixture +def mock_subtensor(): + """Fixture to mock the bittensor subtensor object.""" + subtensor_mock = mock.Mock() + subtensor_mock.get_block_hash.return_value = "mock_block_hash" + + query_call_counter = {'count': 0} + def mock_query(*args, **kwargs): + # Increment counter to simulate unique blocks over time + query_call_counter['count'] += 1 + stable_timestamp = int((datetime.now() - timedelta(minutes=5)).timestamp() * 1000) + # Add a millisecond difference for each subsequent call + timestamp = stable_timestamp + query_call_counter['count'] + return mock.Mock(value=timestamp) + + subtensor_mock.substrate.query.side_effect = mock_query + return subtensor_mock + +@pytest.fixture() +def fixed_mock_subtensor(): + """Fixture to mock the bittensor subtensor object with a fixed timestamp""" + subtensor_mock = mock.Mock() + subtensor_mock.get_block_hash.return_value = "mock_block_hash" + + fixed_timestamp = int((datetime.now() - timedelta(minutes=5)).timestamp() * 1000) + + def mock_query(*args, **kwargs): + return mock.Mock(value=fixed_timestamp) + + subtensor_mock.substrate.query.side_effect = mock_query + return subtensor_mock + +@pytest.fixture +def db_session(): + engine = create_engine('sqlite:///:memory:') + Base.metadata.create_all(engine) + + inspector = inspect(engine) + print(inspector.get_table_names()) + + Session = sessionmaker(bind=engine) + return Session() + +@pytest.fixture +def model_persister(mock_subtensor, db_session): + """Fixture to create a ModelPersister instance with mocked dependencies.""" + persister = ModelDBController(db_path=':memory:') + persister.Session = mock.Mock(return_value=db_session) + return persister + +@pytest.fixture +def model_persister_fixed(fixed_mock_subtensor, db_session): + """Fixture to create a ModelPersister instance with a fixed timestamp.""" + persister = ModelDBController(db_path=':memory:') + persister.Session = mock.Mock(return_value=db_session) + return persister + +@pytest.fixture +def mock_chain_miner_model(): + return ChainMinerModel( + competition_id="1", + hf_repo_id="mock_repo", + hf_model_filename="mock_model", + hf_repo_type="mock_type", + hf_code_filename="mock_code", + block=123456, + hotkey="mock_hotkey" + ) + +def test_add_model(model_persister, mock_chain_miner_model, db_session): + model_persister.add_model(mock_chain_miner_model, "mock_hotkey") + + session = db_session + model_record = session.query(ChainMinerModelDB).first() + assert model_record is not None + assert model_record.hotkey == "mock_hotkey" + assert model_record.competition_id == mock_chain_miner_model.competition_id + +def test_get_model(model_persister_fixed, mock_chain_miner_model, db_session): + model_persister_fixed.add_model(mock_chain_miner_model, "mock_hotkey") + + retrieved_model = model_persister_fixed.get_model("mock_hotkey") + + assert retrieved_model is not None + assert retrieved_model.hf_repo_id == mock_chain_miner_model.hf_repo_id + +def test_delete_model(model_persister_fixed, mock_chain_miner_model, db_session): + model_persister_fixed.add_model(mock_chain_miner_model, "mock_hotkey") + + # Get the model to find its date_submitted + session = db_session + model_record = session.query(ChainMinerModelDB).filter_by(hotkey="mock_hotkey").first() + assert model_record is not None + + delete_result = model_persister_fixed.delete_model(model_record.date_submitted, "mock_hotkey") + assert delete_result is True + + # Check that the model was deleted + model_record = session.query(ChainMinerModelDB).filter_by(hotkey="mock_hotkey").first() + assert model_record is None + +def test_get_latest_models(model_persister, mock_chain_miner_model, db_session): + # Set competition_id for the test + mock_chain_miner_model.competition_id = "test_competition" + model_persister.add_model(mock_chain_miner_model, "mock_hotkey") + + # Get the latest models for the competition + latest_models = model_persister.get_latest_models(["mock_hotkey"], "test_competition") + assert len(latest_models) == 1 + assert "mock_hotkey" in latest_models + assert latest_models["mock_hotkey"].hf_repo_id == mock_chain_miner_model.hf_repo_id + +@mock.patch('cancer_ai.validator.model_db.STORED_MODELS_PER_HOTKEY', 2) +def test_clean_old_records(model_persister, mock_chain_miner_model, db_session): + # For testing purposes, we'll use a smaller STORED_MODELS_PER_HOTKEY value + # to avoid long test execution time + session = db_session + + # Add the first model + model_persister.add_model(mock_chain_miner_model, "mock_hotkey") + session.commit() + + # Add a second model with a different block + mock_chain_miner_model.block += 1 + model_persister.add_model(mock_chain_miner_model, "mock_hotkey") + session.commit() + + # Add a third model which should be cleaned up + mock_chain_miner_model.block += 1 + model_persister.add_model(mock_chain_miner_model, "mock_hotkey") + session.commit() + + # Verify we have 3 records before cleaning + records_before = session.query(ChainMinerModelDB).filter_by(hotkey="mock_hotkey").all() + assert len(records_before) == 1 # Due to how add_model works, it updates existing records + + # Clean old records + model_persister.clean_old_records(["mock_hotkey"]) + + # Check that only STORED_MODELS_PER_HOTKEY models remain + records = session.query(ChainMinerModelDB).filter_by(hotkey="mock_hotkey").all() + assert len(records) == 1 # We expect 1 record since add_model updates existing records diff --git a/cancer_ai/validator/tests/test_rewarder.py b/cancer_ai/validator/tests/test_rewarder.py new file mode 100644 index 000000000..6d065aa50 --- /dev/null +++ b/cancer_ai/validator/tests/test_rewarder.py @@ -0,0 +1,214 @@ +import unittest +import sys +import os +from datetime import datetime, timezone +from unittest.mock import patch +import bittensor as bt + +# Add the project root to the path so we can import the module +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from cancer_ai.validator.rewarder import CompetitionResultsStore + + +class TestCompetitionResultsStore(unittest.TestCase): + def setUp(self): + self.store = CompetitionResultsStore() + self.competition_id = "test_competition" + self.hotkey = "test_hotkey" + self.score = 0.5 + self.date = datetime(2023, 1, 1, tzinfo=timezone.utc) + # Additional test data for extended tests + self.competition_id_1 = "competition_1" + self.competition_id_2 = "competition_2" + self.hotkey_1 = "hotkey_1" + self.hotkey_2 = "hotkey_2" + self.hotkey_3 = "hotkey_3" + self.score_1 = 0.8 + self.score_2 = 0.6 + self.score_3 = 0.9 + + def test_add_score(self): + self.store.add_score(self.competition_id, self.hotkey, self.score, self.date) + self.assertIn(self.competition_id, self.store.score_map) + self.assertIn(self.hotkey, self.store.score_map[self.competition_id]) + self.assertEqual(len(self.store.get_scores(self.competition_id, self.hotkey)), 1) + self.assertEqual( + self.store.get_newest_score(self.competition_id, self.hotkey), self.score + ) + self.assertEqual( + self.store.score_map[self.competition_id][self.hotkey][0].date, self.date + ) # date is not handled by get_newest_score, keep direct access + # Additional checks for multi-competition/hotkey + self.store.add_score(self.competition_id_1, self.hotkey_1, self.score_1) + self.store.add_score(self.competition_id_1, self.hotkey_2, self.score_2) + self.store.add_score(self.competition_id_2, self.hotkey_1, self.score_2) + self.store.add_score(self.competition_id_2, self.hotkey_3, self.score_3) + self.assertEqual(len(self.store.get_scores(self.competition_id_1, self.hotkey_1)), 1) + self.assertEqual(self.store.get_newest_score(self.competition_id_1, self.hotkey_1), self.score_1) + self.assertEqual(len(self.store.get_scores(self.competition_id_2, self.hotkey_3)), 1) + self.assertEqual(self.store.get_newest_score(self.competition_id_2, self.hotkey_3), self.score_3) + + def test_update_average_score(self): + self.store.add_score(self.competition_id, self.hotkey, self.score, self.date) + self.assertEqual( + self.store.average_scores[self.competition_id][self.hotkey], self.score / 5 + ) + self.store.add_score(self.competition_id_1, self.hotkey_1, 0.7) + self.store.add_score(self.competition_id_1, self.hotkey_1, 0.9) + self.store.add_score(self.competition_id_1, self.hotkey_1, 0.8) + expected_average = (0.7 + 0.9 + 0.8) / 5 + self.assertAlmostEqual(self.store.average_scores[self.competition_id_1][self.hotkey_1], expected_average) + + def test_get_hotkeys_with_non_zero_scores(self): + store = CompetitionResultsStore() + competition_id = "comp" + store.average_scores[competition_id] = { + "hk1": 0.0, + "hk2": 0.9, + "hk3": 0.2, + "hk4": 0.7, + "hk5": 1.0 + } + result = store.get_hotkeys_with_non_zero_scores(competition_id) + self.assertEqual(result, ["hk5", "hk2", "hk4", "hk3"]) # Sorted descending, >0 only + # Edge case: all zero or negative + store.average_scores[competition_id] = {"hk1": 0.0, "hk3": 0.0} + self.assertEqual(store.get_hotkeys_with_non_zero_scores(competition_id), []) + + def test_delete_dead_hotkeys(self): + self.store.add_score(self.competition_id, self.hotkey, self.score, self.date) + active_hotkeys = [] + self.store.delete_dead_hotkeys(self.competition_id, active_hotkeys) + self.assertNotIn(self.hotkey, self.store.score_map[self.competition_id]) + self.assertNotIn(self.hotkey, self.store.average_scores[self.competition_id]) + # Extended test + self.store.add_score(self.competition_id_1, self.hotkey_1, self.score_1) + self.store.add_score(self.competition_id_1, self.hotkey_2, self.score_2) + self.store.add_score(self.competition_id_1, self.hotkey_3, self.score_3) + active_hotkeys = [self.hotkey_1, self.hotkey_3] + self.store.delete_dead_hotkeys(self.competition_id_1, active_hotkeys) + self.assertIn(self.hotkey_1, self.store.score_map[self.competition_id_1]) + self.assertIn(self.hotkey_3, self.store.score_map[self.competition_id_1]) + self.assertNotIn(self.hotkey_2, self.store.score_map[self.competition_id_1]) + self.assertNotIn(self.hotkey_2, self.store.average_scores[self.competition_id_1]) + + def test_get_top_hotkey(self): + self.store.add_score(self.competition_id, self.hotkey, self.score, self.date) + top_hotkey = self.store.get_top_hotkey(self.competition_id) + self.assertEqual(top_hotkey, self.hotkey) + # Extended test + self.store.add_score(self.competition_id_1, self.hotkey_1, self.score_1) + self.store.add_score(self.competition_id_1, self.hotkey_2, self.score_2) + self.store.add_score(self.competition_id_1, self.hotkey_3, self.score_3) + top_hotkey = self.store.get_top_hotkey(self.competition_id_1) + self.assertEqual(top_hotkey, self.hotkey_3) + + def test_delete_inactive_competitions(self): + self.store.add_score(self.competition_id, self.hotkey, self.score, self.date) + active_competitions = [] + self.store.delete_inactive_competitions(active_competitions) + self.assertNotIn(self.competition_id, self.store.score_map) + self.assertNotIn(self.competition_id, self.store.average_scores) + self.assertNotIn(self.competition_id, self.store.current_top_hotkeys) + + def test_step_by_step(self): + scores_sequential = [1, 2, 1.5, 1.5, 7, 8] + averages_sequential = [1/5, (1+2)/5, (1+2+1.5)/5, (1+2+1.5+1.5)/5, (1+2+1.5+1.5+7)/5, (2+1.5+1.5+7+8)/5] + for i in range(6): + self.store.add_score(self.competition_id, self.hotkey, scores_sequential[i]) + self.assertEqual( + self.store.average_scores[self.competition_id][self.hotkey], + averages_sequential[i], + ) + + def test_score_history_and_average(self): + scores = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2] + dates = [datetime(2023, 1, i, tzinfo=timezone.utc) for i in range(1, 13)] + for score in scores: + self.store.add_score( + self.competition_id, self.hotkey, score, dates[scores.index(score)] + ) + bt.logging.debug( + f"Scores: {self.store.get_scores(self.competition_id, self.hotkey)}" + ) + self.assertEqual( + len(self.store.get_scores(self.competition_id, self.hotkey)), 10 + ) + expected_scores = scores[-10:] + bt.logging.debug(f"Expected scores: {expected_scores}") + actual_scores = self.store.get_scores(self.competition_id, self.hotkey)[::-1] # oldest to newest + self.assertEqual(actual_scores, expected_scores) + + def test_average_after_history(self): + scores = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + for score in scores: + self.store.add_score(self.competition_id, self.hotkey, score) + expected_average = sum(scores[-5:]) / 5 + self.assertAlmostEqual(self.store.average_scores[self.competition_id][self.hotkey], expected_average) + + # self.assertEqual(top_hotkey, self.hotkey_3) + + def test_get_top_hotkey_empty_competition(self): + """Test getting top hotkey for a competition with no scores.""" + # Try to get top hotkey for a non-existent competition + with self.assertRaises(ValueError): + self.store.get_top_hotkey("non_existent_competition") + + def test_get_competitions(self): + """Test getting all competition IDs.""" + # Add scores to multiple competitions + self.store.add_score(self.competition_id_1, self.hotkey_1, self.score_1) + self.store.add_score(self.competition_id_2, self.hotkey_2, self.score_2) + + # Get all competitions + competitions = self.store.get_competitions() + + # Verify both competitions are returned + self.assertEqual(len(competitions), 2) + self.assertIn(self.competition_id_1, competitions) + self.assertIn(self.competition_id_2, competitions) + + @patch('cancer_ai.validator.rewarder.datetime') + def test_model_dump_and_load(self, mock_datetime): + """Test serializing and deserializing the store.""" + # Mock datetime.now() to return a fixed time + mock_now = datetime(2025, 3, 25, 12, 0, 0, tzinfo=timezone.utc) + mock_datetime.now.return_value = mock_now + + # Add scores to the store + self.store.add_score(self.competition_id_1, self.hotkey_1, self.score_1) + self.store.add_score(self.competition_id_2, self.hotkey_2, self.score_2) + + # Dump the model to a dict + dumped = self.store.model_dump() + + # Verify the dumped data has the expected structure + # Note: We're not testing model_load here since it's not implemented in the class + # Instead we're just checking that model_dump works correctly + self.assertEqual(len(dumped), 3) # score_map, average_scores, and current_top_hotkeys + self.assertIn('score_map', dumped) + self.assertIn('average_scores', dumped) + + @patch('cancer_ai.validator.rewarder.datetime') + def test_edge_cases(self, mock_datetime): + """Test edge cases and boundary conditions.""" + # Mock datetime.now() to return a fixed time + mock_now = datetime(2025, 3, 25, 12, 0, 0, tzinfo=timezone.utc) + mock_datetime.now.return_value = mock_now + + # Test adding a score of 0 + self.store.add_score(self.competition_id_1, self.hotkey_1, 0.0) + self.assertEqual(self.store.get_newest_score(self.competition_id_1, self.hotkey_1), 0.0) + + # Test adding a negative score (should still work, though it might be invalid in real usage) + self.store.add_score(self.competition_id_1, self.hotkey_2, -0.5) + self.assertEqual(self.store.get_newest_score(self.competition_id_1, self.hotkey_2), -0.5) + + # Test with empty active_hotkeys list + self.store.delete_dead_hotkeys(self.competition_id_1, []) + # All hotkeys should be deleted + self.assertEqual(len(self.store.score_map[self.competition_id_1]), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/cancer_ai/validator/utils.py b/cancer_ai/validator/utils.py new file mode 100644 index 000000000..dd37b0ad7 --- /dev/null +++ b/cancer_ai/validator/utils.py @@ -0,0 +1,460 @@ +from enum import Enum +import os +import json +from datetime import datetime +import asyncio +import time +from functools import wraps +import shutil +import yaml +import binascii +import bittensor as bt +from retry import retry +from huggingface_hub import HfApi, hf_hub_download +from typing import Union + + +from cancer_ai.chain_models_store import ChainMinerModel +from .models import ModelInfo +from cancer_ai.validator.models import ( + NewDatasetFile, + OrganizationDataReferenceFactory, +) + + +class ModelType(Enum): + ONNX = "ONNX" + TENSORFLOW_SAVEDMODEL = "TensorFlow SavedModel" + KERAS_H5 = "Keras H5" + PYTORCH = "PyTorch" + SCIKIT_LEARN = "Scikit-learn" + XGBOOST = "XGBoost" + UNKNOWN = "Unknown format" + + +def log_time(func): + @wraps(func) + async def wrapper(*args, **kwargs): + start_time = time.time() + result = await func(*args, **kwargs) + end_time = time.time() + module_name = func.__module__ + bt.logging.trace( + f"'{module_name}.{func.__name__}' took {end_time - start_time:.4f}s" + ) + return result + + return wrapper + + +def detect_model_format(file_path) -> ModelType: + _, ext = os.path.splitext(file_path) + + if ext == ".onnx": + return ModelType.ONNX + elif ext == ".h5": + return ModelType.KERAS_H5 + elif ext in [".pt", ".pth"]: + return ModelType.PYTORCH + elif ext in [".pkl", ".joblib", ""]: + return ModelType.SCIKIT_LEARN + elif ext in [".model", ".json", ".txt"]: + return ModelType.XGBOOST + + try: + with open(file_path, "rb") as f: + # TODO check if it works + header = f.read(4) + if ( + header == b"PK\x03\x04" + ): # Magic number for ZIP files (common in TensorFlow SavedModel) + return ModelType.TENSORFLOW_SAVEDMODEL + elif header[:2] == b"\x89H": # Magic number for HDF5 files (used by Keras) + return ModelType.KERAS_H5 + + except Exception as e: + bt.logging.error(f"Failed to detect model format: {e}") + return ModelType.UNKNOWN + + return ModelType.UNKNOWN + + +async def run_command(cmd): + # Start the subprocess + process = await asyncio.create_subprocess_shell( + cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + bt.logging.debug(f"Running command: {cmd}") + # Wait for the subprocess to finish and capture the output + stdout, stderr = await process.communicate() + + # Return the output and error if any + return stdout.decode(), stderr.decode() + + + +async def fetch_organization_data_references( + hf_repo_id: str, hf_api: HfApi +) -> list[dict]: + bt.logging.trace( + f"Fetching organization data references from Hugging Face repo {hf_repo_id}" + ) + yaml_data = [] + + # prevent stale connections + custom_headers = {"Connection": "close"} + + try: + # blocks event loop while sleeping between retries + files = _list_repo_tree_with_retry_sync(hf_api, hf_repo_id) + except Exception as e: + bt.logging.error("Failed to list repo tree after 10 attempts: %s", e) + return yaml_data + + for file_info in files: + if file_info.__class__.__name__ == "RepoFile": + file_path = file_info.path + + if file_path.startswith("datasets/") and file_path.endswith(".yaml"): + local_file_path = hf_hub_download( + repo_id=hf_repo_id, + repo_type="space", + token=None, + filename=file_path, + headers=custom_headers, + ) + + last_commit_info = file_info.last_commit + commit_date = last_commit_info.date if last_commit_info else None + + if commit_date is not None: + date_uploaded = commit_date + else: + bt.logging.warning( + f"Could not get the last commit date for {file_path}" + ) + date_uploaded = None + + with open(local_file_path, "r", encoding="utf-8") as f: + try: + data = yaml.safe_load(f) + except yaml.YAMLError as e: + bt.logging.error( + f"Error parsing YAML file {file_path}: {str(e)}" + ) + continue # Skip this file due to parsing error + + yaml_data.append( + { + "file_name": file_path, + "yaml_data": data, + "date_uploaded": date_uploaded, + } + ) + else: + continue + return yaml_data + + +async def fetch_yaml_data_from_local_repo(local_repo_path: str) -> list[dict]: + """ + Fetches YAML data from all YAML files in the specified local directory. + Returns a list of dictionaries containing file name, YAML data, and the last modified date. + """ + yaml_data = [] + + # Traverse through the local directory to find YAML files + for root, _, files in os.walk(local_repo_path): + for file_name in files: + if file_name.endswith(".yaml"): + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, local_repo_path) + commit_date = datetime.fromtimestamp(os.path.getmtime(file_path)) + + with open(file_path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + yaml_data.append( + { + "file_name": relative_path, + "yaml_data": data, + "date_uploaded": commit_date, + } + ) + + return yaml_data + + +async def sync_organizations_data_references(fetched_yaml_files: list[dict]): + """ + Synchronizes the OrganizationDataReferenceFactory state with the full content + from the fetched YAML files. + + Each fetched YAML file is expected to contain a list of organization entries. + The 'org_id' key from the YAML is remapped to 'organization_id' to match the model. + """ + all_orgs = [] + for file in fetched_yaml_files: + yaml_data = file["yaml_data"] + for entry in yaml_data: + # Remap 'org_id' to 'organization_id' if needed. + if "org_id" in entry: + entry["organization_id"] = entry.pop("org_id") + all_orgs.append(entry) + + update_data = {"organizations": all_orgs} + + factory = OrganizationDataReferenceFactory.get_instance() + factory.update_from_dict(update_data) + + +async def get_newest_competition_packages(config: bt.Config, packages_count: int = 30) -> list[dict]: + """ + Gets the link to the newest package for a specific competition. + """ + newest_competition_packages: list[dict] = [] + + + hf_api = HfApi(token=config.hf_token) + + datasets_references = await fetch_organization_data_references(config.datasets_config_hf_repo_id, hf_api) + await sync_organizations_data_references(datasets_references) + org_reference = OrganizationDataReferenceFactory.get_instance() + org = org_reference.find_organization_by_competition_id(config.competition_id) + + if not org: + bt.logging.info(f"No organization found for competition ID: {config.competition_id}") + return newest_competition_packages + + try: + files = list_repo_tree_with_retry( + hf_api=hf_api, + repo_id=org.dataset_hf_repo, + repo_type="dataset", + recursive=True, + expand=True, + ) + except Exception as e: + bt.logging.error(f"Failed to list repository tree for {org.dataset_hf_repo}: {e}") + raise + + relevant_files = [ + f for f in files + if f.__class__.__name__ == "RepoFile" + and f.path.startswith(org.dataset_hf_dir) and f.path.endswith(".zip") + ] + + if not relevant_files: + bt.logging.warning(f"No relevant files found in {org.dataset_hf_repo}/{org.dataset_hf_dir}") + return newest_competition_packages + + sorted_files = sorted( + relevant_files, + key=lambda f: f.last_commit.date if f.last_commit else datetime.min, + reverse=True + ) + + top_files = sorted_files[:packages_count] + + if not top_files: + return newest_competition_packages + newest_competition_packages = [ + { + "dataset_hf_repo": org.dataset_hf_repo, + "dataset_hf_filename": file.path, + "dataset_hf_repo_type": "dataset", + } + for file in top_files + ] + + return newest_competition_packages + + + +async def check_for_new_dataset_files( + hf_api: HfApi, org_latest_updates: dict +) -> list[NewDatasetFile]: + """ + For each OrganizationDataReference stored in the singleton, this function: + - Connects to the organization's public Hugging Face repo. + - Lists files under the directory specified by dataset_hf_dir. + - Determines the maximum commit date among those files. + + For a blank state, it returns the file with the latest commit date. + On subsequent checks, it returns any file whose commit date is newer than the previously stored update. + """ + results = [] + factory = OrganizationDataReferenceFactory.get_instance() + + for org in factory.organizations: + files = hf_api.list_repo_tree( + repo_id=org.dataset_hf_repo, + repo_type="dataset", + recursive=True, + expand=True, + ) + relevant_files = [ + f + for f in files + if f.__class__.__name__ == "RepoFile" + and f.path.startswith(org.dataset_hf_dir) and f.path.endswith(".zip") + ] + max_commit_date = None + for f in relevant_files: + commit_date = f.last_commit.date if f.last_commit else None + if commit_date and ( + max_commit_date is None or commit_date > max_commit_date + ): + max_commit_date = commit_date + + new_files = [] + stored_update = org_latest_updates.get(org.organization_id) + # if there is no stored_update and max_commit_date is present (any commit date is present) + if stored_update is None and max_commit_date is not None: + for f in relevant_files: + commit_date = f.last_commit.date if f.last_commit else None + if commit_date == max_commit_date: + new_files.append(f.path) + break + # if there is any stored update then we implicitly expect that any commit date on the repo is present as well + else: + for f in relevant_files: + commit_date = f.last_commit.date if f.last_commit else None + if commit_date and commit_date > stored_update: + new_files.append(f.path) + + # update the stored latest update for this organization. + if max_commit_date is not None: + org_latest_updates[org.organization_id] = max_commit_date + + for file_name in new_files: + results.append( + NewDatasetFile( + competition_id=org.competition_id, + dataset_hf_repo=org.dataset_hf_repo, + dataset_hf_filename=file_name, + ) + ) + + return results + + +async def get_competition_weights(config: bt.Config, hf_api: HfApi) -> dict[str, float]: + """Get competition weights from the competition_weights.yml file.""" + local_file_path = hf_hub_download( + repo_id=config.datasets_config_hf_repo_id, + repo_type="space", + filename="competition_weights.yml" + ) + + with open(local_file_path, 'r', encoding='utf-8') as file: + weights_data = yaml.safe_load(file) + + weights_dict = {} + if weights_data is not None: # Handle empty file case + for item in weights_data: + weights_dict[item['competition_id']] = item['weight'] + + return weights_dict + +@retry(tries=10, delay=5, logger=bt.logging) +def list_repo_tree_with_retry(hf_api, repo_id, repo_type, recursive, expand): + return hf_api.list_repo_tree( + repo_id=repo_id, + repo_type=repo_type, + recursive=recursive, + expand=expand, + ) + +def get_local_dataset(local_dataset_dir: str) -> NewDatasetFile|None: + """Gets dataset package from local directory + + Directory needs to have speficic structure: + Dir + - to_be_released <- datasets to test + - already_released <- function moves exhaused datasets to this directory + + """ + import random + list_of_new_data_packages: list[NewDatasetFile] = [] + to_be_released_dir = os.path.join(local_dataset_dir, "to_be_released") + already_released_dir = os.path.join(local_dataset_dir, "already_released") + + if not os.path.exists(to_be_released_dir): + bt.logging.warning(f"Directory {to_be_released_dir} does not exist.") + return [] + + if not os.path.exists(already_released_dir): + os.makedirs(already_released_dir, exist_ok=True) + + for filename in os.listdir(to_be_released_dir): + if filename.endswith(".zip"): + filepath = os.path.join(to_be_released_dir, filename) + try: + # Move the file to the already_released directory. + shutil.move(filepath, os.path.join(already_released_dir, filename)) + bt.logging.info(f"Successfully processed and moved {filename} to {already_released_dir}") + return NewDatasetFile( + competition_id=random.choice(["melanoma-3"]), + dataset_hf_repo="local", + dataset_hf_filename=os.path.join(already_released_dir, filename), + ) + except Exception as e: + bt.logging.error(f"Error processing {filename}: {e}") + + return None + + +def chain_miner_to_model_info(chain_miner_model: ChainMinerModel) -> ModelInfo: + return ModelInfo( + hf_repo_id=chain_miner_model.hf_repo_id, + hf_model_filename=chain_miner_model.hf_model_filename, + hf_code_filename=chain_miner_model.hf_code_filename, + hf_repo_type=chain_miner_model.hf_repo_type, + competition_id=chain_miner_model.competition_id, + block=chain_miner_model.block, + model_hash=chain_miner_model.model_hash, + ) + +@retry( + Exception, + tries=5, + delay=3, + backoff=3, + max_delay=81, + logger=bt.logging +) +def _list_repo_tree_with_retry_sync(hf_api: HfApi, hf_repo_id: str) -> list: + return list_repo_tree_with_retry( + hf_api=hf_api, + repo_id=hf_repo_id, + repo_type="space", + + recursive=True, + expand=True, + ) + +def decode_raw(raw_hex: str) -> str: + """ + Decode a hex string (0x-prefixed or not) to UTF-8 if possible, + otherwise return the original string. + """ + try: + # strip optional “0x” + hex_str = raw_hex[2:] if raw_hex.startswith("0x") else raw_hex + data = binascii.unhexlify(hex_str) + return data.decode("utf-8") + except (binascii.Error, UnicodeDecodeError): + return raw_hex + +def decode_params(obj): + """ + Recursively walk a dict/list and decode any 0x-prefixed strings. + """ + if isinstance(obj, dict): + return {k: decode_params(v) for k, v in obj.items()} + if isinstance(obj, list): + return [decode_params(v) for v in obj] + if isinstance(obj, str) and obj.startswith("0x"): + return decode_raw(obj) + return obj \ No newline at end of file diff --git a/config/competition_config.json b/config/competition_config.json new file mode 100644 index 000000000..89f810a07 --- /dev/null +++ b/config/competition_config.json @@ -0,0 +1,13 @@ +[ + { + "competition_id": "melanoma-1", + "category": "skin", + "evaluation_times": [ + "10:00", + "22:00" + ], + "dataset_hf_repo": "safescanai/melanoma-competition", + "dataset_hf_filename": "melanoma-1-dataset.zip", + "dataset_hf_repo_type": "dataset" + } +] \ No newline at end of file diff --git a/config/competition_config_testnet.json b/config/competition_config_testnet.json new file mode 100644 index 000000000..8b4f03100 --- /dev/null +++ b/config/competition_config_testnet.json @@ -0,0 +1,12 @@ +[ + { + "competition_id": "melanoma-testnet", + "category": "skin", + "evaluation_times": [ + "14:38" + ], + "dataset_hf_repo": "safescanai/test_dataset", + "dataset_hf_filename": "test_dataset.zip", + "dataset_hf_repo_type": "dataset" + } +] \ No newline at end of file diff --git a/config/hotkey_blacklist.json b/config/hotkey_blacklist.json new file mode 100644 index 000000000..0637a088a --- /dev/null +++ b/config/hotkey_blacklist.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/config/hotkey_blacklist_testnet.json b/config/hotkey_blacklist_testnet.json new file mode 100644 index 000000000..02f17ae07 --- /dev/null +++ b/config/hotkey_blacklist_testnet.json @@ -0,0 +1,22 @@ +[ + "5C8P5k5LeyAUn7pLm15xULAxKryL1MCiwgjdWdxPXdveKwXu", + "5DDJWUUxoQFgohcrhXzsak4tiHjTh4kpWBE464fVRdQKRBux", + "5Dk3j1xu3HR9eLfqyGCrUFYRnwMykp6w1Y74yK3AjwkX3B8E", + "5EsMmcgqNf9F3RMz3SRrUEJ4STvESYr54jh7ECZmrX5q3rXi", + "5DUNBtqiEFoDvtFdvrBD3sRJjRdJ4cTVmmdqieuNrZ5TYbx1", + "5CoLBVrWTQpvTYMnjtakc9iWEFR348Y2YzSWgFd2qkgtkJzb", + "5DZZnwU2LapwmZfYL9AEAWpUR6FoFvqHnzQ5F71Mhwotxujq", + "5GW4dh1mrVzCHfEHZoiDFkxJWHPAQYcfeRUtmt2vnbSof5d1", + "5CY1JgzJnMC6HJ88GQVGRzgsvEbN6LDhG8tqVU1FQGtvGFHL", + "5EeawchJxAuAzA1taJHPCgDQ2NkgckjKve1mzmHobcSbo1g3", + "5Eq6oskxtzVc6vkFmg41XMuPf3628GdJp8zcSCnfrqWQie66", + "5HioSWDsL7dqbXaPuZCHmqocmtzn18WMPbusV8HnqHvPLnCk", + "5Fo2fenxPY1D7hgTHc88g1zrX2ZX17g8DvE5KnazueYefjN5", + "5HjWnpjn2rRzdRZySumzoLurEmzd5KBSwFn2LSMu4vaj7XLv", + "5Cf8iSNjnqNhjbZNUTZYZc9fLTT6D2vT8zNKjd2rBA2iGSAy", + "5FCPTnjevGqAuTttetBy4a24Ej3pH9fiQ8fmvP1ZkrVsLUoT", + "5CXf7G1CHUoyAf79oUnXAAhmkYGwEvQ52RNHVHyoqcB7WMLf", + "5CPMVnZ82XP7Koo1upGXyRphwt5DSV12FxztUyyq1w1GMEgy", + "5EHusPJSQr4VCMQTfEStNZtcQ3hga61x2c29YDRoYkFcuHX6", + "5CXDfdhpBQ9DFWFjEq3Tv7UCmSjzSnmjDfEXYTPoaKRPTTPh" +] \ No newline at end of file diff --git a/contrib/CODE_REVIEW_DOCS.md b/contrib/CODE_REVIEW_DOCS.md deleted file mode 100644 index 9909606a8..000000000 --- a/contrib/CODE_REVIEW_DOCS.md +++ /dev/null @@ -1,72 +0,0 @@ -# Code Review -### Conceptual Review - -A review can be a conceptual review, where the reviewer leaves a comment - * `Concept (N)ACK`, meaning "I do (not) agree with the general goal of this pull - request", - * `Approach (N)ACK`, meaning `Concept ACK`, but "I do (not) agree with the - approach of this change". - -A `NACK` needs to include a rationale why the change is not worthwhile. -NACKs without accompanying reasoning may be disregarded. -After conceptual agreement on the change, code review can be provided. A review -begins with `ACK BRANCH_COMMIT`, where `BRANCH_COMMIT` is the top of the PR -branch, followed by a description of how the reviewer did the review. The -following language is used within pull request comments: - - - "I have tested the code", involving change-specific manual testing in - addition to running the unit, functional, or fuzz tests, and in case it is - not obvious how the manual testing was done, it should be described; - - "I have not tested the code, but I have reviewed it and it looks - OK, I agree it can be merged"; - - A "nit" refers to a trivial, often non-blocking issue. - -### Code Review -Project maintainers reserve the right to weigh the opinions of peer reviewers -using common sense judgement and may also weigh based on merit. Reviewers that -have demonstrated a deeper commitment and understanding of the project over time -or who have clear domain expertise may naturally have more weight, as one would -expect in all walks of life. - -Where a patch set affects consensus-critical code, the bar will be much -higher in terms of discussion and peer review requirements, keeping in mind that -mistakes could be very costly to the wider community. This includes refactoring -of consensus-critical code. - -Where a patch set proposes to change the Bittensor consensus, it must have been -discussed extensively on the discord server and other channels, be accompanied by a widely -discussed BIP and have a generally widely perceived technical consensus of being -a worthwhile change based on the judgement of the maintainers. - -### Finding Reviewers - -As most reviewers are themselves developers with their own projects, the review -process can be quite lengthy, and some amount of patience is required. If you find -that you've been waiting for a pull request to be given attention for several -months, there may be a number of reasons for this, some of which you can do something -about: - - - It may be because of a feature freeze due to an upcoming release. During this time, - only bug fixes are taken into consideration. If your pull request is a new feature, - it will not be prioritized until after the release. Wait for the release. - - It may be because the changes you are suggesting do not appeal to people. Rather than - nits and critique, which require effort and means they care enough to spend time on your - contribution, thundering silence is a good sign of widespread (mild) dislike of a given change - (because people don't assume *others* won't actually like the proposal). Don't take - that personally, though! Instead, take another critical look at what you are suggesting - and see if it: changes too much, is too broad, doesn't adhere to the - [developer notes](DEVELOPMENT_WORKFLOW.md), is dangerous or insecure, is messily written, etc. - Identify and address any of the issues you find. Then ask e.g. on IRC if someone could give - their opinion on the concept itself. - - It may be because your code is too complex for all but a few people, and those people - may not have realized your pull request even exists. A great way to find people who - are qualified and care about the code you are touching is the - [Git Blame feature](https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/tracking-changes-in-a-file). Simply - look up who last modified the code you are changing and see if you can find - them and give them a nudge. Don't be incessant about the nudging, though. - - Finally, if all else fails, ask on IRC or elsewhere for someone to give your pull request - a look. If you think you've been waiting for an unreasonably long time (say, - more than a month) for no particular reason (a few lines changed, etc.), - this is totally fine. Try to return the favor when someone else is asking - for feedback on their code, and the universe balances out. - - Remember that the best thing you can do while waiting is give review to others! \ No newline at end of file diff --git a/contrib/CONTRIBUTING.md b/contrib/CONTRIBUTING.md deleted file mode 100644 index ba33ce3c9..000000000 --- a/contrib/CONTRIBUTING.md +++ /dev/null @@ -1,213 +0,0 @@ -# Contributing to Bittensor Subnet Development - -The following is a set of guidelines for contributing to the Bittensor ecosystem. These are **HIGHLY RECOMMENDED** guidelines, but not hard-and-fast rules. Use your best judgment, and feel free to propose changes to this document in a pull request. - -## Table Of Contents -1. [How Can I Contribute?](#how-can-i-contribute) - 1. [Communication Channels](#communication-channels) - 1. [Code Contribution General Guideline](#code-contribution-general-guidelines) - 1. [Pull Request Philosophy](#pull-request-philosophy) - 1. [Pull Request Process](#pull-request-process) - 1. [Addressing Feedback](#addressing-feedback) - 1. [Squashing Commits](#squashing-commits) - 1. [Refactoring](#refactoring) - 1. [Peer Review](#peer-review) - 1. [Suggesting Features](#suggesting-enhancements-and-features) - - -## How Can I Contribute? -TODO(developer): Define your desired contribution procedure. - -## Communication Channels -TODO(developer): Place your communication channels here - -> Please follow the Bittensor Subnet [style guide](./STYLE.md) regardless of your contribution type. - -Here is a high-level summary: -- Code consistency is crucial; adhere to established programming language conventions. -- Use `black` to format your Python code; it ensures readability and consistency. -- Write concise Git commit messages; summarize changes in ~50 characters. -- Follow these six commit rules: - - Atomic Commits: Focus on one task or fix per commit. - - Subject and Body Separation: Use a blank line to separate the subject from the body. - - Subject Line Length: Keep it under 50 characters for readability. - - Imperative Mood: Write subject line as if giving a command or instruction. - - Body Text Width: Wrap text manually at 72 characters. - - Body Content: Explain what changed and why, not how. -- Make use of your commit messages to simplify project understanding and maintenance. - -> For clear examples of each of the commit rules, see the style guide's [rules](./STYLE.md#the-six-rules-of-a-great-commit) section. - -### Code Contribution General Guidelines - -> Review the Bittensor Subnet [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before contributing. - - -#### Pull Request Philosophy - -Patchsets and enhancements should always be focused. A pull request could add a feature, fix a bug, or refactor code, but it should not contain a mixture of these. Please also avoid 'super' pull requests which attempt to do too much, are overly large, or overly complex as this makes review difficult. - -Specifically, pull requests must adhere to the following criteria: -- Contain fewer than 50 files. PRs with more than 50 files will be closed. -- If a PR introduces a new feature, it *must* include corresponding tests. -- Other PRs (bug fixes, refactoring, etc.) should ideally also have tests, as they provide proof of concept and prevent regression. -- Categorize your PR properly by using GitHub labels. This aids in the review process by informing reviewers about the type of change at a glance. -- Make sure your code includes adequate comments. These should explain why certain decisions were made and how your changes work. -- If your changes are extensive, consider breaking your PR into smaller, related PRs. This makes your contributions easier to understand and review. -- Be active in the discussion about your PR. Respond promptly to comments and questions to help reviewers understand your changes and speed up the acceptance process. - -Generally, all pull requests must: - - - Have a clear use case, fix a demonstrable bug or serve the greater good of the project (e.g. refactoring for modularisation). - - Be well peer-reviewed. - - Follow code style guidelines. - - Not break the existing test suite. - - Where bugs are fixed, where possible, there should be unit tests demonstrating the bug and also proving the fix. - - Change relevant comments and documentation when behaviour of code changes. - -#### Pull Request Process - -Please follow these steps to have your contribution considered by the maintainers: - -*Before* creating the PR: -1. Read the [development workflow](./DEVELOPMENT_WORKFLOW.md) defined for this repository to understand our workflow. -2. Ensure your PR meets the criteria stated in the 'Pull Request Philosophy' section. -3. Include relevant tests for any fixed bugs or new features as stated in the [testing guide](./TESTING.md). -4. Ensure your commit messages are clear and concise. Include the issue number if applicable. -5. If you have multiple commits, rebase them into a single commit using `git rebase -i`. -6. Explain what your changes do and why you think they should be merged in the PR description consistent with the [style guide](./STYLE.md). - -*After* creating the PR: -1. Verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing after you submit your pull request. -2. Label your PR using GitHub's labeling feature. The labels help categorize the PR and streamline the review process. -3. Document your code with comments that provide a clear understanding of your changes. Explain any non-obvious parts of your code or design decisions you've made. -4. If your PR has extensive changes, consider splitting it into smaller, related PRs. This reduces the cognitive load on the reviewers and speeds up the review process. - -Please be responsive and participate in the discussion on your PR! This aids in clarifying any confusion or concerns and leads to quicker resolution and merging of your PR. - -> Note: If your changes are not ready for merge but you want feedback, create a draft pull request. - -Following these criteria will aid in quicker review and potential merging of your PR. -While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted. - -When you are ready to submit your changes, create a pull request: - -> **Always** follow the [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before submitting pull requests. - -After you submit a pull request, it will be reviewed by the maintainers. They may ask you to make changes. Please respond to any comments and push your changes as a new commit. - -> Note: Be sure to merge the latest from "upstream" before making a pull request: - -```bash -git remote add upstream https://github.com/opentensor/bittensor.git # TODO(developer): replace with your repo URL -git fetch upstream -git merge upstream/ -git push origin -``` - -#### Addressing Feedback - -After submitting your pull request, expect comments and reviews from other contributors. You can add more commits to your pull request by committing them locally and pushing to your fork. - -You are expected to reply to any review comments before your pull request is merged. You may update the code or reject the feedback if you do not agree with it, but you should express so in a reply. If there is outstanding feedback and you are not actively working on it, your pull request may be closed. - -#### Squashing Commits - -If your pull request contains fixup commits (commits that change the same line of code repeatedly) or too fine-grained commits, you may be asked to [squash](https://git-scm.com/docs/git-rebase#_interactive_mode) your commits before it will be reviewed. The basic squashing workflow is shown below. - - git checkout your_branch_name - git rebase -i HEAD~n - # n is normally the number of commits in the pull request. - # Set commits (except the one in the first line) from 'pick' to 'squash', save and quit. - # On the next screen, edit/refine commit messages. - # Save and quit. - git push -f # (force push to GitHub) - -Please update the resulting commit message, if needed. It should read as a coherent message. In most cases, this means not just listing the interim commits. - -If your change contains a merge commit, the above workflow may not work and you will need to remove the merge commit first. See the next section for details on how to rebase. - -Please refrain from creating several pull requests for the same change. Use the pull request that is already open (or was created earlier) to amend changes. This preserves the discussion and review that happened earlier for the respective change set. - -The length of time required for peer review is unpredictable and will vary from pull request to pull request. - -#### Refactoring - -Refactoring is a necessary part of any software project's evolution. The following guidelines cover refactoring pull requests for the project. - -There are three categories of refactoring: code-only moves, code style fixes, and code refactoring. In general, refactoring pull requests should not mix these three kinds of activities in order to make refactoring pull requests easy to review and uncontroversial. In all cases, refactoring PRs must not change the behaviour of code within the pull request (bugs must be preserved as is). - -Project maintainers aim for a quick turnaround on refactoring pull requests, so where possible keep them short, uncomplex and easy to verify. - -Pull requests that refactor the code should not be made by new contributors. It requires a certain level of experience to know where the code belongs to and to understand the full ramification (including rebase effort of open pull requests). Trivial pull requests or pull requests that refactor the code with no clear benefits may be immediately closed by the maintainers to reduce unnecessary workload on reviewing. - -#### Peer Review - -Anyone may participate in peer review which is expressed by comments in the pull request. Typically reviewers will review the code for obvious errors, as well as test out the patch set and opine on the technical merits of the patch. Project maintainers take into account the peer review when determining if there is consensus to merge a pull request (remember that discussions may have taken place elsewhere, not just on GitHub). The following language is used within pull-request comments: - -- ACK means "I have tested the code and I agree it should be merged"; -- NACK means "I disagree this should be merged", and must be accompanied by sound technical justification. NACKs without accompanying reasoning may be disregarded; -- utACK means "I have not tested the code, but I have reviewed it and it looks OK, I agree it can be merged"; -- Concept ACK means "I agree in the general principle of this pull request"; -- Nit refers to trivial, often non-blocking issues. - -Reviewers should include the commit(s) they have reviewed in their comments. This can be done by copying the commit SHA1 hash. - -A pull request that changes consensus-critical code is considerably more involved than a pull request that adds a feature to the wallet, for example. Such patches must be reviewed and thoroughly tested by several reviewers who are knowledgeable about the changed subsystems. Where new features are proposed, it is helpful for reviewers to try out the patch set on a test network and indicate that they have done so in their review. Project maintainers will take this into consideration when merging changes. - -For a more detailed description of the review process, see the [Code Review Guidelines](CODE_REVIEW_DOCS.md). - -> **Note:** If you find a **Closed** issue that seems like it is the same thing that you're experiencing, open a new issue and include a link to the original issue in the body of your new one. - -#### How Do I Submit A (Good) Bug Report? - -Please track bugs as GitHub issues. - -Explain the problem and include additional details to help maintainers reproduce the problem: - -* **Use a clear and descriptive title** for the issue to identify the problem. -* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started the application, e.g. which command exactly you used in the terminal, or how you started Bittensor otherwise. When listing steps, **don't just say what you did, but explain how you did it**. For example, if you ran with a set of custom configs, explain if you used a config file or command line arguments. -* **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). -* **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior. -* **Explain which behavior you expected to see instead and why.** -* **Include screenshots and animated GIFs** which show you following the described steps and clearly demonstrate the problem. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux. -* **If you're reporting that Bittensor crashed**, include a crash report with a stack trace from the operating system. On macOS, the crash report will be available in `Console.app` under "Diagnostic and usage information" > "User diagnostic reports". Include the crash report in the issue in a [code block](https://help.github.com/articles/markdown-basics/#multiple-lines), a [file attachment](https://help.github.com/articles/file-attachments-on-issues-and-pull-requests/), or put it in a [gist](https://gist.github.com/) and provide link to that gist. -* **If the problem is related to performance or memory**, include a CPU profile capture with your report, if you're using a GPU then include a GPU profile capture as well. Look into the [PyTorch Profiler](https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) to look at memory usage of your model. -* **If the problem wasn't triggered by a specific action**, describe what you were doing before the problem happened and share more information using the guidelines below. - -Provide more context by answering these questions: - -* **Did the problem start happening recently** (e.g. after updating to a new version) or was this always a problem? -* If the problem started happening recently, **can you reproduce the problem in an older version of Bittensor?** -* **Can you reliably reproduce the issue?** If not, provide details about how often the problem happens and under which conditions it normally happens. - -Include details about your configuration and environment: - -* **Which version of Bittensor Subnet are you using?** -* **What commit hash are you on?** You can get the exact commit hash by checking `git log` and pasting the full commit hash. -* **What's the name and version of the OS you're using**? -* **Are you running Bittensor Subnet in a virtual machine?** If so, which VM software are you using and which operating systems and versions are used for the host and the guest? -* **Are you running Bittensor Subnet in a dockerized container?** If so, have you made sure that your docker container contains your latest changes and is up to date with Master branch? - -### Suggesting Enhancements and Features - -This section guides you through submitting an enhancement suggestion, including completely new features and minor improvements to existing functionality. Following these guidelines helps maintainers and the community understand your suggestion :pencil: and find related suggestions :mag_right:. - -When you are creating an enhancement suggestion, please [include as many details as possible](#how-do-i-submit-a-good-enhancement-suggestion). Fill in [the template](https://bit.ly/atom-behavior-pr), including the steps that you imagine you would take if the feature you're requesting existed. - -#### Before Submitting An Enhancement Suggestion - -* **Check the [debugging guide](./DEBUGGING.md).** for tips — you might discover that the enhancement is already available. Most importantly, check if you're using the latest version of the project first. - -#### How Submit A (Good) Feature Suggestion - -* **Use a clear and descriptive title** for the issue to identify the problem. -* **Provide a step-by-step description of the suggested enhancement** in as many details as possible. -* **Provide specific examples to demonstrate the steps**. Include copy/pasteable snippets which you use in those examples, as [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). -* **Describe the current behavior** and **explain which behavior you expected to see instead** and why. -* **Include screenshots and animated GIFs** which help you demonstrate the steps or point out the part of the project which the suggestion is related to. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux. -* **Explain why this enhancement would be useful** to most users. -* **List some other text editors or applications where this enhancement exists.** -* **Specify the name and version of the OS you're using.** - -Thank you for considering contributing to Bittensor! Any help is greatly appreciated along this journey to incentivize open and permissionless intelligence. diff --git a/contrib/DEVELOPMENT_WORKFLOW.md b/contrib/DEVELOPMENT_WORKFLOW.md deleted file mode 100644 index 13bb07b25..000000000 --- a/contrib/DEVELOPMENT_WORKFLOW.md +++ /dev/null @@ -1,165 +0,0 @@ -# Bittensor Subnet Development Workflow - -This is a highly advisable workflow to follow to keep your subtensor project organized and foster ease of contribution. - -## Table of contents - -- [Bittensor Subnet Development Workflow](#bittensor-subnet-development-workflow) - - [Main Branches](#main-branches) - - [Development Model](#development-model) - - [Feature Branches](#feature-branches) - - [Release Branches](#release-branches) - - [Hotfix Branches](#hotfix-branches) - - [Git Operations](#git-operations) - - [Creating a Feature Branch](#creating-a-feature-branch) - - [Merging Feature Branch into Staging](#merging-feature-branch-into-staging) - - [Creating a Release Branch](#creating-a-release-branch) - - [Finishing a Release Branch](#finishing-a-release-branch) - - [Creating a Hotfix Branch](#creating-a-hotfix-branch) - - [Finishing a Hotfix Branch](#finishing-a-hotfix-branch) - - [Continuous Integration (CI) and Continuous Deployment (CD)](#continuous-integration-ci-and-continuous-deployment-cd) - - [Versioning and Release Notes](#versioning-and-release-notes) - - [Pending Tasks](#pending-tasks) - -## Main Branches - -Bittensor's codebase consists of two main branches: **main** and **staging**. - -**main** -- This is Bittensor's live production branch, which should only be updated by the core development team. This branch is protected, so refrain from pushing or merging into it unless authorized. - -**staging** -- This branch is continuously updated and is where you propose and merge changes. It's essentially Bittensor's active development branch. - -## Development Model - -### Feature Branches - -- Branch off from: `staging` -- Merge back into: `staging` -- Naming convention: `feature//` - -Feature branches are used to develop new features for upcoming or future releases. They exist as long as the feature is in development, but will eventually be merged into `staging` or discarded. Always delete your feature branch after merging to avoid unnecessary clutter. - -### Release Branches - -- Branch off from: `staging` -- Merge back into: `staging` and then `main` -- Naming convention: `release///` - -Release branches support the preparation of a new production release, allowing for minor bug fixes and preparation of metadata (version number, configuration, etc). All new features should be merged into `staging` and wait for the next big release. - -### Hotfix Branches - -General workflow: - -- Branch off from: `main` or `staging` -- Merge back into: `staging` then `main` -- Naming convention: `hotfix///` - -Hotfix branches are meant for quick fixes in the production environment. When a critical bug in a production version must be resolved immediately, a hotfix branch is created. - -## Git Operations - -#### Create a feature branch - -1. Branch from the **staging** branch. - 1. Command: `git checkout -b feature/my-feature staging` - -> Rebase frequently with the updated staging branch so you do not face big conflicts before submitting your pull request. Remember, syncing your changes with other developers could also help you avoid big conflicts. - -#### Merge feature branch into staging - -In other words, integrate your changes into a branch that will be tested and prepared for release. - -1. Switch branch to staging: `git checkout staging` -2. Merging feature branch into staging: `git merge --no-ff feature/my-feature` -3. Pushing changes to staging: `git push origin staging` -4. Delete feature branch: `git branch -d feature/my-feature` (alternatively, this can be navigated on the GitHub web UI) - -This operation is done by Github when merging a PR. - -So, what you have to keep in mind is: -- Open the PR against the `staging` branch. -- After merging a PR you should delete your feature branch. This will be strictly enforced. - -#### Creating a release branch - -1. Create branch from staging: `git checkout -b release/3.4.0/descriptive-message/creator's_name staging` -2. Updating version with major or minor: `./scripts/update_version.sh major|minor` -3. Commit file changes with new version: `git commit -a -m "Updated version to 3.4.0"` - - -#### Finishing a Release Branch - -This involves releasing stable code and generating a new version for bittensor. - -1. Switch branch to main: `git checkout main` -2. Merge release branch into main: `git merge --no-ff release/3.4.0/optional-descriptive-message` -3. Tag changeset: `git tag -a v3.4.0 -m "Releasing v3.4.0: some comment about it"` -4. Push changes to main: `git push origin main` -5. Push tags to origin: `git push origin --tags` - -To keep the changes made in the __release__ branch, we need to merge those back into `staging`: - -- Switch branch to staging: `git checkout staging`. -- Merging release branch into staging: `git merge --no-ff release/3.4.0/optional-descriptive-message` - -This step may well lead to a merge conflict (probably even, since we have changed the version number). If so, fix it and commit. - - -#### Creating a hotfix branch -1. Create branch from main: `git checkout -b hotfix/3.3.4/descriptive-message/creator's-name main` -2. Update patch version: `./scripts/update_version.sh patch` -3. Commit file changes with new version: `git commit -a -m "Updated version to 3.3.4"` -4. Fix the bug and commit the fix: `git commit -m "Fixed critical production issue X"` - -#### Finishing a Hotfix Branch - -Finishing a hotfix branch involves merging the bugfix into both `main` and `staging`. - -1. Switch branch to main: `git checkout main` -2. Merge hotfix into main: `git merge --no-ff hotfix/3.3.4/optional-descriptive-message` -3. Tag new version: `git tag -a v3.3.4 -m "Releasing v3.3.4: descriptive comment about the hotfix"` -4. Push changes to main: `git push origin main` -5. Push tags to origin: `git push origin --tags` -6. Switch branch to staging: `git checkout staging` -7. Merge hotfix into staging: `git merge --no-ff hotfix/3.3.4/descriptive-message/creator's-name` -8. Push changes to origin/staging: `git push origin staging` -9. Delete hotfix branch: `git branch -d hotfix/3.3.4/optional-descriptive-message` - -The one exception to the rule here is that, **when a release branch currently exists, the hotfix changes need to be merged into that release branch, instead of** `staging`. Back-merging the bugfix into the __release__ branch will eventually result in the bugfix being merged into `develop` too, when the release branch is finished. (If work in develop immediately requires this bugfix and cannot wait for the release branch to be finished, you may safely merge the bugfix into develop now already as well.) - -Finally, we remove the temporary branch: - -- `git branch -d hotfix/3.3.4/optional-descriptive-message` -## Continuous Integration (CI) and Continuous Deployment (CD) - -Continuous Integration (CI) is a software development practice where members of a team integrate their work frequently. Each integration is verified by an automated build and test process to detect integration errors as quickly as possible. - -Continuous Deployment (CD) is a software engineering approach in which software functionalities are delivered frequently through automated deployments. - -- **CircleCI job**: Create jobs in CircleCI to automate the merging of staging into main and release version (needed to release code) and building and testing Bittensor (needed to merge PRs). - -> It is highly recommended to set up your own circleci pipeline with your subnet - -## Versioning and Release Notes - -Semantic versioning helps keep track of the different versions of the software. When code is merged into main, generate a new version. - -Release notes provide documentation for each version released to the users, highlighting the new features, improvements, and bug fixes. When merged into main, generate GitHub release and release notes. - -## Pending Tasks - -Follow these steps when you are contributing to the bittensor subnet: - -- Determine if main and staging are different -- Determine what is in staging that is not merged yet - - Document not released developments - - When merged into staging, generate information about what's merged into staging but not released. - - When merged into main, generate GitHub release and release notes. -- CircleCI jobs - - Merge staging into main and release version (needed to release code) - - Build and Test Bittensor (needed to merge PRs) - -This document can be improved as the Bittensor project continues to develop and change. diff --git a/contrib/STYLE.md b/contrib/STYLE.md deleted file mode 100644 index b7ac755fc..000000000 --- a/contrib/STYLE.md +++ /dev/null @@ -1,348 +0,0 @@ -# Style Guide - -A project’s long-term success rests (among other things) on its maintainability, and a maintainer has few tools more powerful than his or her project’s log. It’s worth taking the time to learn how to care for one properly. What may be a hassle at first soon becomes habit, and eventually a source of pride and productivity for all involved. - -Most programming languages have well-established conventions as to what constitutes idiomatic style, i.e. naming, formatting and so on. There are variations on these conventions, of course, but most developers agree that picking one and sticking to it is far better than the chaos that ensues when everybody does their own thing. - -# Table of Contents -1. [Code Style](#code-style) -2. [Naming Conventions](#naming-conventions) -3. [Git Commit Style](#git-commit-style) -4. [The Six Rules of a Great Commit](#the-six-rules-of-a-great-commit) - - [1. Atomic Commits](#1-atomic-commits) - - [2. Separate Subject from Body with a Blank Line](#2-separate-subject-from-body-with-a-blank-line) - - [3. Limit the Subject Line to 50 Characters](#3-limit-the-subject-line-to-50-characters) - - [4. Use the Imperative Mood in the Subject Line](#4-use-the-imperative-mood-in-the-subject-line) - - [5. Wrap the Body at 72 Characters](#5-wrap-the-body-at-72-characters) - - [6. Use the Body to Explain What and Why vs. How](#6-use-the-body-to-explain-what-and-why-vs-how) -5. [Tools Worth Mentioning](#tools-worth-mentioning) - - [Using `--fixup`](#using---fixup) - - [Interactive Rebase](#interactive-rebase) -6. [Pull Request and Squashing Commits Caveats](#pull-request-and-squashing-commits-caveats) - - -### Code style - -#### General Style -Python's official style guide is PEP 8, which provides conventions for writing code for the main Python distribution. Here are some key points: - -- `Indentation:` Use 4 spaces per indentation level. - -- `Line Length:` Limit all lines to a maximum of 79 characters. - -- `Blank Lines:` Surround top-level function and class definitions with two blank lines. Method definitions inside a class are surrounded by a single blank line. - -- `Imports:` Imports should usually be on separate lines and should be grouped in the following order: - - - Standard library imports. - - Related third party imports. - - Local application/library specific imports. -- `Whitespace:` Avoid extraneous whitespace in the following situations: - - - Immediately inside parentheses, brackets or braces. - - Immediately before a comma, semicolon, or colon. - - Immediately before the open parenthesis that starts the argument list of a function call. -- `Comments:` Comments should be complete sentences and should be used to clarify code and are not a substitute for poorly written code. - -#### For Python - -- `List Comprehensions:` Use list comprehensions for concise and readable creation of lists. - -- `Generators:` Use generators when dealing with large amounts of data to save memory. - -- `Context Managers:` Use context managers (with statement) for resource management. - -- `String Formatting:` Use f-strings for formatting strings in Python 3.6 and above. - -- `Error Handling:` Use exceptions for error handling whenever possible. - -#### More details - -Use `black` to format your python code before commiting for consistency across such a large pool of contributors. Black's code [style](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#code-style) ensures consistent and opinionated code formatting. It automatically formats your Python code according to the Black style guide, enhancing code readability and maintainability. - -Key Features of Black: - - Consistency: Black enforces a single, consistent coding style across your project, eliminating style debates and allowing developers to focus on code logic. - - Readability: By applying a standard formatting style, Black improves code readability, making it easier to understand and collaborate on projects. - - Automation: Black automates the code formatting process, saving time and effort. It eliminates the need for manual formatting and reduces the likelihood of inconsistencies. - -### Naming Conventions - -- `Classes:` Class names should normally use the CapWords Convention. -- `Functions and Variables:` Function names should be lowercase, with words separated by underscores as necessary to improve readability. Variable names follow the same convention as function names. - -- `Constants:` Constants are usually defined on a module level and written in all capital letters with underscores separating words. - -- `Non-public Methods and Instance Variables:` Use a single leading underscore (_). This is a weak "internal use" indicator. - -- `Strongly "private" methods and variables:` Use a double leading underscore (__). This triggers name mangling in Python. - - -### Git commit style - -Here’s a model Git commit message when contributing: -``` -Summarize changes in around 50 characters or less - -More detailed explanatory text, if necessary. Wrap it to about 72 -characters or so. In some contexts, the first line is treated as the -subject of the commit and the rest of the text as the body. The -blank line separating the summary from the body is critical (unless -you omit the body entirely); various tools like `log`, `shortlog` -and `rebase` can get confused if you run the two together. - -Explain the problem that this commit is solving. Focus on why you -are making this change as opposed to how (the code explains that). -Are there side effects or other unintuitive consequences of this -change? Here's the place to explain them. - -Further paragraphs come after blank lines. - - - Bullet points are okay, too - - - Typically a hyphen or asterisk is used for the bullet, preceded - by a single space, with blank lines in between, but conventions - vary here - -If you use an issue tracker, put references to them at the bottom, -like this: - -Resolves: #123 -See also: #456, #789 -``` - - -## The six rules of a great commit. - -#### 1. Atomic Commits -An “atomic” change revolves around one task or one fix. - -Atomic Approach - - Commit each fix or task as a separate change - - Only commit when a block of work is complete - - Commit each layout change separately - - Joint commit for layout file, code behind file, and additional resources - -Benefits - -- Easy to roll back without affecting other changes -- Easy to make other changes on the fly -- Easy to merge features to other branches - -#### Avoid trivial commit messages - -Commit messages like "fix", "fix2", or "fix3" don't provide any context or clear understanding of what changes the commit introduces. Here are some examples of good vs. bad commit messages: - -**Bad Commit Message:** - - $ git commit -m "fix" - -**Good Commit Message:** - - $ git commit -m "Fix typo in README file" - -> **Caveat**: When working with new features, an atomic commit will often consist of multiple files, since a layout file, code behind file, and additional resources may have been added/modified. You don’t want to commit all of these separately, because if you had to roll back the application to a state before the feature was added, it would involve multiple commit entries, and that can get confusing - -#### 2. Separate subject from body with a blank line - -Not every commit requires both a subject and a body. Sometimes a single line is fine, especially when the change is so simple that no further context is necessary. - -For example: - - Fix typo in introduction to user guide - -Nothing more need be said; if the reader wonders what the typo was, she can simply take a look at the change itself, i.e. use git show or git diff or git log -p. - -If you’re committing something like this at the command line, it’s easy to use the -m option to git commit: - - $ git commit -m"Fix typo in introduction to user guide" - -However, when a commit merits a bit of explanation and context, you need to write a body. For example: - - Derezz the master control program - - MCP turned out to be evil and had become intent on world domination. - This commit throws Tron's disc into MCP (causing its deresolution) - and turns it back into a chess game. - -Commit messages with bodies are not so easy to write with the -m option. You’re better off writing the message in a proper text editor. [See Pro Git](https://git-scm.com/book/en/v2/Customizing-Git-Git-Configuration). - -In any case, the separation of subject from body pays off when browsing the log. Here’s the full log entry: - - $ git log - commit 42e769bdf4894310333942ffc5a15151222a87be - Author: Kevin Flynn - Date: Fri Jan 01 00:00:00 1982 -0200 - - Derezz the master control program - - MCP turned out to be evil and had become intent on world domination. - This commit throws Tron's disc into MCP (causing its deresolution) - and turns it back into a chess game. - - -#### 3. Limit the subject line to 50 characters -50 characters is not a hard limit, just a rule of thumb. Keeping subject lines at this length ensures that they are readable, and forces the author to think for a moment about the most concise way to explain what’s going on. - -GitHub’s UI is fully aware of these conventions. It will warn you if you go past the 50 character limit. Git will truncate any subject line longer than 72 characters with an ellipsis, thus keeping it to 50 is best practice. - -#### 4. Use the imperative mood in the subject line -Imperative mood just means “spoken or written as if giving a command or instruction”. A few examples: - - Clean your room - Close the door - Take out the trash - -Each of the seven rules you’re reading about right now are written in the imperative (“Wrap the body at 72 characters”, etc.). - -The imperative can sound a little rude; that’s why we don’t often use it. But it’s perfect for Git commit subject lines. One reason for this is that Git itself uses the imperative whenever it creates a commit on your behalf. - -For example, the default message created when using git merge reads: - - Merge branch 'myfeature' - -And when using git revert: - - Revert "Add the thing with the stuff" - - This reverts commit cc87791524aedd593cff5a74532befe7ab69ce9d. - -Or when clicking the “Merge” button on a GitHub pull request: - - Merge pull request #123 from someuser/somebranch - -So when you write your commit messages in the imperative, you’re following Git’s own built-in conventions. For example: - - Refactor subsystem X for readability - Update getting started documentation - Remove deprecated methods - Release version 1.0.0 - -Writing this way can be a little awkward at first. We’re more used to speaking in the indicative mood, which is all about reporting facts. That’s why commit messages often end up reading like this: - - Fixed bug with Y - Changing behavior of X - -And sometimes commit messages get written as a description of their contents: - - More fixes for broken stuff - Sweet new API methods - -To remove any confusion, here’s a simple rule to get it right every time. - -**A properly formed Git commit subject line should always be able to complete the following sentence:** - - If applied, this commit will - -For example: - - If applied, this commit will refactor subsystem X for readability - If applied, this commit will update getting started documentation - If applied, this commit will remove deprecated methods - If applied, this commit will release version 1.0.0 - If applied, this commit will merge pull request #123 from user/branch - -#### 5. Wrap the body at 72 characters -Git never wraps text automatically. When you write the body of a commit message, you must mind its right margin, and wrap text manually. - -The recommendation is to do this at 72 characters, so that Git has plenty of room to indent text while still keeping everything under 80 characters overall. - -A good text editor can help here. It’s easy to configure Vim, for example, to wrap text at 72 characters when you’re writing a Git commit. - -#### 6. Use the body to explain what and why vs. how -This [commit](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) from Bitcoin Core is a great example of explaining what changed and why: - -``` -commit eb0b56b19017ab5c16c745e6da39c53126924ed6 -Author: Pieter Wuille -Date: Fri Aug 1 22:57:55 2014 +0200 - - Simplify serialize.h's exception handling - - Remove the 'state' and 'exceptmask' from serialize.h's stream - implementations, as well as related methods. - - As exceptmask always included 'failbit', and setstate was always - called with bits = failbit, all it did was immediately raise an - exception. Get rid of those variables, and replace the setstate - with direct exception throwing (which also removes some dead - code). - - As a result, good() is never reached after a failure (there are - only 2 calls, one of which is in tests), and can just be replaced - by !eof(). - - fail(), clear(n) and exceptions() are just never called. Delete - them. -``` - -Take a look at the [full diff](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) and just think how much time the author is saving fellow and future committers by taking the time to provide this context here and now. If he didn’t, it would probably be lost forever. - -In most cases, you can leave out details about how a change has been made. Code is generally self-explanatory in this regard (and if the code is so complex that it needs to be explained in prose, that’s what source comments are for). Just focus on making clear the reasons why you made the change in the first place—the way things worked before the change (and what was wrong with that), the way they work now, and why you decided to solve it the way you did. - -The future maintainer that thanks you may be yourself! - - - -#### Tools worth mentioning - -##### Using `--fixup` - -If you've made a commit and then realize you've missed something or made a minor mistake, you can use the `--fixup` option. - -For example, suppose you've made a commit with a hash `9fceb02`. Later, you realize you've left a debug statement in your code. Instead of making a new commit titled "remove debug statement" or "fix", you can do the following: - - $ git commit --fixup 9fceb02 - -This will create a new commit to fix the issue, with a message like "fixup! The original commit message". - -##### Interactive Rebase - -Interactive rebase, or `rebase -i`, can be used to squash these fixup commits into the original commits they're fixing, which cleans up your commit history. You can use the `autosquash` option to automatically squash any commits marked as "fixup" into their target commits. - -For example: - - $ git rebase -i --autosquash HEAD~5 - -This command starts an interactive rebase for the last 5 commits (`HEAD~5`). Any commits marked as "fixup" will be automatically moved to squash with their target commits. - -The benefit of using `--fixup` and interactive rebase is that it keeps your commit history clean and readable. It groups fixes with the commits they are related to, rather than having a separate "fix" commit that might not make sense to other developers (or even to you) in the future. - - ---- - -#### Pull Request and Squashing Commits Caveats - -While atomic commits are great for development and for understanding the changes within the branch, the commit history can get messy when merging to the main branch. To keep a cleaner and more understandable commit history in our main branch, we encourage squashing all the commits of a PR into one when merging. - -This single commit should provide an overview of the changes that the PR introduced. It should follow the guidelines for atomic commits (an atomic commit is complete, self-contained, and understandable) but on the scale of the entire feature, task, or fix that the PR addresses. This approach combines the benefits of atomic commits during development with a clean commit history in our main branch. - -Here is how you can squash commits: - -```bash -git rebase -i HEAD~n -``` - -where `n` is the number of commits to squash. After running the command, replace `pick` with `squash` for the commits you want to squash into the previous commit. This will combine the commits and allow you to write a new commit message. - -In this context, an atomic commit message could look like: - -``` -Add feature X - -This commit introduces feature X which does A, B, and C. It adds -new files for layout, updates the code behind the file, and introduces -new resources. This change is important because it allows users to -perform task Y more efficiently. - -It includes: -- Creation of new layout file -- Updates in the code-behind file -- Addition of new resources - -Resolves: #123 -``` - -In your PRs, remember to detail what the PR is introducing or fixing. This will be helpful for reviewers to understand the context and the reason behind the changes. diff --git a/docs/running_on_mainnet.md b/docs/running_on_mainnet.md deleted file mode 100644 index 38be00a6c..000000000 --- a/docs/running_on_mainnet.md +++ /dev/null @@ -1,244 +0,0 @@ -# Running Subnet on Mainnet - -This tutorial shows how to use the bittensor `btcli` to create a subnetwork and connect your incentive mechanism to it. - -**IMPORTANT:** Before attempting to register on mainnet, we strongly recommend that you: -- First run [Running Subnet Locally](running_on_staging.md), and -- Then run [Running on the Testnet](running_on_testnet.md). - -Your incentive mechanisms running on the mainnet are open to anyone. They emit real TAO. Creating these mechanisms incur a `lock_cost` in TAO. - -**DANGER** -- Do not expose your private keys. -- Only use your testnet wallet. -- Do not reuse the password of your mainnet wallet. -- Make sure your incentive mechanism is resistant to abuse. - -## Prerequisites - -Before proceeding further, make sure that you have installed Bittensor. See the below instructions: - -- [Install `bittensor`](https://github.com/opentensor/bittensor#install). - -After installing `bittensor`, proceed as below: - -## Steps - -## 1. Install your subnet template - -**NOTE: Skip this step if** you already did this during local testing and development. - -In your project directory: - -```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git -``` - -Next, `cd` into `bittensor-subnet-template` repo directory: - -```bash -cd bittensor-subnet-template -``` - -Install the Bittensor subnet template package: - -```bash -python -m pip install -e . # Install your subnet template package -``` - -## 2. Create wallets - -Create wallets for subnet owner, subnet validator and for subnet miner. - -This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner. - -The owner will create and control the subnet. The owner must have at least 100 TAO before the owner can run next steps. - -The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts. - -**NOTE**: You can also use existing wallets to register. Creating new keys is shown here for reference. - -Create a coldkey for the owner wallet: - -```bash -btcli wallet new_coldkey --wallet.name owner -``` - -Create a coldkey and hotkey for the subnet miner wallet: -```bash -btcli wallet new_coldkey --wallet.name miner -``` - -and - -```bash -btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default -``` - -Create a coldkey and hotkey for the subnet validator wallet: - -```bash -btcli wallet new_coldkey --wallet.name validator -``` - -and - -```bash -btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default -``` - -## 3. Getting the price of subnet creation - -Creating subnets on mainnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the Bittensor blockchain. - -By default you must have at least 100 TAO on your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below code shows how to get the current price of creating a subnet. - -```bash -btcli subnet lock_cost -``` - -The above command will show: - -```bash ->> Subnet lock cost: τ100.000000000 -``` - -## 4. Purchasing a slot - -Using your TAO balance, you can register your subnet to the mainchain. This will create a new subnet on the mainchain and give you the owner permissions to it. The below command shows how to purchase a slot. - -**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered. - -```bash -btcli subnet create -``` - -Enter the owner wallet name. This gives permissions to the coldkey. - -```bash ->> Enter wallet name (default): owner # Enter your owner wallet name ->> Enter password to unlock key: # Enter your wallet password. ->> Register subnet? [y/n]: # Select yes (y) ->> ⠇ 📡 Registering subnet... -✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later. -``` - -## 5. (Optional) Register keys - -**NOTE**: While this is not enforced, we recommend subnet owners to run a subnet validator and a subnet miner on the subnet to demonstrate proper use to the community. - -This step registers your subnet validator and subnet miner keys to the subnet giving them the **first two slots** on the subnet. - -Register your miner key to the subnet: - -```bash -btcli subnet recycle_register --netuid 1 --subtensor.network finney --wallet.name miner --wallet.hotkey default -``` - -Follow the below prompts: - -```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. ->> Continue Registration? - hotkey: ... - coldkey: ... - network: finney [y/n]: # Select yes (y) ->> ✅ Registered -``` - -Next, register your validator key to the subnet: - -```bash -btcli subnet recycle_register --netuid 1 --subtensor.network finney --wallet.name validator --wallet.hotkey default -``` - -Follow the below prompts: - -```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. ->> Continue Registration? - hotkey: ... - coldkey: ... - network: finney [y/n]: # Select yes (y) ->> ✅ Registered -``` - -## 6. Check that your keys have been registered - -Check that your subnet validator key has been registered: - -```bash -btcli wallet overview --wallet.name validator -``` - -The output will be similar to the below: - -```bash -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 0 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 -``` - -Check that your subnet miner has been registered: - -```bash -btcli wallet overview --wallet.name miner -``` - -The output will be similar to the below: - -```bash -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 -``` - -## 7. Run subnet miner and subnet validator - -Run the subnet miner: - -```bash -python neurons/miner.py --netuid 1 --wallet.name miner --wallet.hotkey default --logging.debug -``` - -You will see the below terminal output: - -```bash ->> 2023-08-08 16:58:11.223 | INFO | Running miner for subnet: 1 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ... -``` - -Run the subnet validator: - -```bash -python neurons/validator.py --netuid 1 --wallet.name validator --wallet.hotkey default --logging.debug -``` - -You will see the below terminal output: - -```bash ->> 2023-08-08 16:58:11.223 | INFO | Running validator for subnet: 1 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ... -``` - -## 8. Get emissions flowing - -Register to the root subnet using the `btcli`: - -```bash -btcli root register -``` - -Then set your weights for the subnet: - -```bash -btcli root weights -``` - -## 9. Stopping your nodes - -To stop your nodes, press CTRL + C in the terminal where the nodes are running. - ---- \ No newline at end of file diff --git a/docs/running_on_staging.md b/docs/running_on_staging.md deleted file mode 100644 index 6eeb4d5e7..000000000 --- a/docs/running_on_staging.md +++ /dev/null @@ -1,340 +0,0 @@ -# Running Subnet Locally - -This tutorial will guide you through: - -- Setting up a local blockchain that is not connected to either Bittensor testchain or mainchain -- Creating a subnet -- Run your incentive mechanism on the subnet. - -## Local blockchain vs local subtensor node - -Running a local blockchain is sometimes synonymously referred as running on staging. This is **different** from running a local subtensor node that connects to the Bittensor mainchain. - -A local subtensor node will connect to the mainchain and sync with the mainchain, giving you your own access point to the mainchain. - -Running a local blockchain spins up two authority nodes locally, not connected to any other nodes or testchain or mainchain. This tutorial is for running a local blockchain. - -## Prerequisites - -Before proceeding further, make sure that you have installed Bittensor. See the below instructions: - -- [Install `bittensor`](https://github.com/opentensor/bittensor#install). - -After installing `bittensor`, proceed as below: - -## 1. Install Substrate dependencies - -Begin by installing the required dependencies for running a Substrate node. - -Update your system packages: - -```bash -sudo apt update -``` - -Install additional required libraries and tools - -```bash -sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler -``` - -## 2. Install Rust and Cargo - -Rust is the programming language used in Substrate development. Cargo is Rust package manager. - -Install rust and cargo: - -```bash -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -``` - -Update your shell's source to include Cargo's path: - -```bash -source "$HOME/.cargo/env" -``` - -## 3. Clone the subtensor repository - -This step fetches the subtensor codebase to your local machine. - -```bash -git clone https://github.com/opentensor/subtensor.git -``` - -## 4. Setup Rust - -This step ensures that you have the nightly toolchain and the WebAssembly (wasm) compilation target. Note that this step will run the subtensor chain on your terminal directly, hence we advise that you run this as a background process using PM2 or other software. - -Update to the nightly version of Rust: - -```bash -./subtensor/scripts/init.sh -``` - -## 5. Initialize - -These steps initialize your local subtensor chain in development mode. These commands will set up and run a local subtensor. - -Build the binary with the faucet feature enabled: - -```bash -cargo build --release --features pow-faucet -``` - -**NOTE**: The `--features pow-faucet` option in the above is required if we want to use the command `btcli wallet faucet` [See the below Mint tokens step](#8-mint-tokens-from-faucet). - -Next, run the localnet script and turn off the attempt to build the binary (as we have already done this above): - -```bash -BUILD_BINARY=0 ./scripts/localnet.sh -``` - -**NOTE**: Watch for any build or initialization outputs in this step. If you are building the project for the first time, this step will take a while to finish building, depending on your hardware. - -## 6. Install subnet template - -`cd` to your project directory and clone the bittensor subnet template repository: - -```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git -``` - -Navigate to the cloned repository: - -```bash -cd bittensor-subnet-template -``` - -Install the bittensor-subnet-template Python package: - -```bash -python -m pip install -e . -``` - -## 7. Set up wallets - -You will need wallets for the different roles, i.e., subnet owner, subnet validator and subnet miner, in the subnet. - -- The owner wallet creates and controls the subnet. -- The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts. - -Create a coldkey for the owner role: - -```bash -btcli wallet new_coldkey --wallet.name owner -``` - -Set up the miner's wallets: - -```bash -btcli wallet new_coldkey --wallet.name miner -``` - -```bash -btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default -``` - -Set up the validator's wallets: - -```bash -btcli wallet new_coldkey --wallet.name validator -``` -```bash -btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default -``` - -## 8. Mint tokens from faucet - -You will need tokens to initialize the intentive mechanism on the chain as well as for registering the subnet. - -Run the following commands to mint faucet tokens for the owner and for the validator. - -Mint faucet tokens for the owner: - -```bash -btcli wallet faucet --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see: - -```bash ->> Balance: τ0.000000000 ➡ τ100.000000000 -``` - -Mint tokens for the validator: - -```bash -btcli wallet faucet --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see: - -```bash ->> Balance: τ0.000000000 ➡ τ100.000000000 -``` - -## 9. Create a subnet - -The below commands establish a new subnet on the local chain. The cost will be exactly τ1000.000000000 for the first subnet you create and you'll have to run the faucet several times to get enough tokens. - -```bash -btcli subnet create --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see: - -```bash ->> Your balance is: τ200.000000000 ->> Do you want to register a subnet for τ1000.000000000? [y/n]: ->> Enter password to unlock key: [YOUR_PASSWORD] ->> ✅ Registered subnetwork with netuid: 1 -``` - -**NOTE**: The local chain will now have a default `netuid` of 1. The second registration will create a `netuid` 2 and so on, until you reach the subnet limit of 8. If you register more than 8 subnets, then a subnet with the least staked TAO will be replaced by the 9th subnet you register. - -## 10. Register keys - -Register your subnet validator and subnet miner on the subnet. This gives your two keys unique slots on the subnet. The subnet has a current limit of 128 slots. - -Register the subnet miner: - -```bash -btcli subnet register --wallet.name miner --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -Follow the below prompts: - -```bash ->> Enter netuid [1] (1): 1 ->> Continue Registration? [y/n]: y ->> ✅ Registered -``` - -Register the subnet validator: - -```bash - -btcli subnet register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -Follow the below prompts: - -``` ->> Enter netuid [1] (1): 1 ->> Continue Registration? [y/n]: y ->> ✅ Registered -``` - -## 11. Add stake - -This step bootstraps the incentives on your new subnet by adding stake into its incentive mechanism. - -```bash -btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -Follow the below prompts: - -```bash ->> Stake all Tao from account: 'validator'? [y/n]: y ->> Stake: - τ0.000000000 ➡ τ100.000000000 -``` - -## 12. Validate key registrations - -Verify that both the miner and validator keys are successfully registered: - -```bash -btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see the `2` entry under `NEURONS` column for the `NETUID` of 1, indicating that you have registered a validator and a miner in this subnet: - -```bash -NETUID NEURONS MAX_N DIFFICULTY TEMPO CON_REQ EMISSION BURN(τ) - 1 2 256.00 10.00 M 1000 None 0.00% τ1.00000 - 2 128 -``` - -See the subnet validator's registered details: - -```bash -btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see: - -``` -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 0 True 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ100.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 -``` - -See the subnet miner's registered details: - -```bash -btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -You will see: - -```bash -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 - -``` - -## 13. Run subnet miner and subnet validator - -Run the subnet miner and subnet validator. Make sure to specify your subnet parameters. - -Run the subnet miner: - -```bash -python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug -``` - -Run the subnet validator: - -```bash -python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug -``` - -## 14. Set weights for your subnet - -Register a validator on the root subnet and boost to set weights for your subnet. This is a necessary step to ensure that the subnet is able to receive emmissions. - -### Register your validator on the root subnet - -```bash -btcli root register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -### Boost your subnet on the root subnet -```bash -btcli root boost --netuid 1 --increase 1 --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -## 15. Verify your incentive mechanism - -After a few blocks the subnet validator will set weights. This indicates that the incentive mechanism is active. Then after a subnet tempo elapses (360 blocks or 72 minutes) you will see your incentive mechanism beginning to distribute TAO to the subnet miner. - -```bash -btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946 -``` - -## Ending your session - -To halt your nodes: -```bash -# Press CTRL + C keys in the terminal. -``` - ---- diff --git a/docs/running_on_testnet.md b/docs/running_on_testnet.md deleted file mode 100644 index 9203d3a51..000000000 --- a/docs/running_on_testnet.md +++ /dev/null @@ -1,242 +0,0 @@ -# Running Subnet on Testnet - -This tutorial shows how to use the Bittensor testnet to create a subnet and run your incentive mechanism on it. - -**IMPORTANT:** We strongly recommend that you first run [Running Subnet Locally](running_on_staging.md) before running on the testnet. Incentive mechanisms running on the testnet are open to anyone, and although these mechanisms on testnet do not emit real TAO, they cost you test TAO which you must create. - -**DANGER** -- Do not expose your private keys. -- Only use your testnet wallet. -- Do not reuse the password of your mainnet wallet. -- Make sure your incentive mechanism is resistant to abuse. - -## Prerequisites - -Before proceeding further, make sure that you have installed Bittensor. See the below instructions: - -- [Install `bittensor`](https://github.com/opentensor/bittensor#install). - -After installing `bittensor`, proceed as below: - -## 1. Install Bittensor subnet template - -**NOTE: Skip this step if** you already did this during local testing and development. - -`cd` into your project directory and clone the bittensor-subnet-template repo: - -```bash -git clone https://github.com/opentensor/bittensor-subnet-template.git -``` - -Next, `cd` into bittensor-subnet-template repo directory: - -```bash -cd bittensor-subnet-template # Enter the -``` - -Install the bittensor-subnet-template package: - -```bash -python -m pip install -e . -``` - -## 2. Create wallets - -Create wallets for subnet owner, subnet validator and for subnet miner. - -This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner. - -The owner will create and control the subnet. The owner must have at least 100 testnet TAO before the owner can run next steps. - -The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts. - -Create a coldkey for your owner wallet: - -```bash -btcli wallet new_coldkey --wallet.name owner -``` - -Create a coldkey and hotkey for your miner wallet: - -```bash -btcli wallet new_coldkey --wallet.name miner -``` - -and - -```bash -btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default -``` - -Create a coldkey and hotkey for your validator wallet: - -```bash -btcli wallet new_coldkey --wallet.name validator -``` - -and - -```bash -btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default -``` - -## 3. Get the price of subnet creation - -Creating subnets on the testnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the chain. - -By default you must have at least 100 testnet TAO in your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below command shows how to get the current price of creating a subnet. - -```bash -btcli subnet lock_cost --subtensor.network test -``` - -The above command will show: - -```bash ->> Subnet lock cost: τ100.000000000 -``` - -## 4. (Optional) Get faucet tokens - -Faucet is disabled on the testnet. Hence, if you don't have sufficient faucet tokens, ask the [Bittensor Discord community](https://discord.com/channels/799672011265015819/830068283314929684) for faucet tokens. - -## 5. Purchase a slot - -Using the test TAO from the previous step you can register your subnet on the testnet. This will create a new subnet on the testnet and give you the owner permissions to it. - -The below command shows how to purchase a slot. - -**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered. - -```bash -btcli subnet create --subtensor.network test -``` - -Enter the owner wallet name which gives permissions to the coldkey: - -```bash ->> Enter wallet name (default): owner # Enter your owner wallet name ->> Enter password to unlock key: # Enter your wallet password. ->> Register subnet? [y/n]: # Select yes (y) ->> ⠇ 📡 Registering subnet... -✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later. -``` - -## 6. Register keys - -This step registers your subnet validator and subnet miner keys to the subnet, giving them the **first two slots** on the subnet. - -Register your miner key to the subnet: - -```bash -btcli subnet register --netuid 13 --subtensor.network test --wallet.name miner --wallet.hotkey default -``` - -Follow the below prompts: - -```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. ->> Continue Registration? - hotkey: ... - coldkey: ... - network: finney [y/n]: # Select yes (y) ->> ✅ Registered -``` - -Next, register your validator key to the subnet: - -```bash -btcli subnet register --netuid 13 --subtensor.network test --wallet.name validator --wallet.hotkey default -``` - -Follow the prompts: - -```bash ->> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created. ->> Continue Registration? - hotkey: ... - coldkey: ... - network: finney [y/n]: # Select yes (y) ->> ✅ Registered -``` - -## 7. Check that your keys have been registered - -This step returns information about your registered keys. - -Check that your validator key has been registered: - -```bash -btcli wallet overview --wallet.name validator --subtensor.network test -``` - -The above command will display the below: - -```bash -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 0 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 -``` - -Check that your miner has been registered: - -```bash -btcli wallet overview --wallet.name miner --subtensor.network test -``` - -The above command will display the below: - -```bash -Subnet: 1 -COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58 -miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf… -1 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000 - Wallet balance: τ0.0 -``` - -## 8. Run subnet miner and subnet validator - -Run the subnet miner: - -```bash -python neurons/miner.py --netuid 1 --subtensor.network test --wallet.name miner --wallet.hotkey default --logging.debug -``` - -You will see the below terminal output: - -```bash ->> 2023-08-08 16:58:11.223 | INFO | Running miner for subnet: 1 on network: ws://127.0.0.1:9946 with config: ... -``` - -Next, run the subnet validator: - -```bash -python neurons/validator.py --netuid 1 --subtensor.network test --wallet.name validator --wallet.hotkey default --logging.debug -``` - -You will see the below terminal output: - -```bash ->> 2023-08-08 16:58:11.223 | INFO | Running validator for subnet: 1 on network: ws://127.0.0.1:9946 with config: ... -``` - - -## 9. Get emissions flowing - -Register to the root network using the `btcli`: - -```bash -btcli root register --subtensor.network test -``` - -Then set your weights for the subnet: - -```bash -btcli root weights --subtensor.network test -``` - -## 10. Stopping your nodes - -To stop your nodes, press CTRL + C in the terminal where the nodes are running. diff --git a/docs/stream_tutorial/README.md b/docs/stream_tutorial/README.md deleted file mode 100644 index f213fd3af..000000000 --- a/docs/stream_tutorial/README.md +++ /dev/null @@ -1,490 +0,0 @@ -# Bittensor Streaming Tutorial -This document is intented as a developer-friendly walkthrough of integrating streaming into your bittensor application. - -If you prefer to jump right into a complete stand-alone example, see: -- `miner.py` -- `protocol.py` -- `client.py` - -Start your miner: -```bash -python miner.py --netuid 8 --wallet.name default --wallet.hotkey miner --subtensor.network test --axon.port 10000 --logging.trace -``` - -Run the client: -```bash -python client.py --netuid 8 --my_uid 1 --network test -``` - -## Overview -This tutorial is designed to show you how to use the streaming API to integrate into your application. It will cover the following topics: -- writing your streaming protocol (inherits from bittensor.StreamingSynapse) -- writing your streaming server (uses your streaming protocol) -- writing your streaming client (uses your streaming protocol) - -### Defining your streaming protocol -When designing your protocol, it would be helpful to look at the bittensor.StreamingSynapse for reference. Below is a condensed snippet of the abstract methods that you will need to implement in your subclass. - -You will need to implement two methods: - -- `process_streaming_response` -- `extract_response_json` - -These two methods are the core of your streaming protocol. The first method process_streaming_response is called as the response is being streamed from the network. It is responsible for handling the streaming response, such as parsing and accumulating data. The second method extract_response_json is called after the response has been processed and is responsible for retrieving structured data to be post-processed in the dendrite in bittensor core code. - -```python -class StreamingSynapse(bittensor.Synapse, ABC): - ... - class BTStreamingResponse(_StreamingResponse): - ... - @abstractmethod - async def process_streaming_response(self, response: Response): - """ - Abstract method that must be implemented by the subclass. - This method should provide logic to handle the streaming response, such as parsing and accumulating data. - It is called as the response is being streamed from the network, and should be implemented to handle the specific - streaming data format and requirements of the subclass. - - Args: - response: The response object to be processed, typically containing chunks of data. - """ - ... - - @abstractmethod - def extract_response_json(self, response: Response) -> dict: - """ - Abstract method that must be implemented by the subclass. - This method should provide logic to extract JSON data from the response, including headers and content. - It is called after the response has been processed and is responsible for retrieving structured data - that can be used by the application. - - Args: - response: The response object from which to extract JSON data. - """ - ... - ... -``` - -See the full reference code at the bittensor [repo](https://github.com/opentensor/bittensor/blob/master/bittensor/stream.py). - - -#### Create your protocol -Let's walk through how to create a protocol using the bittensor.StreamingSynapse class. -```python -class MyStreamingSynapse(bt.StreamingSynapse): - # define your expected data fields here as pydantic field objects - # This allows you to control what information is passed along the network - messages: List[str] = pydantic.Field( - ..., # this ellipsis (...) indicates the object is required - title="Messages", # What is the name of this field? - description="A list of messages in the Prompting scenario. Immutable.", - allow_mutation=False, # disallow modification of this field after creation - ) - completion: str = pydantic.Field( - "", - title="Completion", - ) - # add fields as necessary - ... - - # This method controls how your synapse is deserialized from the network - # E.g. you can extract whatever information you want to receive at the final - # yield in the async generator returned by the server, without receiving - # the entire synapse object itself. - # In this example, we just want the completion string at the end. - def deserialize(self) -> str: - return self.completion - - # implement your `process_streaming_response` logic to actually yield objects to the streamer - # this effectively defines the async generator that you'll recieve on the client side - async def process_streaming_response(self, response: MyStreamingSynapse): - # this is an example of how you might process a streaming response - # iterate over the response content and yield each line - async for chunk in response.content.iter_any(): - tokens = chunk.decode("utf-8").split("\n") - yield tokens - - # implement `extract_response_json` to extract the JSON data from the response headers - # this will be dependent on the data you are streaming and how you want to structure it - # it MUST conform to the following format expected by the bittensor dendrite: - """ - { - # METADATA AND HEADERS - "name": ..., - "timeout": float(...), - "total_size": int(...), - "header_size": int(...), - "dendrite": ..., - "axon": ..., - # YOUR FIELDS - "messages": self.messages, - ... - } - """ - def extract_response_json(self, response: MyStreamingSynapse) -> dict: - # iterate over the response headers and extract the necessary data - headers = { - k.decode("utf-8"): v.decode("utf-8") - for k, v in response.__dict__["_raw_headers"] - } - # helper function to extract data from headers - def extract_info(prefix): - return { - key.split("_")[-1]: value - for key, value in headers.items() - if key.startswith(prefix) - } - # return the extracted data in the expected format - return { - "name": headers.get("name", ""), - "timeout": float(headers.get("timeout", 0)), - "total_size": int(headers.get("total_size", 0)), - "header_size": int(headers.get("header_size", 0)), - "dendrite": extract_info("bt_header_dendrite"), # dendrite info - "axon": extract_info("bt_header_axon"), # axon info - "messages": self.messages, # field object - } -``` - -[Here](https://github.com/opentensor/text-prompting/blob/main/prompting/protocol.py#L131) is a full example implementation of a streaming protocol based on the text-prompting network. - -Please read the docstrings provided, they can be very helpful! - -### Writing the server -Great! Now we have our protocol defined, let's see how to define our server. -This will generate the tokens to be streamed in this prompting example. - -For brevity we will not be building a full miner, but inspecting the central components. -```python -class MyStreamPromptingMiner(bt.Miner): - ... # any relevant methods you'd need for your miner - - # define your server forward here - # NOTE: It is crucial that your typehints are correct and reflect your streaming protocol object - # otherwise the axon will reject adding your route to the server. - def forward(self, synapse: MyStreamingSynapse) -> MyStreamingSynapse: - # Let's use a GPT2 tokenizer for this toy example - tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - - # Simulated function to decode token IDs into strings. In a real-world scenario, - # this can be replaced with an actual model inference step. - def model(ids): - return (tokenizer.decode(id) for id in ids) - - # This function is called asynchronously to process the input text and send back tokens - # as a streaming response. It essentially produces the async generator that will be - # consumed by the client with an `async for` loop. - async def _forward(text: str, send: Send): - # `text` may be the input prompt to your model in a real-world scenario. - # let's tokenize them into IDs for the sake of this example. - input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze() - - # You may want to buffer your tokens before sending them back to the client. - # this can be useful so we aren't flooding the client with individual tokens - # and allows you more fine-grained control over how much data is sent back - # with each yield. - N = 3 # Number of tokens to send back to the client at a time - buffer = [] - # Iterate over the tokens and send the generationed tokens back to the client - # when we have sufficient (N) tokens in the buffer. - for token in model(input_ids): - buffer.append(token) # Add token to buffer - - # If buffer has N tokens, send them back to the client. - if len(buffer) == N: - joined_buffer = "".join(buffer) - # Send the tokens back to the client - # This is the core of the streaming response and the format - # is important. The `send` function is provided by the ASGI server - # and is responsible for sending the response back to the client. - # This buffer will be received by the client as a single chunk of - # data, which can then be split into individual tokens! - await send( - { - "type": "http.response.body", - "body": joined_buffer.encode("utf-8"), - "more_body": True, - } - ) - buffer = [] # Clear the buffer for next batch of tokens - - # Create a streaming response object using the `_forward` function - # It is useful to wrap your _forward function in a partial function - # to pass in the text argument lazily. - token_streamer = partial(_forward, synapse.messages[0]) - # Return the streaming response object, which is an instance of the - # `BTStreamingResponse` class. - return synapse.create_streaming_response(token_streamer) -``` - -#### Complete Example -Here is a full example for reference: -> This inherits from the prompting (text-prompting) miner base class. -> Take a look at the `prompting/baseminer/miner.py` file [here](https://github.com/opentensor/text-prompting/blob/main/prompting/baseminer/miner.py) for more details. - -```python -class StreamingTemplateMiner(prompting.Miner): - def config(self) -> "bt.Config": - """ - Returns the configuration object specific to this miner. - - Implement and extend this method to provide custom configurations for the miner. - Currently, it sets up a basic configuration parser. - - Returns: - bt.Config: A configuration object with the miner's operational parameters. - """ - parser = argparse.ArgumentParser(description="Streaming Miner Configs") - self.add_args(parser) - return bt.config(parser) - - def add_args(cls, parser: argparse.ArgumentParser): - """ - Adds custom arguments to the command line parser. - - Developers can introduce additional command-line arguments specific to the miner's - functionality in this method. These arguments can then be used to configure the miner's operation. - - Args: - parser (argparse.ArgumentParser): - The command line argument parser to which custom arguments should be added. - """ - pass - - def prompt(self, synapse: StreamPrompting) -> StreamPrompting: - """ - Generates a streaming response for the provided synapse. - - This function serves as the main entry point for handling streaming prompts. It takes - the incoming synapse which contains messages to be processed and returns a streaming - response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode - the incoming message, and then sends the response back to the client token by token. - - Args: - synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed. - - Returns: - StreamPrompting: The streaming response object which can be used by other functions to - stream back the response to the client. - - Usage: - This function can be extended and customized based on specific requirements of the - miner. Developers can swap out the tokenizer, model, or adjust how streaming responses - are generated to suit their specific applications. - """ - bt.logging.trace("In outer PROMPT()") - tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - - # Simulated function to decode token IDs into strings. In a real-world scenario, - # this can be replaced with an actual model inference step. - def model(ids): - return (tokenizer.decode(id) for id in ids) - - async def _prompt(text: str, send: Send): - """ - Asynchronously processes the input text and sends back tokens as a streaming response. - - This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then - uses the simulated model to decode token IDs into strings. It then sends each token - back to the client as a streaming response, with a delay between tokens to simulate - the effect of real-time streaming. - - Args: - text (str): The input text message to be processed. - send (Send): An asynchronous function that allows sending back the streaming response. - - Usage: - This function can be adjusted based on the streaming requirements, speed of - response, or the model being used. Developers can also introduce more sophisticated - processing steps or modify how tokens are sent back to the client. - """ - bt.logging.trace("In inner _PROMPT()") - input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze() - buffer = [] - bt.logging.debug(f"Input text: {text}") - bt.logging.debug(f"Input ids: {input_ids}") - - N = 3 # Number of tokens to send back to the client at a time - for token in model(input_ids): - bt.logging.trace(f"appending token: {token}") - buffer.append(token) - # If buffer has N tokens, send them back to the client. - if len(buffer) == N: - time.sleep(0.1) - joined_buffer = "".join(buffer) - bt.logging.debug(f"sedning tokens: {joined_buffer}") - await send( - { - "type": "http.response.body", - "body": joined_buffer.encode("utf-8"), - "more_body": True, - } - ) - bt.logging.debug(f"Streamed tokens: {joined_buffer}") - buffer = [] # Clear the buffer for next batch of tokens - - # Send any remaining tokens in the buffer - if buffer: - joined_buffer = "".join(buffer) - await send( - { - "type": "http.response.body", - "body": joined_buffer.encode("utf-8"), - "more_body": False, # No more tokens to send - } - ) - bt.logging.trace(f"Streamed tokens: {joined_buffer}") - - message = synapse.messages[0] - bt.logging.trace(f"message in _prompt: {message}") - token_streamer = partial(_prompt, message) - bt.logging.trace(f"token streamer: {token_streamer}") - return synapse.create_streaming_response(token_streamer) -``` - -### Writing the client -Excellent! Now we have defined our server, now we can define our client. - -This has assumed you have: -1. Registered your miner on the chain (`finney`/`test`) -2. Are serving your miner on an open port (e.g. `12345`) - -Steps: -- Instantiate your synapse subclass with the relevant information. E.g. `messages`, `roles`, etc. -- Instantiate your wallet and a dendrite client -- Query the dendrite client with your synapse object -- Iterate over the async generator to extract the yielded tokens on the server side - -```python - -# Import bittensor -import bittensor as bt - -# Create your streaming synapse subclass object to house the request body -syn = MyStreamingSynapse( - roles=["user"], - messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."] -) - -# Create a wallet instance that must be registered on the network -wallet = bt.wallet(name="default", hotkey="default") - -# Instantiate the metagraph -metagraph = bt.metagraph( - netuid=8, network="test", sync=True, lite=False -) - -# Grab the axon you're serving -my_uid = 1 -axon = metagraph.axons[my_uid] - -# Create a Dendrite instance to handle client-side communication. -dendrite = bt.dendrite(wallet=wallet) - - -This is an async function so we can use the `await` keyword when querying the server with the dendrite object. -async def main(): - # Send a request to the Axon using the Dendrite, passing in a StreamPrompting - # instance with roles and messages. The response is awaited, as the Dendrite - # communicates asynchronously with the Axon. Returns a list of async generator. - responses = await dendrite( - [axon], - syn, - deserialize=False, - streaming=True - ) - - # Now that we have our responses we want to iterate over the yielded tokens - # iterate over the async generator to extract the yielded tokens on server side - for resp in responses: - i=0 - async for chunk in resp: - i += 1 - if i % 5 == 0: - print() - if isinstance(chunk, list): - print(chunk[0], end="", flush=True) - else: - # last object yielded is the synapse itself with completion filled - synapse = chunk - break - - # The synapse object contains the completion attribute which contains the - # accumulated tokens from the streaming response. - -if __name__ == "__main__": - # Run the main function with asyncio - asyncio.run(main()) - -``` -There you have it! - -### Complete example -If you would like to see a complete standalone example that only depends on bittensor>=6.2.0, look below: - -- client.py -- streaming_miner.py -- - -# client.py -```python -# Import bittensor and the text-prompting packages -import bittensor as bt -import prompting - -# Create a StreamPrompting synapse object to house the request body -syn = prompting.protocol.StreamPrompting( - roles=["user"], - messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."]) -syn - -# create a wallet instance that must be registered on the network -wallet = bt.wallet(name="default", hotkey="default") -wallet - -# instantiate the metagraph -metagraph = bt.metagraph( - netuid=8, network="test", sync=True, lite=False -) -metagraph - -# Grab the axon you're serving -axon = metagraph.axons[62] -axon - -# Create a Dendrite instance to handle client-side communication. -d = bt.dendrite(wallet=wallet) -d - - -async def main(): - - # Send a request to the Axon using the Dendrite, passing in a StreamPrompting - # instance with roles and messages. The response is awaited, as the Dendrite - # communicates asynchronously with the Axon. Returns a list of async generator. - responses = await d( - [axon], - syn, - deserialize=False, - streaming=True - ) - responses - - # iterate over the async generator to extract the yielded tokens on server side - for resp in responses: - i=0 - async for chunk in resp: - i += 1 - if i % 5 == 0: - print() - if isinstance(chunk, list): - print(chunk[0], end="", flush=True) - else: - # last object yielded is the synapse itself with completion filled - synapse = chunk - break - -if __name__ == "__main__": - import asyncio - asyncio.run(main()) -``` diff --git a/docs/stream_tutorial/client.py b/docs/stream_tutorial/client.py deleted file mode 100644 index 67e6f05c9..000000000 --- a/docs/stream_tutorial/client.py +++ /dev/null @@ -1,104 +0,0 @@ -import argparse -import asyncio -import bittensor as bt - -from protocol import StreamPrompting - -""" -This has assumed you have: -1. Registered your miner on the chain (finney/test) -2. Are serving your miner on an open port (e.g. 12345) - -Steps: -- Instantiate your synapse subclass with the relevant information. E.g. messages, roles, etc. -- Instantiate your wallet and a dendrite client -- Query the dendrite client with your synapse object -- Iterate over the async generator to extract the yielded tokens on the server side -""" - - -async def query_synapse(my_uid, wallet_name, hotkey, network, netuid): - syn = StreamPrompting( - roles=["user"], - messages=[ - "hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." - ], - ) - - # create a wallet instance with provided wallet name and hotkey - wallet = bt.wallet(name=wallet_name, hotkey=hotkey) - - # instantiate the metagraph with provided network and netuid - metagraph = bt.metagraph( - netuid=netuid, network=network, sync=True, lite=False - ) - - # Grab the axon you're serving - axon = metagraph.axons[my_uid] - - # Create a Dendrite instance to handle client-side communication. - dendrite = bt.dendrite(wallet=wallet) - - async def main(): - responses = await dendrite( - [axon], syn, deserialize=False, streaming=True - ) - - for resp in responses: - i = 0 - async for chunk in resp: - i += 1 - if i % 5 == 0: - print() - if isinstance(chunk, list): - print(chunk[0], end="", flush=True) - else: - # last object yielded is the synapse itself with completion filled - synapse = chunk - break - - # Run the main function with asyncio - await main() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Query a Bittensor synapse with given parameters." - ) - - # Adding arguments - parser.add_argument( - "--my_uid", - type=int, - required=True, - help="Your unique miner ID on the chain", - ) - parser.add_argument( - "--netuid", type=int, required=True, help="Network Unique ID" - ) - parser.add_argument( - "--wallet_name", type=str, default="default", help="Name of the wallet" - ) - parser.add_argument( - "--hotkey", type=str, default="default", help="Hotkey for the wallet" - ) - parser.add_argument( - "--network", - type=str, - default="test", - help='Network type, e.g., "test" or "mainnet"', - ) - - # Parse arguments - args = parser.parse_args() - - # Running the async function with provided arguments - asyncio.run( - query_synapse( - args.my_uid, - args.wallet_name, - args.hotkey, - args.network, - args.netuid, - ) - ) diff --git a/docs/stream_tutorial/config.py b/docs/stream_tutorial/config.py deleted file mode 100644 index 7cbe82ca4..000000000 --- a/docs/stream_tutorial/config.py +++ /dev/null @@ -1,116 +0,0 @@ -import bittensor as bt -import argparse -import os - - -def check_config(cls, config: "bt.Config"): - bt.axon.check_config(config) - bt.logging.check_config(config) - full_path = os.path.expanduser( - "{}/{}/{}/{}".format( - config.logging.logging_dir, - config.wallet.get("name", bt.defaults.wallet.name), - config.wallet.get("hotkey", bt.defaults.wallet.hotkey), - config.miner.name, - ) - ) - config.miner.full_path = os.path.expanduser(full_path) - if not os.path.exists(config.miner.full_path): - os.makedirs(config.miner.full_path) - - -def get_config() -> "bt.Config": - parser = argparse.ArgumentParser() - parser.add_argument( - "--axon.port", type=int, default=8098, help="Port to run the axon on." - ) - # Subtensor network to connect to - parser.add_argument( - "--subtensor.network", - default="finney", - help="Bittensor network to connect to.", - ) - # Chain endpoint to connect to - parser.add_argument( - "--subtensor.chain_endpoint", - default="wss://entrypoint-finney.opentensor.ai:443", - help="Chain endpoint to connect to.", - ) - # Adds override arguments for network and netuid. - parser.add_argument( - "--netuid", type=int, default=1, help="The chain subnet uid." - ) - - parser.add_argument( - "--miner.root", - type=str, - help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ", - default="~/.bittensor/miners/", - ) - parser.add_argument( - "--miner.name", - type=str, - help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ", - default="Bittensor Miner", - ) - - # Run config. - parser.add_argument( - "--miner.blocks_per_epoch", - type=str, - help="Blocks until the miner repulls the metagraph from the chain", - default=100, - ) - - # Switches. - parser.add_argument( - "--miner.no_serve", - action="store_true", - help="If True, the miner doesnt serve the axon.", - default=False, - ) - parser.add_argument( - "--miner.no_start_axon", - action="store_true", - help="If True, the miner doesnt start the axon.", - default=False, - ) - - # Mocks. - parser.add_argument( - "--miner.mock_subtensor", - action="store_true", - help="If True, the miner will allow non-registered hotkeys to mine.", - default=False, - ) - - # Adds subtensor specific arguments i.e. --subtensor.chain_endpoint ... --subtensor.network ... - bt.subtensor.add_args(parser) - - # Adds logging specific arguments i.e. --logging.debug ..., --logging.trace .. or --logging.logging_dir ... - bt.logging.add_args(parser) - - # Adds wallet specific arguments i.e. --wallet.name ..., --wallet.hotkey ./. or --wallet.path ... - bt.wallet.add_args(parser) - - # Adds axon specific arguments i.e. --axon.port ... - bt.axon.add_args(parser) - - # Activating the parser to read any command-line inputs. - # To print help message, run python3 template/miner.py --help - config = bt.config(parser) - - # Logging captures events for diagnosis or understanding miner's behavior. - config.full_path = os.path.expanduser( - "{}/{}/{}/netuid{}/{}".format( - config.logging.logging_dir, - config.wallet.name, - config.wallet.hotkey, - config.netuid, - "miner", - ) - ) - # Ensure the directory for logging exists, else create one. - if not os.path.exists(config.full_path): - os.makedirs(config.full_path, exist_ok=True) - return config diff --git a/docs/stream_tutorial/miner.py b/docs/stream_tutorial/miner.py deleted file mode 100644 index a62814d29..000000000 --- a/docs/stream_tutorial/miner.py +++ /dev/null @@ -1,398 +0,0 @@ -import copy -import time -import asyncio -import argparse -import threading -import traceback -from abc import ABC, abstractmethod -from functools import partial -from starlette.types import Send - -import bittensor as bt -from transformers import GPT2Tokenizer -from typing import List, Dict, Tuple, Union, Callable, Awaitable - -from protocol import StreamPrompting -from config import get_config, check_config - - -class StreamMiner(ABC): - def __init__(self, config=None, axon=None, wallet=None, subtensor=None): - # Setup base config from Miner.config() and merge with subclassed config. - base_config = copy.deepcopy(config or get_config()) - self.config = self.config() - self.config.merge(base_config) - - check_config(StreamMiner, self.config) - bt.logging.info(self.config) # TODO: duplicate print? - - self.prompt_cache: Dict[str, Tuple[str, int]] = {} - - # Activating Bittensor's logging with the set configurations. - bt.logging.set_config(config=self.config.logging) - - # Wallet holds cryptographic information, ensuring secure transactions and communication. - self.wallet = wallet or bt.wallet(config=self.config) - bt.logging.info(f"Wallet {self.wallet}") - - # subtensor manages the blockchain connection, facilitating interaction with the Bittensor blockchain. - self.subtensor = subtensor or bt.subtensor(config=self.config) - bt.logging.info(f"Subtensor: {self.subtensor}") - bt.logging.info( - f"Running miner for subnet: {self.config.netuid} on network: {self.subtensor.chain_endpoint} with config:" - ) - - # metagraph provides the network's current state, holding state about other participants in a subnet. - self.metagraph = self.subtensor.metagraph(self.config.netuid) - bt.logging.info(f"Metagraph: {self.metagraph}") - - if self.wallet.hotkey.ss58_address not in self.metagraph.hotkeys: - bt.logging.error( - f"\nYour validator: {self.wallet} if not registered to chain connection: {self.subtensor} \nRun btcli register and try again. " - ) - exit() - else: - # Each miner gets a unique identity (UID) in the network for differentiation. - self.my_subnet_uid = self.metagraph.hotkeys.index( - self.wallet.hotkey.ss58_address - ) - bt.logging.info(f"Running miner on uid: {self.my_subnet_uid}") - - # The axon handles request processing, allowing validators to send this process requests. - self.axon = axon or bt.axon( - wallet=self.wallet, port=self.config.axon.port - ) - # Attach determiners which functions are called when servicing a request. - bt.logging.info(f"Attaching forward function to axon.") - print(f"Attaching forward function to axon. {self._prompt}") - self.axon.attach( - forward_fn=self._prompt, - ) - bt.logging.info(f"Axon created: {self.axon}") - - # Instantiate runners - self.should_exit: bool = False - self.is_running: bool = False - self.thread: threading.Thread = None - self.lock = asyncio.Lock() - self.request_timestamps: Dict = {} - - @abstractmethod - def config(self) -> "bt.Config": - ... - - @classmethod - @abstractmethod - def add_args(cls, parser: argparse.ArgumentParser): - ... - - def _prompt(self, synapse: StreamPrompting) -> StreamPrompting: - """ - A wrapper method around the `prompt` method that will be defined by the subclass. - - This method acts as an intermediary layer to perform pre-processing before calling the - actual `prompt` method implemented in the subclass. Specifically, it checks whether a - prompt is in cache to avoid reprocessing recent requests. If the prompt is not in the - cache, the subclass `prompt` method is called. - - Args: - synapse (StreamPrompting): The incoming request object encapsulating the details of the request. - - Returns: - StreamPrompting: The response object to be sent back in reply to the incoming request, essentially - the filled synapse request object. - - Raises: - ValueError: If the prompt is found in the cache indicating it was sent recently. - - Example: - This method is not meant to be called directly but is invoked internally when a request - is received, and it subsequently calls the `prompt` method of the subclass. - """ - return self.prompt(synapse) - - @abstractmethod - def prompt(self, synapse: StreamPrompting) -> StreamPrompting: - """ - Abstract method to handle and respond to incoming requests to the miner. - - Subclasses should implement this method to define their custom logic for processing and - responding to requests. This method is designed to be overridden, and its behavior will - be dependent on the specific implementation provided in the subclass. - - Args: - synapse (StreamPrompting): The incoming request object encapsulating the details - of the request. This must contain `messages` and `roles` as fields. - - Returns: - StreamPrompting: The response object that should be sent back in reply to the - incoming request. This is essentially the filled synapse request object. - - Example: - class CustomMiner(Miner): - def prompt(self, synapse: StreamPrompting) -> StreamPrompting: - # Custom logic to process and respond to the request. - synapse.completion = "The meaning of life is 42." - return synapse - """ - ... - - def run(self): - """ - Runs the miner logic. This method starts the miner's operations, including - listening for incoming requests and periodically updating the miner's knowledge - of the network graph. - """ - if not self.subtensor.is_hotkey_registered( - netuid=self.config.netuid, - hotkey_ss58=self.wallet.hotkey.ss58_address, - ): - bt.logging.error( - f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}" - f"Please register the hotkey using `btcli subnets register` before trying again" - ) - exit() - - # Serve passes the axon information to the network + netuid we are hosting on. - # This will auto-update if the axon port of external ip have changed. - bt.logging.info( - f"Serving axon {StreamPrompting} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}" - ) - self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor) - - # Start starts the miner's axon, making it active on the network. - bt.logging.info( - f"Starting axon server on port: {self.config.axon.port}" - ) - self.axon.start() - - # --- Run until should_exit = True. - self.last_epoch_block = self.subtensor.get_current_block() - bt.logging.info(f"Miner starting at block: {self.last_epoch_block}") - - # This loop maintains the miner's operations until intentionally stopped. - bt.logging.info(f"Starting main loop") - step = 0 - try: - while not self.should_exit: - start_epoch = time.time() - - # --- Wait until next epoch. - current_block = self.subtensor.get_current_block() - while ( - current_block - self.last_epoch_block - < self.config.miner.blocks_per_epoch - ): - # --- Wait for next bloc. - time.sleep(1) - current_block = self.subtensor.get_current_block() - - # --- Check if we should exit. - if self.should_exit: - break - - # --- Update the metagraph with the latest network state. - self.last_epoch_block = self.subtensor.get_current_block() - - metagraph = self.subtensor.metagraph( - netuid=self.config.netuid, - lite=True, - block=self.last_epoch_block, - ) - log = ( - f"Step:{step} | " - f"Block:{metagraph.block.item()} | " - f"Stake:{metagraph.S[self.my_subnet_uid]} | " - f"Rank:{metagraph.R[self.my_subnet_uid]} | " - f"Trust:{metagraph.T[self.my_subnet_uid]} | " - f"Consensus:{metagraph.C[self.my_subnet_uid] } | " - f"Incentive:{metagraph.I[self.my_subnet_uid]} | " - f"Emission:{metagraph.E[self.my_subnet_uid]}" - ) - bt.logging.info(log) - - step += 1 - - # If someone intentionally stops the miner, it'll safely terminate operations. - except KeyboardInterrupt: - self.axon.stop() - bt.logging.success("Miner killed by keyboard interrupt.") - exit() - - # In case of unforeseen errors, the miner will log the error and continue operations. - except Exception as e: - bt.logging.error(traceback.format_exc()) - - def run_in_background_thread(self): - """ - Starts the miner's operations in a separate background thread. - This is useful for non-blocking operations. - """ - if not self.is_running: - bt.logging.debug("Starting miner in background thread.") - self.should_exit = False - self.thread = threading.Thread(target=self.run, daemon=True) - self.thread.start() - self.is_running = True - bt.logging.debug("Started") - - def stop_run_thread(self): - """ - Stops the miner's operations that are running in the background thread. - """ - if self.is_running: - bt.logging.debug("Stopping miner in background thread.") - self.should_exit = True - self.thread.join(5) - self.is_running = False - bt.logging.debug("Stopped") - - def __enter__(self): - """ - Starts the miner's operations in a background thread upon entering the context. - This method facilitates the use of the miner in a 'with' statement. - """ - self.run_in_background_thread() - - def __exit__(self, exc_type, exc_value, traceback): - """ - Stops the miner's background operations upon exiting the context. - This method facilitates the use of the miner in a 'with' statement. - - Args: - exc_type: The type of the exception that caused the context to be exited. - None if the context was exited without an exception. - exc_value: The instance of the exception that caused the context to be exited. - None if the context was exited without an exception. - traceback: A traceback object encoding the stack trace. - None if the context was exited without an exception. - """ - self.stop_run_thread() - - -class StreamingTemplateMiner(StreamMiner): - def config(self) -> "bt.Config": - """ - Returns the configuration object specific to this miner. - - Implement and extend this method to provide custom configurations for the miner. - Currently, it sets up a basic configuration parser. - - Returns: - bt.Config: A configuration object with the miner's operational parameters. - """ - parser = argparse.ArgumentParser(description="Streaming Miner Configs") - self.add_args(parser) - return bt.config(parser) - - def add_args(cls, parser: argparse.ArgumentParser): - """ - Adds custom arguments to the command line parser. - - Developers can introduce additional command-line arguments specific to the miner's - functionality in this method. These arguments can then be used to configure the miner's operation. - - Args: - parser (argparse.ArgumentParser): - The command line argument parser to which custom arguments should be added. - """ - pass - - def prompt(self, synapse: StreamPrompting) -> StreamPrompting: - """ - Generates a streaming response for the provided synapse. - - This function serves as the main entry point for handling streaming prompts. It takes - the incoming synapse which contains messages to be processed and returns a streaming - response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode - the incoming message, and then sends the response back to the client token by token. - - Args: - synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed. - - Returns: - StreamPrompting: The streaming response object which can be used by other functions to - stream back the response to the client. - - Usage: - This function can be extended and customized based on specific requirements of the - miner. Developers can swap out the tokenizer, model, or adjust how streaming responses - are generated to suit their specific applications. - """ - bt.logging.trace("HI. PROMPT()") - tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - - # Simulated function to decode token IDs into strings. In a real-world scenario, - # this can be replaced with an actual model inference step. - def model(ids): - return (tokenizer.decode(id) for id in ids) - - async def _prompt(text: str, send: Send): - """ - Asynchronously processes the input text and sends back tokens as a streaming response. - - This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then - uses the simulated model to decode token IDs into strings. It then sends each token - back to the client as a streaming response, with a delay between tokens to simulate - the effect of real-time streaming. - - Args: - text (str): The input text message to be processed. - send (Send): An asynchronous function that allows sending back the streaming response. - - Usage: - This function can be adjusted based on the streaming requirements, speed of - response, or the model being used. Developers can also introduce more sophisticated - processing steps or modify how tokens are sent back to the client. - """ - bt.logging.trace("HI. _PROMPT()") - input_ids = tokenizer( - text, return_tensors="pt" - ).input_ids.squeeze() - buffer = [] - bt.logging.debug(f"Input text: {text}") - bt.logging.debug(f"Input ids: {input_ids}") - - N = 3 # Number of tokens to send back to the client at a time - for token in model(input_ids): - bt.logging.trace(f"appending token: {token}") - buffer.append(token) - # If buffer has N tokens, send them back to the client. - if len(buffer) == N: - time.sleep(0.1) - joined_buffer = "".join(buffer) - bt.logging.debug(f"sedning tokens: {joined_buffer}") - await send( - { - "type": "http.response.body", - "body": joined_buffer.encode("utf-8"), - "more_body": True, - } - ) - bt.logging.debug(f"Streamed tokens: {joined_buffer}") - buffer = [] # Clear the buffer for next batch of tokens - - # Send any remaining tokens in the buffer - if buffer: - joined_buffer = "".join(buffer) - await send( - { - "type": "http.response.body", - "body": joined_buffer.encode("utf-8"), - "more_body": False, # No more tokens to send - } - ) - bt.logging.trace(f"Streamed tokens: {joined_buffer}") - - message = synapse.messages[0] - bt.logging.trace(f"message in _prompt: {message}") - token_streamer = partial(_prompt, message) - bt.logging.trace(f"token streamer: {token_streamer}") - return synapse.create_streaming_response(token_streamer) - - -# This is the main function, which runs the miner. -if __name__ == "__main__": - with StreamingTemplateMiner(): - while True: - time.sleep(1) diff --git a/docs/stream_tutorial/protocol.py b/docs/stream_tutorial/protocol.py deleted file mode 100644 index 26e91fdc2..000000000 --- a/docs/stream_tutorial/protocol.py +++ /dev/null @@ -1,154 +0,0 @@ -import pydantic -import bittensor as bt - -from abc import ABC, abstractmethod -from typing import List, Union, Callable, Awaitable -from starlette.responses import StreamingResponse - - -class StreamPrompting(bt.StreamingSynapse): - """ - StreamPrompting is a specialized implementation of the `StreamingSynapse` tailored for prompting functionalities within - the Bittensor network. This class is intended to interact with a streaming response that contains a sequence of tokens, - which represent prompts or messages in a certain scenario. - - As a developer, when using or extending the `StreamPrompting` class, you should be primarily focused on the structure - and behavior of the prompts you are working with. The class has been designed to seamlessly handle the streaming, - decoding, and accumulation of tokens that represent these prompts. - - Attributes: - - `roles` (List[str]): A list of roles involved in the prompting scenario. This could represent different entities - or agents involved in the conversation or use-case. They are immutable to ensure consistent - interaction throughout the lifetime of the object. - - - `messages` (List[str]): These represent the actual prompts or messages in the prompting scenario. They are also - immutable to ensure consistent behavior during processing. - - - `completion` (str): Stores the processed result of the streaming tokens. As tokens are streamed, decoded, and - processed, they are accumulated in the completion attribute. This represents the "final" - product or result of the streaming process. - - `required_hash_fields` (List[str]): A list of fields that are required for the hash. - - Methods: - - `process_streaming_response`: This method asynchronously processes the incoming streaming response by decoding - the tokens and accumulating them in the `completion` attribute. - - - `deserialize`: Converts the `completion` attribute into its desired data format, in this case, a string. - - - `extract_response_json`: Extracts relevant JSON data from the response, useful for gaining insights on the response's - metadata or for debugging purposes. - - Note: While you can directly use the `StreamPrompting` class, it's designed to be extensible. Thus, you can create - subclasses to further customize behavior for specific prompting scenarios or requirements. - """ - - roles: List[str] = pydantic.Field( - ..., - title="Roles", - description="A list of roles in the StreamPrompting scenario. Immuatable.", - allow_mutation=False, - ) - - messages: List[str] = pydantic.Field( - ..., - title="Messages", - description="A list of messages in the StreamPrompting scenario. Immutable.", - allow_mutation=False, - ) - - required_hash_fields: List[str] = pydantic.Field( - ["messages"], - title="Required Hash Fields", - description="A list of required fields for the hash.", - allow_mutation=False, - ) - - completion: str = pydantic.Field( - "", - title="Completion", - description="Completion status of the current StreamPrompting object. This attribute is mutable and can be updated.", - ) - - async def process_streaming_response(self, response: StreamingResponse): - """ - `process_streaming_response` is an asynchronous method designed to process the incoming streaming response from the - Bittensor network. It's the heart of the StreamPrompting class, ensuring that streaming tokens, which represent - prompts or messages, are decoded and appropriately managed. - - As the streaming response is consumed, the tokens are decoded from their 'utf-8' encoded format, split based on - newline characters, and concatenated into the `completion` attribute. This accumulation of decoded tokens in the - `completion` attribute allows for a continuous and coherent accumulation of the streaming content. - - Args: - response: The streaming response object containing the content chunks to be processed. Each chunk in this - response is expected to be a set of tokens that can be decoded and split into individual messages or prompts. - """ - if self.completion is None: - self.completion = "" - bt.logging.debug( - "Processing streaming response (StreamingSynapse base class)." - ) - async for chunk in response.content.iter_any(): - bt.logging.debug(f"Processing chunk: {chunk}") - tokens = chunk.decode("utf-8").split("\n") - for token in tokens: - bt.logging.debug(f"--processing token: {token}") - if token: - self.completion += token - bt.logging.debug(f"yielding tokens {tokens}") - yield tokens - - def deserialize(self) -> str: - """ - Deserializes the response by returning the completion attribute. - - Returns: - str: The completion result. - """ - return self.completion - - def extract_response_json(self, response: StreamingResponse) -> dict: - """ - `extract_response_json` is a method that performs the crucial task of extracting pertinent JSON data from the given - response. The method is especially useful when you need a detailed insight into the streaming response's metadata - or when debugging response-related issues. - - Beyond just extracting the JSON data, the method also processes and structures the data for easier consumption - and understanding. For instance, it extracts specific headers related to dendrite and axon, offering insights - about the Bittensor network's internal processes. The method ultimately returns a dictionary with a structured - view of the extracted data. - - Args: - response: The response object from which to extract the JSON data. This object typically includes headers and - content which can be used to glean insights about the response. - - Returns: - dict: A structured dictionary containing: - - Basic response metadata such as name, timeout, total_size, and header_size. - - Dendrite and Axon related information extracted from headers. - - Roles and Messages pertaining to the current StreamPrompting instance. - - The accumulated completion. - """ - headers = { - k.decode("utf-8"): v.decode("utf-8") - for k, v in response.__dict__["_raw_headers"] - } - - def extract_info(prefix): - return { - key.split("_")[-1]: value - for key, value in headers.items() - if key.startswith(prefix) - } - - return { - "name": headers.get("name", ""), - "timeout": float(headers.get("timeout", 0)), - "total_size": int(headers.get("total_size", 0)), - "header_size": int(headers.get("header_size", 0)), - "dendrite": extract_info("bt_header_dendrite"), - "axon": extract_info("bt_header_axon"), - "roles": self.roles, - "messages": self.messages, - "completion": self.completion, - } diff --git a/evaluation-results/.gitkeep b/evaluation-results/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/min_compute.yml b/min_compute.yml index 1da3bb044..4e31ed56e 100644 --- a/min_compute.yml +++ b/min_compute.yml @@ -1,16 +1,3 @@ -# Use this document to specify the minimum compute requirements. -# This document will be used to generate a list of recommended hardware for your subnet. - -# This is intended to give a rough estimate of the minimum requirements -# so that the user can make an informed decision about whether or not -# they want to run a miner or validator on their machine. - -# NOTE: Specification for miners may be different from validators - -version: '1.0' # update this version key as needed, ideally should match your release version - -compute_spec: - miner: cpu: @@ -21,67 +8,55 @@ compute_spec: architecture: "x86_64" # Architecture type (e.g., x86_64, arm64) gpu: - required: True # Does the application require a GPU? - min_vram: 8 # Minimum GPU VRAM (GB) - recommended_vram: 24 # Recommended GPU VRAM (GB) - cuda_cores: 1024 # Minimum number of CUDA cores (if applicable) - min_compute_capability: 6.0 # Minimum CUDA compute capability - recommended_compute_capability: 7.0 # Recommended CUDA compute capability - recommended_gpu: "NVIDIA A100" # provide a recommended GPU to purchase/rent + required: False memory: - min_ram: 16 # Minimum RAM (GB) + min_ram: 8 # Minimum RAM (GB) min_swap: 4 # Minimum swap space (GB) - recommended_swap: 8 # Recommended swap space (GB) + recommended_swap: 16 # Recommended swap space (GB) ram_type: "DDR4" # RAM type (e.g., DDR4, DDR3, etc.) storage: - min_space: 10 # Minimum free storage space (GB) - recommended_space: 100 # Recommended free storage space (GB) + min_space: 500 # Minimum free storage space (GB) + recommended_space: 1000 # Recommended free storage space (GB) type: "SSD" # Preferred storage type (e.g., SSD, HDD) - min_iops: 1000 # Minimum I/O operations per second (if applicable) - recommended_iops: 5000 # Recommended I/O operations per second os: name: "Ubuntu" # Name of the preferred operating system(s) - version: 20.04 # Version of the preferred operating system(s) + version: 24.04 # Version of the preferred operating system(s) validator: cpu: - min_cores: 4 # Minimum number of CPU cores + min_cores: 8 # Minimum number of CPU cores min_speed: 2.5 # Minimum speed per core (GHz) recommended_cores: 8 # Recommended number of CPU cores recommended_speed: 3.5 # Recommended speed per core (GHz) architecture: "x86_64" # Architecture type (e.g., x86_64, arm64) gpu: - required: True # Does the application require a GPU? - min_vram: 8 # Minimum GPU VRAM (GB) - recommended_vram: 24 # Recommended GPU VRAM (GB) + required: False # we don't use GPU currently for running models, but we will + min_vram: 6 # Minimum GPU VRAM (GB) + recommended_vram: 12 # Recommended GPU VRAM (GB) cuda_cores: 1024 # Minimum number of CUDA cores (if applicable) min_compute_capability: 6.0 # Minimum CUDA compute capability recommended_compute_capability: 7.0 # Recommended CUDA compute capability - recommended_gpu: "NVIDIA A100" # provide a recommended GPU to purchase/rent + recommended_gpu: "NVIDIA RTX" # provide a recommended GPU to purchase/rent memory: min_ram: 16 # Minimum RAM (GB) min_swap: 4 # Minimum swap space (GB) - recommended_swap: 8 # Recommended swap space (GB) + recommended_swap: 4 # Recommended swap space (GB) ram_type: "DDR4" # RAM type (e.g., DDR4, DDR3, etc.) storage: - min_space: 10 # Minimum free storage space (GB) - recommended_space: 100 # Recommended free storage space (GB) + min_space: 300 # Minimum free storage space (GB) + recommended_space: 500 # Recommended free storage space (GB) type: "SSD" # Preferred storage type (e.g., SSD, HDD) - min_iops: 1000 # Minimum I/O operations per second (if applicable) - recommended_iops: 5000 # Recommended I/O operations per second os: name: "Ubuntu" # Name of the preferred operating system(s) - version: 20.04 # Version of the preferred operating system(s) + version: 24.04 # Version of the preferred operating system(s) network_spec: bandwidth: - download: 100 # Minimum download bandwidth (Mbps) - upload: 20 # Minimum upload bandwidth (Mbps) diff --git a/neurons/miner.py b/neurons/miner.py index 5f7b95001..5439ce04d 100644 --- a/neurons/miner.py +++ b/neurons/miner.py @@ -1,169 +1,291 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - +import asyncio +import copy import time -import typing +import os +from pathlib import Path + import bittensor as bt +from dotenv import load_dotenv +from huggingface_hub import HfApi, login as hf_login +import huggingface_hub +from huggingface_hub import hf_hub_download +import onnx +import argparse +import hashlib + +from cancer_ai.validator.utils import run_command +from cancer_ai.validator.model_run_manager import ModelRunManager +from cancer_ai.validator.models import ModelInfo +from cancer_ai.validator.dataset_manager import DatasetManager +from cancer_ai.validator.competition_manager import COMPETITION_HANDLER_MAPPING + +from cancer_ai.base.base_miner import BaseNeuron +from cancer_ai.chain_models_store import ChainMinerModel, ChainModelMetadata +from cancer_ai.utils.config import path_config, add_miner_args +from cancer_ai.validator.utils import get_newest_competition_packages + + +LICENSE_NOTICE = """ +🔒 License Notice: +To share your model for Safe Scan competition, it must be released under the MIT license. + +✅ By continuing, you confirm that your model is licensed under the MIT License, +which allows open use, modification, and distribution with attribution. + +📤 Make sure your HuggingFace repository has license set to MIT. +""" +class MinerManagerCLI: + def __init__(self, config=None): + + # setting basic Bittensor objects + base_config = copy.deepcopy(config or BaseNeuron.config()) + self.config = path_config(self) + self.config.merge(base_config) + self.config.logging.debug = True + BaseNeuron.check_config(self.config) + bt.logging.set_config(config=self.config.logging) + + self.code_zip_path = None + + self.wallet = None + self.subtensor = None + self.metagraph = None + self.hotkey = None + self.metadata_store = None + + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + """Method for injecting miner arguments to the parser.""" + add_miner_args(cls, parser) + + async def upload_to_hf(self) -> None: + """Uploads model and code to Hugging Face.""" + bt.logging.info("Uploading model to Hugging Face.") + hf_api = HfApi() + hf_login(token=self.config.hf_token) + + hf_model_path = self.config.hf_model_name + hf_code_path = self.code_zip_path + bt.logging.info(f"Model path: {hf_model_path}") + bt.logging.info(f"Code path: {hf_code_path}") + + path = hf_api.upload_file( + path_or_fileobj=self.config.model_path, + path_in_repo=hf_model_path, + repo_id=self.config.hf_repo_id, + token=self.config.hf_token, + ) + bt.logging.info("Uploading code to Hugging Face.") + path = hf_api.upload_file( + path_or_fileobj=self.code_zip_path, + path_in_repo=Path(hf_code_path).name, + repo_id=self.config.hf_repo_id, + token=self.config.hf_token, + ) + bt.logging.info(f"Code uploaded to Hugging Face: {path}") + bt.logging.info(f"Uploaded model to Hugging Face: {path}") + + @staticmethod + def is_onnx_model(model_path: str) -> bool: + """Checks if model is an ONNX model.""" + if not os.path.exists(model_path): + bt.logging.error("Model file does not exist") + return False + try: + onnx.checker.check_model(model_path) + except onnx.checker.ValidationError as e: + bt.logging.warning(e) + return False + return True + + async def evaluate_model(self) -> None: + bt.logging.info("Evaluate model mode") + + run_manager = ModelRunManager( + config=self.config, model=ModelInfo(file_path=self.config.model_path) + ) -# Bittensor Miner Template: -import template + try: + dataset_packages = await get_newest_competition_packages(self.config) + except Exception as e: + bt.logging.error(f"Error retrieving competition packages: {e}") + return + + for package in dataset_packages: + dataset_manager = DatasetManager( + self.config, + self.config.competition_id, + package["dataset_hf_repo"], + package["dataset_hf_filename"], + package["dataset_hf_repo_type"], + use_auth=False + ) + await dataset_manager.prepare_dataset() -# import base miner class which takes care of most of the boilerplate -from template.base.miner import BaseMinerNeuron + X_test, y_test = await dataset_manager.get_data() + competition_handler = COMPETITION_HANDLER_MAPPING[self.config.competition_id]( + X_test=X_test, y_test=y_test, config=self.config + ) -class Miner(BaseMinerNeuron): - """ - Your miner neuron class. You should use this class to define your miner's behavior. In particular, you should replace the forward function with your own logic. You may also want to override the blacklist and priority functions according to your needs. + # Set preprocessing directory and preprocess data once + competition_handler.set_preprocessed_data_dir(self.config.models.dataset_dir) + await competition_handler.preprocess_and_serialize_data(X_test) - This class inherits from the BaseMinerNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior. + y_test = competition_handler.prepare_y_pred(y_test) - This class provides reasonable default behavior for a miner such as blacklisting unrecognized hotkeys, prioritizing requests based on stake, and forwarding requests to the forward function. If you need to define custom - """ + start_time = time.time() + # Pass the preprocessed data generator instead of raw paths + preprocessed_data_gen = competition_handler.get_preprocessed_data_generator() + y_pred = await run_manager.run(preprocessed_data_gen) + run_time_s = time.time() - start_time - def __init__(self, config=None): - super(Miner, self).__init__(config=config) - - # TODO(developer): Anything specific to your use case you can do here - - async def forward( - self, synapse: template.protocol.Dummy - ) -> template.protocol.Dummy: - """ - Processes the incoming 'Dummy' synapse by performing a predefined operation on the input data. - This method should be replaced with actual logic relevant to the miner's purpose. - - Args: - synapse (template.protocol.Dummy): The synapse object containing the 'dummy_input' data. - - Returns: - template.protocol.Dummy: The synapse object with the 'dummy_output' field set to twice the 'dummy_input' value. - - The 'forward' function is a placeholder and should be overridden with logic that is appropriate for - the miner's intended operation. This method demonstrates a basic transformation of input data. - """ - # TODO(developer): Replace with actual implementation logic. - synapse.dummy_output = synapse.dummy_input * 2 - return synapse - - async def blacklist( - self, synapse: template.protocol.Dummy - ) -> typing.Tuple[bool, str]: - """ - Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should - define the logic for blacklisting requests based on your needs and desired security parameters. - - Blacklist runs before the synapse data has been deserialized (i.e. before synapse.data is available). - The synapse is instead contracted via the headers of the request. It is important to blacklist - requests before they are deserialized to avoid wasting resources on requests that will be ignored. - - Args: - synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request. - - Returns: - Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted, - and a string providing the reason for the decision. - - This function is a security measure to prevent resource wastage on undesired requests. It should be enhanced - to include checks against the metagraph for entity registration, validator status, and sufficient stake - before deserialization of synapse data to minimize processing overhead. - - Example blacklist logic: - - Reject if the hotkey is not a registered entity within the metagraph. - - Consider blacklisting entities that are not validators or have insufficient stake. - - In practice it would be wise to blacklist requests from entities that are not validators, or do not have - enough stake. This can be checked via metagraph.S and metagraph.validator_permit. You can always attain - the uid of the sender via a metagraph.hotkeys.index( synapse.dendrite.hotkey ) call. - - Otherwise, allow the request to be processed further. - """ - - if synapse.dendrite is None or synapse.dendrite.hotkey is None: - bt.logging.warning("Received a request without a dendrite or hotkey.") - return True, "Missing dendrite or hotkey" - - # TODO(developer): Define how miners should blacklist requests. - uid = self.metagraph.hotkeys.index(synapse.dendrite.hotkey) - if ( - not self.config.blacklist.allow_non_registered - and synapse.dendrite.hotkey not in self.metagraph.hotkeys - ): - # Ignore requests from un-registered entities. - bt.logging.trace( - f"Blacklisting un-registered hotkey {synapse.dendrite.hotkey}" + # print(y_pred) + model_result = competition_handler.get_model_result(y_test, y_pred, run_time_s) + bt.logging.info( + f"Evalutaion results:\n{model_result.model_dump_json(indent=4)}" ) - return True, "Unrecognized hotkey" - - if self.config.blacklist.force_validator_permit: - # If the config is set to force validator permit, then we should only allow requests from validators. - if not self.metagraph.validator_permit[uid]: - bt.logging.warning( - f"Blacklisting a request from non-validator hotkey {synapse.dendrite.hotkey}" - ) - return True, "Non-validator hotkey" - - bt.logging.trace( - f"Not Blacklisting recognized hotkey {synapse.dendrite.hotkey}" + + # Cleanup preprocessed data + competition_handler.cleanup_preprocessed_data() + + if self.config.clean_after_run: + dataset_manager.delete_dataset() + + async def compress_code(self) -> None: + bt.logging.info("Compressing code") + bt.logging.info(f"Code directory: {self.config.code_directory}") + + code_dir = Path(self.config.code_directory) + self.code_zip_path = str(code_dir.parent / f"{code_dir.name}.zip") + + out, err = await run_command( + f"zip -r {self.code_zip_path} {self.config.code_directory}/*" ) - return False, "Hotkey recognized!" + if err: + bt.logging.info("Error zipping code") + bt.logging.error(err) + return + bt.logging.info(f"Code zip path: {self.code_zip_path}") + + def _compute_model_hash(self, repo_id, model_filename): + """Compute an 8-character hexadecimal SHA-1 hash of the model file from Hugging Face.""" + try: + model_path = huggingface_hub.hf_hub_download( + repo_id=repo_id, + filename=model_filename, + repo_type="model", + ) + sha1 = hashlib.sha1() + with open(model_path, 'rb') as f: + while chunk := f.read(8192): + sha1.update(chunk) + full_hash = sha1.hexdigest() + truncated_hash = full_hash[:8] # Take the first 8 characters of the hex digest + bt.logging.info(f"Computed 8-character hash: {truncated_hash}") + return truncated_hash + except Exception as e: + bt.logging.error(f"Failed to compute model hash: {e}") + return None + + async def submit_model(self) -> None: + # Check if the required model and files are present in hugging face repo + print(LICENSE_NOTICE) + self.wallet = bt.wallet(config=self.config) + self.subtensor = bt.subtensor(config=self.config) + self.metagraph = self.subtensor.metagraph(self.config.netuid) + self.hotkey = self.wallet.hotkey.ss58_address + + bt.logging.info(f"Wallet: {self.wallet}") + bt.logging.info(f"Subtensor: {self.subtensor}") + bt.logging.info(f"Metagraph: {self.metagraph}") + + if not self.subtensor.is_hotkey_registered( + netuid=self.config.netuid, + hotkey_ss58=self.wallet.hotkey.ss58_address, + ): + bt.logging.error( + f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}." + f" Please register the hotkey using `btcli subnets register` before trying again" + ) + exit() - async def priority(self, synapse: template.protocol.Dummy) -> float: - """ - The priority function determines the order in which requests are handled. More valuable or higher-priority - requests are processed before others. You should design your own priority mechanism with care. + self.metadata_store = ChainModelMetadata( + subtensor=self.subtensor, netuid=self.config.netuid, wallet=self.wallet + ) - This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph. + if len(self.config.hf_repo_id.encode('utf-8')) > 32: + bt.logging.error("hf_repo_id must be 32 bytes or less") + return + + if len(self.config.hf_model_name.encode('utf-8')) > 32: + bt.logging.error("hf_model_filename must be 32 bytes or less") + return + + if len(self.config.hf_code_filename.encode('utf-8')) > 31: + bt.logging.error("hf_code_filename must be 31 bytes or less") + return - Args: - synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request. + if not self._check_hf_file_exists(self.config.hf_repo_id, self.config.hf_model_name, self.config.hf_repo_type): + return - Returns: - float: A priority score derived from the stake of the calling entity. + if not self._check_hf_file_exists(self.config.hf_repo_id, self.config.hf_code_filename, self.config.hf_repo_type): + return - Miners may receive messages from multiple entities at once. This function determines which request should be - processed first. Higher values indicate that the request should be processed first. Lower values indicate - that the request should be processed later. + model_hash = self._compute_model_hash( + self.config.hf_repo_id, self.config.hf_model_name + ) - Example priority logic: - - A higher stake results in a higher priority value. - """ - if synapse.dendrite is None or synapse.dendrite.hotkey is None: - bt.logging.warning("Received a request without a dendrite or hotkey.") - return 0.0 - - # TODO(developer): Define how miners should prioritize requests. - caller_uid = self.metagraph.hotkeys.index( - synapse.dendrite.hotkey - ) # Get the caller index. - priority = float( - self.metagraph.S[caller_uid] - ) # Return the stake as the priority. - bt.logging.trace( - f"Prioritizing {synapse.dendrite.hotkey} with value: {priority}" + if not model_hash: + bt.logging.error("Failed to compute model hash") + return + + # Push model metadata to chain + model_id = ChainMinerModel( + competition_id=self.config.competition_id, + hf_repo_id=self.config.hf_repo_id, + hf_model_filename=self.config.hf_model_name, + hf_repo_type="model", + hf_code_filename=self.config.hf_code_filename, + block=None, + model_hash=model_hash, + ) + await self.metadata_store.store_model_metadata(model_id) + bt.logging.success( + f"Successfully pushed model metadata on chain. Model ID: {model_id}" ) - return priority + + def _check_hf_file_exists(self, repo_id, filename, repo_type): + if not huggingface_hub.file_exists(repo_id=repo_id, filename=filename, repo_type=repo_type): + bt.logging.error(f"{filename} not found in Hugging Face repo") + return False + return True + + async def main(self) -> None: + + # bt.logging(config=self.config) + if self.config.action != "submit" and not self.config.model_path: + bt.logging.error("Missing --model-path argument") + return + if self.config.action != "submit" and not MinerManagerCLI.is_onnx_model( + self.config.model_path + ): + bt.logging.error("Provided model with is not in ONNX format") + return + + match self.config.action: + case "submit": + await self.submit_model() + case "evaluate": + await self.evaluate_model() + case "upload": + await self.compress_code() + await self.upload_to_hf() + case _: + bt.logging.error(f"Unrecognized action: {self.config.action}") -# This is the main function, which runs the miner. if __name__ == "__main__": - with Miner() as miner: - while True: - bt.logging.info(f"Miner running... {time.time()}") - time.sleep(5) + load_dotenv() + cli_manager = MinerManagerCLI() + asyncio.run(cli_manager.main()) diff --git a/neurons/tests/competition_runner_test.py b/neurons/tests/competition_runner_test.py new file mode 100644 index 000000000..9b7be9483 --- /dev/null +++ b/neurons/tests/competition_runner_test.py @@ -0,0 +1,109 @@ +import asyncio +import json +from types import SimpleNamespace +from typing import List, Dict +import pytest + +import bittensor as bt + + +from cancer_ai.validator.competition_manager import CompetitionManager +from cancer_ai.validator.rewarder import CompetitionWinnersStore, Rewarder +from cancer_ai.base.base_miner import BaseNeuron +from cancer_ai.utils.config import path_config +from cancer_ai.validator.utils import get_competition_config +from cancer_ai.mock import MockSubtensor +from cancer_ai.validator.models import CompetitionsListModel, CompetitionModel +from cancer_ai.validator.model_db import ModelDBController + + +COMPETITION_FILEPATH = "config/competition_config_testnet.json" + +# TODO integrate with bt config +test_config = SimpleNamespace( + **{ + "wandb_entity": "testnet", + "wandb_project_name": "melanoma-1", + "competition_id": "melaonoma-1", + "hotkeys": [], + "subtensor": SimpleNamespace(**{"network": "test"}), + "netuid": 163, + "models": SimpleNamespace( + **{ + "model_dir": "/tmp/models", + "dataset_dir": "/tmp/datasets", + } + ), + "hf_token": "HF_TOKEN", + "db_path": "models.db", + } +) + +competitions_cfg = get_competition_config("config/competition_config_testnet.json") + + +async def run_competitions( + config: str, + subtensor: bt.subtensor, + hotkeys: List[str], +) -> Dict[str, str]: + """Run all competitions, return the winning hotkey for each competition""" + results = {} + for competition_cfg in competitions_cfg.competitions: + bt.logging.info("Starting competition: ", competition_cfg) + + competition_manager = CompetitionManager( + config=config, + subtensor=subtensor, + hotkeys=hotkeys, + validator_hotkey="Walidator", + competition_id=competition_cfg.competition_id, + dataset_hf_repo=competition_cfg.dataset_hf_repo, + dataset_hf_id=competition_cfg.dataset_hf_filename, + dataset_hf_repo_type=competition_cfg.dataset_hf_repo_type, + test_mode=True, + db_controller=ModelDBController(db_path=test_config.db_path, subtensor=subtensor) + ) + results[competition_cfg.competition_id] = await competition_manager.evaluate() + + bt.logging.info(await competition_manager.evaluate()) + + return results + + +def config_for_scheduler(subtensor: bt.subtensor) -> Dict[str, CompetitionManager]: + """Returns CompetitionManager instances arranged by competition time""" + time_arranged_competitions = {} + for competition_cfg in competitions_cfg: + for competition_time in competition_cfg["evaluation_time"]: + time_arranged_competitions[competition_time] = CompetitionManager( + config={}, + subtensor=subtensor, + hotkeys=[], + validator_hotkey="Walidator", + competition_id=competition_cfg.competition_id, + dataset_hf_repo=competition_cfg.dataset_hf_repo, + dataset_hf_id=competition_cfg.dataset_hf_filename, + dataset_hf_repo_type=competition_cfg.dataset_hf_repo_type, + test_mode=True, + db_controller=ModelDBController(db_path=test_config.db_path, subtensor=subtensor) + ) + return time_arranged_competitions + + +@pytest.fixture +def competition_config(): + with open(COMPETITION_FILEPATH, "r") as f: + return json.load(f) + + +if __name__ == "__main__": + config = BaseNeuron.config() + bt.logging.set_config(config=config) + # if True: # run them right away + path_config = path_config(None) + # config = config.merge(path_config) + # BaseNeuron.check_config(config) + bt.logging.set_config(config=config.logging) + bt.logging.info(config) + asyncio.run(run_competitions(test_config, MockSubtensor("123"), [])) diff --git a/neurons/validator.py b/neurons/validator.py index e28b972cb..1226b5421 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -1,7 +1,6 @@ # The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 +# Copyright 2023 Yuma Rao +# Copyright 2024 Safe-Scan # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -19,49 +18,563 @@ import time +import asyncio +import os +import traceback +import json +import threading +import datetime +import csv +import zipfile +from uuid import uuid4 -# Bittensor import bittensor as bt +import numpy as np +import wandb -# import base validator class which takes care of most of the boilerplate -from template.base.validator import BaseValidatorNeuron -# Bittensor Validator Template: -from template.validator import forward +from cancer_ai.chain_models_store import ChainModelMetadata +from cancer_ai.validator.rewarder import CompetitionResultsStore +from cancer_ai.base.base_validator import BaseValidatorNeuron +from cancer_ai.validator.cancer_ai_logo import cancer_ai_logo +from cancer_ai.validator.utils import ( + fetch_organization_data_references, + sync_organizations_data_references, + check_for_new_dataset_files, + get_local_dataset, +) +from cancer_ai.validator.model_db import ModelDBController +from cancer_ai.validator.competition_manager import CompetitionManager +from cancer_ai.validator.models import OrganizationDataReferenceFactory, NewDatasetFile +from cancer_ai.validator.models import WanDBLogCompetitionWinners, WanDBLogBase, WanDBLogModelErrorEntry +from huggingface_hub import HfApi +BLACKLIST_FILE_PATH = "config/hotkey_blacklist.json" +BLACKLIST_FILE_PATH_TESTNET = "config/hotkey_blacklist_testnet.json" class Validator(BaseValidatorNeuron): - """ - Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic. + + def __init__(self, config=None, exit_event=None): + print(cancer_ai_logo) + super(Validator, self).__init__(config=config) + self.hotkey = self.wallet.hotkey.ss58_address + self.db_controller = ModelDBController(db_path=self.config.db_path, subtensor=self.subtensor) - This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior. + self.chain_models = ChainModelMetadata( + self.subtensor, self.config.netuid, self.wallet + ) + self.last_miners_refresh: float = None + self.last_monitor_datasets: float = None - This class provides reasonable default behavior for a validator such as keeping a moving average of the scores of the miners and using them to set weights at the end of each epoch. Additionally, the scores are reset for new hotkeys at the end of each epoch. - """ + self.hf_api = HfApi() - def __init__(self, config=None): - super(Validator, self).__init__(config=config) + self.exit_event = exit_event + + async def concurrent_forward(self): + + coroutines = [ + self.refresh_miners(), + ] + if self.config.filesystem_evaluation: + coroutines.append(self.filesystem_test_evaluation()) + else: + coroutines.append(self.monitor_datasets()) + + await asyncio.gather(*coroutines) - bt.logging.info("load_state()") - self.load_state() - # TODO(developer): Anything specific to your use case you can do here - async def forward(self): + async def refresh_miners(self): """ - Validator forward pass. Consists of: - - Generating the query - - Querying the miners - - Getting the responses - - Rewarding the miners - - Updating the scores + Downloads miner's models from the chain and stores them in the DB """ - # TODO(developer): Rewrite this function based on your protocol definition. - return await forward(self) + + if self.last_miners_refresh is not None and ( + time.time() - self.last_miners_refresh + < self.config.miners_refresh_interval * 60 + ): + bt.logging.trace("Skipping model refresh, not enough time passed") + return + + bt.logging.info("Synchronizing miners from the chain") + bt.logging.info(f"Amount of hotkeys: {len(self.hotkeys)}") + + blacklist_file = ( + BLACKLIST_FILE_PATH_TESTNET + if self.config.test_mode + else BLACKLIST_FILE_PATH + ) + + with open(blacklist_file, "r", encoding="utf-8") as f: + BLACKLISTED_HOTKEYS = json.load(f) + + for i, hotkey in enumerate(self.hotkeys): + if hotkey in BLACKLISTED_HOTKEYS: + bt.logging.debug(f"Skipping blacklisted hotkey {hotkey}") + continue + + hotkey = str(hotkey) + bt.logging.debug(f"Downloading model {i+1}/{len(self.hotkeys)} from hotkey {hotkey}") + try: + uid = self.metagraph.hotkeys.index(hotkey) + chain_model_metadata = await self.chain_models.retrieve_model_metadata(hotkey, uid) + except Exception as e: + bt.logging.warning(f"Cannot get miner model for hotkey {hotkey} from the chain: {e}. Skipping.") + continue + + try: + self.db_controller.add_model(chain_model_metadata, hotkey) + except Exception as e: + # Check if it's a model_hash length constraint error + if "CHECK constraint failed: LENGTH(model_hash) <= 8" in str(e): + bt.logging.error( + f"Invalid model hash for hotkey {hotkey}: " + f"Hash '{chain_model_metadata.model_hash}' exceeds 8-character limit. " + f"Model info will not be persisted to database." + ) + else: + bt.logging.error(f"An error occured while trying to persist the model info: {e}", exc_info=True) + + self.db_controller.clean_old_records(self.hotkeys) + self.last_miners_refresh = time.time() + self.save_state() + + async def filesystem_test_evaluation(self): + time.sleep(5) + data_package = get_local_dataset(self.config.local_dataset_dir) + if not data_package: + bt.logging.info("No new data packages found.") + return + competition_manager = CompetitionManager( + config=self.config, + subtensor=self.subtensor, + hotkeys=self.hotkeys, + validator_hotkey=self.hotkey, + competition_id=data_package.competition_id, + dataset_hf_repo="", + dataset_hf_filename = data_package.dataset_hf_filename, + dataset_hf_repo_type="dataset", + db_controller = self.db_controller, + test_mode = self.config.test_mode, + local_fs_mode=True, + ) + try: + winning_hotkey, _ = await competition_manager.evaluate() + if not winning_hotkey: + bt.logging.error("NO WINNING HOTKEY") + except Exception as e: + bt.logging.error(f"Error evaluating {data_package.dataset_hf_filename}: {e}", exc_info=True) + return + + models_results = competition_manager.results + + + try: + top_hotkey = self.competition_results_store.get_top_hotkey(data_package.competition_id) + except ValueError: + bt.logging.warning(f"No top hotkey available for competition {data_package.competition_id}") + top_hotkey = None + + + results_file_name = f"{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}-{data_package.competition_id}.csv" + await self.log_results_to_csv(results_file_name, data_package, top_hotkey, models_results) + if winning_hotkey: + bt.logging.info(f"Competition result for {data_package.competition_id}: {winning_hotkey}") + + + # bt.logging.warning("Competition results store before update") + # bt.logging.warning(self.competition_results_store.model_dump_json()) + competition_weights = await self.competition_results_store.update_competition_results(data_package.competition_id, models_results, self.config, self.metagraph.hotkeys, self.hf_api, self.db_controller) + # bt.logging.warning("Competition results store after update") + # bt.logging.warning(self.competition_results_store.model_dump_json()) + self.update_scores(competition_weights, 0.000001, 0.000002) + + + async def monitor_datasets(self): + """Main validation logic, triggered by new datastes on huggingface""" + + if self.last_monitor_datasets is not None and ( + time.time() - self.last_monitor_datasets + < self.config.monitor_datasets_interval + ): + return + self.last_monitor_datasets = time.time() + bt.logging.info("Starting monitor_datasets") + + try: + yaml_data = await fetch_organization_data_references( + self.config.datasets_config_hf_repo_id, + self.hf_api, + ) + await sync_organizations_data_references(yaml_data) + except Exception as e: + bt.logging.error(f"Error in monitor_datasets initial setup: {e}\n Stack trace: {traceback.format_exc()}") + return + + self.organizations_data_references = OrganizationDataReferenceFactory.get_instance() + bt.logging.info("Fetched and synced organization data references") + + try: + data_packages: list[NewDatasetFile] = await check_for_new_dataset_files(self.hf_api, self.org_latest_updates) + except Exception as e: + stack_trace = traceback.format_exc() + bt.logging.error(f"Error checking for new dataset files: {e}\n Stack trace: {stack_trace}") + return + + if not data_packages: + bt.logging.info("No new data packages found.") + return + + bt.logging.info(f"Found {len(data_packages)} new data packages") + self.save_state() + + for data_package in data_packages: + competition_id = data_package.competition_id + competition_uuid = uuid4().hex + competition_start_time = datetime.datetime.now() + bt.logging.info(f"Starting competition for {competition_id}") + competition_manager = CompetitionManager( + config=self.config, + subtensor=self.subtensor, + hotkeys=self.hotkeys, + validator_hotkey=self.hotkey, + competition_id=competition_id, + dataset_hf_repo=data_package.dataset_hf_repo, + dataset_hf_filename=data_package.dataset_hf_filename, + dataset_hf_repo_type="dataset", + db_controller = self.db_controller, + test_mode = self.config.test_mode, + ) + + winning_hotkey = None + try: + winning_hotkey, _ = await competition_manager.evaluate() + + except Exception: + stack_trace = traceback.format_exc() + bt.logging.error(f"Cannot run {competition_id}: {stack_trace}") + wandb.init(project=competition_id, group="competition_evaluation") + error_log = WanDBLogBase( + uuid=competition_uuid, + log_type="competition_error", + competition_id=competition_id, + run_time_s=(datetime.datetime.now() - competition_start_time).seconds, + validator_hotkey=self.wallet.hotkey.ss58_address, + errors=str(stack_trace), + dataset_filename=data_package.dataset_hf_filename + ) + wandb.log(error_log.model_dump()) + wandb.finish() + continue + + if not winning_hotkey: + bt.logging.warning("Could not determine the winner of competition") + continue + winning_model_link = self.db_controller.get_latest_model(hotkey=winning_hotkey, cutoff_time=self.config.models_query_cutoff).hf_link + + + # Update competition results + bt.logging.info(f"Competition result for {competition_id}: {winning_hotkey}") + competition_weights = await self.competition_results_store.update_competition_results(competition_id, competition_manager.results, self.config, self.metagraph.hotkeys, self.hf_api, self.db_controller) + self.update_scores(competition_weights, 0.0001, 0.0002) + + average_winning_hotkey = self.competition_results_store.get_top_hotkey(competition_id) + winner_log = WanDBLogCompetitionWinners( + uuid=competition_uuid, + competition_id=competition_id, + + competition_winning_hotkey=winning_hotkey, + competition_winning_uid=self.metagraph.hotkeys.index(winning_hotkey), + + average_winning_hotkey=average_winning_hotkey, + average_winning_uid=self.metagraph.hotkeys.index(average_winning_hotkey), + + validator_hotkey=self.wallet.hotkey.ss58_address, + model_link=winning_model_link, + dataset_filename=data_package.dataset_hf_filename, + run_time_s=(datetime.datetime.now() - competition_start_time).seconds + ) + wandb.init(project=competition_id, group="competition_evaluation") + wandb.log(winner_log.model_dump()) + wandb.finish() + + # log results to CSV + csv_filename = f"{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}-{competition_id}.csv" + await self.log_results_to_csv(csv_filename, data_package, winning_hotkey, competition_manager.results) + + # Logging results + wandb.init(project=competition_id, group="model_evaluation") + for miner_hotkey, evaluation_result in competition_manager.results: + + if miner_hotkey in competition_manager.error_results: + continue + + try: + model = self.db_controller.get_latest_model( + hotkey=miner_hotkey, + cutoff_time=self.config.models_query_cutoff, + ) + avg_score = 0.0 + if ( + data_package.competition_id in self.competition_results_store.average_scores and + miner_hotkey in self.competition_results_store.average_scores[competition_id] + ): + avg_score = self.competition_results_store.average_scores[competition_id][miner_hotkey] + + ActualWanDBLogModelEntryClass = competition_manager.competition_handler.WanDBLogModelClass + model_log = ActualWanDBLogModelEntryClass( + uuid=competition_uuid, + competition_id=competition_id, + miner_hotkey=miner_hotkey, + uid=self.metagraph.hotkeys.index(miner_hotkey), + validator_hotkey=self.wallet.hotkey.ss58_address, + model_url=model.hf_link, + average_score=avg_score, + run_time_s=evaluation_result.run_time_s, + dataset_filename=data_package.dataset_hf_filename, + **evaluation_result.to_log_dict(), + ) + wandb.log(model_log.model_dump()) + except Exception as e: + bt.logging.error(f"Error logging model results for hotkey {miner_hotkey}: {e}") + continue + + #logging errors + for miner_hotkey, error_message in competition_manager.error_results: + model_log = WanDBLogModelErrorEntry( + uuid=competition_uuid, + competition_id=competition_id, + miner_hotkey=miner_hotkey, + uid=self.metagraph.hotkeys.index(miner_hotkey), + validator_hotkey=self.wallet.hotkey.ss58_address, + dataset_filename=data_package.dataset_hf_filename, + errors=error_message, + ) + wandb.log(model_log.model_dump()) + + wandb.finish() + + + def update_scores( + self, + competition_weights: dict[str, float], + min_min_score: float, + max_min_score: float + ): + """ + For each competition: + 1) Award the winner its full `weight`. + 2) Linearly spread concrete minimal values in [min_min_score … max_min_score] + across the other non‐winner hotkeys (highest raw → max_min_score, lowest → min_min_score). + 3) Do NOT multiply those minimal values by the weight—just add them directly. + """ + self.scores = np.zeros(self.metagraph.n, dtype=np.float32) + + for comp_id, weight in competition_weights.items(): + try: + winner_hotkey = self.competition_results_store.get_top_hotkey(comp_id) + except ValueError as e: + bt.logging.warning(f"[{comp_id}] cannot determine winner: {e}") + continue + + if winner_hotkey in self.metagraph.hotkeys: + winner_idx = self.metagraph.hotkeys.index(winner_hotkey) + self.scores[winner_idx] += weight + bt.logging.info( + f"[{comp_id}] +{weight:.6f} to winner {winner_hotkey}" + ) + else: + bt.logging.warning( + f"[{comp_id}] winner {winner_hotkey!r} not in metagraph" + ) + + try: + all_hotkeys = self.competition_results_store.get_hotkeys_with_non_zero_scores(comp_id) + except ValueError as e: + bt.logging.warning(f"[{comp_id}] {e}") + continue + + # remove the winner from the list + non_winners = [hk for hk in all_hotkeys if hk != winner_hotkey] + k = len(non_winners) + if k == 0: + continue + + # compute the minimal-value sequence: + # index 0 (highest score) → max_min_score, + # index k-1 (lowest) → min_min_score + if k > 1: + span = max_min_score - min_min_score + step = span / (k - 1) + minimal_values = [ + max_min_score - i * step + for i in range(k) + ] + else: + # single runner-up gets the top of the band + minimal_values = [max_min_score] + + # apply those concrete minimal values (not scaled by weight) + for minimal, hk in zip(minimal_values, non_winners): + if hk in self.metagraph.hotkeys: + idx = self.metagraph.hotkeys.index(hk) + self.scores[idx] += minimal + bt.logging.info( + f"[{comp_id}] +{minimal:.6f} to non-winner {hk}" + ) + else: + bt.logging.warning( + f"[{comp_id}] non-winner {hk!r} not in metagraph" + ) + + bt.logging.debug( + "Scores from update_scores:\n" + f"{np.array2string(self.scores, precision=7, floatmode='fixed', separator=', ', suppress_small=True)}" + ) + + self.save_state() + + + async def log_results_to_csv(self, file_name: str, data_package: NewDatasetFile, top_hotkey: str, models_results: list): + """Debug method for dumping rewards for testing """ + + csv_file_path = os.path.join("evaluation-results", file_name) + bt.logging.info(f"Logging results to CSV for {data_package.competition_id} to file {csv_file_path}") + with open(csv_file_path, mode='a', newline='') as f: + writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + if os.stat(csv_file_path).st_size == 0: + writer.writerow(["Package name", "Date", "Hotkey", "Score", "Average","Winner"]) + competition_id = data_package.competition_id + for hotkey, model_result in models_results: + avg_score = 0.0 + if (competition_id in self.competition_results_store.average_scores and + hotkey in self.competition_results_store.average_scores[competition_id]): + avg_score = self.competition_results_store.average_scores[competition_id][hotkey] + + if hotkey == top_hotkey: + writer.writerow([os.path.basename(data_package.dataset_hf_filename), + datetime.datetime.now(datetime.timezone.utc), + hotkey, + round(model_result.score, 6), + round(avg_score, 6), + "X"]) + else: + writer.writerow([os.path.basename(data_package.dataset_hf_filename), + datetime.datetime.now(), + hotkey, + round(model_result.score, 6), + round(avg_score, 6), + " "]) + + + # Custom JSON encoder to handle datetime objects + class DateTimeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, datetime.datetime): + return obj.isoformat() + return super().default(obj) + + def save_state(self): + """Saves the state of the validator to a file.""" + if not getattr(self, "organizations_data_references", None): + self.organizations_data_references = OrganizationDataReferenceFactory.get_instance() + + scores_list = self.scores.tolist() if hasattr(self.scores, 'tolist') else [] + hotkeys_list = self.hotkeys.tolist() if hasattr(self.hotkeys, 'tolist') else self.hotkeys + + state_dict = { + 'scores': scores_list, + 'hotkeys': hotkeys_list, + 'organizations_data_references': self.organizations_data_references.model_dump(), + 'org_latest_updates': self.org_latest_updates, + 'competition_results_store': self.competition_results_store.model_dump() + } + + state_path = self.config.neuron.full_path + "/state.json" + os.makedirs(os.path.dirname(state_path), exist_ok=True) + + try: + with open(state_path, 'w') as f: + json.dump(state_dict, f, indent=2, cls=self.DateTimeEncoder) + f.flush() + f.close() + except TypeError as e: + bt.logging.error(f"Error serializing state to JSON: {e}", exc_info=True) + for key, value in state_dict.items(): + try: + json.dumps(value, cls=self.DateTimeEncoder) + except TypeError as e: + bt.logging.error(f"Problem serializing field '{key}': {e}") + except Exception as e: + bt.logging.error(f"Error saving validator state: {e}", exc_info=True) + if 'f' in locals() and f: + f.flush() + f.close() + + def create_empty_state(self): + """Creates an empty state file.""" + empty_state = { + 'scores': [], + 'hotkeys': [], + 'organizations_data_references': self.organizations_data_references.model_dump(), + 'org_latest_updates': {}, + 'competition_results_store': self.competition_results_store.model_dump() + } + + state_path = self.config.neuron.full_path + "/state.json" + os.makedirs(os.path.dirname(state_path), exist_ok=True) + + with open(state_path, 'w') as f: + json.dump(empty_state, f, indent=2, cls=self.DateTimeEncoder) + + def load_state(self): + """Loads the state of the validator from a file.""" + json_path = self.config.neuron.full_path + "/state.json" + + if os.path.exists(json_path): + try: + with open(json_path, 'r') as f: + state = json.load(f) + self._convert_datetime_strings(state) + self.scores = np.array(state['scores'], dtype=np.float32) + self.hotkeys = np.array(state['hotkeys']) + factory = OrganizationDataReferenceFactory.get_instance() + factory.update_from_dict(state['organizations_data_references']) + self.organizations_data_references = factory + self.org_latest_updates = state['org_latest_updates'] + self.competition_results_store = CompetitionResultsStore.model_validate( + state['competition_results_store'] + ) + except (json.JSONDecodeError, KeyError, TypeError) as e: + bt.logging.error(f"Error loading JSON state: {e}") + if 'f' in locals() and f: + f.close() + bt.logging.info("Validator state file closed after loading.") + else: + bt.logging.warning("No state file found. Creating an empty one.") + self.create_empty_state() + return + + if 'f' in locals() and f: + f.close() + + def _convert_datetime_strings(self, state_dict): + """Helper method to convert ISO format datetime strings back to datetime objects.""" + if 'org_latest_updates' in state_dict and state_dict['org_latest_updates']: + for org_id, timestamp in state_dict['org_latest_updates'].items(): + if isinstance(timestamp, str): + state_dict['org_latest_updates'][org_id] = datetime.datetime.fromisoformat(timestamp) + + + + -# The main function parses the configuration and runs the validator. if __name__ == "__main__": - with Validator() as validator: + bt.logging.info("Setting up main thread interrupt handle.") + exit_event = threading.Event() + with Validator(exit_event=exit_event) as validator: while True: - bt.logging.info(f"Validator running... {time.time()}") time.sleep(5) + if exit_event.is_set(): + bt.logging.info("Exit event received. Shutting down...") + break diff --git a/requirements.txt b/requirements.txt index f44dfb74a..c13dcd03b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,172 @@ -bittensor>=7 -starlette>=0.30.0 -pydantic>=2 -rich>=13 -pytest>=8 -torch>=2 -numpy>=1 -setuptools>=68 \ No newline at end of file +absl-py==2.1.0 +aiofiles==24.1.0 +aiohappyeyeballs==2.3.5 +aiohttp==3.10.2 +aiosignal==1.3.1 +annotated-types==0.7.0 +ansible==8.5.0 +ansible-core==2.15.12 +ansible-vault==2.1.0 +anyio==4.4.0 +astunparse==1.6.3 +async-property==0.2.2 +async-substrate-interface==1.0.3 +async-timeout==4.0.3 +async-unzip==0.3.6 +asyncstdlib==3.13.0 +attrs==24.2.0 +backoff==2.2.1 +base58==2.1.1 +bittensor==9.0.2 +bittensor-cli==9.1.0 +bittensor-commit-reveal==0.2.0 +bittensor-wallet==3.0.4 +black==24.8.0 +bt-decode==0.5.0a2 +certifi==2024.7.4 +cffi==1.17.0 +cfgv==3.4.0 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +coloredlogs==15.0.1 +crontab==1.0.1 +cryptography==43.0.3 +cytoolz==0.12.3 +ddt==1.6.0 +decorator==5.1.1 +distlib==0.3.8 +dnspython==2.7.0 +docker-pycreds==0.4.0 +ecdsa==0.19.0 +email_validator==2.2.0 +eth-hash==0.7.0 +eth-keys==0.5.1 +eth-typing==4.4.0 +eth-utils==2.2.2 +exceptiongroup==1.2.2 +fastapi==0.110.3 +filelock==3.15.4 +flake8==7.1.1 +flatbuffers==24.3.25 +frozenlist==1.4.1 +fsspec==2024.6.1 +fuzzywuzzy==0.18.0 +gast==0.6.0 +gitdb==4.0.11 +GitPython==3.1.43 +google-pasta==0.2.0 +greenlet==3.1.1 +grpcio==1.65.5 +h11==0.14.0 +h5py==3.11.0 +huggingface-hub==0.24.5 +humanfriendly==10.0 +identify==2.6.0 +idna==3.7 +iniconfig==2.0.0 +Jinja2==3.1.4 +joblib==1.4.2 +keras==3.5.0 +Levenshtein==0.25.1 +libclang==18.1.1 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +mccabe==0.7.0 +mdurl==0.1.2 +ml-dtypes==0.4.0 +mnemonic==0.21 +more-itertools==10.4.0 +mpmath==1.3.0 +msgpack==1.0.8 +msgpack-numpy-opentensor==0.5.0 +multidict==6.0.5 +munch==2.5.0 +mypy==1.11.1 +mypy-extensions==1.0.0 +namex==0.0.8 +narwhals==1.28.0 +nest-asyncio==1.6.0 +netaddr==1.3.0 +networkx==3.3 +nodeenv==1.9.1 +numpy==2.0.2 +onnx==1.16.2 +onnxruntime==1.19.0 +opt-einsum==3.3.0 +optree==0.12.1 +packaging==24.1 +password-strength==0.0.3.post2 +pathspec==0.12.1 +pillow==10.4.0 +platformdirs==4.2.2 +plotille==5.0.0 +plotly==6.0.0 +pluggy==1.5.0 +pre-commit==3.8.0 +protobuf==4.25.4 +psutil==6.0.0 +py==1.11.0 +py-bip39-bindings==0.1.11 +py-ed25519-zebra-bindings==1.0.1 +py-sr25519-bindings==0.2.0 +pycodestyle==2.12.1 +pycparser==2.22 +pycryptodome==3.20.0 +pydantic==2.8.2 +pydantic_core==2.20.1 +pyflakes==3.2.0 +Pygments==2.18.0 +PyNaCl==1.5.0 +pytest==8.3.2 +pytest-asyncio==0.24.0 +python-dotenv==1.0.1 +python-Levenshtein==0.25.1 +python-statemachine==2.1.2 +pywry==0.6.2 +PyYAML==6.0.2 +rapidfuzz==3.9.6 +redis==5.0.8 +requests==2.32.3 +resolvelib==0.8.1 +retry==0.9.2 +rich==13.8.0 +scalecodec==1.2.11 +schedule==1.2.2 +scikit-learn==1.5.1 +scipy==1.14.1 +sentry-sdk==2.13.0 +setproctitle==1.3.3 +shellingham==1.5.4 +shtab==1.6.5 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +SQLAlchemy==1.4.0 +starlette==0.37.2 +substrate-interface==1.7.10 +sympy==1.13.1 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 +tensorflow==2.18.0 +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==2.4.0 +threadpoolctl==3.5.0 +toml==0.10.0 +tomli==2.0.1 +toolz==0.12.1 +torch==2.4.0 +tqdm==4.66.5 +typer==0.15.1 +typing_extensions==4.12.2 +urllib3==2.2.2 +uvicorn==0.30.0 +virtualenv==20.26.4 +wandb==0.17.7 +websocket-client==1.8.0 +websockets==14.1 +Werkzeug==3.0.3 +wrapt==1.16.0 +xxhash==3.4.1 +yarl==1.9.4 diff --git a/scripts/check_compatibility.sh b/scripts/check_compatibility.sh deleted file mode 100755 index b0bd6b43d..000000000 --- a/scripts/check_compatibility.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -if [ -z "$1" ]; then - echo "Please provide a Python version as an argument." - exit 1 -fi - -python_version="$1" -all_passed=true - -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -check_compatibility() { - all_supported=0 - - while read -r requirement; do - # Skip lines starting with git+ - if [[ "$requirement" == git+* ]]; then - continue - fi - - package_name=$(echo "$requirement" | awk -F'[!=<>]' '{print $1}' | awk -F'[' '{print $1}') # Strip off brackets - echo -n "Checking $package_name... " - - url="https://pypi.org/pypi/$package_name/json" - response=$(curl -s $url) - status_code=$(curl -s -o /dev/null -w "%{http_code}" $url) - - if [ "$status_code" != "200" ]; then - echo -e "${RED}Information not available for $package_name. Failure.${NC}" - all_supported=1 - continue - fi - - classifiers=$(echo "$response" | jq -r '.info.classifiers[]') - requires_python=$(echo "$response" | jq -r '.info.requires_python') - - base_version="Programming Language :: Python :: ${python_version%%.*}" - specific_version="Programming Language :: Python :: $python_version" - - if echo "$classifiers" | grep -q "$specific_version" || echo "$classifiers" | grep -q "$base_version"; then - echo -e "${GREEN}Supported${NC}" - elif [ "$requires_python" != "null" ]; then - if echo "$requires_python" | grep -Eq "==$python_version|>=$python_version|<=$python_version"; then - echo -e "${GREEN}Supported${NC}" - else - echo -e "${RED}Not compatible with Python $python_version due to constraint $requires_python.${NC}" - all_supported=1 - fi - else - echo -e "${YELLOW}Warning: Specific version not listed, assuming compatibility${NC}" - fi - done < requirements.txt - - return $all_supported -} - -echo "Checking compatibility for Python $python_version..." -check_compatibility -if [ $? -eq 0 ]; then - echo -e "${GREEN}All requirements are compatible with Python $python_version.${NC}" -else - echo -e "${RED}All requirements are NOT compatible with Python $python_version.${NC}" - all_passed=false -fi - -echo "" -if $all_passed; then - echo -e "${GREEN}All tests passed.${NC}" -else - echo -e "${RED}All tests did not pass.${NC}" - exit 1 -fi diff --git a/scripts/check_requirements_changes.sh b/scripts/check_requirements_changes.sh deleted file mode 100755 index a06d050f8..000000000 --- a/scripts/check_requirements_changes.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# Check if requirements files have changed in the last commit -if git diff --name-only HEAD~1 | grep -E 'requirements.txt|requirements.txt'; then - echo "Requirements files have changed. Running compatibility checks..." - echo 'export REQUIREMENTS_CHANGED="true"' >> $BASH_ENV -else - echo "Requirements files have not changed. Skipping compatibility checks..." - echo 'export REQUIREMENTS_CHANGED="false"' >> $BASH_ENV -fi diff --git a/scripts/install_staging.sh b/scripts/install_staging.sh deleted file mode 100644 index 24280cedd..000000000 --- a/scripts/install_staging.sh +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash - -# Section 1: Build/Install -# This section is for first-time setup and installations. - -install_dependencies() { - # Function to install packages on macOS - install_mac() { - which brew > /dev/null - if [ $? -ne 0 ]; then - echo "Installing Homebrew..." - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - fi - echo "Updating Homebrew packages..." - brew update - echo "Installing required packages..." - brew install make llvm curl libssl protobuf tmux - } - - # Function to install packages on Ubuntu/Debian - install_ubuntu() { - echo "Updating system packages..." - sudo apt update - echo "Installing required packages..." - sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler tmux - } - - # Detect OS and call the appropriate function - if [[ "$OSTYPE" == "darwin"* ]]; then - install_mac - elif [[ "$OSTYPE" == "linux-gnu"* ]]; then - install_ubuntu - else - echo "Unsupported operating system." - exit 1 - fi - - # Install rust and cargo - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - - # Update your shell's source to include Cargo's path - source "$HOME/.cargo/env" -} - -# Call install_dependencies only if it's the first time running the script -if [ ! -f ".dependencies_installed" ]; then - install_dependencies - touch .dependencies_installed -fi - - -# Section 2: Test/Run -# This section is for running and testing the setup. - -# Create a coldkey for the owner role -wallet=${1:-owner} - -# Logic for setting up and running the environment -setup_environment() { - # Clone subtensor and enter the directory - if [ ! -d "subtensor" ]; then - git clone https://github.com/opentensor/subtensor.git - fi - cd subtensor - git pull - - # Update to the nightly version of rust - ./scripts/init.sh - - cd ../bittensor-subnet-template - - # Install the bittensor-subnet-template python package - python -m pip install -e . - - # Create and set up wallets - # This section can be skipped if wallets are already set up - if [ ! -f ".wallets_setup" ]; then - btcli wallet new_coldkey --wallet.name $wallet --no_password --no_prompt - btcli wallet new_coldkey --wallet.name miner --no_password --no_prompt - btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default --no_prompt - btcli wallet new_coldkey --wallet.name validator --no_password --no_prompt - btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default --no_prompt - touch .wallets_setup - fi - -} - -# Call setup_environment every time -setup_environment - -## Setup localnet -# assumes we are in the bittensor-subnet-template/ directory -# Initialize your local subtensor chain in development mode. This command will set up and run a local subtensor network. -cd ../subtensor - -# Start a new tmux session and create a new pane, but do not switch to it -echo "FEATURES='pow-faucet runtime-benchmarks' BT_DEFAULT_TOKEN_WALLET=$(cat ~/.bittensor/wallets/$wallet/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+') bash scripts/localnet.sh" >> setup_and_run.sh -chmod +x setup_and_run.sh -tmux new-session -d -s localnet -n 'localnet' -tmux send-keys -t localnet 'bash ../subtensor/setup_and_run.sh' C-m - -# Notify the user -echo ">> localnet.sh is running in a detached tmux session named 'localnet'" -echo ">> You can attach to this session with: tmux attach-session -t localnet" - -# Register a subnet (this needs to be run each time we start a new local chain) -btcli subnet create --wallet.name $wallet --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt - -# Transfer tokens to miner and validator coldkeys -export BT_MINER_TOKEN_WALLET=$(cat ~/.bittensor/wallets/miner/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+') -export BT_VALIDATOR_TOKEN_WALLET=$(cat ~/.bittensor/wallets/validator/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+') - -btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_MINER_TOKEN_WALLET --amount 1000 --no_prompt -btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_VALIDATOR_TOKEN_WALLET --amount 10000 --no_prompt - -# Register wallet hotkeys to subnet -btcli subnet register --wallet.name miner --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt -btcli subnet register --wallet.name validator --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt - -# Add stake to the validator -btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --amount 10000 --no_prompt - -# Ensure both the miner and validator keys are successfully registered. -btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946 -btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt -btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt - -cd ../bittensor-subnet-template - - -# Check if inside a tmux session -if [ -z "$TMUX" ]; then - # Start a new tmux session and run the miner in the first pane - tmux new-session -d -s bittensor -n 'miner' 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug' - - # Split the window and run the validator in the new pane - tmux split-window -h -t bittensor:miner 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug' - - # Attach to the new tmux session - tmux attach-session -t bittensor -else - # If already in a tmux session, create two panes in the current window - tmux split-window -h 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug' - tmux split-window -v -t 0 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name3 validator --wallet.hotkey default --logging.debug' -fi diff --git a/scripts/start_validator.py b/scripts/start_validator.py new file mode 100755 index 000000000..ac5d08643 --- /dev/null +++ b/scripts/start_validator.py @@ -0,0 +1,227 @@ +""" +The script was based on the original script from the Pretraining Subnet repository. +https://github.com/macrocosm-os/pretraining/blob/main/scripts/start_validator.py + +This script runs a validator process and automatically updates it when a new version is released. +Command-line arguments will be forwarded to validator (`neurons/validator.py`), so you can pass +them like this: + python3 scripts/start_validator.py --wallet.name=my-wallet +Auto-updates are enabled by default and will make sure that the latest version is always running +by pulling the latest version from git and upgrading python packages. This is done periodically. +Local changes may prevent the update, but they will be preserved. + +The script will use the same virtual environment as the one used to run it. If you want to run +validator within virtual environment, run this auto-update script from the virtual environment. + +Pm2 is required for this script. This script will start a pm2 process using the name provided by +the --pm2_name argument. +""" + +import argparse +import logging +import subprocess +import sys +import time +import os +from datetime import timedelta +from shlex import split +from typing import List +from argparse import Namespace +from pathlib import Path + +log = logging.getLogger(__name__) +UPDATES_CHECK_TIME = timedelta(minutes=5) +CURRENT_WORKING_DIR = Path(__file__).parent.parent + +ECOSYSTEM_CONFIG_PATH = CURRENT_WORKING_DIR / "config" / "ecosystem.config.js" # Path to the pm2 ecosystem config file + +def get_version() -> str: + """Extract the version as current git commit hash""" + result = subprocess.run( + split("git rev-parse HEAD"), + check=True, + capture_output=True, + cwd=CURRENT_WORKING_DIR, + ) + commit = result.stdout.decode().strip() + assert len(commit) == 40, f"Invalid commit hash: {commit}" + return commit[:8] + + +def generate_pm2_config(pm2_name: str, args: List[str]) -> None: + """ + Generate a pm2 ecosystem config file to run the validator. + """ + config_content = f""" + module.exports = {{ + apps: [ + {{ + name: '{pm2_name}', + script: 'neurons/validator.py', + interpreter: '{sys.executable}', + autorestart: true, + restart_delay: 30000, + max_restarts: 100, + env: {{ + PYTHONPATH: '{os.environ.get('PYTHONPATH', '')}:./', + }}, + args: '{' '.join(args)}' + }} + ] + }}; + """ + with open(ECOSYSTEM_CONFIG_PATH, "w") as f: + f.write(config_content) + log.info("Generated pm2 ecosystem config at: %s", ECOSYSTEM_CONFIG_PATH) + + +def start_validator_process(pm2_name: str, args: List[str]) -> subprocess.Popen: + """ + Spawn a new python process running neurons.validator using pm2. + """ + assert sys.executable, "Failed to get python executable" + generate_pm2_config(pm2_name, args) # Generate the pm2 config file + + log.info("Starting validator process with pm2, name: %s", pm2_name) + process = subprocess.Popen( + [ + "pm2", + "start", + str(ECOSYSTEM_CONFIG_PATH) + ], + cwd=CURRENT_WORKING_DIR, + ) + process.pm2_name = pm2_name + + return process + + +def stop_validator_process(process: subprocess.Popen) -> None: + """Stop the validator process""" + subprocess.run( + ("pm2", "delete", process.pm2_name), cwd=CURRENT_WORKING_DIR, check=True + ) + + +def pull_latest_version() -> None: + """ + Pull the latest version from git. + This uses `git pull --rebase`, so if any changes were made to the local repository, + this will try to apply them on top of origin's changes. This is intentional, as we + don't want to overwrite any local changes. However, if there are any conflicts, + this will abort the rebase and return to the original state. + The conflicts are expected to happen rarely since validator is expected + to be used as-is. + """ + try: + subprocess.run( + split("git pull --rebase --autostash"), check=True, cwd=CURRENT_WORKING_DIR + ) + except subprocess.CalledProcessError as exc: + log.error("Failed to pull, reverting: %s", exc) + subprocess.run(split("git rebase --abort"), check=True, cwd=CURRENT_WORKING_DIR) + + +def upgrade_packages() -> None: + """ + Upgrade python packages by running `pip install --upgrade -r requirements.txt`. + Notice: this won't work if some package in `requirements.txt` is downgraded. + Ignored as this is unlikely to happen. + """ + log.info("Upgrading packages") + try: + subprocess.run( + split(f"{sys.executable} -m pip install --upgrade -r requirements.txt"), + check=True, + cwd=CURRENT_WORKING_DIR, + ) + except subprocess.CalledProcessError as exc: + log.error("Failed to upgrade packages, proceeding anyway. %s", exc) + + +def main(pm2_name: str, args_namespace: Namespace, extra_args: List[str]) -> None: + """ + Run the validator process and automatically update it when a new version is released. + This will check for updates every `UPDATES_CHECK_TIME` and update the validator + if a new version is available. Update is performed as simple `git pull --rebase`. + """ + + args_list = [] + for key, value in vars(args_namespace).items(): + if value != '' and value is not None: + args_list.append(f"--{key}") + if not isinstance(value, bool): + args_list.append(str(value)) + + args_list.extend(extra_args) + + validator = start_validator_process(pm2_name, args_list) + current_version = latest_version = get_version() + log.info("Current version: %s", current_version) + + try: + while True: + pull_latest_version() + latest_version = get_version() + log.info("Latest version: %s", latest_version) + + if latest_version != current_version: + log.info( + "Upgraded to latest version: %s -> %s", + current_version, + latest_version, + ) + upgrade_packages() + + stop_validator_process(validator) + validator = start_validator_process(pm2_name, args_list) + current_version = latest_version + + time.sleep(UPDATES_CHECK_TIME.total_seconds()) + + finally: + stop_validator_process(validator) + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + parser = argparse.ArgumentParser( + description="Automatically update and restart the validator process when a new version is released.", + epilog="Example usage: python start_validator.py --pm2_name 'net9vali' --wallet_name 'wallet1' --wallet_hotkey 'key123'", + ) + + parser.add_argument( + "--pm2_name", default="cancer_ai_vali", help="Name of the PM2 process." + ) + + parser.add_argument( + "--wallet.name", default="validator", help="Name of the wallet." + ) + + parser.add_argument( + "--wallet.hotkey", default="default", help="Name of the hotkey." + ) + + parser.add_argument( + "--subtensor.network", default="finney", help="Name of the network." + ) + + parser.add_argument( + "--netuid", default="76", help="Netuid of the network." + ) + + parser.add_argument( + "--logging.debug", default=1, help="Enable debug logging." + ) + + parser.add_argument( + "--hf_token", default="", help="Access token for Hugging Face." + ) + + flags, extra_args = parser.parse_known_args() + main(flags.pm2_name, flags, extra_args) diff --git a/setup.py b/setup.py deleted file mode 100644 index f76ec9b29..000000000 --- a/setup.py +++ /dev/null @@ -1,96 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import re -import os -import codecs -import pathlib -from os import path -from io import open -from setuptools import setup, find_packages -from pkg_resources import parse_requirements - - -def read_requirements(path): - with open(path, "r") as f: - requirements = f.read().splitlines() - processed_requirements = [] - - for req in requirements: - # For git or other VCS links - if req.startswith("git+") or "@" in req: - pkg_name = re.search(r"(#egg=)([\w\-_]+)", req) - if pkg_name: - processed_requirements.append(pkg_name.group(2)) - else: - # You may decide to raise an exception here, - # if you want to ensure every VCS link has an #egg= at the end - continue - else: - processed_requirements.append(req) - return processed_requirements - - -requirements = read_requirements("requirements.txt") -here = path.abspath(path.dirname(__file__)) - -with open(path.join(here, "README.md"), encoding="utf-8") as f: - long_description = f.read() - -# loading version from setup.py -with codecs.open( - os.path.join(here, "template/__init__.py"), encoding="utf-8" -) as init_file: - version_match = re.search( - r"^__version__ = ['\"]([^'\"]*)['\"]", init_file.read(), re.M - ) - version_string = version_match.group(1) - -setup( - name="bittensor_subnet_template", # TODO(developer): Change this value to your module subnet name. - version=version_string, - description="bittensor_subnet_template", # TODO(developer): Change this value to your module subnet description. - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/opentensor/bittensor-subnet-template", # TODO(developer): Change this url to your module subnet github url. - author="bittensor.com", # TODO(developer): Change this value to your module subnet author name. - packages=find_packages(), - include_package_data=True, - author_email="", # TODO(developer): Change this value to your module subnet author email. - license="MIT", - python_requires=">=3.8", - install_requires=requirements, - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Topic :: Software Development :: Build Tools", - # Pick your license as you wish - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - ], -) diff --git a/template/api/dummy.py b/template/api/dummy.py deleted file mode 100644 index f6a433f17..000000000 --- a/template/api/dummy.py +++ /dev/null @@ -1,44 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao -# Copyright © 2023 Opentensor Foundation -# Copyright © 2023 Opentensor Technologies Inc - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import bittensor as bt -from typing import List, Optional, Union, Any, Dict -from template.protocol import Dummy -from bittensor.subnets import SubnetsAPI - - -class DummyAPI(SubnetsAPI): - def __init__(self, wallet: "bt.wallet"): - super().__init__(wallet) - self.netuid = 33 - self.name = "dummy" - - def prepare_synapse(self, dummy_input: int) -> Dummy: - synapse.dummy_input = dummy_input - return synapse - - def process_responses( - self, responses: List[Union["bt.Synapse", Any]] - ) -> List[int]: - outputs = [] - for response in responses: - if response.dendrite.status_code != 200: - continue - return outputs.append(response.dummy_output) - return outputs diff --git a/template/api/get_query_axons.py b/template/api/get_query_axons.py deleted file mode 100644 index 5d51c8f36..000000000 --- a/template/api/get_query_axons.py +++ /dev/null @@ -1,126 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao -# Copyright © 2023 Opentensor Foundation -# Copyright © 2023 Opentensor Technologies Inc - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -import numpy as np -import random -import bittensor as bt - - -async def ping_uids(dendrite, metagraph, uids, timeout=3): - """ - Pings a list of UIDs to check their availability on the Bittensor network. - - Args: - dendrite (bittensor.dendrite): The dendrite instance to use for pinging nodes. - metagraph (bittensor.metagraph): The metagraph instance containing network information. - uids (list): A list of UIDs (unique identifiers) to ping. - timeout (int, optional): The timeout in seconds for each ping. Defaults to 3. - - Returns: - tuple: A tuple containing two lists: - - The first list contains UIDs that were successfully pinged. - - The second list contains UIDs that failed to respond. - """ - axons = [metagraph.axons[uid] for uid in uids] - try: - responses = await dendrite( - axons, - bt.Synapse(), # TODO: potentially get the synapses available back? - deserialize=False, - timeout=timeout, - ) - successful_uids = [ - uid - for uid, response in zip(uids, responses) - if response.dendrite.status_code == 200 - ] - failed_uids = [ - uid - for uid, response in zip(uids, responses) - if response.dendrite.status_code != 200 - ] - except Exception as e: - bt.logging.error(f"Dendrite ping failed: {e}") - successful_uids = [] - failed_uids = uids - bt.logging.debug(f"ping() successful uids: {successful_uids}") - bt.logging.debug(f"ping() failed uids : {failed_uids}") - return successful_uids, failed_uids - -async def get_query_api_nodes(dendrite, metagraph, n=0.1, timeout=3): - """ - Fetches the available API nodes to query for the particular subnet. - - Args: - wallet (bittensor.wallet): The wallet instance to use for querying nodes. - metagraph (bittensor.metagraph): The metagraph instance containing network information. - n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1. - timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3. - - Returns: - list: A list of UIDs representing the available API nodes. - """ - bt.logging.debug( - f"Fetching available API nodes for subnet {metagraph.netuid}" - ) - vtrust_uids = [ - uid.item() - for uid in metagraph.uids - if metagraph.validator_trust[uid] > 0 - ] - top_uids = np.where(metagraph.S > np.quantile(metagraph.S, 1 - n))[0].tolist() - init_query_uids = set(top_uids).intersection(set(vtrust_uids)) - query_uids, _ = await ping_uids( - dendrite, metagraph, list(init_query_uids), timeout=timeout - ) - bt.logging.debug( - f"Available API node UIDs for subnet {metagraph.netuid}: {query_uids}" - ) - if len(query_uids) > 3: - query_uids = random.sample(query_uids, 3) - return query_uids - - -async def get_query_api_axons( - wallet, metagraph=None, n=0.1, timeout=3, uids=None -): - """ - Retrieves the axons of query API nodes based on their availability and stake. - - Args: - wallet (bittensor.wallet): The wallet instance to use for querying nodes. - metagraph (bittensor.metagraph, optional): The metagraph instance containing network information. - n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1. - timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3. - uids (Union[List[int], int], optional): The specific UID(s) of the API node(s) to query. Defaults to None. - - Returns: - list: A list of axon objects for the available API nodes. - """ - dendrite = bt.dendrite(wallet=wallet) - - if metagraph is None: - metagraph = bt.metagraph(netuid=21) - - if uids is not None: - query_uids = [uids] if isinstance(uids, int) else uids - else: - query_uids = await get_query_api_nodes( - dendrite, metagraph, n=n, timeout=timeout - ) - return [metagraph.axons[uid] for uid in query_uids] diff --git a/template/subnet_links.py b/template/subnet_links.py deleted file mode 100644 index c33f2e2dd..000000000 --- a/template/subnet_links.py +++ /dev/null @@ -1,76 +0,0 @@ -SUBNET_LINKS = [ - {"name": "sn0", "url": ""}, - {"name": "sn1", "url": "https://github.com/opentensor/prompting/"}, - {"name": "sn2", "url": "https://github.com/inference-labs-inc/omron-subnet/"}, - { - "name": "sn3", - "url": "https://github.com/myshell-ai/MyShell-TTS-Subnet/", - }, - {"name": "sn4", "url": "https://github.com/manifold-inc/targon/"}, - {"name": "sn5", "url": "https://github.com/OpenKaito/openkaito/"}, - {"name": "sn6", "url": "https://github.com/amedeo-gigaver/infinite_games/"}, - {"name": "sn7", "url": "https://github.com/eclipsevortex/SubVortex/"}, - { - "name": "sn8", - "url": "https://github.com/taoshidev/proprietary-trading-network/", - }, - {"name": "sn9", "url": "https://github.com/unconst/pretrain-subnet/"}, - { - "name": "sn10", - "url": "https://github.com/Sturdy-Subnet/sturdy-subnet/", - }, - { - "name": "sn11", - "url": "https://github.com/impel-intelligence/dippy-bittensor-subnet/", - }, - {"name": "sn12", "url": "https://github.com/backend-developers-ltd/ComputeHorde/"}, - {"name": "sn13", "url": "https://github.com/macrocosm-os/data-universe/"}, - { - "name": "sn14", - "url": "https://github.com/synapsec-ai/llm-defender-subnet/", - }, - { - "name": "sn15", - "url": "https://github.com/blockchain-insights/blockchain-data-subnet/", - }, - {"name": "sn16", "url": "https://github.com/eseckft/BitAds.ai/"}, - {"name": "sn17", "url": "https://github.com/404-Repo/three-gen-subnet/"}, - {"name": "sn18", "url": "https://github.com/corcel-api/cortex.t/"}, - {"name": "sn19", "url": "https://github.com/namoray/vision/"}, - {"name": "sn20", "url": "https://github.com/RogueTensor/bitagent_subnet/"}, - { - "name": "sn21", - "url": "https://github.com/omegalabsinc/omegalabs-anytoany-bittensor", - }, - {"name": "sn22", "url": "https://github.com/Datura-ai/smart-scrape/"}, - {"name": "sn23", "url": "https://github.com/SocialTensor/SocialTensorSubnet/"}, - { - "name": "sn24", - "url": "https://github.com/omegalabsinc/omegalabs-bittensor-subnet/", - }, - {"name": "sn25", "url": "https://github.com/macrocosm-os/folding/"}, - { - "name": "sn26", - "url": "https://github.com/TensorAlchemy/TensorAlchemy/", - }, - { - "name": "sn27", - "url": "https://github.com/neuralinternet/compute-subnet/", - }, - {"name": "sn28", "url": "https://github.com/foundryservices/snpOracle/"}, - {"name": "sn29", "url": "https://github.com/fractal-net/fractal/"}, - {"name": "sn30", "url": "https://github.com/Bettensor/bettensor/"}, - { - "name": "sn31", - "url": "https://github.com/nimaaghli/NASChain/", - }, - {"name": "sn32", "url": "https://github.com/It-s-AI/llm-detection/"}, - { - "name": "sn33", - "url": "https://github.com/afterpartyai/bittensor-conversation-genome-project/", - }, - {"name": "sn34", "url": "https://github.com/Healthi-Labs/healthi-subnet/"}, - {"name": "sn35", "url": "https://github.com/LogicNet-Subnet/LogicNet-prod/"}, - {"name": "sn36", "url": "https://github.com/HIP-Labs/HIP-Subnet/"}, - {"name": "sn37", "url": "https://github.com/macrocosm-os/finetuning/"}, -] diff --git a/template/validator/__init__.py b/template/validator/__init__.py deleted file mode 100644 index e43fa8564..000000000 --- a/template/validator/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .forward import forward -from .reward import reward diff --git a/template/validator/forward.py b/template/validator/forward.py deleted file mode 100644 index af5e7ee01..000000000 --- a/template/validator/forward.py +++ /dev/null @@ -1,63 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import time -import bittensor as bt - -from template.protocol import Dummy -from template.validator.reward import get_rewards -from template.utils.uids import get_random_uids - - -async def forward(self): - """ - The forward function is called by the validator every time step. - - It is responsible for querying the network and scoring the responses. - - Args: - self (:obj:`bittensor.neuron.Neuron`): The neuron object which contains all the necessary state for the validator. - - """ - # TODO(developer): Define how the validator selects a miner to query, how often, etc. - # get_random_uids is an example method, but you can replace it with your own. - miner_uids = get_random_uids(self, k=self.config.neuron.sample_size) - - # The dendrite client queries the network. - responses = await self.dendrite( - # Send the query to selected miner axons in the network. - axons=[self.metagraph.axons[uid] for uid in miner_uids], - # Construct a dummy query. This simply contains a single integer. - synapse=Dummy(dummy_input=self.step), - # All responses have the deserialize function called on them before returning. - # You are encouraged to define your own deserialization function. - deserialize=True, - ) - - # Log the results for monitoring purposes. - bt.logging.info(f"Received responses: {responses}") - - # TODO(developer): Define how the validator scores responses. - # Adjust the scores based on responses from miners. - rewards = get_rewards(self, query=self.step, responses=responses) - - bt.logging.info(f"Scored responses: {rewards}") - # Update the scores based on the rewards. You may want to define your own update_scores function for custom behavior. - self.update_scores(rewards, miner_uids) - time.sleep(5) diff --git a/template/validator/reward.py b/template/validator/reward.py deleted file mode 100644 index 584921832..000000000 --- a/template/validator/reward.py +++ /dev/null @@ -1,55 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# TODO(developer): Set your name -# Copyright © 2023 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -import numpy as np -from typing import List -import bittensor as bt - - -def reward(query: int, response: int) -> float: - """ - Reward the miner response to the dummy request. This method returns a reward - value for the miner, which is used to update the miner's score. - - Returns: - - float: The reward value for the miner. - """ - bt.logging.info(f"In rewards, query val: {query}, response val: {response}, rewards val: {1.0 if response == query * 2 else 0}") - return 1.0 if response == query * 2 else 0 - - -def get_rewards( - self, - query: int, - responses: List[float], -) -> np.ndarray: - """ - Returns an array of rewards for the given query and responses. - - Args: - - query (int): The query sent to the miner. - - responses (List[float]): A list of responses from the miner. - - Returns: - - np.ndarray: An array of rewards for the given query and responses. - """ - # Get all the reward results by iteratively calling your reward() function. - - return np.array( - [reward(query, response) for response in responses] - ) diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py index 48e015a93..167be9624 100644 --- a/tests/test_template_validator.py +++ b/tests/test_template_validator.py @@ -23,10 +23,10 @@ import torch from neurons.validator import Validator -from template.base.validator import BaseValidatorNeuron -from template.protocol import Dummy -from template.utils.uids import get_random_uids -from template.validator.reward import get_rewards +from cancer_ai.base.base_validator import BaseValidatorNeuron +from cancer_ai.protocol import Dummy +from cancer_ai.utils.uids import get_random_uids +from cancer_ai.validator.reward import get_rewards class TemplateValidatorNeuronTestCase(unittest.TestCase): diff --git a/tmp/dataset_tricoder.zip b/tmp/dataset_tricoder.zip new file mode 100644 index 000000000..b29da3fb1 Binary files /dev/null and b/tmp/dataset_tricoder.zip differ diff --git a/verify/generate.py b/verify/generate.py deleted file mode 100644 index ad860359d..000000000 --- a/verify/generate.py +++ /dev/null @@ -1,35 +0,0 @@ -from substrateinterface import Keypair -from os import getenv, environ -from datetime import datetime -import bittensor - -# Hardcode or set the environment variable WALLET_PASS to the password for the wallet -# environ["WALLET_PASS"] = "" - - -def main(args): - wallet = bittensor.wallet(name=args.name) - keypair = wallet.coldkey - - timestamp = datetime.now() - timezone = timestamp.astimezone().tzname() - - message = f"On {timestamp} {timezone} {args.message}" - signature = keypair.sign(data=message) - - file_contents = f"{message}\n\tSigned by: {keypair.ss58_address}\n\tSignature: {signature.hex()}" - print(file_contents) - open("message_and_signature.txt", "w").write(file_contents) - - print(f"Signature generated and saved to message_and_signature.txt") - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Generate a signature") - parser.add_argument("--message", help="The message to sign", type=str) - parser.add_argument("--name", help="The wallet name", type=str) - args = parser.parse_args() - - main(args) diff --git a/verify/verify.py b/verify/verify.py deleted file mode 100644 index 36ea50f7f..000000000 --- a/verify/verify.py +++ /dev/null @@ -1,41 +0,0 @@ -from substrateinterface import Keypair -from binascii import unhexlify - - -def main(args): - file_data = open(args.file).read() - file_split = file_data.split("\n\t") - - address_line = file_split[1] - address_prefix = "Signed by: " - if address_line.startswith(address_prefix): - address = address_line[len(address_prefix) :] - else: - address = address_line - - keypair = Keypair(ss58_address=address, ss58_format=42) - - message = file_split[0] - - signature_line = file_split[2] - signature_prefix = "Signature: " - if signature_line.startswith(signature_prefix): - signature = signature_line[len(signature_prefix) :] - else: - signature = signature_line - - real_signature = unhexlify(signature.encode()) - - if not keypair.verify(data=message, signature=real_signature): - raise ValueError(f"Invalid signature for address={address}") - else: - print(f"Signature verified, signed by {address}") - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Verify a signature") - parser.add_argument("--file", help="The file containing the message and signature") - args = parser.parse_args() - main(args)