diff --git a/CSV_DATA_FEATURE.md b/CSV_DATA_FEATURE.md new file mode 100644 index 000000000..ad9d9a8a2 --- /dev/null +++ b/CSV_DATA_FEATURE.md @@ -0,0 +1,299 @@ +# CSV Data Feature for Jesse Trading Framework + +This feature adds support for loading custom data from CSV files for backtesting and hyperparameter optimization in Jesse. + +## Overview + +The CSV data feature allows you to: +- Load tick data from CSV files +- Aggregate tick data into OHLCV candles +- Use custom data sources for backtesting +- Import CSV data into Jesse database +- Access CSV data through REST API endpoints + +## Features + +### 1. CSV Data Provider (`jesse/services/csv_data_provider.py`) +- Loads tick data from CSV files +- Aggregates tick data into various timeframes (1m, 5m, 1h, etc.) +- Supports data caching for performance +- Handles large CSV files efficiently + +### 2. CSV Parser (`jesse/services/csv_parser.py`) +- Parses various CSV formats +- Auto-detects column names +- Converts timestamps to Jesse format +- Supports different timestamp formats + +### 3. API Endpoints (`jesse/controllers/csv_controller.py`) +- `/csv/symbols` - Get available symbols +- `/csv/symbols/{symbol}/info` - Get symbol information +- `/csv/symbols/{symbol}/timeframes` - Get available timeframes +- `/csv/import` - Import symbol to database +- `/csv/candles` - Get candles from CSV data +- `/csv/preview/{symbol}` - Preview CSV data +- `/csv/clear-cache` - Clear data cache + +## Supported CSV Format + +The feature supports CSV files with the following format: +```csv +t,p,v +1672444800000,0.005288,0.0 +1672444800001,0.005288,0.0 +1672444800002,0.005288,0.0 +``` + +Where: +- `t` - timestamp in milliseconds +- `p` - price +- `v` - volume + +## Usage + +### 1. Prepare Your Data + +Place your CSV files in the following structure: +``` +/Users/alxy/Downloads/Fond/KucoinData/ +├── SYMBOL1/ +│ └── price.csv +├── SYMBOL2/ +│ └── price.csv +└── ... +``` + +### 2. Start Jesse Server + +```bash +jesse run +``` + +### 3. Access CSV Endpoints + +The CSV endpoints are available at `http://localhost:9000/csv/` + +### 4. Import Data for Backtesting + +#### Using API: + +```bash +# Get available symbols +curl -X GET "http://localhost:9000/csv/symbols" \ + -H "Authorization: Bearer YOUR_TOKEN" + +# Import a symbol to database +curl -X POST "http://localhost:9000/csv/import" \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "symbol": "ACH", + "timeframe": "1m", + "exchange": "custom" + }' + +# Get candles +curl -X GET "http://localhost:9000/csv/candles?symbol=ACH&timeframe=1m" \ + -H "Authorization: Bearer YOUR_TOKEN" +``` + +#### Using Python: + +```python +from jesse.services.csv_data_provider import csv_data_provider + +# Get available symbols +symbols = csv_data_provider.get_available_symbols() +print(f"Available symbols: {symbols}") + +# Get candles for a symbol +candles = csv_data_provider.get_candles( + symbol="ACH", + timeframe="1m", + start_date=1672444800000, # Optional + finish_date=1672531200000 # Optional +) + +# Import to database +success = csv_data_provider.save_candles_to_database( + symbol="ACH", + timeframe="1m", + exchange="custom" +) +``` + +### 5. Use in Backtesting + +Once data is imported, you can use it in backtesting by setting the exchange to "custom": + +```python +# In your backtest configuration +routes = [ + { + "exchange": "custom", + "symbol": "ACH", + "timeframe": "1m", + "strategy": "YourStrategy" + } +] +``` + +## Configuration + +### Data Directory + +By default, the CSV data provider looks for data in `/Users/alxy/Downloads/Fond/KucoinData/`. You can change this by modifying the `data_directory` parameter in `csv_data_provider.py`: + +```python +csv_data_provider = CSVDataProvider(data_directory="/path/to/your/data") +``` + +### Supported Timeframes + +The feature supports all standard Jesse timeframes: +- 1m, 3m, 5m, 15m, 30m, 45m +- 1h, 2h, 3h, 4h, 6h, 8h, 12h +- 1d, 3d, 1w, 1M + +## Performance Considerations + +- Large CSV files are processed efficiently using pandas +- Data is cached in memory for repeated access +- Use `clear_cache()` to free memory when needed +- Consider using smaller date ranges for very large datasets + +## Error Handling + +The feature includes comprehensive error handling: +- File not found errors +- Invalid CSV format errors +- Memory errors for very large files +- Database connection errors + +## Testing + +Run the test script to verify functionality: + +```bash +python test_csv_simple.py +``` + +This will test: +- Data directory structure +- CSV file reading +- Data aggregation +- Basic functionality + +## API Reference + +### GET /csv/symbols +Get list of available symbols. + +**Response:** +```json +{ + "symbols": ["ACH", "BTC", "ETH", ...] +} +``` + +### GET /csv/symbols/{symbol}/info +Get information about a specific symbol. + +**Response:** +```json +{ + "info": { + "symbol": "ACH", + "start_time": 1672444800000, + "end_time": 1758585540003, + "start_date": "2023-01-01", + "end_date": "2025-09-22", + "file_path": "/path/to/file.csv", + "file_size": 178916630 + } +} +``` + +### POST /csv/import +Import a symbol to Jesse database. + +**Request:** +```json +{ + "symbol": "ACH", + "timeframe": "1m", + "exchange": "custom", + "start_date": "2023-01-01", + "finish_date": "2023-12-31" +} +``` + +**Response:** +```json +{ + "message": "Successfully imported ACH to database", + "symbol": "ACH", + "timeframe": "1m", + "exchange": "custom" +} +``` + +### GET /csv/candles +Get candles from CSV data. + +**Parameters:** +- `symbol` - Symbol name +- `timeframe` - Timeframe (default: 1m) +- `exchange` - Exchange name (default: custom) +- `start_date` - Start date (optional) +- `finish_date` - Finish date (optional) + +**Response:** +```json +{ + "candles": [ + { + "time": 1672444800, + "open": 0.005288, + "close": 0.005288, + "high": 0.005288, + "low": 0.005288, + "volume": 0.0 + } + ], + "count": 1426275, + "symbol": "ACH", + "timeframe": "1m", + "exchange": "custom" +} +``` + +## Troubleshooting + +### Common Issues + +1. **File not found**: Make sure CSV files are in the correct directory structure +2. **Memory errors**: Use smaller date ranges or clear cache +3. **Invalid format**: Ensure CSV files have the correct format (t,p,v) +4. **Database errors**: Check database connection and permissions + +### Debug Mode + +Enable debug logging to see detailed information: + +```python +import logging +logging.basicConfig(level=logging.DEBUG) +``` + +## Contributing + +When contributing to this feature: +1. Follow the existing code style +2. Add tests for new functionality +3. Update documentation +4. Test with various CSV formats + +## License + +This feature is part of the Jesse trading framework and follows the same license terms. diff --git a/CSV_LOADER_README.md b/CSV_LOADER_README.md new file mode 100644 index 000000000..ac7aa61f9 --- /dev/null +++ b/CSV_LOADER_README.md @@ -0,0 +1,257 @@ +# CSV Data Loader - Инструкция по использованию + +## 🚀 Быстрый старт + +### 1. Запуск Jesse сервера +```bash +# В первом терминале +cd /Users/alxy/Desktop/1PROJ/JesseLocal/jesse +jesse run +``` + +### 2. Быстрый тест +```bash +# Во втором терминале +cd /Users/alxy/Desktop/1PROJ/JesseLocal/jesse +python quick_test.py +``` + +## 📋 Доступные скрипты + +### 1. `quick_test.py` - Быстрый тест +Проверяет базовую функциональность: +- Получение списка символов +- Информацию о символах +- Предварительный просмотр данных +- Импорт одного символа + +```bash +python quick_test.py +``` + +### 2. `csv_ticks_to_db.py` - Основной загрузчик +Полнофункциональный загрузчик с различными режимами: + +```python +# В Jupyter или Python скрипте +from jesse.research.external_data.csv_ticks_to_db import * + +# Быстрый предварительный просмотр +quick_preview() + +# Загрузить образец данных (10 символов) +load_sample_data() + +# Загрузить все данные +load_all_data_full() + +# Загрузить конкретные символы +load_specific_symbols(["ACH", "CAS", "DOGS"]) + +# Загрузить данные за период +load_custom_date_range() +``` + +### 3. `batch_csv_loader.py` - Пакетная загрузка с прогресс-баром +```bash +python batch_csv_loader.py +``` + +## 🔧 Настройка + +### Изменение настроек в `csv_ticks_to_db.py`: + +```python +# Настройки подключения +AUTHORIZATION = "ваш_токен_авторизации" +BASE_URL = "http://localhost:9000" + +# Настройки загрузки +def load_all_data( + timeframe: str = "1m", # Таймфрейм + max_symbols: int = None, # Максимум символов + start_date: str = None, # Начальная дата "2023-01-01" + finish_date: str = None, # Конечная дата "2023-12-31" + preview_only: bool = False # Только просмотр +): +``` + +### Изменение настроек в `batch_csv_loader.py`: + +```python +settings = { + 'timeframe': '1m', # Таймфрейм + 'max_symbols': 50, # Максимум символов + 'start_date': None, # Начальная дата + 'finish_date': None, # Конечная дата + 'batch_size': 5, # Размер батча + 'delay': 0.2 # Задержка между запросами +} +``` + +## 📊 Мониторинг загрузки + +### Статистика в реальном времени +- Количество обработанных символов +- Успешные/неудачные импорты +- Общее количество свечей +- Скорость загрузки + +### Файлы статистики +- `batch_loader_stats.json` - Детальная статистика загрузки + +## 🎯 Примеры использования + +### Загрузка всех данных +```python +from jesse.research.external_data.csv_ticks_to_db import load_all_data_full +load_all_data_full() +``` + +### Загрузка данных за 2023 год +```python +from jesse.research.external_data.csv_ticks_to_db import load_all_data +load_all_data( + start_date="2023-01-01", + finish_date="2023-12-31" +) +``` + +### Загрузка только топ-10 символов +```python +from jesse.research.external_data.csv_ticks_to_db import load_all_data +load_all_data(max_symbols=10) +``` + +### Загрузка конкретных символов +```python +from jesse.research.external_data.csv_ticks_to_db import load_specific_symbols +load_specific_symbols(["ACH", "CAS", "DOGS", "READY"]) +``` + +## 🔍 Отладка + +### Проверка подключения +```python +from jesse.research.external_data.csv_ticks_to_db import CSVDataLoader, BASE_URL, AUTHORIZATION + +loader = CSVDataLoader(BASE_URL, AUTHORIZATION) +symbols = loader.get_available_symbols() +print(f"Доступно символов: {len(symbols)}") +``` + +### Проверка конкретного символа +```python +symbol = "ACH" +info = loader.get_symbol_info(symbol) +print(f"Информация о {symbol}: {info}") + +preview = loader.preview_data(symbol, limit=5) +print(f"Предварительный просмотр: {preview}") +``` + +### Проверка загруженных данных +```python +candles = loader.get_candles("ACH", "1m", limit=10) +print(f"Загружено свечей: {candles['count']}") +``` + +## ⚠️ Устранение неполадок + +### Ошибка подключения +``` +❌ Ошибка получения символов: Connection refused +``` +**Решение:** Убедитесь, что Jesse сервер запущен на `http://localhost:9000` + +### Ошибка авторизации +``` +❌ Ошибка получения символов: 401 Unauthorized +``` +**Решение:** Обновите токен авторизации в скрипте + +### Ошибка импорта +``` +❌ Ошибка импорта ACH: 500 Internal Server Error +``` +**Решение:** Проверьте логи Jesse сервера, возможно проблема с базой данных + +### Недостаточно памяти +``` +❌ MemoryError +``` +**Решение:** +- Уменьшите `max_symbols` +- Увеличьте `delay` между запросами +- Используйте `start_date` и `finish_date` для ограничения данных + +## 📈 Оптимизация производительности + +### Для больших объемов данных: +1. Используйте `batch_csv_loader.py` с настройками: + ```python + 'batch_size': 3, # Меньшие батчи + 'delay': 0.5, # Больше задержка + 'max_symbols': 100 # Ограничение символов + ``` + +2. Загружайте данные по периодам: + ```python + # 2023 год + load_all_data(start_date="2023-01-01", finish_date="2023-12-31") + + # 2024 год + load_all_data(start_date="2024-01-01", finish_date="2024-12-31") + ``` + +3. Используйте разные таймфреймы: + ```python + # Сначала 1m для основных символов + load_specific_symbols(["ACH", "CAS"], "1m") + + # Потом 5m для остальных + load_all_data(timeframe="5m", max_symbols=50) + ``` + +## 🎉 После загрузки + +### Использование в бэктесте: +```python +# В конфигурации бэктеста +routes = [ + { + "exchange": "custom", + "symbol": "ACH", + "timeframe": "1m", + "strategy": "YourStrategy" + } +] +``` + +### Проверка загруженных данных: +```python +from jesse.services.csv_data_provider import csv_data_provider + +# Получить свечи +candles = csv_data_provider.get_candles("ACH", "1m") +print(f"Загружено {len(candles)} свечей для ACH") +``` + +## 📝 Логи + +### Логи Jesse сервера: +```bash +tail -f storage/logs/jesse.log +``` + +### Логи загрузки: +- Консольный вывод с прогресс-баром +- `batch_loader_stats.json` - детальная статистика + +## 🆘 Поддержка + +При возникновении проблем: +1. Проверьте логи Jesse сервера +2. Убедитесь в правильности токена авторизации +3. Проверьте доступность CSV файлов +4. Используйте `quick_test.py` для диагностики diff --git a/batch_csv_loader.py b/batch_csv_loader.py new file mode 100644 index 000000000..403829a05 --- /dev/null +++ b/batch_csv_loader.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Batch CSV Data Loader with Progress Bar +Пакетная загрузка CSV данных с прогресс-баром и детальной статистикой. +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +from jesse.research.external_data.csv_ticks_to_db import CSVDataLoader, BASE_URL, AUTHORIZATION +import time +from tqdm import tqdm +import json +from datetime import datetime + +class BatchCSVLoader: + """Пакетный загрузчик CSV данных с прогресс-баром""" + + def __init__(self): + self.loader = CSVDataLoader(BASE_URL, AUTHORIZATION) + self.stats = { + 'total_symbols': 0, + 'successful': 0, + 'failed': 0, + 'total_candles': 0, + 'start_time': None, + 'end_time': None, + 'errors': [] + } + + def load_with_progress(self, + timeframe: str = "1m", + max_symbols: int = None, + start_date: str = None, + finish_date: str = None, + batch_size: int = 10, + delay: float = 0.1): + """ + Загрузить данные с прогресс-баром + + Args: + timeframe: Таймфрейм + max_symbols: Максимальное количество символов + start_date: Начальная дата + finish_date: Конечная дата + batch_size: Размер батча для обработки + delay: Задержка между запросами + """ + + print("🚀 Пакетная загрузка CSV данных в Jesse...") + print(f"📊 Таймфрейм: {timeframe}") + if start_date: + print(f"📅 Начальная дата: {start_date}") + if finish_date: + print(f"📅 Конечная дата: {finish_date}") + print("-" * 60) + + # Получение списка символов + print("📋 Получаем список символов...") + symbols = self.loader.get_available_symbols() + + if not symbols: + print("❌ Символы не найдены!") + return + + # Ограничение количества + if max_symbols and max_symbols < len(symbols): + symbols = symbols[:max_symbols] + print(f"🔄 Ограничиваем до {max_symbols} символов") + + self.stats['total_symbols'] = len(symbols) + self.stats['start_time'] = time.time() + + print(f"✅ Найдено {len(symbols)} символов для загрузки") + print(f"📦 Размер батча: {batch_size}") + print() + + # Создание прогресс-бара + with tqdm(total=len(symbols), desc="Загрузка данных", unit="символ") as pbar: + for i in range(0, len(symbols), batch_size): + batch = symbols[i:i + batch_size] + + # Обработка батча + self._process_batch(batch, timeframe, start_date, finish_date, delay) + + # Обновление прогресс-бара + pbar.update(len(batch)) + + # Обновление описания + pbar.set_postfix({ + 'Успешно': self.stats['successful'], + 'Ошибок': self.stats['failed'], + 'Свечей': f"{self.stats['total_candles']:,}" + }) + + # Завершение + self.stats['end_time'] = time.time() + self._print_final_stats() + + def _process_batch(self, batch, timeframe, start_date, finish_date, delay): + """Обработать батч символов""" + for symbol in batch: + try: + # Импорт символа + success = self.loader.import_symbol( + symbol=symbol, + timeframe=timeframe, + exchange="custom", + start_date=start_date, + finish_date=finish_date + ) + + if success: + self.stats['successful'] += 1 + + # Получение количества свечей + candles_data = self.loader.get_candles(symbol, timeframe, limit=1) + if candles_data: + candle_count = candles_data.get('count', 0) + self.stats['total_candles'] += candle_count + else: + self.stats['failed'] += 1 + self.stats['errors'].append(f"Ошибка импорта {symbol}") + + # Задержка между запросами + if delay > 0: + time.sleep(delay) + + except Exception as e: + self.stats['failed'] += 1 + self.stats['errors'].append(f"Исключение для {symbol}: {str(e)}") + + def _print_final_stats(self): + """Вывести итоговую статистику""" + duration = self.stats['end_time'] - self.stats['start_time'] + + print("\n" + "=" * 60) + print("📊 ИТОГОВАЯ СТАТИСТИКА ЗАГРУЗКИ") + print("=" * 60) + print(f"📈 Всего символов: {self.stats['total_symbols']}") + print(f"✅ Успешно загружено: {self.stats['successful']}") + print(f"❌ Ошибок: {self.stats['failed']}") + print(f"📊 Всего свечей: {self.stats['total_candles']:,}") + print(f"⏱️ Время выполнения: {duration:.2f} секунд") + + if self.stats['successful'] > 0: + print(f"⚡ Скорость: {self.stats['successful']/duration:.2f} символов/сек") + print(f"📈 Среднее свечей на символ: {self.stats['total_candles']/self.stats['successful']:,.0f}") + + # Вывод ошибок если есть + if self.stats['errors']: + print(f"\n❌ Ошибки ({len(self.stats['errors'])}):") + for error in self.stats['errors'][:10]: # Показываем первые 10 + print(f" • {error}") + if len(self.stats['errors']) > 10: + print(f" ... и еще {len(self.stats['errors']) - 10} ошибок") + + # Сохранение статистики + self._save_stats() + + print(f"\n🎉 Загрузка завершена!") + print(f"💾 Статистика сохранена в batch_loader_stats.json") + + def _save_stats(self): + """Сохранить статистику в файл""" + stats_data = { + 'timestamp': datetime.now().isoformat(), + 'stats': self.stats, + 'summary': { + 'success_rate': self.stats['successful'] / self.stats['total_symbols'] * 100, + 'avg_candles_per_symbol': self.stats['total_candles'] / max(self.stats['successful'], 1), + 'duration_seconds': self.stats['end_time'] - self.stats['start_time'] + } + } + + with open('batch_loader_stats.json', 'w', encoding='utf-8') as f: + json.dump(stats_data, f, indent=2, ensure_ascii=False) + + +def main(): + """Основная функция""" + print("🔧 Пакетный загрузчик CSV данных") + print("=" * 40) + + # Создание загрузчика + loader = BatchCSVLoader() + + # Настройки загрузки + settings = { + 'timeframe': '1m', + 'max_symbols': 50, # Ограничиваем для тестирования + 'start_date': None, # Загружаем все данные + 'finish_date': None, + 'batch_size': 5, # Небольшие батчи + 'delay': 0.2 # Задержка между запросами + } + + print("⚙️ Настройки:") + for key, value in settings.items(): + print(f" {key}: {value}") + print() + + # Подтверждение + response = input("Продолжить загрузку? (y/N): ").strip().lower() + if response not in ['y', 'yes', 'да']: + print("❌ Загрузка отменена") + return + + # Запуск загрузки + try: + loader.load_with_progress(**settings) + except KeyboardInterrupt: + print("\n⏹️ Загрузка прервана пользователем") + except Exception as e: + print(f"\n❌ Критическая ошибка: {e}") + + +if __name__ == "__main__": + main() diff --git a/jesse/__init__.py b/jesse/__init__.py index a74eca941..6b7dc354e 100644 --- a/jesse/__init__.py +++ b/jesse/__init__.py @@ -264,6 +264,7 @@ def shutdown_event(): from jesse.controllers.notification_controller import router as notification_router from jesse.controllers.system_controller import router as system_router from jesse.controllers.file_controller import router as file_router +from jesse.controllers.csv_controller import router as csv_router # register routers fastapi_app.include_router(websocket_router) @@ -277,6 +278,7 @@ def shutdown_event(): fastapi_app.include_router(notification_router) fastapi_app.include_router(system_router) fastapi_app.include_router(file_router) +fastapi_app.include_router(csv_router) # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/jesse/config.py b/jesse/config.py index 71a81fafa..4c645a5a8 100644 --- a/jesse/config.py +++ b/jesse/config.py @@ -64,6 +64,8 @@ 'warmup_candles_num': 240, 'generate_candles_from_1m': False, 'persistency': True, + # Fill missing candle data with empty candles instead of raising errors + 'fill_missing_candles': True, }, }, @@ -130,6 +132,9 @@ def set_config(conf: dict) -> None: config['env']['data']['warmup_candles_num'] = int(conf['warm_up_candles']) # logs config['env']['logging'] = conf['logging'] + # fill missing candles option + if 'fill_missing_candles' in conf: + config['env']['data']['fill_missing_candles'] = conf['fill_missing_candles'] # exchanges for key, e in conf['exchanges'].items(): if not jh.is_live() and e['type']: diff --git a/jesse/controllers/csv_controller.py b/jesse/controllers/csv_controller.py new file mode 100644 index 000000000..28242b79c --- /dev/null +++ b/jesse/controllers/csv_controller.py @@ -0,0 +1,224 @@ +""" +CSV Data Controller for Jesse trading framework. +Handles API endpoints for managing CSV data sources. +""" + +from typing import Optional, List, Dict +from fastapi import APIRouter, Header, Query, HTTPException +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +from jesse.services import auth as authenticator +from jesse.modes.data_provider import ( + get_available_csv_symbols, + import_csv_symbol_to_database, + get_csv_candles +) +from jesse.services.csv_data_provider import csv_data_provider +import jesse.helpers as jh + + +router = APIRouter(prefix="/csv", tags=["CSV Data"]) + + +class CSVImportRequest(BaseModel): + symbol: str + timeframe: str = "1m" + exchange: str = "custom" + start_date: Optional[str] = None + finish_date: Optional[str] = None + + +class CSVSymbolInfo(BaseModel): + symbol: str + start_time: int + end_time: int + start_date: str + end_date: str + file_path: str + file_size: int + + +@router.get("/symbols") +def get_symbols(authorization: Optional[str] = Header(None)): + """ + Get list of available CSV symbols. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + symbols = get_available_csv_symbols() + return JSONResponse({'symbols': symbols}, status_code=200) + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.get("/symbols/{symbol}/info") +def get_symbol_info(symbol: str, authorization: Optional[str] = Header(None)): + """ + Get information about a specific CSV symbol. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + info = csv_data_provider.get_symbol_info(symbol) + if info is None: + return JSONResponse({'error': f'Symbol {symbol} not found'}, status_code=404) + + return JSONResponse({'info': info}, status_code=200) + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.get("/symbols/{symbol}/timeframes") +def get_available_timeframes(symbol: str, authorization: Optional[str] = Header(None)): + """ + Get available timeframes for a CSV symbol. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + timeframes = csv_data_provider.get_available_timeframes(symbol) + return JSONResponse({'timeframes': timeframes}, status_code=200) + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.post("/import") +def import_symbol(request: CSVImportRequest, authorization: Optional[str] = Header(None)): + """ + Import a CSV symbol to Jesse database. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + # Convert date strings to timestamps if provided + start_date = None + finish_date = None + + if request.start_date: + start_date = jh.date_to_timestamp(request.start_date) + if request.finish_date: + finish_date = jh.date_to_timestamp(request.finish_date) + + # Import symbol to database + success = import_csv_symbol_to_database( + symbol=request.symbol, + timeframe=request.timeframe, + exchange=request.exchange, + start_date=start_date, + finish_date=finish_date + ) + + if success: + return JSONResponse({ + 'message': f'Successfully imported {request.symbol} to database', + 'symbol': request.symbol, + 'timeframe': request.timeframe, + 'exchange': request.exchange + }, status_code=200) + else: + return JSONResponse({ + 'error': f'Failed to import {request.symbol} to database' + }, status_code=500) + + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.get("/candles") +def get_candles( + symbol: str, + timeframe: str = "1m", + exchange: str = "custom", + start_date: Optional[str] = Query(None), + finish_date: Optional[str] = Query(None), + authorization: Optional[str] = Header(None) +): + """ + Get candles from CSV data source. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + # Convert date strings to timestamps if provided + start_timestamp = None + finish_timestamp = None + + if start_date: + start_timestamp = jh.date_to_timestamp(start_date) + if finish_date: + finish_timestamp = jh.date_to_timestamp(finish_date) + + # Get candles + candles = get_csv_candles( + exchange=exchange, + symbol=symbol, + timeframe=timeframe, + start_date=start_timestamp, + finish_date=finish_timestamp + ) + + return JSONResponse({ + 'candles': candles, + 'count': len(candles), + 'symbol': symbol, + 'timeframe': timeframe, + 'exchange': exchange + }, status_code=200) + + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.post("/clear-cache") +def clear_cache(authorization: Optional[str] = Header(None)): + """ + Clear CSV data cache. + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + csv_data_provider.clear_cache() + return JSONResponse({'message': 'Cache cleared successfully'}, status_code=200) + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) + + +@router.get("/preview/{symbol}") +def preview_data( + symbol: str, + limit: int = Query(100, ge=1, le=1000), + authorization: Optional[str] = Header(None) +): + """ + Preview CSV data for a symbol (first N rows). + """ + if not authenticator.is_valid_token(authorization): + return authenticator.unauthorized_response() + + try: + # Load tick data + tick_data = csv_data_provider.load_tick_data(symbol) + + if tick_data is None: + return JSONResponse({'error': f'No data found for symbol {symbol}'}, status_code=404) + + # Get preview data + preview = tick_data.head(limit).to_dict('records') + + return JSONResponse({ + 'preview': preview, + 'total_rows': len(tick_data), + 'symbol': symbol, + 'limit': limit + }, status_code=200) + + except Exception as e: + return JSONResponse({'error': str(e)}, status_code=500) diff --git a/jesse/enums/__init__.py b/jesse/enums/__init__.py index be5a65df2..a83725d95 100644 --- a/jesse/enums/__init__.py +++ b/jesse/enums/__init__.py @@ -93,6 +93,11 @@ class exchanges: GATE_SPOT = 'Gate Spot' HYPERLIQUID_PERPETUAL = 'Hyperliquid Perpetual' HYPERLIQUID_PERPETUAL_TESTNET = 'Hyperliquid Perpetual Testnet' + KUCOIN_SPOT = 'KuCoin Spot' + KUCOIN_FUTURES = 'KuCoin Futures' + KUCOIN_FUTURES_TESTNET = 'KuCoin Futures Testnet' + CUSTOM_CSV = 'CustomCSV' # CSV with database import in jesse format + CUSTOM_CSV_RAW = 'CustomCSVRaw' # RAW CSV without database import in jesse format @dataclass diff --git a/jesse/info.py b/jesse/info.py index 7832bd202..a3c80b455 100644 --- a/jesse/info.py +++ b/jesse/info.py @@ -23,6 +23,8 @@ timeframes.MINUTE_30, timeframes.HOUR_1, timeframes.HOUR_4, timeframes.DAY_1] HYPERLIQUID_TIMEFRAMES = [timeframes.MINUTE_1, timeframes.MINUTE_3, timeframes.MINUTE_5, timeframes.MINUTE_15, timeframes.MINUTE_30, timeframes.HOUR_1, timeframes.HOUR_2, timeframes.HOUR_4, timeframes.HOUR_8, timeframes.HOUR_12, timeframes.DAY_1] +KUCOIN_TIMEFRAMES = [timeframes.MINUTE_1, timeframes.MINUTE_3, timeframes.MINUTE_5, timeframes.MINUTE_15, + timeframes.MINUTE_30, timeframes.HOUR_1, timeframes.HOUR_2, timeframes.HOUR_4, timeframes.HOUR_6, timeframes.HOUR_8, timeframes.HOUR_12, timeframes.DAY_1, timeframes.WEEK_1, timeframes.MONTH_1] exchange_info = { # BYBIT_USDT_PERPETUAL @@ -425,6 +427,61 @@ }, "required_live_plan": "free", }, + # KUCOIN_SPOT + exchanges_enums.KUCOIN_SPOT: { + "name": exchanges_enums.KUCOIN_SPOT, + "url": "https://www.kucoin.com/trade", + "fee": 0.001, + "type": "spot", + "supported_leverage_modes": ["cross"], + "supported_timeframes": KUCOIN_TIMEFRAMES, + "modes": { + "backtesting": True, + "live_trading": True, + }, + "required_live_plan": "premium", + }, + # KUCOIN_FUTURES + exchanges_enums.KUCOIN_FUTURES: { + "name": exchanges_enums.KUCOIN_FUTURES, + "url": "https://futures.kucoin.com/trade", + "fee": 0.0006, + "type": "futures", + "supported_leverage_modes": ["cross", "isolated"], + "supported_timeframes": KUCOIN_TIMEFRAMES, + "modes": { + "backtesting": True, + "live_trading": True, + }, + "required_live_plan": "premium", + }, + # KUCOIN_FUTURES_TESTNET + exchanges_enums.KUCOIN_FUTURES_TESTNET: { + "name": exchanges_enums.KUCOIN_FUTURES_TESTNET, + "url": "https://sandbox-futures.kucoin.com/trade", + "fee": 0.0006, + "type": "futures", + "supported_leverage_modes": ["cross", "isolated"], + "supported_timeframes": KUCOIN_TIMEFRAMES, + "modes": { + "backtesting": False, + "live_trading": True, + }, + "required_live_plan": "free", + }, + exchanges_enums.CUSTOM_CSV: { + "name": exchanges_enums.CUSTOM_CSV, + "url": "https://jesse.trade", + "fee": 0.0, + "type": "spot", + "supported_leverage_modes": [], + "supported_timeframes": [timeframes.MINUTE_1, timeframes.MINUTE_5, timeframes.MINUTE_15, timeframes.MINUTE_30, timeframes.HOUR_1, timeframes.HOUR_4, timeframes.DAY_1], + "modes": { + "backtesting": True, + "live_trading": False, + }, + "required_live_plan": "free", + }, } # list of supported exchanges for backtesting diff --git a/jesse/modes/backtest_mode.py b/jesse/modes/backtest_mode.py index 4ca66cd26..c0e9f284a 100644 --- a/jesse/modes/backtest_mode.py +++ b/jesse/modes/backtest_mode.py @@ -496,8 +496,12 @@ def _step_simulator( progressbar = Progressbar(length, step=420) last_update_time = None for i in range(length): - # update time - store.app.time = first_candles_set[i][0] + 60_000 + # update time - ensure we don't go out of bounds + if i < len(first_candles_set): + store.app.time = first_candles_set[i][0] + 60_000 + else: + # If we're out of bounds, use the last available candle + store.app.time = first_candles_set[-1][0] + 60_000 # add candles for j in candles: @@ -528,9 +532,11 @@ def _step_simulator( # until = count - ((i + 1) % count) if (i + 1) % count == 0: + start_idx = max(0, i - (count - 1)) + end_idx = min(i + 1, len(candles[j]['candles'])) generated_candle = generate_candle_from_one_minutes( timeframe, - candles[j]['candles'][(i - (count - 1)):(i + 1)] + candles[j]['candles'][start_idx:end_idx] ) store.candles.add_candle(generated_candle, exchange, symbol, timeframe, with_execution=False, @@ -595,6 +601,8 @@ def _step_simulator( def _simulation_minutes_length(candles: dict) -> int: key = f"{config['app']['considering_candles'][0][0]}-{config['app']['considering_candles'][0][1]}" first_candles_set = candles[key]["candles"] + if len(first_candles_set) == 0: + raise ValueError(f"No candles available for {key}") return len(first_candles_set) @@ -608,7 +616,7 @@ def _prepare_times_before_simulation(candles: dict) -> None: try: store.app.starting_time = first_candles_set[0][0] except IndexError: - raise IndexError('Check your "warm_up_candles" config value') + raise IndexError(f'Check your "warm_up_candles" config value. No candles available for {key}. Array size: {len(first_candles_set)}') store.app.time = first_candles_set[0][0] @@ -688,10 +696,17 @@ def _prepare_routes(hyperparameters: dict = None, def get_candles_from_pipeline(candles_pipeline: Optional[BaseCandlesPipeline], candles: np.ndarray, i: int, candles_step: int = -1) -> np.ndarray: if candles_pipeline is None: if candles_step == -1: + # Ensure index is within bounds + if i >= len(candles): + raise IndexError(f"Index {i} is out of bounds for candles array of size {len(candles)}") return candles[i] else: - return candles[i: i+candles_step] - return candles_pipeline.get_candles(candles[i: i + candles_pipeline._batch_size], i, candles_step) + # Ensure slice is within bounds + end_idx = min(i + candles_step, len(candles)) + return candles[i: end_idx] + # Ensure batch slice is within bounds + batch_end = min(i + candles_pipeline._batch_size, len(candles)) + return candles_pipeline.get_candles(candles[i: batch_end], i, candles_step) def _update_progress_bar( @@ -902,7 +917,8 @@ def _skip_simulator( candles_step = _calculate_minimum_candle_step() progressbar = Progressbar(length, step=candles_step) last_update_time = None - for i in range(0, length, candles_step): + # Ensure we don't go beyond the available candles + for i in range(0, min(length, len(list(candles.values())[0]['candles'])), candles_step): # update time moved to _simulate_price_change_effect__multiple_candles # store.app.time = first_candles_set[i][0] + (60_000 * candles_step) _simulate_new_candles(candles, candles_pipelines, i, candles_step) @@ -986,9 +1002,22 @@ def _simulate_new_candles(candles: dict, candles_pipelines: Dict[str, BaseCandle # add candles for j in candles: candles_pipeline = candles_pipelines[j] - short_candles = get_candles_from_pipeline(candles_pipeline, candles[j]['candles'], i, candles_step) - candles[j]['candles'][i:i+candles_step] = short_candles - if i != 0: + # Ensure we don't request candles beyond array bounds + max_available_step = min(candles_step, len(candles[j]['candles']) - i) + if max_available_step <= 0: + continue # Skip if no candles available from this index + + short_candles = get_candles_from_pipeline(candles_pipeline, candles[j]['candles'], i, max_available_step) + # Ensure we don't exceed the array bounds + actual_step = min(max_available_step, len(short_candles)) + end_idx = min(i + actual_step, len(candles[j]['candles'])) + + # Only assign if we have valid bounds + if i < len(candles[j]['candles']) and end_idx <= len(candles[j]['candles']) and actual_step > 0: + candles[j]['candles'][i:end_idx] = short_candles[:actual_step] + + # Fix jumped candles only if we have candles and previous candle exists + if i != 0 and len(short_candles) > 0 and i - 1 < len(candles[j]["candles"]): previous_short_candles = candles[j]["candles"][i - 1] # work the same, the fix needs to be done only on the gap of 1m edge candles. short_candles[0] = _get_fixed_jumped_candle( @@ -1010,10 +1039,11 @@ def _simulate_new_candles(candles: dict, candles_pipelines: Dict[str, BaseCandle count = TIMEFRAME_TO_ONE_MINUTES[timeframe] if (i + candles_step) % count == 0: + start_idx = max(0, i - count + candles_step) + end_idx = min(i + candles_step, len(candles[j]["candles"])) generated_candle = generate_candle_from_one_minutes( timeframe, - candles[j]["candles"][ - i - count + candles_step: i + candles_step], + candles[j]["candles"][start_idx:end_idx], ) store.candles.add_candle( @@ -1029,6 +1059,10 @@ def _simulate_new_candles(candles: dict, candles_pipelines: Dict[str, BaseCandle def _simulate_price_change_effect_multiple_candles( short_timeframes_candles: np.ndarray, exchange: str, symbol: str ) -> None: + # Check if we have any candles to process + if len(short_timeframes_candles) == 0: + return + real_candle = np.array( [ short_timeframes_candles[0][0], @@ -1141,7 +1175,10 @@ def _update_all_routes_a_partial_candle( continue tf_minutes = TIMEFRAME_TO_ONE_MINUTES[timeframe] number_of_needed_candles = int(storable_temp_candle[0] % (tf_minutes * 60_000) // 60000) + 1 - candles_1m = store.candles.get_candles(exchange, symbol, '1m')[-number_of_needed_candles:] + all_candles_1m = store.candles.get_candles(exchange, symbol, '1m') + # Ensure we don't request more candles than available + number_of_needed_candles = min(number_of_needed_candles, len(all_candles_1m)) + candles_1m = all_candles_1m[-number_of_needed_candles:] generated_candle = generate_candle_from_one_minutes( timeframe, candles_1m, diff --git a/jesse/modes/data_provider.py b/jesse/modes/data_provider.py index 806d02430..9388031e5 100644 --- a/jesse/modes/data_provider.py +++ b/jesse/modes/data_provider.py @@ -5,6 +5,7 @@ from fastapi.responses import FileResponse import jesse.helpers as jh from jesse.info import live_trading_exchanges, backtesting_exchanges +from jesse.services.csv_data_provider import csv_data_provider def get_candles(exchange: str, symbol: str, timeframe: str): @@ -80,6 +81,77 @@ def get_candles(exchange: str, symbol: str, timeframe: str): ] +def get_csv_candles(exchange: str, symbol: str, timeframe: str, start_date: int = None, finish_date: int = None): + """ + Get candles from CSV data source. + + Args: + exchange: Exchange name (should be 'custom' for CSV data) + symbol: Symbol name + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + List of candle dictionaries or empty list if failed + """ + try: + # Get candles from CSV data provider + candles = csv_data_provider.get_candles(symbol, timeframe, start_date, finish_date) + + if candles is None or len(candles) == 0: + return [] + + # Convert to Jesse format + return [ + { + 'time': int(c[0] / 1000), + 'open': c[1], + 'close': c[2], + 'high': c[3], + 'low': c[4], + 'volume': c[5], + } for c in candles + ] + + except Exception as e: + from jesse.services import logger + logger.error(f"Error getting CSV candles for {symbol}: {e}") + return [] + + +def get_available_csv_symbols(): + """ + Get list of available symbols from CSV data. + + Returns: + List of symbol names + """ + return csv_data_provider.get_available_symbols() + + +def import_csv_symbol_to_database(symbol: str, timeframe: str = "1m", + exchange: str = "custom", + start_date: int = None, + finish_date: int = None): + """ + Import a CSV symbol to Jesse database. + + Args: + symbol: Symbol name + timeframe: Timeframe + exchange: Exchange name + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + bool: True if imported successfully, False otherwise + """ + return csv_data_provider.save_candles_to_database( + symbol, timeframe, exchange, start_date, finish_date + ) + + def get_config(client_config: dict, has_live=False) -> dict: from jesse.services.db import database database.open_connection() diff --git a/jesse/modes/import_candles_mode/__init__.py b/jesse/modes/import_candles_mode/__init__.py index ba896c7db..a553d4076 100644 --- a/jesse/modes/import_candles_mode/__init__.py +++ b/jesse/modes/import_candles_mode/__init__.py @@ -25,8 +25,9 @@ def run( exchange: str, symbol: str, start_date_str: str, + end_date_str: str = None, mode: str = 'candles', - running_via_dashboard: bool = True, + running_via_dashboard: bool = False, show_progressbar: bool = False, ): if running_via_dashboard: @@ -51,7 +52,7 @@ def handle_time(): try: start_timestamp = jh.arrow_to_timestamp(arrow.get(start_date_str, 'YYYY-MM-DD')) - except: + except Exception: raise ValueError( f'start_date must be a string representing a date before today. ex: 2020-01-17. You entered: {start_date_str}') @@ -62,12 +63,31 @@ def handle_time(): elif start_timestamp > today: raise ValueError("Future's date is not accepted. start_date must be a string a representing date BEFORE today.") + # end_date validations + end_timestamp = None + if end_date_str is not None: + try: + end_timestamp = jh.arrow_to_timestamp(arrow.get(end_date_str, 'YYYY-MM-DD')) + except Exception: + raise ValueError( + f'end_date must be a string representing a date. ex: 2023-12-31. You entered: {end_date_str}') + + if end_timestamp < start_timestamp: + raise ValueError("end_date must be after start_date.") + elif end_timestamp > today: + raise ValueError("end_date cannot be in the future.") + # We just call this to throw a exception in case of a symbol without dash jh.quote_asset(symbol) symbol = symbol.upper() - until_date = arrow.utcnow().floor('day') + # Use end_date if provided, otherwise use today + if end_timestamp is not None: + until_date = arrow.get(end_timestamp / 1000).floor('day') + else: + until_date = arrow.utcnow().floor('day') + start_date = arrow.get(start_timestamp / 1000) days_count = jh.date_diff_in_days(start_date, until_date) candles_count = days_count * 1440 @@ -92,6 +112,10 @@ def handle_time(): # to make sure it won't try to import candles from the future! LOL if temp_start_timestamp > jh.now_to_timestamp(): break + + # stop if we've reached the end_date + if end_timestamp is not None and temp_start_timestamp > end_timestamp: + break # prevent duplicates calls to boost performance count = Candle.select().where( @@ -146,7 +170,7 @@ def handle_time(): }) else: print(msg) - run(client_id, exchange, symbol, jh.timestamp_to_time(first_existing_timestamp)[:10], mode, + run(client_id, exchange, symbol, jh.timestamp_to_time(first_existing_timestamp)[:10], end_date_str, mode, running_via_dashboard, show_progressbar) return @@ -192,8 +216,9 @@ def handle_time(): skipped_days = round(skipped_minutes / 1440, 1) imported_days = round(imported_minutes / 1440, 1) + end_date_display = jh.timestamp_to_date(end_timestamp) if end_timestamp else "today" success_text = ( - f'Successfully imported candles since "{jh.timestamp_to_date(start_timestamp)}" until today ' + f'Successfully imported candles since "{jh.timestamp_to_date(start_timestamp)}" until "{end_date_display}" ' f'({imported_days} days imported, {skipped_days} days already existed in the database). ' ) diff --git a/jesse/modes/import_candles_mode/drivers/Custom/CustomCSV.py b/jesse/modes/import_candles_mode/drivers/Custom/CustomCSV.py new file mode 100644 index 000000000..f2a65c811 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/CustomCSV.py @@ -0,0 +1,431 @@ +from jesse.modes.import_candles_mode.drivers.interface import CandleExchange +from jesse.services.csv_data_provider import CSVDataProvider +from .csv_parsers import CSVParserFactory +import jesse.helpers as jh +import os +from typing import Optional + + +class CustomCSV(CandleExchange): + def __init__(self, data_directory: Optional[str] = None, parser_type: Optional[str] = None, max_candles: int = 1000): + """ + Initialize CustomCSV driver for local CSV files. + + Args: + data_directory: Path to directory containing CSV data files. + If None, uses default KucoinData directory. + parser_type: Specific CSV parser type to use (optional). + If None, auto-detects format. + max_candles: Maximum number of candles to fetch (default 1000). + Set to 0 or None for unlimited. + """ + super().__init__( + name='CustomCSV', + count=max_candles if max_candles else 1000000, # Large number for unlimited + rate_limit_per_second=1, + backup_exchange_class=None + ) + + # Set data directory + if data_directory is None: + # Try to get from environment variable first + self.data_directory = os.getenv('CSV_DATA_DIR', "CSVDirectory") + else: + self.data_directory = data_directory + + # Validate data directory exists + if not os.path.exists(self.data_directory): + raise FileNotFoundError(f"Data directory not found: {self.data_directory}") + + # Initialize CSV parser using factory + self.csv_parser = CSVParserFactory.create_parser(self.data_directory, parser_type) + + # Initialize CSV data provider with custom directory (for backward compatibility) + self.csv_provider = CSVDataProvider(data_directory=self.data_directory) + + # Cache for symbol info to avoid repeated file system calls + self._symbol_cache = {} + self._available_symbols_cache = None + + def fetch(self, symbol: str, start_timestamp: int, timeframe: str) -> list: + """ + Fetch candles from CSV data provider + + Args: + symbol: Trading symbol (e.g., 'ACH' or 'ACH-USDT') + start_timestamp: Start timestamp in milliseconds + timeframe: Timeframe (e.g., '1m') + + Returns: + List of candles in Jesse format + """ + try: + # Remove common suffixes from symbol for CSV lookup + csv_symbol = self._normalize_symbol(symbol) + + # Validate symbol exists + if not self._symbol_exists(csv_symbol): + raise FileNotFoundError(f'Symbol {symbol} not found in data directory: {self.data_directory}') + + # Calculate end timestamp based on timeframe + end_timestamp = self._calculate_end_timestamp(start_timestamp, timeframe) + + # Get candles from CSV parser + candles = self.csv_parser.get_candles( + symbol=csv_symbol, + timeframe=timeframe, + start_date=start_timestamp, + finish_date=end_timestamp + ) + + if candles is None or len(candles) == 0: + # Get symbol info to provide more context + symbol_info = self.csv_parser.get_symbol_info(csv_symbol) + if symbol_info: + start_time_data = symbol_info.get('start_time', 0) + end_time = symbol_info.get('end_time', 0) + end_date_str = jh.timestamp_to_time(end_time) if end_time else 'Unknown' + start_date_str = jh.timestamp_to_time(start_time_data) if start_time_data else 'Unknown' + + # Determine if data hasn't started yet or has ended + if start_timestamp < start_time_data: + warning_msg = ( + f"⚠️ WARNING: No candles found for {symbol} in CSV data for timeframe {timeframe}. " + f"Data hasn't started yet. Available data starts: {start_date_str}. " + f"Requested start: {jh.timestamp_to_time(start_timestamp)}" + ) + else: + warning_msg = ( + f"⚠️ WARNING: No candles found for {symbol} in CSV data for timeframe {timeframe}. " + f"Data may have ended. Last available data: {end_date_str}. " + f"Requested start: {jh.timestamp_to_time(start_timestamp)}" + ) + raise Exception(warning_msg) + else: + raise Exception(f'No candles found for {symbol} in CSV data for timeframe {timeframe}') + + # Convert to Jesse format (list of dictionaries) + jesse_candles = [] + for candle in candles: + jesse_candles.append({ + 'id': jh.generate_unique_id(), # id + 'timestamp': int(candle[0]), # timestamp + 'open': float(candle[1]), # open + 'close': float(candle[2]), # close + 'high': float(candle[3]), # high + 'low': float(candle[4]), # low + 'volume': float(candle[5]), # volume + 'symbol': symbol, # symbol + 'exchange': 'CustomCSV', # exchange + 'timeframe': timeframe # timeframe + }) + + return jesse_candles + + except FileNotFoundError as e: + raise e + except Exception as e: + raise Exception(f'Error fetching candles from CSV for {symbol}: {e}') + + def get_starting_time(self, symbol: str) -> int: + """ + Get starting time for a symbol + + Args: + symbol: Trading symbol (e.g., 'ACH' or 'ACH-USDT') + + Returns: + Starting timestamp in milliseconds + """ + try: + # Normalize symbol for CSV lookup + csv_symbol = self._normalize_symbol(symbol) + + # Check cache first + if csv_symbol in self._symbol_cache: + return self._symbol_cache[csv_symbol]['start_time'] + + # Get symbol info from CSV parser + symbol_info = self.csv_parser.get_symbol_info(csv_symbol) + if symbol_info is None: + raise FileNotFoundError(f'Symbol {symbol} not found in CSV data directory: {self.data_directory}') + + # Cache the symbol info + self._symbol_cache[csv_symbol] = symbol_info + + return symbol_info['start_time'] + except FileNotFoundError as e: + raise e + except Exception as e: + raise Exception(f'Error getting starting time for {symbol}: {e}') + + def get_candles(self, symbol: str, start_date: int, finish_date: int) -> list: + """ + Get candles from CSV data provider + + Args: + symbol: Trading symbol (e.g., 'ACH' or 'ACH-USDT') + start_date: Start timestamp in milliseconds + finish_date: Finish timestamp in milliseconds + + Returns: + List of candles in Jesse format + """ + try: + # Normalize symbol for CSV lookup + csv_symbol = self._normalize_symbol(symbol) + + # Validate symbol exists + if not self._symbol_exists(csv_symbol): + raise FileNotFoundError(f'Symbol {symbol} not found in data directory: {self.data_directory}') + + # Get candles from CSV parser + candles = self.csv_parser.get_candles( + symbol=csv_symbol, + timeframe='1m', + start_date=start_date, + finish_date=finish_date + ) + + if candles is None or len(candles) == 0: + # Get symbol info to provide more context + symbol_info = self.csv_parser.get_symbol_info(csv_symbol) + if symbol_info: + end_time = symbol_info.get('end_time', 0) + end_date_str = jh.timestamp_to_time(end_time) if end_time else 'Unknown' + warning_msg = ( + f"⚠️ WARNING: No candles found for {symbol} in CSV data between " + f"{jh.timestamp_to_time(start_date)} and {jh.timestamp_to_time(finish_date)}. " + f"Data may have ended. Last available data: {end_date_str}" + ) + raise Exception(warning_msg) + else: + raise Exception(f'No candles found for {symbol} in CSV data between {start_date} and {finish_date}') + + # Convert to Jesse format (list of dictionaries) + jesse_candles = [] + for candle in candles: + jesse_candles.append({ + 'id': jh.generate_unique_id(), # id + 'timestamp': int(candle[0]), # timestamp + 'open': float(candle[1]), # open + 'close': float(candle[2]), # close + 'high': float(candle[3]), # high + 'low': float(candle[4]), # low + 'volume': float(candle[5]), # volume + 'symbol': symbol, # symbol + 'exchange': 'CustomCSV', # exchange + 'timeframe': '1m' # timeframe (hardcoded for get_candles) + }) + + return jesse_candles + + except FileNotFoundError as e: + raise e + except Exception as e: + raise Exception(f'Error getting candles from CSV for {symbol}: {e}') + + def get_available_symbols(self) -> list: + """ + Get available symbols from CSV data in SYMBOL-USDT format + + Returns: + List of available symbols in SYMBOL-USDT format + """ + try: + # Use cache if available + if self._available_symbols_cache is not None: + return self._available_symbols_cache + + # Get symbols from CSV parser (already in SYMBOL-USDT format) + symbols = self.csv_parser.get_available_symbols() + + # Cache the result + self._available_symbols_cache = symbols + + return symbols + except Exception as e: + raise Exception(f'Error getting symbols from CSV: {e}') + + def get_exchange_info(self, symbol: str) -> dict: + """ + Get exchange info for a symbol + + Args: + symbol: Trading symbol + + Returns: + Dictionary with exchange info + """ + try: + # Normalize symbol for CSV lookup + csv_symbol = self._normalize_symbol(symbol) + + # Check cache first + if csv_symbol in self._symbol_cache: + symbol_info = self._symbol_cache[csv_symbol] + else: + symbol_info = self.csv_parser.get_symbol_info(csv_symbol) + if symbol_info is None: + raise FileNotFoundError(f'Symbol {symbol} not found in CSV data directory: {self.data_directory}') + # Cache the symbol info + self._symbol_cache[csv_symbol] = symbol_info + + return { + 'symbol': symbol, + 'base_asset': csv_symbol, + 'quote_asset': 'USDT', + 'min_qty': 0.001, + 'max_qty': 1000000, + 'step_size': 0.001, + 'tick_size': 0.00001, + 'min_notional': 10.0, + 'price_precision': 5, + 'qty_precision': 3, + 'start_time': symbol_info.get('start_time', 0), + 'end_time': symbol_info.get('end_time', 0) + } + except FileNotFoundError as e: + raise e + except Exception as e: + raise Exception(f'Error getting exchange info for {symbol}: {e}') + + def _normalize_symbol(self, symbol: str) -> str: + """ + Normalize symbol by removing common suffixes for CSV lookup + + Args: + symbol: Trading symbol (e.g., 'ACH-USDT') + + Returns: + Normalized symbol for CSV lookup (e.g., 'ACH') + """ + if symbol.endswith('-USDT'): + return symbol.replace('-USDT', '') + elif symbol.endswith('-USDC'): + return symbol.replace('-USDC', '') + elif symbol.endswith('-BTC'): + return symbol.replace('-BTC', '') + elif symbol.endswith('-ETH'): + return symbol.replace('-ETH', '') + else: + return symbol + + def _symbol_exists(self, symbol: str) -> bool: + """ + Check if symbol exists in data directory + + Args: + symbol: Symbol name (without suffix) + + Returns: + True if symbol exists, False otherwise + """ + symbol_path = os.path.join(self.data_directory, symbol) + price_file = os.path.join(symbol_path, "price.csv") + return os.path.exists(price_file) + + def _calculate_end_timestamp(self, start_timestamp: int, timeframe: str) -> int: + """ + Calculate end timestamp based on timeframe and count + + Args: + start_timestamp: Start timestamp in milliseconds + timeframe: Timeframe (e.g., '1m', '5m', '1h') + + Returns: + End timestamp in milliseconds + """ + # Convert timeframe to milliseconds + timeframe_ms = self._timeframe_to_ms(timeframe) + + # Calculate end timestamp + return start_timestamp + (self.count - 1) * timeframe_ms + + def _timeframe_to_ms(self, timeframe: str) -> int: + """ + Convert timeframe string to milliseconds + + Args: + timeframe: Timeframe string (e.g., '1m', '5m', '1h', '1d') + + Returns: + Timeframe in milliseconds + """ + timeframe_map = { + '1m': 60 * 1000, # 1 minute + '3m': 3 * 60 * 1000, # 3 minutes + '5m': 5 * 60 * 1000, # 5 minutes + '15m': 15 * 60 * 1000, # 15 minutes + '30m': 30 * 60 * 1000, # 30 minutes + '1h': 60 * 60 * 1000, # 1 hour + '2h': 2 * 60 * 60 * 1000, # 2 hours + '4h': 4 * 60 * 60 * 1000, # 4 hours + '6h': 6 * 60 * 60 * 1000, # 6 hours + '8h': 8 * 60 * 60 * 1000, # 8 hours + '12h': 12 * 60 * 60 * 1000, # 12 hours + '1d': 24 * 60 * 60 * 1000, # 1 day + } + + return timeframe_map.get(timeframe, 60 * 1000) # Default to 1 minute + + def clear_cache(self): + """ + Clear all caches + """ + self._symbol_cache.clear() + self._available_symbols_cache = None + self.csv_parser.clear_cache() + self.csv_provider.clear_cache() + + def get_data_directory(self) -> str: + """ + Get the current data directory path + + Returns: + Path to data directory + """ + return self.data_directory + + def set_data_directory(self, data_directory: str): + """ + Set a new data directory and reinitialize provider + + Args: + data_directory: New path to data directory + """ + if not os.path.exists(data_directory): + raise FileNotFoundError(f"Data directory not found: {data_directory}") + + self.data_directory = data_directory + self.csv_parser = CSVParserFactory.create_parser(self.data_directory) + self.csv_provider = CSVDataProvider(data_directory=self.data_directory) + self.clear_cache() + + def get_parser_info(self) -> dict: + """ + Get information about the current CSV parser + + Returns: + Dictionary with parser information + """ + return self.csv_parser.get_parser_info() + + def get_available_parsers(self) -> dict: + """ + Get list of available CSV parsers + + Returns: + Dictionary mapping parser names to descriptions + """ + return CSVParserFactory.get_available_parsers() + + def set_parser_type(self, parser_type: str): + """ + Set a specific parser type + + Args: + parser_type: Parser type name + """ + self.csv_parser = CSVParserFactory.create_parser(self.data_directory, parser_type) + self.clear_cache() diff --git a/jesse/modes/import_candles_mode/drivers/Custom/__init__.py b/jesse/modes/import_candles_mode/drivers/Custom/__init__.py new file mode 100644 index 000000000..2c0ff3bb1 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/__init__.py @@ -0,0 +1 @@ +# CustomCSV driver for Jesse diff --git a/jesse/modes/import_candles_mode/drivers/Custom/batch_csv_loader.py b/jesse/modes/import_candles_mode/drivers/Custom/batch_csv_loader.py new file mode 100644 index 000000000..0c658ca23 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/batch_csv_loader.py @@ -0,0 +1,741 @@ +""" +Batch CSV Loader for CustomCSV driver. + +This module provides functionality for batch loading all symbols from a directory +with detailed progress reporting and statistics. +""" + +import os +import time +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor, as_completed +import jesse.helpers as jh +from jesse.services import logger + +from .CustomCSV import CustomCSV +from .csv_parsers import CSVParserFactory + +# Database imports +try: + from jesse.services.db import database + from jesse.models.Candle import Candle, store_candles_into_db + DATABASE_AVAILABLE = True +except ImportError: + DATABASE_AVAILABLE = False + logger.warning("Database modules not available. Database saving will be disabled.") + + +@dataclass +class SymbolLoadResult: + """Result of loading a single symbol""" + symbol: str + success: bool + candles_count: int = 0 + start_time: Optional[datetime] = None + end_time: Optional[datetime] = None + error_message: Optional[str] = None + load_duration: float = 0.0 + saved_to_db: bool = False + db_save_duration: float = 0.0 + db_error_message: Optional[str] = None + timeframe: str = "1m" + + +@dataclass +class BatchLoadReport: + """Report of batch loading operation""" + total_symbols: int + successful_loads: int + failed_loads: int + total_candles: int + total_duration: float + start_time: datetime + end_time: datetime + results: List[SymbolLoadResult] + errors: List[str] + saved_to_db: int = 0 + db_save_failures: int = 0 + total_db_save_duration: float = 0.0 + + @property + def success_rate(self) -> float: + """Calculate success rate percentage""" + if self.total_symbols == 0: + return 0.0 + return (self.successful_loads / self.total_symbols) * 100 + + @property + def db_save_rate(self) -> float: + """Calculate database save rate percentage""" + if self.successful_loads == 0: + return 0.0 + return (self.saved_to_db / self.successful_loads) * 100 + + @property + def average_candles_per_symbol(self) -> float: + """Calculate average candles per successful symbol""" + if self.successful_loads == 0: + return 0.0 + return self.total_candles / self.successful_loads + + +class BatchCSVLoader: + """ + Batch loader for CSV data from directory. + + Provides functionality to load all available symbols from a directory + with progress reporting and detailed statistics. + """ + + def __init__(self, data_directory: Optional[str] = None, parser_type: Optional[str] = None): + """ + Initialize batch CSV loader. + + Args: + data_directory: Path to directory containing CSV data files + parser_type: Specific CSV parser type to use + """ + self.data_directory = data_directory or os.getenv('CSV_DATA_DIR', "/Users/alxy/Downloads/Fond/KucoinData") + self.parser_type = parser_type + + # Initialize CSV driver with unlimited candles + self.csv_driver = CustomCSV(data_directory=self.data_directory, parser_type=parser_type, max_candles=0) + + # Statistics + self.stats = { + 'total_symbols': 0, + 'successful_loads': 0, + 'failed_loads': 0, + 'total_candles': 0, + 'start_time': None, + 'end_time': None + } + + def get_available_symbols(self) -> List[str]: + """ + Get list of available symbols in the directory. + + Returns: + List of available symbols + """ + return self.csv_driver.get_available_symbols() + + def load_single_symbol(self, symbol: str, timeframe: str = "1m", + max_candles: int = 1000) -> SymbolLoadResult: + """ + Load data for a single symbol. + + Args: + symbol: Symbol to load + timeframe: Timeframe for candles + max_candles: Maximum number of candles to load + + Returns: + SymbolLoadResult with loading details + """ + start_time = time.time() + result = SymbolLoadResult( + symbol=symbol, + success=False, + timeframe=timeframe + ) + + try: + # Get symbol info + symbol_info = self.csv_driver.get_exchange_info(symbol) + if symbol_info: + # Convert timestamps to datetime objects + from datetime import datetime + result.start_time = datetime.fromtimestamp(symbol_info['start_time'] / 1000) + result.end_time = datetime.fromtimestamp(symbol_info['end_time'] / 1000) + + # Load candles + start_timestamp = symbol_info['start_time'] if symbol_info else int(time.time() * 1000) + candles = self.csv_driver.fetch(symbol, start_timestamp, timeframe) + + result.success = True + result.candles_count = len(candles) + + except Exception as e: + result.error_message = str(e) + logger.error(f"Failed to load {symbol}: {e}") + + result.load_duration = time.time() - start_time + return result + + def load_all_symbols(self, timeframe: str = "1m", max_candles: int = 1000, + max_workers: int = 4, progress_callback: Optional[callable] = None) -> BatchLoadReport: + """ + Load all available symbols from directory. + + Args: + timeframe: Timeframe for candles + max_candles: Maximum number of candles per symbol + max_workers: Maximum number of concurrent workers + progress_callback: Optional callback for progress updates + + Returns: + BatchLoadReport with detailed results + """ + logger.info("Starting batch CSV loading...") + + # Get available symbols + symbols = self.get_available_symbols() + total_symbols = len(symbols) + + if total_symbols == 0: + logger.warning("No symbols found in directory") + return BatchLoadReport( + total_symbols=0, + successful_loads=0, + failed_loads=0, + total_candles=0, + total_duration=0.0, + start_time=datetime.now(), + end_time=datetime.now(), + results=[], + errors=[] + ) + + logger.info(f"Found {total_symbols} symbols to load") + + # Initialize statistics + self.stats = { + 'total_symbols': total_symbols, + 'successful_loads': 0, + 'failed_loads': 0, + 'total_candles': 0, + 'start_time': datetime.now(), + 'end_time': None + } + + results = [] + errors = [] + + # Load symbols + if max_workers == 1: + # Sequential loading + for i, symbol in enumerate(symbols): + logger.info(f"Loading {symbol} ({i+1}/{total_symbols})") + result = self.load_single_symbol(symbol, timeframe, max_candles) + results.append(result) + + # Update statistics + if result.success: + self.stats['successful_loads'] += 1 + self.stats['total_candles'] += result.candles_count + else: + self.stats['failed_loads'] += 1 + if result.error_message: + errors.append(f"{symbol}: {result.error_message}") + + # Progress callback + if progress_callback: + progress_callback(i + 1, total_symbols, result) + else: + # Parallel loading + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_symbol = { + executor.submit(self.load_single_symbol, symbol, timeframe, max_candles): symbol + for symbol in symbols + } + + # Process completed tasks + completed = 0 + for future in as_completed(future_to_symbol): + symbol = future_to_symbol[future] + completed += 1 + + try: + result = future.result() + results.append(result) + + # Update statistics + if result.success: + self.stats['successful_loads'] += 1 + self.stats['total_candles'] += result.candles_count + else: + self.stats['failed_loads'] += 1 + if result.error_message: + errors.append(f"{symbol}: {result.error_message}") + + logger.info(f"Completed {symbol} ({completed}/{total_symbols})") + + # Progress callback + if progress_callback: + progress_callback(completed, total_symbols, result) + + except Exception as e: + logger.error(f"Error processing {symbol}: {e}") + self.stats['failed_loads'] += 1 + errors.append(f"{symbol}: {e}") + + # Finalize statistics + self.stats['end_time'] = datetime.now() + total_duration = (self.stats['end_time'] - self.stats['start_time']).total_seconds() + + # Create report + report = BatchLoadReport( + total_symbols=total_symbols, + successful_loads=self.stats['successful_loads'], + failed_loads=self.stats['failed_loads'], + total_candles=self.stats['total_candles'], + total_duration=total_duration, + start_time=self.stats['start_time'], + end_time=self.stats['end_time'], + results=results, + errors=errors + ) + + logger.info(f"Batch loading completed: {report.success_rate:.1f}% success rate") + return report + + def load_symbols_by_pattern(self, pattern: str, timeframe: str = "1m", + max_candles: int = 1000) -> BatchLoadReport: + """ + Load symbols matching a specific pattern. + + Args: + pattern: Pattern to match symbol names (case-insensitive) + timeframe: Timeframe for candles + max_candles: Maximum number of candles per symbol + + Returns: + BatchLoadReport with detailed results + """ + all_symbols = self.get_available_symbols() + matching_symbols = [s for s in all_symbols if pattern.lower() in s.lower()] + + logger.info(f"Found {len(matching_symbols)} symbols matching pattern '{pattern}'") + + # Temporarily replace the driver's symbol list + original_symbols = self.csv_driver.get_available_symbols() + self.csv_driver._available_symbols_cache = matching_symbols + + try: + report = self.load_all_symbols(timeframe, max_candles, max_workers=1) + finally: + # Restore original symbol list + self.csv_driver._available_symbols_cache = original_symbols + + return report + + def generate_report(self, report: BatchLoadReport, save_to_file: Optional[str] = None) -> str: + """ + Generate a detailed text report from batch loading results. + + Args: + report: BatchLoadReport to generate report from + save_to_file: Optional file path to save report + + Returns: + Formatted report string + """ + lines = [] + lines.append("=" * 80) + lines.append("BATCH CSV LOADING REPORT") + lines.append("=" * 80) + lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + lines.append(f"Data Directory: {self.data_directory}") + lines.append(f"Parser Type: {self.parser_type or 'Auto-detected'}") + lines.append("") + + # Summary statistics + lines.append("SUMMARY STATISTICS") + lines.append("-" * 40) + lines.append(f"Total Symbols: {report.total_symbols}") + lines.append(f"Successful Loads: {report.successful_loads}") + lines.append(f"Failed Loads: {report.failed_loads}") + lines.append(f"Success Rate: {report.success_rate:.1f}%") + lines.append(f"Total Candles: {report.total_candles:,}") + lines.append(f"Average Candles per Symbol: {report.average_candles_per_symbol:.1f}") + lines.append(f"Total Duration: {report.total_duration:.2f} seconds") + lines.append(f"Average Time per Symbol: {report.total_duration / report.total_symbols:.2f} seconds") + + # Database save statistics + if hasattr(report, 'saved_to_db') and report.saved_to_db > 0: + lines.append("") + lines.append("DATABASE SAVE STATISTICS") + lines.append("-" * 40) + lines.append(f"Saved to Database: {report.saved_to_db}") + lines.append(f"Database Save Failures: {report.db_save_failures}") + lines.append(f"Database Save Rate: {report.db_save_rate:.1f}%") + if report.total_db_save_duration > 0: + lines.append(f"Total DB Save Duration: {report.total_db_save_duration:.2f} seconds") + lines.append(f"Average DB Save Time: {report.total_db_save_duration / report.saved_to_db:.2f} seconds per symbol") + + lines.append("") + + # Detailed results + lines.append("DETAILED RESULTS") + lines.append("-" * 40) + for result in report.results: + status = "✅ SUCCESS" if result.success else "❌ FAILED" + lines.append(f"{status} {result.symbol}") + if result.success: + lines.append(f" Candles: {result.candles_count:,}") + lines.append(f" Duration: {result.load_duration:.2f}s") + if result.start_time and result.end_time: + lines.append(f" Data Range: {result.start_time} - {result.end_time}") + + # Database save information + if hasattr(result, 'saved_to_db'): + if result.saved_to_db: + lines.append(f" Database: ✅ Saved ({result.db_save_duration:.2f}s)") + else: + lines.append(f" Database: ❌ Failed - {result.db_error_message}") + else: + lines.append(f" Error: {result.error_message}") + lines.append("") + + # Errors summary + if report.errors: + lines.append("ERRORS SUMMARY") + lines.append("-" * 40) + for error in report.errors: + lines.append(f"• {error}") + lines.append("") + + # Performance metrics + lines.append("PERFORMANCE METRICS") + lines.append("-" * 40) + if report.total_candles > 0: + candles_per_second = report.total_candles / report.total_duration + lines.append(f"Candles per Second: {candles_per_second:.1f}") + + successful_results = [r for r in report.results if r.success] + if successful_results: + avg_load_time = sum(r.load_duration for r in successful_results) / len(successful_results) + lines.append(f"Average Load Time: {avg_load_time:.2f} seconds") + + lines.append("=" * 80) + + report_text = "\n".join(lines) + + # Save to file if requested + if save_to_file: + try: + with open(save_to_file, 'w', encoding='utf-8') as f: + f.write(report_text) + logger.info(f"Report saved to: {save_to_file}") + except Exception as e: + logger.error(f"Failed to save report to {save_to_file}: {e}") + + return report_text + + def get_directory_info(self) -> Dict: + """ + Get information about the data directory. + + Returns: + Dictionary with directory information + """ + info = { + 'directory': self.data_directory, + 'exists': os.path.exists(self.data_directory), + 'symbol_count': 0, + 'parser_info': None, + 'symbols': [] + } + + if info['exists']: + try: + info['symbol_count'] = len(self.get_available_symbols()) + info['symbols'] = self.get_available_symbols()[:10] # First 10 symbols + info['parser_info'] = self.csv_driver.get_parser_info() + except Exception as e: + info['error'] = str(e) + + return info + + def save_symbol_to_database(self, symbol: str, timeframe: str = "1m", + exchange: str = "CustomCSV", + max_candles: int = 0) -> SymbolLoadResult: + """ + Load and save a single symbol to database. + + Args: + symbol: Symbol to load + timeframe: Timeframe for candles + exchange: Exchange name for database + max_candles: Maximum candles to load (0 = unlimited) + + Returns: + SymbolLoadResult with database save information + """ + result = SymbolLoadResult(symbol=symbol, success=False, timeframe=timeframe) + + try: + # Load data first + load_start = time.time() + result = self.load_single_symbol(symbol, timeframe, max_candles) + load_duration = time.time() - load_start + result.load_duration = load_duration + + if not result.success: + return result + + # Save to database + if not DATABASE_AVAILABLE: + result.db_error_message = "Database not available" + return result + + db_start = time.time() + + # Get candles data + symbol_info = self.csv_driver.get_exchange_info(symbol) + if not symbol_info: + result.db_error_message = "Could not get symbol info" + return result + + start_timestamp = symbol_info['start_time'] + end_timestamp = symbol_info['end_time'] + + # Load candles + candles = self.csv_driver.get_candles(symbol, start_timestamp, end_timestamp) + if not candles or len(candles) == 0: + result.db_error_message = f"No candles to save (got {len(candles) if candles else 0} candles)" + return result + + # Convert to numpy array for database storage + import numpy as np + + # Convert list of dicts to numpy array + if isinstance(candles, list) and len(candles) > 0 and isinstance(candles[0], dict): + # Convert from Jesse format (list of dicts) to numpy array + candles_list = [] + for candle in candles: + candles_list.append([ + candle['timestamp'], + candle['open'], + candle['close'], + candle['high'], + candle['low'], + candle['volume'] + ]) + candles_array = np.array(candles_list) + else: + candles_array = np.array(candles) + + # Ensure database connection + database.open_connection() + + # Clear existing data for this exchange/symbol/timeframe + Candle.delete().where( + (Candle.exchange == exchange) & + (Candle.symbol == symbol) & + (Candle.timeframe == timeframe) + ).execute() + + # Save to database using Jesse's function + store_candles_into_db(exchange, symbol, timeframe, candles_array, on_conflict='replace') + + database.close_connection() + + db_duration = time.time() - db_start + result.saved_to_db = True + result.db_save_duration = db_duration + + logger.info(f"Successfully saved {len(candles)} candles for {symbol} to database in {db_duration:.2f}s") + + except Exception as e: + result.db_error_message = str(e) + logger.error(f"Error saving {symbol} to database: {e}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + + return result + + def save_all_symbols_to_database(self, timeframe: str = "1m", + exchange: str = "CustomCSV", + max_candles: int = 0, + max_workers: int = 1, + batch_size: int = 20, + progress_callback: Optional[callable] = None) -> BatchLoadReport: + """ + Load and save all symbols to database. + + Args: + timeframe: Timeframe for candles + exchange: Exchange name for database + max_candles: Maximum candles per symbol (0 = unlimited) + max_workers: Number of parallel workers + batch_size: Number of symbols to process in each batch (default: 20) + progress_callback: Callback function for progress updates + + Returns: + BatchLoadReport with database save statistics + """ + if not DATABASE_AVAILABLE: + raise Exception("Database not available. Cannot save to database.") + + logger.info(f"Starting batch save to database for {self.data_directory}") + logger.info(f"Exchange: {exchange}, Timeframe: {timeframe}, Max candles: {max_candles}") + logger.info(f"Batch size: {batch_size}, Max workers: {max_workers}") + + # Get symbols to process + symbols = self.get_available_symbols() + total_symbols = len(symbols) + + if total_symbols == 0: + logger.warning("No symbols found to save") + return BatchLoadReport( + total_symbols=0, + successful_loads=0, + failed_loads=0, + total_candles=0, + total_duration=0.0, + start_time=datetime.now(), + end_time=datetime.now(), + results=[], + errors=["No symbols found"] + ) + + logger.info(f"Found {total_symbols} symbols to save to database") + + # Split symbols into batches + symbol_batches = [symbols[i:i + batch_size] for i in range(0, len(symbols), batch_size)] + total_batches = len(symbol_batches) + + logger.info(f"Split into {total_batches} batches of up to {batch_size} symbols each") + + # Initialize statistics + self.stats = { + 'total_symbols': total_symbols, + 'successful_loads': 0, + 'failed_loads': 0, + 'total_candles': 0, + 'saved_to_db': 0, + 'db_save_failures': 0, + 'start_time': datetime.now(), + 'end_time': None + } + + results = [] + errors = [] + + # Process batches + completed_symbols = 0 + + for batch_num, batch_symbols in enumerate(symbol_batches, 1): + logger.info(f"Processing batch {batch_num}/{total_batches} ({len(batch_symbols)} symbols)") + + if max_workers == 1: + # Sequential saving within batch + for symbol in batch_symbols: + logger.info(f"Saving {symbol} to database ({completed_symbols + 1}/{total_symbols})") + result = self.save_symbol_to_database(symbol, timeframe, exchange, max_candles) + results.append(result) + + # Update statistics + if result.success: + self.stats['successful_loads'] += 1 + self.stats['total_candles'] += result.candles_count + if result.saved_to_db: + self.stats['saved_to_db'] += 1 + else: + self.stats['db_save_failures'] += 1 + if result.db_error_message: + errors.append(f"{symbol} DB save failed: {result.db_error_message}") + else: + self.stats['failed_loads'] += 1 + if result.error_message: + errors.append(f"{symbol}: {result.error_message}") + + completed_symbols += 1 + + # Progress callback + if progress_callback: + progress_callback(completed_symbols, total_symbols, result) + else: + # Parallel saving within batch + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks for this batch + future_to_symbol = { + executor.submit(self.save_symbol_to_database, symbol, timeframe, exchange, max_candles): symbol + for symbol in batch_symbols + } + + # Process completed tasks + for future in as_completed(future_to_symbol): + symbol = future_to_symbol[future] + try: + result = future.result() + results.append(result) + + # Update statistics + if result.success: + self.stats['successful_loads'] += 1 + self.stats['total_candles'] += result.candles_count + if result.saved_to_db: + self.stats['saved_to_db'] += 1 + else: + self.stats['db_save_failures'] += 1 + if result.db_error_message: + errors.append(f"{symbol} DB save failed: {result.db_error_message}") + else: + self.stats['failed_loads'] += 1 + if result.error_message: + errors.append(f"{symbol}: {result.error_message}") + + completed_symbols += 1 + + # Progress callback + if progress_callback: + progress_callback(completed_symbols, total_symbols, result) + + except Exception as e: + error_msg = f"Unexpected error processing {symbol}: {e}" + errors.append(error_msg) + logger.error(error_msg) + completed_symbols += 1 + + # Log batch completion + batch_success = sum(1 for r in results[-len(batch_symbols):] if r.success) + batch_saved = sum(1 for r in results[-len(batch_symbols):] if r.saved_to_db) + logger.info(f"Batch {batch_num}/{total_batches} completed: {batch_success}/{len(batch_symbols)} loaded, {batch_saved}/{len(batch_symbols)} saved to DB") + + # Finalize statistics + self.stats['end_time'] = datetime.now() + self.stats['total_duration'] = (self.stats['end_time'] - self.stats['start_time']).total_seconds() + + # Calculate database save statistics + total_db_save_duration = sum(r.db_save_duration for r in results if r.saved_to_db) + + # Create report + report = BatchLoadReport( + total_symbols=total_symbols, + successful_loads=self.stats['successful_loads'], + failed_loads=self.stats['failed_loads'], + total_candles=self.stats['total_candles'], + total_duration=self.stats['total_duration'], + start_time=self.stats['start_time'], + end_time=self.stats['end_time'], + results=results, + errors=errors, + saved_to_db=self.stats['saved_to_db'], + db_save_failures=self.stats['db_save_failures'], + total_db_save_duration=total_db_save_duration + ) + + logger.info(f"Batch save completed: {report.successful_loads}/{total_symbols} symbols loaded, " + f"{report.saved_to_db}/{report.successful_loads} saved to database") + + return report + + +def create_batch_loader(data_directory: Optional[str] = None, parser_type: Optional[str] = None) -> BatchCSVLoader: + """ + Convenience function to create a BatchCSVLoader instance. + + Args: + data_directory: Path to directory containing CSV data files + parser_type: Specific CSV parser type to use + + Returns: + BatchCSVLoader instance + """ + return BatchCSVLoader(data_directory, parser_type) diff --git a/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/__init__.py b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/__init__.py new file mode 100644 index 000000000..dbfe54eca --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/__init__.py @@ -0,0 +1,16 @@ +""" +CSV Parsers for CustomCSV driver. + +This module provides interfaces and implementations for parsing different CSV formats. +""" + +from .base_csv_parser import BaseCSVParser +from .kucoin_csv_parser import KucoinCSVParser +from .csv_parser_factory import CSVParserFactory + +__all__ = [ + 'BaseCSVParser', + 'KucoinCSVParser', + 'CSVParserFactory' +] + diff --git a/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/base_csv_parser.py b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/base_csv_parser.py new file mode 100644 index 000000000..4ebe9f74e --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/base_csv_parser.py @@ -0,0 +1,216 @@ +""" +Base CSV Parser interface for CustomCSV driver. + +This module defines the abstract interface that all CSV parsers must implement. +""" + +from abc import ABC, abstractmethod +from typing import List, Dict, Optional, Tuple +import pandas as pd +import numpy as np +import os + + +class BaseCSVParser(ABC): + """ + Abstract base class for CSV parsers. + + All CSV parsers must implement this interface to be compatible with CustomCSV driver. + """ + + def __init__(self, data_directory: str): + """ + Initialize CSV parser. + + Args: + data_directory: Base directory containing CSV data files + """ + self.data_directory = data_directory + self.cache = {} # Cache for loaded data + + @abstractmethod + def get_available_symbols(self) -> List[str]: + """ + Get list of available symbols. + + Returns: + List of symbol names in SYMBOL-USDT format + """ + pass + + @abstractmethod + def get_symbol_info(self, symbol: str) -> Optional[Dict]: + """ + Get information about a specific symbol. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + + Returns: + Dictionary with symbol information or None if not found + """ + pass + + @abstractmethod + def load_tick_data(self, symbol: str, start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[pd.DataFrame]: + """ + Load tick data for a symbol. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + DataFrame with tick data or None if failed + """ + pass + + @abstractmethod + def aggregate_to_candles(self, tick_data: pd.DataFrame, timeframe: str = "1m") -> np.ndarray: + """ + Aggregate tick data into OHLCV candles. + + Args: + tick_data: DataFrame with tick data + timeframe: Target timeframe (e.g., '1m', '5m', '1h') + + Returns: + Numpy array with candles in format [timestamp, open, high, low, close, volume] + """ + pass + + @abstractmethod + def get_candles(self, symbol: str, timeframe: str = "1m", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[np.ndarray]: + """ + Get candles for a symbol. + + Args: + symbol: Symbol name + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + Numpy array of candles or None if failed + """ + pass + + @abstractmethod + def get_file_path(self, symbol: str) -> str: + """ + Get the file path for a symbol. + + Args: + symbol: Symbol name (without suffix) + + Returns: + Full path to the CSV file + """ + pass + + @abstractmethod + def validate_file_format(self, file_path: str) -> bool: + """ + Validate that the CSV file has the expected format. + + Args: + file_path: Path to the CSV file + + Returns: + True if format is valid, False otherwise + """ + pass + + def normalize_symbol(self, symbol: str) -> str: + """ + Normalize symbol by removing common suffixes. + + Args: + symbol: Trading symbol (e.g., 'ACH-USDT') + + Returns: + Normalized symbol for file lookup (e.g., 'ACH') + """ + if symbol.endswith('-USDT'): + return symbol.replace('-USDT', '') + elif symbol.endswith('-USDC'): + return symbol.replace('-USDC', '') + elif symbol.endswith('-BTC'): + return symbol.replace('-BTC', '') + elif symbol.endswith('-ETH'): + return symbol.replace('-ETH', '') + else: + return symbol + + def symbol_exists(self, symbol: str) -> bool: + """ + Check if symbol exists in data directory. + + Args: + symbol: Symbol name (without suffix) + + Returns: + True if symbol exists, False otherwise + """ + file_path = self.get_file_path(symbol) + return os.path.exists(file_path) and self.validate_file_format(file_path) + + def clear_cache(self): + """ + Clear all caches. + """ + self.cache.clear() + + def get_data_directory(self) -> str: + """ + Get the current data directory path. + + Returns: + Path to data directory + """ + return self.data_directory + + def set_data_directory(self, data_directory: str): + """ + Set a new data directory. + + Args: + data_directory: New path to data directory + """ + if not os.path.exists(data_directory): + raise FileNotFoundError(f"Data directory not found: {data_directory}") + + self.data_directory = data_directory + self.clear_cache() + + def _timeframe_to_ms(self, timeframe: str) -> int: + """ + Convert timeframe string to milliseconds. + + Args: + timeframe: Timeframe string (e.g., '1m', '5m', '1h', '1d') + + Returns: + Timeframe in milliseconds + """ + timeframe_map = { + '1m': 60 * 1000, # 1 minute + '3m': 3 * 60 * 1000, # 3 minutes + '5m': 5 * 60 * 1000, # 5 minutes + '15m': 15 * 60 * 1000, # 15 minutes + '30m': 30 * 60 * 1000, # 30 minutes + '1h': 60 * 60 * 1000, # 1 hour + '2h': 2 * 60 * 60 * 1000, # 2 hours + '4h': 4 * 60 * 60 * 1000, # 4 hours + '6h': 6 * 60 * 60 * 1000, # 6 hours + '8h': 8 * 60 * 60 * 1000, # 8 hours + '12h': 12 * 60 * 60 * 1000, # 12 hours + '1d': 24 * 60 * 60 * 1000, # 1 day + } + + return timeframe_map.get(timeframe, 60 * 1000) # Default to 1 minute + diff --git a/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/csv_parser_factory.py b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/csv_parser_factory.py new file mode 100644 index 000000000..9a9dfe269 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/csv_parser_factory.py @@ -0,0 +1,201 @@ +""" +CSV Parser Factory for CustomCSV driver. + +This factory automatically detects CSV format and creates appropriate parser. +""" + +import os +from typing import Optional, Dict, Any +from .base_csv_parser import BaseCSVParser +from .kucoin_csv_parser import KucoinCSVParser +import jesse.helpers as jh +from jesse.services import logger + + +class CSVParserFactory: + """ + Factory class for creating CSV parsers based on detected format. + """ + + # Registry of available parsers + _parsers = { + 'kucoin': KucoinCSVParser, + # Add more parsers here as needed + # 'binance': BinanceCSVParser, + # 'coinbase': CoinbaseCSVParser, + } + + @classmethod + def create_parser(cls, data_directory: str, parser_type: Optional[str] = None) -> BaseCSVParser: + """ + Create CSV parser for the given data directory. + + Args: + data_directory: Path to data directory + parser_type: Specific parser type to use (optional) + + Returns: + Appropriate CSV parser instance + """ + if parser_type: + if parser_type not in cls._parsers: + raise ValueError(f"Unknown parser type: {parser_type}. Available: {list(cls._parsers.keys())}") + return cls._parsers[parser_type](data_directory) + + # Auto-detect format + detected_type = cls.detect_format(data_directory) + if detected_type: + logger.info(f"Auto-detected CSV format: {detected_type}") + return cls._parsers[detected_type](data_directory) + + # Default to KucoinCSVParser for backward compatibility + logger.info("Using default KucoinCSVParser") + return KucoinCSVParser(data_directory) + + @classmethod + def detect_format(cls, data_directory: str) -> Optional[str]: + """ + Detect CSV format by examining files in the directory. + + Args: + data_directory: Path to data directory + + Returns: + Detected format type or None if unknown + """ + if not os.path.exists(data_directory): + return None + + # Look for sample files to detect format + sample_files = [] + for item in os.listdir(data_directory): + item_path = os.path.join(data_directory, item) + if os.path.isdir(item_path): + # Check for common CSV file names + for csv_file in ['price.csv', 'data.csv', 'trades.csv', 'klines.csv']: + file_path = os.path.join(item_path, csv_file) + if os.path.exists(file_path): + sample_files.append(file_path) + break + + # Limit to first few files for performance + if len(sample_files) >= 3: + break + + if not sample_files: + return None + + # Analyze sample files to detect format + for file_path in sample_files: + format_type = cls._analyze_file_format(file_path) + if format_type: + return format_type + + return None + + @classmethod + def _analyze_file_format(cls, file_path: str) -> Optional[str]: + """ + Analyze a single file to determine its format. + + Args: + file_path: Path to CSV file + + Returns: + Detected format type or None + """ + try: + with open(file_path, 'r') as f: + # Read first few lines + lines = [] + for i, line in enumerate(f): + if i >= 5: # Read max 5 lines + break + lines.append(line.strip()) + + if not lines: + return None + + # Check for Kucoin format: t,p,v + if lines[0] == 't,p,v': + # Validate data format + for line in lines[1:]: + if not line: + continue + parts = line.split(',') + if len(parts) == 3: + try: + # Check if first part is timestamp, others are numeric + int(parts[0]) + float(parts[1]) + float(parts[2]) + return 'kucoin' + except ValueError: + break + else: + break + + # Add more format detection logic here + # elif lines[0] == 'timestamp,open,high,low,close,volume': + # return 'binance' + # elif lines[0] == 'time,price,size': + # return 'coinbase' + + except Exception as e: + logger.error(f"Error analyzing file format for {file_path}: {e}") + + return None + + @classmethod + def register_parser(cls, name: str, parser_class: type): + """ + Register a new parser type. + + Args: + name: Parser name + parser_class: Parser class that inherits from BaseCSVParser + """ + if not issubclass(parser_class, BaseCSVParser): + raise ValueError("Parser class must inherit from BaseCSVParser") + + cls._parsers[name] = parser_class + logger.info(f"Registered parser: {name}") + + @classmethod + def get_available_parsers(cls) -> Dict[str, str]: + """ + Get list of available parsers. + + Returns: + Dictionary mapping parser names to descriptions + """ + return { + name: parser_class.__doc__.split('\n')[0] if parser_class.__doc__ else "No description" + for name, parser_class in cls._parsers.items() + } + + @classmethod + def get_parser_info(cls, parser_type: str) -> Optional[Dict[str, Any]]: + """ + Get information about a specific parser. + + Args: + parser_type: Parser type name + + Returns: + Parser information dictionary or None if not found + """ + if parser_type not in cls._parsers: + return None + + # Create temporary instance to get info + try: + temp_parser = cls._parsers[parser_type]("/tmp") + return temp_parser.get_parser_info() + except: + return { + 'name': parser_type, + 'class': cls._parsers[parser_type].__name__, + 'description': cls._parsers[parser_type].__doc__.split('\n')[0] if cls._parsers[parser_type].__doc__ else "No description" + } + diff --git a/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/kucoin_csv_parser.py b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/kucoin_csv_parser.py new file mode 100644 index 000000000..f2a4d87dd --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/Custom/csv_parsers/kucoin_csv_parser.py @@ -0,0 +1,359 @@ +""" +Kucoin CSV Parser implementation for CustomCSV driver. + +This parser handles the specific CSV format used by KucoinData: +- File structure: SYMBOL/price.csv +- CSV format: t,p,v (timestamp, price, volume) +- Headers: t,p,v +""" + +import os +import pandas as pd +import numpy as np +from typing import List, Dict, Optional +from .base_csv_parser import BaseCSVParser +from jesse.services import logger + + +class KucoinCSVParser(BaseCSVParser): + """ + CSV parser for KucoinData format. + + Expected file structure: + /data_directory/ + ├── ACH/ + │ └── price.csv + ├── AEG/ + │ └── price.csv + └── ... + + CSV format: + t,p,v + 1672444800000,0.00785,0.0 + 1672444800001,0.00785,0.0 + """ + + def __init__(self, data_directory: str = "/Users/alxy/Downloads/Fond/KucoinData"): + """ + Initialize Kucoin CSV parser. + + Args: + data_directory: Base directory containing CSV data files + """ + super().__init__(data_directory) + self.expected_columns = ['t', 'p', 'v'] # timestamp, price, volume + self.expected_headers = 't,p,v' + + def get_available_symbols(self) -> List[str]: + """ + Get list of available symbols in SYMBOL-USDT format. + + Returns: + List of symbol names in SYMBOL-USDT format + """ + if not os.path.exists(self.data_directory): + logger.error(f"Data directory not found: {self.data_directory}") + return [] + + symbols = [] + for item in os.listdir(self.data_directory): + item_path = os.path.join(self.data_directory, item) + if os.path.isdir(item_path): + # Check if price.csv exists in the directory + price_file = os.path.join(item_path, "price.csv") + if os.path.exists(price_file) and self.validate_file_format(price_file): + # Return symbols in SYMBOL-USDT format for Jesse compatibility + symbols.append(f"{item}-USDT") + + return sorted(symbols) + + def get_symbol_info(self, symbol: str) -> Optional[Dict]: + """ + Get information about a specific symbol. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + + Returns: + Dictionary with symbol information or None if not found + """ + # Normalize symbol + csv_symbol = self.normalize_symbol(symbol) + + # Check cache first + cache_key = f"symbol_info_{csv_symbol}" + if cache_key in self.cache: + return self.cache[cache_key] + + file_path = self.get_file_path(csv_symbol) + + if not os.path.exists(file_path): + logger.error(f"Price file not found for symbol {symbol}: {file_path}") + return None + + try: + # Read first and last lines to get time range + with open(file_path, 'r') as f: + # Skip header + f.readline() + + # Read first data line + first_line = f.readline().strip() + if not first_line: + logger.error(f"Empty file: {file_path}") + return None + + # Read last line + last_line = None + for line in f: + line = line.strip() + if line: + last_line = line + + if not last_line: + last_line = first_line + + # Parse timestamps + first_timestamp = int(first_line.split(',')[0]) + last_timestamp = int(last_line.split(',')[0]) + + symbol_info = { + 'symbol': csv_symbol, + 'start_time': first_timestamp, + 'end_time': last_timestamp, + 'file_path': file_path, + 'format': 'kucoin' + } + + # Cache the result + self.cache[cache_key] = symbol_info + + return symbol_info + + except Exception as e: + logger.error(f"Error getting symbol info for {symbol}: {e}") + return None + + def load_tick_data(self, symbol: str, start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[pd.DataFrame]: + """ + Load tick data for a symbol. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + DataFrame with tick data or None if failed + """ + # Normalize symbol + csv_symbol = self.normalize_symbol(symbol) + + # Check cache first + cache_key = f"tick_data_{csv_symbol}_{start_date}_{finish_date}" + if cache_key in self.cache: + return self.cache[cache_key] + + file_path = self.get_file_path(csv_symbol) + + if not os.path.exists(file_path): + logger.error(f"Price file not found for symbol {symbol}: {file_path}") + return None + + try: + # Read CSV file (skip header row) + df = pd.read_csv(file_path, names=self.expected_columns, skiprows=1) + + # Filter by date range if specified + if start_date is not None: + df = df[df['t'] >= start_date] + if finish_date is not None: + df = df[df['t'] <= finish_date] + + # Sort by timestamp + df = df.sort_values('t').reset_index(drop=True) + + logger.info(f"Loaded {len(df)} ticks for {symbol}") + + # Cache the result + self.cache[cache_key] = df + + return df + + except Exception as e: + logger.error(f"Error loading tick data for {symbol}: {e}") + return None + + def aggregate_to_candles(self, tick_data: pd.DataFrame, timeframe: str = "1m") -> np.ndarray: + """ + Aggregate tick data into OHLCV candles. + + Args: + tick_data: DataFrame with tick data + timeframe: Target timeframe (e.g., '1m', '5m', '1h') + + Returns: + Numpy array with candles in format [timestamp, open, high, low, close, volume] + """ + if tick_data.empty: + return np.array([]) + + # Convert timeframe to milliseconds + timeframe_ms = self._timeframe_to_ms(timeframe) + + # Create timestamp groups + tick_data['group'] = (tick_data['t'] // timeframe_ms) * timeframe_ms + + # Aggregate by group - fix the column structure + agg_dict = { + 't': 'first', # Use first timestamp in group + 'p': ['first', 'max', 'min', 'last'], # OHLC + 'v': 'sum' # Volume + } + + candles = tick_data.groupby('group').agg(agg_dict) + + # Flatten multi-level columns properly + candles.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume'] + + # Reset index to make group column a regular column + candles = candles.reset_index(drop=True) + + # Convert to numpy array + result = candles[['timestamp', 'open', 'high', 'low', 'close', 'volume']].values + + return result.astype(np.float64) + + def get_candles(self, symbol: str, timeframe: str = "1m", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[np.ndarray]: + """ + Get candles for a symbol. + + Args: + symbol: Symbol name + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + Numpy array of candles or None if failed + """ + # Check cache first + cache_key = f"candles_{symbol}_{timeframe}_{start_date}_{finish_date}" + if cache_key in self.cache: + return self.cache[cache_key] + + # Load tick data + tick_data = self.load_tick_data(symbol, start_date, finish_date) + if tick_data is None or tick_data.empty: + # Provide more context about why no data was found + try: + symbol_info = self.get_symbol_info(symbol) + if symbol_info: + data_start = symbol_info.get('start_time', 0) + data_end = symbol_info.get('end_time', 0) + logger.warning( + f"No tick data found for {symbol} in timeframe {timeframe}. " + f"Available data range: {data_start} - {data_end}, " + f"Requested range: {start_date} - {finish_date}" + ) + except: + pass + return None + + # Aggregate to candles + candles = self.aggregate_to_candles(tick_data, timeframe) + + # Cache the result + self.cache[cache_key] = candles + + return candles + + def get_file_path(self, symbol: str) -> str: + """ + Get the file path for a symbol. + + Args: + symbol: Symbol name (without suffix) + + Returns: + Full path to the CSV file + """ + return os.path.join(self.data_directory, symbol, "price.csv") + + def validate_file_format(self, file_path: str) -> bool: + """ + Validate that the CSV file has the expected format. + + Args: + file_path: Path to the CSV file + + Returns: + True if format is valid, False otherwise + """ + try: + # Check if file exists and is readable + if not os.path.exists(file_path) or not os.access(file_path, os.R_OK): + return False + + # Read first line to check headers + with open(file_path, 'r') as f: + first_line = f.readline().strip() + if first_line != self.expected_headers: + logger.warning(f"Unexpected header format in {file_path}: {first_line}") + return False + + # Try to read a few lines to validate format + with open(file_path, 'r') as f: + lines = [f.readline().strip() for _ in range(3)] # Read header + 2 data lines + + for i, line in enumerate(lines[1:], 1): # Skip header + if not line: + continue + + parts = line.split(',') + if len(parts) != 3: + logger.warning(f"Invalid line format in {file_path} line {i+1}: {line}") + return False + + # Check if first part is a valid timestamp + try: + timestamp = int(parts[0]) + if timestamp < 1000000000000: # Should be milliseconds + logger.warning(f"Invalid timestamp format in {file_path} line {i+1}: {timestamp}") + return False + except ValueError: + logger.warning(f"Invalid timestamp in {file_path} line {i+1}: {parts[0]}") + return False + + # Check if price and volume are numeric + try: + float(parts[1]) # price + float(parts[2]) # volume + except ValueError: + logger.warning(f"Invalid numeric values in {file_path} line {i+1}: {line}") + return False + + return True + + except Exception as e: + logger.error(f"Error validating file format for {file_path}: {e}") + return False + + def get_parser_info(self) -> Dict: + """ + Get information about this parser. + + Returns: + Dictionary with parser information + """ + return { + 'name': 'KucoinCSVParser', + 'version': '1.0.0', + 'description': 'Parser for KucoinData CSV format', + 'expected_format': 't,p,v (timestamp, price, volume)', + 'file_structure': 'SYMBOL/price.csv', + 'supported_timeframes': ['1m', '3m', '5m', '15m', '30m', '1h', '2h', '4h', '6h', '8h', '12h', '1d'] + } diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFutures.py b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFutures.py new file mode 100644 index 000000000..f88c97e91 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFutures.py @@ -0,0 +1,16 @@ +from .KuCoinMain import KuCoinMain + + +class KuCoinFutures(KuCoinMain): + def __init__(self) -> None: + # KuCoin Futures is not supported + raise ValueError( + 'KuCoin Futures is not supported. Please use KuCoin Spot instead.' + ) + + def _convert_symbol(self, symbol: str) -> str: + """Convert Jesse symbol format to CCXT format for futures""" + raise ValueError('KuCoin Futures is not supported') + + def get_available_symbols(self) -> list: + raise ValueError('KuCoin Futures is not supported') \ No newline at end of file diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFuturesTestnet.py b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFuturesTestnet.py new file mode 100644 index 000000000..56a82037c --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinFuturesTestnet.py @@ -0,0 +1,16 @@ +from .KuCoinMain import KuCoinMain + + +class KuCoinFuturesTestnet(KuCoinMain): + def __init__(self) -> None: + # KuCoin Futures Testnet is not supported + raise ValueError( + 'KuCoin Futures Testnet is not supported. Please use KuCoin Spot instead.' + ) + + def _convert_symbol(self, symbol: str) -> str: + """Convert Jesse symbol format to CCXT format for futures testnet""" + raise ValueError('KuCoin Futures Testnet is not supported') + + def get_available_symbols(self) -> list: + raise ValueError('KuCoin Futures Testnet is not supported') \ No newline at end of file diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinMain.py b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinMain.py new file mode 100644 index 000000000..3ef9251b5 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinMain.py @@ -0,0 +1,169 @@ +import ccxt +import jesse.helpers as jh +from jesse.modes.import_candles_mode.drivers.interface import CandleExchange +from typing import Union +from .kucoin_utils import timeframe_to_interval +import time + + +class KuCoinMain(CandleExchange): + def __init__( + self, + name: str, + rest_endpoint: str, + backup_exchange_class, + ) -> None: + super().__init__( + name=name, + count=1500, # KuCoin allows up to 1500 candles per request + rate_limit_per_second=10, # KuCoin rate limit + backup_exchange_class=backup_exchange_class + ) + + self.endpoint = rest_endpoint + # Initialize CCXT exchange + self.exchange = ccxt.kucoin({ + 'apiKey': '', # No API key needed for public data + 'secret': '', + 'password': '', + 'sandbox': 'testnet' in name.lower(), + 'enableRateLimit': True, + 'timeout': 30000, + }) + + def _convert_timeframe(self, timeframe: str) -> str: + """Convert Jesse timeframe to CCXT timeframe format""" + timeframe_map = { + '1m': '1m', + '3m': '3m', + '5m': '5m', + '15m': '15m', + '30m': '30m', + '1h': '1h', + '2h': '2h', + '4h': '4h', + '6h': '6h', + '8h': '8h', + '12h': '12h', + '1D': '1d', + '1W': '1w', + '1M': '1M' + } + + if timeframe not in timeframe_map: + raise ValueError(f"Unsupported timeframe: {timeframe}") + + return timeframe_map[timeframe] + + def _convert_symbol(self, symbol: str) -> str: + """Convert Jesse symbol format to CCXT format""" + # Jesse uses BTC-USDT, CCXT uses BTC/USDT + return symbol.replace('-', '/') + + def get_starting_time(self, symbol: str) -> int: + """ + Get the earliest available timestamp for a symbol + """ + try: + ccxt_symbol = self._convert_symbol(symbol) + + # Try to get data from a reasonable start date (2020-01-01) + start_date = 1577836800000 # 2020-01-01 00:00:00 UTC + + # Get the earliest available data + ohlcv = self.exchange.fetch_ohlcv( + ccxt_symbol, + '1d', + since=start_date, + limit=1 + ) + + if not ohlcv: + # If no data from 2020, try from 2017 + start_date = 1483228800000 # 2017-01-01 00:00:00 UTC + ohlcv = self.exchange.fetch_ohlcv( + ccxt_symbol, + '1d', + since=start_date, + limit=1 + ) + + if not ohlcv: + raise ValueError(f"No data available for symbol {symbol}") + + # Get the first available timestamp + first_timestamp = ohlcv[0][0] + # Add one day to ensure we have complete 1m data + return first_timestamp + 60_000 * 1440 + + except Exception as e: + # If all else fails, return a reasonable default + print(f"Warning: Could not get starting time for {symbol}: {str(e)}") + return 1577836800000 # 2020-01-01 00:00:00 UTC + + def fetch(self, symbol: str, start_timestamp: int, timeframe: str = '1m') -> Union[list, None]: + try: + ccxt_symbol = self._convert_symbol(symbol) + ccxt_timeframe = self._convert_timeframe(timeframe) + + # Calculate end timestamp + end_timestamp = start_timestamp + (self.count - 1) * 60000 * jh.timeframe_to_one_minutes(timeframe) + + # Fetch OHLCV data + ohlcv = self.exchange.fetch_ohlcv( + ccxt_symbol, + ccxt_timeframe, + since=start_timestamp, + limit=self.count + ) + + if not ohlcv: + return [] + + # Convert to Jesse format + candles = [] + for candle in ohlcv: + candles.append({ + 'id': jh.generate_unique_id(), + 'exchange': self.name, + 'symbol': symbol, + 'timeframe': timeframe, + 'timestamp': int(candle[0]), + 'open': float(candle[1]), + 'high': float(candle[2]), + 'low': float(candle[3]), + 'close': float(candle[4]), + 'volume': float(candle[5]) + }) + + return candles + + except Exception as e: + print(f"Error fetching candles for {symbol}: {str(e)}") + return [] + + def get_available_symbols(self) -> list: + try: + markets = self.exchange.load_markets() + + # Filter only trading symbols + trading_symbols = [] + for symbol, market in markets.items(): + if market.get('active', False) and market.get('type') == 'spot': + # Convert from CCXT format (BTC/USDT) to Jesse format (BTC-USDT) + jesse_symbol = symbol.replace('/', '-') + trading_symbols.append(jesse_symbol) + + return trading_symbols + + except Exception as e: + print(f"Error getting available symbols: {str(e)}") + return [] + + def __del__(self): + """Cleanup method""" + if hasattr(self, 'exchange'): + try: + self.exchange.close() + except: + pass \ No newline at end of file diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinSpot.py b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinSpot.py new file mode 100644 index 000000000..1a57c9179 --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/KuCoin/KuCoinSpot.py @@ -0,0 +1,11 @@ +from .KuCoinMain import KuCoinMain +from jesse.enums import exchanges + + +class KuCoinSpot(KuCoinMain): + def __init__(self) -> None: + super().__init__( + name=exchanges.KUCOIN_SPOT, + rest_endpoint='https://api.kucoin.com', + backup_exchange_class=None + ) diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/__init__.py b/jesse/modes/import_candles_mode/drivers/KuCoin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/jesse/modes/import_candles_mode/drivers/KuCoin/kucoin_utils.py b/jesse/modes/import_candles_mode/drivers/KuCoin/kucoin_utils.py new file mode 100644 index 000000000..55913c95c --- /dev/null +++ b/jesse/modes/import_candles_mode/drivers/KuCoin/kucoin_utils.py @@ -0,0 +1,25 @@ +def timeframe_to_interval(timeframe: str) -> str: + """ + Convert Jesse timeframe to KuCoin interval format + """ + timeframe_map = { + '1m': '1min', + '3m': '3min', + '5m': '5min', + '15m': '15min', + '30m': '30min', + '1h': '1hour', + '2h': '2hour', + '4h': '4hour', + '6h': '6hour', + '8h': '8hour', + '12h': '12hour', + '1D': '1day', + '1W': '1week', + '1M': '1month' + } + + if timeframe not in timeframe_map: + raise ValueError(f"Unsupported timeframe: {timeframe}") + + return timeframe_map[timeframe] \ No newline at end of file diff --git a/jesse/modes/import_candles_mode/drivers/__init__.py b/jesse/modes/import_candles_mode/drivers/__init__.py index c4a228ca1..6c4be718a 100644 --- a/jesse/modes/import_candles_mode/drivers/__init__.py +++ b/jesse/modes/import_candles_mode/drivers/__init__.py @@ -19,6 +19,12 @@ from jesse.modes.import_candles_mode.drivers.Gate.GateSpot import GateSpot from jesse.modes.import_candles_mode.drivers.Hyperliquid.HyperliquidPerpetual import HyperliquidPerpetual from jesse.modes.import_candles_mode.drivers.Hyperliquid.HyperliquidPerpetualTestnet import HyperliquidPerpetualTestnet +# KuCoin imports +from jesse.modes.import_candles_mode.drivers.KuCoin.KuCoinSpot import KuCoinSpot +from jesse.modes.import_candles_mode.drivers.KuCoin.KuCoinFutures import KuCoinFutures +from jesse.modes.import_candles_mode.drivers.KuCoin.KuCoinFuturesTestnet import KuCoinFuturesTestnet +# CustomCSV imports +from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV drivers = { @@ -39,11 +45,17 @@ exchanges.GATE_SPOT: GateSpot, exchanges.HYPERLIQUID_PERPETUAL: HyperliquidPerpetual, exchanges.HYPERLIQUID_PERPETUAL_TESTNET: HyperliquidPerpetualTestnet, + # KuCoin + exchanges.KUCOIN_SPOT: KuCoinSpot, + exchanges.KUCOIN_FUTURES: KuCoinFutures, + exchanges.KUCOIN_FUTURES_TESTNET: KuCoinFuturesTestnet, # Spot exchanges.BINANCE_SPOT: BinanceSpot, exchanges.BINANCE_US_SPOT: BinanceUSSpot, exchanges.BYBIT_SPOT_TESTNET: BybitSpotTestnet, exchanges.BYBIT_SPOT: BybitSpot, + # CustomCSV + exchanges.CUSTOM_CSV: CustomCSV, } diff --git a/jesse/modes/optimize_mode/Optimize.py b/jesse/modes/optimize_mode/Optimize.py index 1349a4833..50ac25161 100644 --- a/jesse/modes/optimize_mode/Optimize.py +++ b/jesse/modes/optimize_mode/Optimize.py @@ -16,6 +16,8 @@ from jesse.services.redis import is_process_active from jesse.models.OptimizationSession import update_optimization_session_status, update_optimization_session_trials, get_optimization_session, get_optimization_session_by_id import traceback +from dotenv import load_dotenv + # Define a Ray-compatible remote function @@ -90,6 +92,8 @@ def __init__( optimal_total: int, cpu_cores: int, ) -> None: + + load_dotenv() # Check for Python 3.13 first thing if jh.python_version() == (3, 13): raise ValueError( @@ -156,7 +160,11 @@ def __init__( # Initialize Ray if not already if not ray.is_initialized(): try: - ray.init(num_cpus=self.cpu_cores, ignore_reinit_error=True) + # Get PostgreSQL file exclusions to prevent Ray from uploading large database files + ray.init( + num_cpus=self.cpu_cores, + ignore_reinit_error=True, + ) logger.log_optimize_mode(f"Successfully started optimization session with {self.cpu_cores} CPU cores") except Exception as e: logger.log_optimize_mode(f"Error initializing Ray: {e}. Falling back to 1 CPU.") diff --git a/jesse/research/external_data/csv_ticks_to_db.py b/jesse/research/external_data/csv_ticks_to_db.py new file mode 100644 index 000000000..b38d65432 --- /dev/null +++ b/jesse/research/external_data/csv_ticks_to_db.py @@ -0,0 +1,315 @@ +""" +CSV Ticks to Database Loader +Загружает все доступные CSV данные в базу данных Jesse для бэктестинга и оптимизации. +""" + +AUTHORIZATION = "ef260e9aa3c673af240d17a2660480361a8e081d1ffeca2a5ed0e3219fc18567" +BASE_URL = "http://localhost:9000" + +import requests +import time +import json +from datetime import datetime +from typing import List, Dict, Optional + +class CSVDataLoader: + """Класс для загрузки CSV данных в базу Jesse""" + + def __init__(self, base_url: str, authorization: str): + self.base_url = base_url + self.headers = {"Authorization": authorization} + self.session = requests.Session() + self.session.headers.update(self.headers) + + def get_available_symbols(self) -> List[str]: + """Получить список доступных символов""" + try: + response = self.session.get(f"{self.base_url}/csv/symbols") + response.raise_for_status() + data = response.json() + return data.get('symbols', []) + except Exception as e: + print(f"Ошибка получения символов: {e}") + return [] + + def get_symbol_info(self, symbol: str) -> Optional[Dict]: + """Получить информацию о символе""" + try: + response = self.session.get(f"{self.base_url}/csv/symbols/{symbol}/info") + response.raise_for_status() + data = response.json() + return data.get('info') + except Exception as e: + print(f"Ошибка получения информации о {symbol}: {e}") + return None + + def get_available_timeframes(self, symbol: str) -> List[str]: + """Получить доступные таймфреймы для символа""" + try: + response = self.session.get(f"{self.base_url}/csv/symbols/{symbol}/timeframes") + response.raise_for_status() + data = response.json() + return data.get('timeframes', []) + except Exception as e: + print(f"Ошибка получения таймфреймов для {symbol}: {e}") + return [] + + def preview_data(self, symbol: str, limit: int = 10) -> Optional[Dict]: + """Предварительный просмотр данных""" + try: + response = self.session.get(f"{self.base_url}/csv/preview/{symbol}?limit={limit}") + response.raise_for_status() + return response.json() + except Exception as e: + print(f"Ошибка предварительного просмотра {symbol}: {e}") + return None + + def import_symbol(self, symbol: str, timeframe: str = "1m", + exchange: str = "custom", + start_date: Optional[str] = None, + finish_date: Optional[str] = None) -> bool: + """Импортировать символ в базу данных""" + try: + payload = { + "symbol": symbol, + "timeframe": timeframe, + "exchange": exchange + } + + if start_date: + payload["start_date"] = start_date + if finish_date: + payload["finish_date"] = finish_date + + response = self.session.post( + f"{self.base_url}/csv/import", + json=payload + ) + response.raise_for_status() + + data = response.json() + print(f"✅ {symbol}: {data.get('message', 'Импортирован успешно')}") + return True + + except Exception as e: + print(f"❌ Ошибка импорта {symbol}: {e}") + return False + + def get_candles(self, symbol: str, timeframe: str = "1m", + start_date: Optional[str] = None, + finish_date: Optional[str] = None, + limit: int = 100) -> Optional[Dict]: + """Получить свечи для символа""" + try: + params = { + "symbol": symbol, + "timeframe": timeframe, + "limit": limit + } + + if start_date: + params["start_date"] = start_date + if finish_date: + params["finish_date"] = finish_date + + response = self.session.get(f"{self.base_url}/csv/candles", params=params) + response.raise_for_status() + return response.json() + + except Exception as e: + print(f"Ошибка получения свечей для {symbol}: {e}") + return None + + def clear_cache(self) -> bool: + """Очистить кэш""" + try: + response = self.session.post(f"{self.base_url}/csv/clear-cache") + response.raise_for_status() + print("✅ Кэш очищен") + return True + except Exception as e: + print(f"❌ Ошибка очистки кэша: {e}") + return False + + +def load_all_data(timeframe: str = "1m", + max_symbols: Optional[int] = None, + start_date: Optional[str] = None, + finish_date: Optional[str] = None, + preview_only: bool = False): + """ + Загрузить все доступные данные + + Args: + timeframe: Таймфрейм для загрузки (по умолчанию "1m") + max_symbols: Максимальное количество символов для загрузки + start_date: Начальная дата (формат: "2023-01-01") + finish_date: Конечная дата (формат: "2023-12-31") + preview_only: Только предварительный просмотр без импорта + """ + + print("🚀 Начинаем загрузку CSV данных в Jesse...") + print(f"Таймфрейм: {timeframe}") + if start_date: + print(f"Начальная дата: {start_date}") + if finish_date: + print(f"Конечная дата: {finish_date}") + print("-" * 50) + + # Инициализация загрузчика + loader = CSVDataLoader(BASE_URL, AUTHORIZATION) + + # Получение списка символов + print("📋 Получаем список доступных символов...") + symbols = loader.get_available_symbols() + + if not symbols: + print("❌ Символы не найдены!") + return + + print(f"✅ Найдено {len(symbols)} символов") + + # Ограничение количества символов если указано + if max_symbols and max_symbols < len(symbols): + symbols = symbols[:max_symbols] + print(f"🔄 Ограничиваем до {max_symbols} символов") + + # Статистика + successful_imports = 0 + failed_imports = 0 + total_candles = 0 + + start_time = time.time() + + for i, symbol in enumerate(symbols, 1): + print(f"\n[{i}/{len(symbols)}] Обрабатываем {symbol}...") + + # Получение информации о символе + info = loader.get_symbol_info(symbol) + if info: + print(f" 📊 Период: {info['start_date']} - {info['end_date']}") + print(f" 📁 Размер файла: {info['file_size']:,} байт") + + # Предварительный просмотр + if preview_only: + preview = loader.preview_data(symbol, limit=5) + if preview: + print(f" 👀 Предварительный просмотр:") + for row in preview.get('preview', [])[:3]: + print(f" {row}") + continue + + # Импорт данных + success = loader.import_symbol( + symbol=symbol, + timeframe=timeframe, + exchange="custom", + start_date=start_date, + finish_date=finish_date + ) + + if success: + successful_imports += 1 + + # Получение информации о загруженных свечах + candles_data = loader.get_candles(symbol, timeframe, limit=1) + if candles_data: + candle_count = candles_data.get('count', 0) + total_candles += candle_count + print(f" 📈 Загружено {candle_count:,} свечей") + else: + failed_imports += 1 + + # Небольшая пауза между запросами + time.sleep(0.1) + + # Итоговая статистика + end_time = time.time() + duration = end_time - start_time + + print("\n" + "=" * 50) + print("📊 ИТОГОВАЯ СТАТИСТИКА") + print("=" * 50) + print(f"✅ Успешно импортировано: {successful_imports}") + print(f"❌ Ошибок импорта: {failed_imports}") + print(f"📈 Всего свечей: {total_candles:,}") + print(f"⏱️ Время выполнения: {duration:.2f} секунд") + print(f"⚡ Скорость: {successful_imports/duration:.2f} символов/сек") + + if not preview_only: + print(f"\n🎉 Данные готовы для бэктестинга!") + print(f"Используйте exchange: 'custom' в конфигурации бэктеста") + + +def load_specific_symbols(symbols: List[str], timeframe: str = "1m"): + """Загрузить конкретные символы""" + print(f"🎯 Загружаем конкретные символы: {symbols}") + + loader = CSVDataLoader(BASE_URL, AUTHORIZATION) + + for symbol in symbols: + print(f"\n📊 Загружаем {symbol}...") + + # Проверяем доступность символа + available_symbols = loader.get_available_symbols() + if symbol not in available_symbols: + print(f"❌ Символ {symbol} не найден в доступных") + continue + + # Импортируем + success = loader.import_symbol(symbol, timeframe, "custom") + if success: + print(f"✅ {symbol} загружен успешно") + else: + print(f"❌ Ошибка загрузки {symbol}") + + +#%% +# Основные функции для использования + +def quick_preview(): + """Быстрый предварительный просмотр данных""" + print("🔍 Быстрый предварительный просмотр...") + load_all_data(preview_only=True, max_symbols=5) + +def load_sample_data(): + """Загрузить образец данных (первые 10 символов)""" + print("📦 Загружаем образец данных...") + load_all_data(max_symbols=10) + +def load_all_data_full(): + """Загрузить все доступные данные""" + print("🌍 Загружаем все доступные данные...") + load_all_data() + +def load_custom_date_range(): + """Загрузить данные за определенный период""" + print("📅 Загружаем данные за определенный период...") + load_all_data( + start_date="2023-01-01", + finish_date="2023-12-31" + ) + +#%% +# Примеры использования: + +if __name__ == "__main__": + # Выберите один из вариантов: + + # 1. Быстрый предварительный просмотр + # quick_preview() + + # 2. Загрузить образец данных + # load_sample_data() + + # 3. Загрузить все данные + # load_all_data_full() + + # 4. Загрузить конкретные символы + # load_specific_symbols(["ACH", "BTC", "ETH"]) + + # 5. Загрузить данные за период + # load_custom_date_range() + + # По умолчанию - быстрый предварительный просмотр + quick_preview() diff --git a/jesse/research/import_candles.py b/jesse/research/import_candles.py index ab33356ca..9d0704c48 100644 --- a/jesse/research/import_candles.py +++ b/jesse/research/import_candles.py @@ -2,6 +2,7 @@ def import_candles( exchange: str, symbol: str, start_date: str, + end_date: str = None, show_progressbar: bool = True, ) -> str: from jesse.modes.import_candles_mode import run @@ -11,6 +12,7 @@ def import_candles( exchange=exchange, symbol=symbol, start_date_str=start_date, + end_date_str=end_date, running_via_dashboard=False, show_progressbar=show_progressbar ) diff --git a/jesse/research/optimization.py b/jesse/research/optimization.py new file mode 100644 index 000000000..c6d7aa7aa --- /dev/null +++ b/jesse/research/optimization.py @@ -0,0 +1,542 @@ +from typing import List, Dict, Tuple +import copy +import jesse.helpers as jh +from jesse.research.backtest import _isolated_backtest + + +def optimization( + config: dict, + routes: List[Dict[str, str]], + data_routes: List[Dict[str, str]], + training_candles: dict, + testing_candles: dict, + training_warmup_candles: dict = None, + testing_warmup_candles: dict = None, + strategy_hp: List[Dict] = None, + optimal_total: int = 100, + n_trials: int = 200, + fast_mode: bool = False, + cpu_cores: int = 1, + objective_function: str = 'sharpe', + generate_tradingview: bool = False, + generate_hyperparameters: bool = False, + generate_equity_curve: bool = False, + benchmark: bool = False, + generate_csv: bool = False, + generate_json: bool = False, + generate_logs: bool = False, +) -> dict: + """ + An isolated optimization() function which is perfect for using in research, and AI training + such as our own optimization mode. Because of it being a pure function, it can be used + in Python's multiprocessing without worrying about pickling issues. + + Example `config`: + { + 'starting_balance': 5_000, + 'fee': 0.005, + 'type': 'futures', + 'futures_leverage': 3, + 'futures_leverage_mode': 'cross', + 'exchange': 'Binance', + 'warm_up_candles': 0 + } + + Example `route`: + [{'exchange': 'Bybit USDT Perpetual', 'strategy': 'A1', 'symbol': 'BTC-USDT', 'timeframe': '1m'}] + + Example `data_route`: + [{'exchange': 'Bybit USDT Perpetual', 'symbol': 'BTC-USDT', 'timeframe': '3m'}] + + Example `training_candles` and `testing_candles`: + { + 'Binance-BTC-USDT': { + 'exchange': 'Binance', + 'symbol': 'BTC-USDT', + 'candles': np.array([]), + }, + } + + Example `strategy_hp`: + [ + {'name': 'rsi_period', 'type': 'int', 'min': 10, 'max': 30}, + {'name': 'rsi_threshold', 'type': 'float', 'min': 20.0, 'max': 40.0}, + {'name': 'ema_period', 'type': 'int', 'min': 20, 'max': 50} + ] + """ + return _isolated_optimization( + config, + routes, + data_routes, + training_candles, + testing_candles, + training_warmup_candles, + testing_warmup_candles, + strategy_hp, + optimal_total, + n_trials, + fast_mode, + cpu_cores, + objective_function, + generate_tradingview=generate_tradingview, + generate_csv=generate_csv, + generate_json=generate_json, + generate_equity_curve=generate_equity_curve, + benchmark=benchmark, + generate_hyperparameters=generate_hyperparameters, + generate_logs=generate_logs, + ) + + +def _isolated_optimization( + config: dict, + routes: List[Dict[str, str]], + data_routes: List[Dict[str, str]], + training_candles: dict, + testing_candles: dict, + training_warmup_candles: dict = None, + testing_warmup_candles: dict = None, + strategy_hp: List[Dict] = None, + optimal_total: int = 100, + n_trials: int = 200, + fast_mode: bool = False, + cpu_cores: int = 1, + objective_function: str = 'sharpe', + generate_tradingview: bool = False, + generate_hyperparameters: bool = False, + generate_equity_curve: bool = False, + benchmark: bool = False, + generate_csv: bool = False, + generate_json: bool = False, + generate_logs: bool = False, +) -> dict: + """ + Internal isolated optimization function that can be used in multiprocessing. + """ + from jesse.services.validators import validate_routes + from jesse.config import config as jesse_config, reset_config + from jesse.routes import router + from jesse.store import store + from jesse.config import set_config + from jesse.services.candle import inject_warmup_candles_to_store + + # Check Python version for Ray compatibility + if jh.python_version() == (3, 13): + raise ValueError( + 'Optimization is not supported on Python 3.13. The Ray library used for optimization does not support Python 3.13 yet. Please use Python 3.12 or lower.' + ) + + jesse_config['app']['trading_mode'] = 'optimize' + + # inject (formatted) configuration values + set_config(_format_config(config)) + + # set routes + router.initiate(routes, data_routes) + + validate_routes(router) + + # initiate candle store + store.candles.init_storage(5000) + + # assert that the passed candles are 1m candles + for key, value in training_candles.items(): + candle_set = value['candles'] + if len(candle_set) > 1 and candle_set[1][0] - candle_set[0][0] != 60_000: + raise ValueError( + f'Training candles passed to the research.optimization() must be 1m candles. ' + f'\nIf you wish to trade other timeframes, notice that you need to pass it through ' + f'the timeframe option in your routes. ' + f'\nThe difference between your candles are {candle_set[1][0] - candle_set[0][0]} milliseconds which more than ' + f'the accepted 60000 milliseconds.' + ) + + for key, value in testing_candles.items(): + candle_set = value['candles'] + if len(candle_set) > 1 and candle_set[1][0] - candle_set[0][0] != 60_000: + raise ValueError( + f'Testing candles passed to the research.optimization() must be 1m candles. ' + f'\nIf you wish to trade other timeframes, notice that you need to pass it through ' + f'the timeframe option in your routes. ' + f'\nThe difference between your candles are {candle_set[1][0] - candle_set[0][0]} milliseconds which more than ' + f'the accepted 60000 milliseconds.' + ) + + # make a copy to make sure we don't mutate the past data causing some issues for multiprocessing tasks + training_candles_dict = copy.deepcopy(training_candles) + testing_candles_dict = copy.deepcopy(testing_candles) + training_warmup_candles_dict = copy.deepcopy(training_warmup_candles) + testing_warmup_candles_dict = copy.deepcopy(testing_warmup_candles) + + # if warmup_candles is passed, use it + if training_warmup_candles: + for c in jesse_config['app']['considering_candles']: + key = jh.key(c[0], c[1]) + # inject warm-up candles + inject_warmup_candles_to_store( + training_warmup_candles_dict[key]['candles'], + c[0], + c[1] + ) + + if testing_warmup_candles: + for c in jesse_config['app']['considering_candles']: + key = jh.key(c[0], c[1]) + # inject warm-up candles + inject_warmup_candles_to_store( + testing_warmup_candles_dict[key]['candles'], + c[0], + c[1] + ) + + # Get strategy hyperparameters if not provided + if strategy_hp is None: + strategy_class = jh.get_strategy_class(router.routes[0].strategy_name) + strategy_hp = strategy_class.hyperparameters(None) + + if not strategy_hp: + raise ValueError('Targeted strategy does not implement a valid hyperparameters() method.') + + # Run optimization + best_trial, all_trials = _run_optimization( + config, + routes, + data_routes, + training_candles_dict, + testing_candles_dict, + training_warmup_candles_dict, + testing_warmup_candles_dict, + strategy_hp, + optimal_total, + n_trials, + fast_mode, + cpu_cores, + objective_function, + generate_tradingview=generate_tradingview, + generate_csv=generate_csv, + generate_json=generate_json, + generate_equity_curve=generate_equity_curve, + benchmark=benchmark, + generate_hyperparameters=generate_hyperparameters, + generate_logs=generate_logs, + ) + + result = { + 'best_trial': best_trial, + 'all_trials': all_trials, + 'total_trials': len(all_trials), + 'best_score': best_trial.get('score', 0) if best_trial else 0, + 'best_params': best_trial.get('params', {}) if best_trial else {}, + } + + # reset store and config so rerunning would be flawlessly possible + reset_config() + store.reset() + + return result + + +def _run_optimization( + config: dict, + routes: List[Dict[str, str]], + data_routes: List[Dict[str, str]], + training_candles: dict, + testing_candles: dict, + training_warmup_candles: dict, + testing_warmup_candles: dict, + strategy_hp: List[Dict], + optimal_total: int, + n_trials: int, + fast_mode: bool, + cpu_cores: int, + objective_function: str, + generate_tradingview: bool = False, + generate_hyperparameters: bool = False, + generate_equity_curve: bool = False, + benchmark: bool = False, + generate_csv: bool = False, + generate_json: bool = False, + generate_logs: bool = False, +) -> Tuple[dict, List[dict]]: + """ + Run the actual optimization process using random search. + """ + + all_trials = [] + best_trial = None + best_score = -float('inf') + + # Format config for backtest + backtest_config = _format_config_for_backtest(config, routes[0]['exchange']) + + for trial_num in range(n_trials): + # Generate random hyperparameters + hp = _generate_random_hyperparameters(strategy_hp) + + try: + # Evaluate fitness + score, training_metrics, testing_metrics = _evaluate_fitness( + backtest_config, + routes, + data_routes, + training_candles, + testing_candles, + training_warmup_candles, + testing_warmup_candles, + hp, + optimal_total, + objective_function, + fast_mode, + generate_tradingview=generate_tradingview, + generate_csv=generate_csv, + generate_json=generate_json, + generate_equity_curve=generate_equity_curve, + benchmark=benchmark, + generate_hyperparameters=generate_hyperparameters, + generate_logs=generate_logs, + ) + + # Create trial result + trial_result = { + 'trial_number': trial_num + 1, + 'params': hp, + 'score': score, + 'training_metrics': training_metrics, + 'testing_metrics': testing_metrics, + 'dna': _encode_params_to_dna(hp) + } + + all_trials.append(trial_result) + + # Update best trial if this is better + if score > best_score: + best_score = score + best_trial = trial_result + + except Exception as e: + # Log error and continue with next trial + print(f"Trial {trial_num + 1} failed: {str(e)}") + continue + + # Sort trials by score (descending) + all_trials.sort(key=lambda x: x['score'], reverse=True) + + return best_trial, all_trials + + +def _generate_random_hyperparameters(strategy_hp: List[Dict]) -> dict: + """ + Generate random hyperparameters based on strategy configuration. + """ + import numpy as np + + hp = {} + for param in strategy_hp: + param_name = str(param['name']) + param_type = param['type'] + + # Convert to string whether input is type class or string + if isinstance(param_type, type): + param_type = param_type.__name__ + else: + # Remove quotes if they exist + param_type = param_type.strip("'").strip('"') + + if param_type == 'int': + if 'step' in param and param['step'] is not None: + steps = (param['max'] - param['min']) // param['step'] + 1 + value = param['min'] + np.random.randint(0, steps) * param['step'] + else: + value = np.random.randint(param['min'], param['max'] + 1) + hp[param_name] = value + elif param_type == 'float': + if 'step' in param and param['step'] is not None: + steps = int((param['max'] - param['min']) / param['step']) + 1 + value = param['min'] + np.random.randint(0, steps) * param['step'] + else: + value = np.random.uniform(param['min'], param['max']) + hp[param_name] = value + elif param_type == 'categorical': + options = param['options'] + hp[param_name] = options[np.random.randint(0, len(options))] + else: + raise ValueError(f"Unsupported hyperparameter type: {param_type}") + + return hp + + +def _evaluate_fitness( + config: dict, + routes: List[Dict[str, str]], + data_routes: List[Dict[str, str]], + training_candles: dict, + testing_candles: dict, + training_warmup_candles: dict, + testing_warmup_candles: dict, + hp: dict, + optimal_total: int, + objective_function: str, + fast_mode: bool, + generate_tradingview: bool = False, + generate_hyperparameters: bool = False, + generate_equity_curve: bool = False, + benchmark: bool = False, + generate_csv: bool = False, + generate_json: bool = False, + generate_logs: bool = False, +) -> Tuple[float, dict, dict]: + """ + Evaluate fitness of hyperparameters by running backtests. + """ + from math import log10 + + # Run training backtest + training_result = _isolated_backtest( + config, + routes, + data_routes, + candles=training_candles, + warmup_candles=training_warmup_candles, + hyperparameters=hp, + fast_mode=fast_mode, + generate_tradingview=generate_tradingview, + generate_csv=generate_csv, + generate_json=generate_json, + generate_equity_curve=generate_equity_curve, + benchmark=benchmark, + generate_hyperparameters=generate_hyperparameters, + generate_logs=generate_logs, + ) + + training_metrics = training_result['metrics'] + + # Calculate fitness score + if training_metrics['total'] > 5: + total_effect_rate = log10(training_metrics['total']) / log10(optimal_total) + total_effect_rate = min(total_effect_rate, 1) + + # Get the ratio based on objective function + if objective_function == 'sharpe': + ratio = training_metrics['sharpe_ratio'] + ratio_normalized = jh.normalize(ratio, -.5, 5) + elif objective_function == 'calmar': + ratio = training_metrics['calmar_ratio'] + ratio_normalized = jh.normalize(ratio, -.5, 30) + elif objective_function == 'sortino': + ratio = training_metrics['sortino_ratio'] + ratio_normalized = jh.normalize(ratio, -.5, 15) + elif objective_function == 'omega': + ratio = training_metrics['omega_ratio'] + ratio_normalized = jh.normalize(ratio, -.5, 5) + elif objective_function == 'serenity': + ratio = training_metrics['serenity_index'] + ratio_normalized = jh.normalize(ratio, -.5, 15) + elif objective_function == 'smart sharpe': + ratio = training_metrics['smart_sharpe'] + ratio_normalized = jh.normalize(ratio, -.5, 5) + elif objective_function == 'smart sortino': + ratio = training_metrics['smart_sortino'] + ratio_normalized = jh.normalize(ratio, -.5, 15) + else: + raise ValueError( + f'The entered ratio configuration `{objective_function}` for the optimization is unknown. ' + f'Choose between sharpe, calmar, sortino, serenity, smart sharpe, smart sortino and omega.' + ) + + # If the ratio is negative then the configuration is not usable + if ratio < 0: + return 0.0001, training_metrics, {} + + # Run testing backtest + testing_result = _isolated_backtest( + config, + routes, + data_routes, + candles=testing_candles, + warmup_candles=testing_warmup_candles, + hyperparameters=hp, + fast_mode=fast_mode, + generate_tradingview=generate_tradingview, + generate_csv=generate_csv, + generate_json=generate_json, + generate_equity_curve=generate_equity_curve, + benchmark=benchmark, + generate_hyperparameters=generate_hyperparameters, + generate_logs=generate_logs, + ) + + testing_metrics = testing_result['metrics'] + + # Calculate fitness score + score = total_effect_rate * ratio_normalized + import numpy as np + if np.isnan(score): + score = 0.0001 + else: + score = 0.0001 + training_metrics = {} + testing_metrics = {} + + return score, training_metrics, testing_metrics + + +def _format_config_for_backtest(config: dict, exchange: str) -> dict: + """ + Format config for backtest function. + """ + return { + 'starting_balance': config['starting_balance'], + 'fee': config['fee'], + 'type': config['type'], + 'futures_leverage': config['futures_leverage'], + 'futures_leverage_mode': config['futures_leverage_mode'], + 'exchange': exchange, + 'warm_up_candles': config['warm_up_candles'] + } + + +def _encode_params_to_dna(params: dict) -> str: + """ + Encode parameters to DNA (base64) for identification. + """ + import base64 + import json + + params_str = json.dumps(params, sort_keys=True) + return base64.b64encode(params_str.encode()).decode() + + +def _format_config(config): + """ + Jesse's required format for user_config is different from what this function accepts (so it + would be easier to write for the researcher). Hence, we need to reformat the config_dict: + """ + exchange_config = { + 'balance': config['starting_balance'], + 'fee': config['fee'], + 'type': config['type'], + 'name': config['exchange'], + } + # futures exchange has different config, so: + if exchange_config['type'] == 'futures': + exchange_config['futures_leverage'] = config['futures_leverage'] + exchange_config['futures_leverage_mode'] = config['futures_leverage_mode'] + + return { + 'exchanges': { + config['exchange']: exchange_config + }, + 'logging': { + 'balance_update': True, + 'order_cancellation': True, + 'order_execution': True, + 'order_submission': True, + 'position_closed': True, + 'position_increased': True, + 'position_opened': True, + 'position_reduced': True, + 'shorter_period_candles': False, + 'trading_candles': True + }, + 'warm_up_candles': config['warm_up_candles'] + } diff --git a/jesse/services/candle.py b/jesse/services/candle.py index 563c31fb2..db7a2ed24 100644 --- a/jesse/services/candle.py +++ b/jesse/services/candle.py @@ -16,10 +16,43 @@ def generate_candle_from_one_minutes( if len(candles) == 0: raise ValueError('No candles were passed') - if not accept_forming_candles and len(candles) != jh.timeframe_to_one_minutes(timeframe): - raise ValueError( - f'Sent only {len(candles)} candles but {jh.timeframe_to_one_minutes(timeframe)} is required to create a "{timeframe}" candle.' - ) + required_candles = jh.timeframe_to_one_minutes(timeframe) + + if not accept_forming_candles and len(candles) != required_candles: + # Check if we should fill missing candles + fill_missing = jh.get_config('env.data.fill_missing_candles', True) + + if fill_missing and len(candles) < required_candles: + # Log warning about missing data + from jesse.services.logger import info + info( + f'Insufficient data for {timeframe} candle: only {len(candles)} candles available, ' + f'but {required_candles} required. Filling with empty candles.' + ) + + # Create empty candles to fill the gap + empty_candles = [] + last_timestamp = candles[-1][0] if len(candles) > 0 else 0 + last_price = candles[-1][2] if len(candles) > 0 else 0 + + for i in range(required_candles - len(candles)): + # Create empty candle with open=close=last_price, volume=0 + empty_candle = np.array([ + last_timestamp + (i + 1) * 60_000, # timestamp + last_price, # open + last_price, # close + last_price, # high + last_price, # low + 0 # volume + ]) + empty_candles.append(empty_candle) + + # Combine original candles with empty ones + candles = np.concatenate([candles, np.array(empty_candles)]) + else: + raise ValueError( + f'Sent only {len(candles)} candles but {required_candles} is required to create a "{timeframe}" candle.' + ) return np.array([ candles[0][0], @@ -86,14 +119,14 @@ def split_candle(candle: np.ndarray, price: float) -> tuple: o = candle[1] c = candle[2] h = candle[3] - l = candle[4] + low = candle[4] v = candle[5] - if is_bullish(candle) and l < price < o: + if is_bullish(candle) and low < price < o: return np.array([ timestamp, o, price, o, price, v ]), np.array([ - timestamp, price, c, h, l, v + timestamp, price, c, h, low, v ]) elif price == o: return candle, candle @@ -101,17 +134,17 @@ def split_candle(candle: np.ndarray, price: float) -> tuple: return np.array([ timestamp, o, price, price, o, v ]), np.array([ - timestamp, price, c, h, l, v + timestamp, price, c, h, low, v ]) - elif is_bearish(candle) and l < price < c: + elif is_bearish(candle) and low < price < c: return np.array([ timestamp, o, price, h, price, v ]), np.array([ - timestamp, price, c, c, l, v + timestamp, price, c, c, low, v ]) elif is_bullish(candle) and c < price < h: return np.array([ - timestamp, o, price, price, l, v + timestamp, o, price, price, low, v ]), np.array([ timestamp, price, c, h, c, v ]), @@ -119,11 +152,11 @@ def split_candle(candle: np.ndarray, price: float) -> tuple: return np.array([ timestamp, o, c, h, c, v ]), np.array([ - timestamp, price, price, price, l, v + timestamp, price, price, price, low, v ]) elif is_bullish(candle) and price == c: return np.array([ - timestamp, o, c, c, l, v + timestamp, o, c, c, low, v ]), np.array([ timestamp, price, price, h, price, v ]) @@ -131,23 +164,23 @@ def split_candle(candle: np.ndarray, price: float) -> tuple: return np.array([ timestamp, o, h, h, o, v ]), np.array([ - timestamp, h, c, h, l, v + timestamp, h, c, h, low, v ]) - elif is_bullish(candle) and price == l: + elif is_bullish(candle) and price == low: return np.array([ - timestamp, o, l, o, l, v + timestamp, o, low, o, low, v ]), np.array([ - timestamp, l, c, h, l, v + timestamp, low, c, h, low, v ]) - elif is_bearish(candle) and price == l: + elif is_bearish(candle) and price == low: return np.array([ - timestamp, o, l, h, l, v + timestamp, o, low, h, low, v ]), np.array([ - timestamp, l, c, c, l, v + timestamp, low, c, c, low, v ]) elif is_bullish(candle) and price == h: return np.array([ - timestamp, o, h, h, l, v + timestamp, o, h, h, low, v ]), np.array([ timestamp, h, c, h, c, v ]) @@ -155,11 +188,11 @@ def split_candle(candle: np.ndarray, price: float) -> tuple: return np.array([ timestamp, o, price, h, price, v ]), np.array([ - timestamp, price, c, price, l, v + timestamp, price, c, price, low, v ]) elif is_bullish(candle) and o < price < c: return np.array([ - timestamp, o, price, price, l, v + timestamp, o, price, price, low, v ]), np.array([ timestamp, price, c, h, price, v ]) @@ -213,6 +246,13 @@ def get_candles( ) -> Tuple[np.ndarray, np.ndarray]: symbol = symbol.upper() + # Check if this is a CSV data source + if exchange.lower() == 'custom': + return _get_csv_candles( + symbol, timeframe, start_date_timestamp, finish_date_timestamp, + warmup_candles_num, is_for_jesse + ) + # convert start_date and finish_date to timestamps trading_start_date_timestamp = jh.timestamp_to_arrow(start_date_timestamp).floor( 'day').int_timestamp * 1000 @@ -271,7 +311,6 @@ def _get_candles_from_db( # validate finish_date is not in the future current_timestamp = arrow.utcnow().int_timestamp * 1000 if finish_date_timestamp > current_timestamp: - today_str = jh.timestamp_to_date(current_timestamp) yesterday_date = jh.timestamp_to_date(current_timestamp - 86400000) raise InvalidDateRange(f'The finish date "{jh.timestamp_to_time(finish_date_timestamp)[:19]}" cannot be in the future. Please select a date up to "{yesterday_date}".') @@ -304,22 +343,92 @@ def _get_candles_from_db( # Check if earliest available timestamp is after the requested start date if earliest_available > start_date_timestamp + 60_000: # Allow 1 minute tolerance - raise CandleNotFoundInDatabase( - f"Missing candles for {symbol} on {exchange}. " - f"Requested data from {jh.timestamp_to_date(start_date_timestamp)}, " - f"but earliest available candle is from {jh.timestamp_to_date(earliest_available)}." - ) + # Check if we should fill missing candles + fill_missing = jh.get_config('env.data.fill_missing_candles', True) + + if fill_missing: + # Log warning about missing data + from jesse.services.logger import info + info( + f'Missing candles for {symbol} on {exchange}. ' + f'Requested data from {jh.timestamp_to_date(start_date_timestamp)}, ' + f'but earliest available candle is from {jh.timestamp_to_date(earliest_available)}. ' + f'Filling with empty candles.' + ) + + # Calculate how many minutes we need to fill at the beginning + missing_minutes = int((earliest_available - start_date_timestamp) // 60_000) + + # Create empty candles to fill the gap at the beginning + empty_candles = [] + first_price = candles_array[0][1] if len(candles_array) > 0 else 0 # Use first open price + + for i in range(missing_minutes): + empty_candle = np.array([ + start_date_timestamp + i * 60_000, # timestamp + first_price, # open + first_price, # close + first_price, # high + first_price, # low + 0 # volume + ]) + empty_candles.append(empty_candle) + + # Combine empty candles at the beginning with original candles + if empty_candles: + candles_array = np.concatenate([np.array(empty_candles), candles_array]) + else: + raise CandleNotFoundInDatabase( + f"Missing candles for {symbol} on {exchange}. " + f"Requested data from {jh.timestamp_to_date(start_date_timestamp)}, " + f"but earliest available candle is from {jh.timestamp_to_date(earliest_available)}." + ) # For finish date validation, we need to check if we have candles up to exactly one minute # before the start of the requested finish date # Check if the latest available candle timestamp is before the required last candle if latest_available < finish_date_timestamp: - # Missing candles at the end of the requested range - raise CandleNotFoundInDatabase( - f"Missing recent candles for \"{symbol}\" on \"{exchange}\". " - f"Requested data until \"{jh.timestamp_to_time(finish_date_timestamp)[:19]}\", " - f"but latest available candle is up to \"{jh.timestamp_to_time(latest_available)[:19]}\"." - ) + # Check if we should fill missing candles + fill_missing = jh.get_config('env.data.fill_missing_candles', True) + + if fill_missing: + # Log warning about missing data + from jesse.services.logger import info + info( + f'Missing recent candles for "{symbol}" on "{exchange}". ' + f'Requested data until "{jh.timestamp_to_time(finish_date_timestamp)[:19]}", ' + f'but latest available candle is up to "{jh.timestamp_to_time(latest_available)[:19]}". ' + f'Filling with empty candles.' + ) + + # Calculate how many minutes we need to fill + missing_minutes = int((finish_date_timestamp - latest_available) // 60_000) + + # Create empty candles to fill the gap + empty_candles = [] + last_price = candles_array[-1][2] if len(candles_array) > 0 else 0 # Use last close price + + for i in range(missing_minutes): + empty_candle = np.array([ + latest_available + (i + 1) * 60_000, # timestamp + last_price, # open + last_price, # close + last_price, # high + last_price, # low + 0 # volume + ]) + empty_candles.append(empty_candle) + + # Combine original candles with empty ones + if empty_candles: + candles_array = np.concatenate([candles_array, np.array(empty_candles)]) + else: + # Missing candles at the end of the requested range + raise CandleNotFoundInDatabase( + f"Missing recent candles for \"{symbol}\" on \"{exchange}\". " + f"Requested data until \"{jh.timestamp_to_time(finish_date_timestamp)[:19]}\", " + f"but latest available candle is up to \"{jh.timestamp_to_time(latest_available)[:19]}\"." + ) if caching: # cache for 1 week it for near future calls @@ -331,17 +440,99 @@ def _get_candles_from_db( def _get_generated_candles(timeframe, trading_candles) -> np.ndarray: # generate candles for the requested timeframe generated_candles = [] + required_candles = jh.timeframe_to_one_minutes(timeframe) + for i in range(len(trading_candles)): - num = jh.timeframe_to_one_minutes(timeframe) - - if (i + 1) % num == 0: + if (i + 1) % required_candles == 0: + # Get the slice of candles for this timeframe + start_idx = max(0, i - (required_candles - 1)) + end_idx = min(i + 1, len(trading_candles)) + candle_slice = trading_candles[start_idx:end_idx] + + # If we don't have enough candles, fill with empty ones + if len(candle_slice) < required_candles: + fill_missing = jh.get_config('env.data.fill_missing_candles', True) + + if fill_missing: + from jesse.services.logger import info + info( + f'Insufficient data for {timeframe} candle generation: only {len(candle_slice)} candles available, ' + f'but {required_candles} required. Filling with empty candles.' + ) + + empty_candles = [] + last_timestamp = candle_slice[-1][0] if len(candle_slice) > 0 else 0 + last_price = candle_slice[-1][2] if len(candle_slice) > 0 else 0 + + for j in range(required_candles - len(candle_slice)): + empty_candle = np.array([ + last_timestamp + (j + 1) * 60_000, # timestamp + last_price, # open + last_price, # close + last_price, # high + last_price, # low + 0 # volume + ]) + empty_candles.append(empty_candle) + + # Combine original candles with empty ones + candle_slice = np.concatenate([candle_slice, np.array(empty_candles)]) + else: + raise ValueError( + f'Insufficient data for {timeframe} candle: only {len(candle_slice)} candles available, ' + f'but {required_candles} required.' + ) + generated_candles.append( generate_candle_from_one_minutes( timeframe, - trading_candles[(i - (num - 1)):(i + 1)], + candle_slice, True ) ) + # Handle the case where we don't have enough data for a complete candle + # but we're at the end of the data + elif i == len(trading_candles) - 1 and len(trading_candles) < required_candles and (i + 1) % required_candles != 0: + fill_missing = jh.get_config('env.data.fill_missing_candles', True) + + if fill_missing: + from jesse.services.logger import info + info( + f'Insufficient data for {timeframe} candle generation: only {len(trading_candles)} candles available, ' + f'but {required_candles} required. Filling with empty candles.' + ) + + # Fill with empty candles to complete the timeframe + empty_candles = [] + last_timestamp = trading_candles[-1][0] if len(trading_candles) > 0 else 0 + last_price = trading_candles[-1][2] if len(trading_candles) > 0 else 0 + + for j in range(required_candles - len(trading_candles)): + empty_candle = np.array([ + last_timestamp + (j + 1) * 60_000, # timestamp + last_price, # open + last_price, # close + last_price, # high + last_price, # low + 0 # volume + ]) + empty_candles.append(empty_candle) + + # Combine original candles with empty ones + complete_candle_slice = np.concatenate([trading_candles, np.array(empty_candles)]) + + generated_candles.append( + generate_candle_from_one_minutes( + timeframe, + complete_candle_slice, + True + ) + ) + else: + raise ValueError( + f'Insufficient data for {timeframe} candle: only {len(trading_candles)} candles available, ' + f'but {required_candles} required.' + ) return np.array(generated_candles) @@ -396,3 +587,72 @@ def delete_candles(exchange: str, symbol: str) -> None: Candle.exchange == exchange, Candle.symbol == symbol ).execute() + + +def _get_csv_candles( + symbol: str, + timeframe: str, + start_date_timestamp: int, + finish_date_timestamp: int, + warmup_candles_num: int = 0, + is_for_jesse: bool = False +) -> Tuple[np.ndarray, np.ndarray]: + """ + Get candles from CSV data source. + + Args: + symbol: Symbol name + timeframe: Timeframe + start_date_timestamp: Start timestamp in milliseconds + finish_date_timestamp: Finish timestamp in milliseconds + warmup_candles_num: Number of warmup candles + is_for_jesse: Whether this is for Jesse framework + + Returns: + Tuple of (warmup_candles, trading_candles) + """ + from jesse.services.csv_data_provider import csv_data_provider + + try: + # Get candles from CSV data provider + candles = csv_data_provider.get_candles( + symbol=symbol, + timeframe=timeframe, + start_date=start_date_timestamp, + finish_date=finish_date_timestamp + ) + + if candles is None or len(candles) == 0: + return None, None + + # Convert to numpy array if needed + if not isinstance(candles, np.ndarray): + candles = np.array(candles) + + # Calculate warmup candles if needed + warmup_candles = None + if warmup_candles_num > 0: + # Calculate warmup period + warmup_period_ms = warmup_candles_num * jh.timeframe_to_one_minutes(timeframe) * 60_000 + warmup_start = start_date_timestamp - warmup_period_ms + + # Get warmup candles + warmup_candles = csv_data_provider.get_candles( + symbol=symbol, + timeframe=timeframe, + start_date=warmup_start, + finish_date=start_date_timestamp - 1 + ) + + if warmup_candles is not None and len(warmup_candles) > 0: + if not isinstance(warmup_candles, np.ndarray): + warmup_candles = np.array(warmup_candles) + else: + warmup_candles = None + + return warmup_candles, candles + + except Exception as e: + from jesse.services import logger + logger.error(f"Error getting CSV candles for {symbol}: {e}") + return None, None diff --git a/jesse/services/csv_data_provider.py b/jesse/services/csv_data_provider.py new file mode 100644 index 000000000..6475a6aca --- /dev/null +++ b/jesse/services/csv_data_provider.py @@ -0,0 +1,399 @@ +""" +CSV Data Provider for Jesse trading framework. +Handles loading and aggregating tick data from CSV files into OHLCV candles. +""" + +import os +import pandas as pd +import numpy as np +from typing import Dict, List, Optional, Tuple +import jesse.helpers as jh +from jesse.services import logger +from jesse.services.csv_parser import CSVParser + + +class CSVDataProvider: + """ + Data provider for CSV files containing tick data. + Aggregates tick data into OHLCV candles for backtesting. + """ + + def __init__(self, data_directory: str = "/home/jesse/KucoinData"): + """ + Initialize CSV data provider. + + Args: + data_directory: Base directory containing CSV data files + """ + self.data_directory = data_directory + self.cache = {} # Cache for loaded data + + def get_available_symbols(self) -> List[str]: + """ + Get list of available symbols in SYMBOL-USDT format. + + Returns: + List of symbol names in SYMBOL-USDT format + """ + if not os.path.exists(self.data_directory): + return [] + + symbols = [] + for item in os.listdir(self.data_directory): + item_path = os.path.join(self.data_directory, item) + if os.path.isdir(item_path): + # Check if price.csv exists in the directory + price_file = os.path.join(item_path, "price.csv") + if os.path.exists(price_file): + # Return symbols in SYMBOL-USDT format for Jesse compatibility + symbols.append(f"{item}-USDT") + + return sorted(symbols) + + def get_symbol_info(self, symbol: str) -> Optional[Dict]: + """ + Get information about a symbol's data. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + + Returns: + Dictionary with symbol information or None if not found + """ + # Remove common suffixes from symbol for file lookup + csv_symbol = symbol + if symbol.endswith('-USDT'): + csv_symbol = symbol.replace('-USDT', '') + elif symbol.endswith('-USDC'): + csv_symbol = symbol.replace('-USDC', '') + elif symbol.endswith('-BTC'): + csv_symbol = symbol.replace('-BTC', '') + elif symbol.endswith('-ETH'): + csv_symbol = symbol.replace('-ETH', '') + + price_file = os.path.join(self.data_directory, csv_symbol, "price.csv") + + if not os.path.exists(price_file): + return None + + try: + # Read first and last lines to get time range + with open(price_file, 'r') as f: + first_line = f.readline().strip() # Skip header + first_line = f.readline().strip() # Get first data line + f.seek(0, 2) # Go to end of file + file_size = f.tell() + + # Read last line + f.seek(max(0, file_size - 1000)) # Read last 1000 bytes + last_chunk = f.read() + last_line = last_chunk.split('\n')[-2] if '\n' in last_chunk else last_chunk + + # Parse first and last timestamps + first_parts = first_line.split(',') + last_parts = last_line.split(',') + + if len(first_parts) >= 1 and len(last_parts) >= 1: + start_time = int(first_parts[0]) # timestamp is in first column + end_time = int(last_parts[0]) + + return { + 'symbol': symbol, + 'start_time': start_time, + 'end_time': end_time, + 'start_date': jh.timestamp_to_date(start_time), + 'end_date': jh.timestamp_to_date(end_time), + 'file_path': price_file, + 'file_size': file_size + } + + except Exception as e: + logger.error(f"Error getting symbol info for {symbol}: {e}") + + return None + + def load_tick_data(self, symbol: str, start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[pd.DataFrame]: + """ + Load tick data for a symbol. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + DataFrame with tick data or None if failed + """ + # Remove common suffixes from symbol for file lookup + csv_symbol = symbol + if symbol.endswith('-USDT'): + csv_symbol = symbol.replace('-USDT', '') + elif symbol.endswith('-USDC'): + csv_symbol = symbol.replace('-USDC', '') + elif symbol.endswith('-BTC'): + csv_symbol = symbol.replace('-BTC', '') + elif symbol.endswith('-ETH'): + csv_symbol = symbol.replace('-ETH', '') + + price_file = os.path.join(self.data_directory, csv_symbol, "price.csv") + + if not os.path.exists(price_file): + logger.error(f"Price file not found for symbol {symbol}: {price_file}") + return None + + try: + # Read CSV file (skip header row) + df = pd.read_csv(price_file, names=['timestamp', 'price', 'volume'], skiprows=1) + + # Filter by date range if specified + if start_date is not None: + df = df[df['timestamp'] >= start_date] + if finish_date is not None: + df = df[df['timestamp'] <= finish_date] + + # Sort by timestamp + df = df.sort_values('timestamp').reset_index(drop=True) + + logger.info(f"Loaded {len(df)} ticks for {symbol}") + return df + + except Exception as e: + logger.error(f"Error loading tick data for {symbol}: {e}") + return None + + def aggregate_to_candles(self, tick_data: pd.DataFrame, timeframe: str = "1m") -> np.ndarray: + """ + Aggregate tick data into OHLCV candles. + + Args: + tick_data: DataFrame with tick data + timeframe: Target timeframe (e.g., "1m", "5m", "1h") + + Returns: + numpy array of candles in Jesse format + """ + if tick_data is None or len(tick_data) == 0: + return np.array([]) + + try: + # Convert timeframe to minutes + timeframe_minutes = jh.timeframe_to_one_minutes(timeframe) + timeframe_ms = timeframe_minutes * 60 * 1000 # Convert to milliseconds + + # Group ticks by timeframe + tick_data['candle_timestamp'] = (tick_data['timestamp'] // timeframe_ms) * timeframe_ms + + # Aggregate to OHLCV + candles = tick_data.groupby('candle_timestamp').agg({ + 'price': ['first', 'last', 'max', 'min'], # OHLC + 'volume': 'sum' # Volume + }).reset_index() + + # Flatten column names + candles.columns = ['timestamp', 'open', 'close', 'high', 'low', 'volume'] + + # Convert to numpy array in Jesse format: [timestamp, open, close, high, low, volume] + result = candles[['timestamp', 'open', 'close', 'high', 'low', 'volume']].values + + logger.info(f"Aggregated {len(tick_data)} ticks into {len(result)} {timeframe} candles") + return result + + except Exception as e: + logger.error(f"Error aggregating tick data to candles: {e}") + return np.array([]) + + def get_candles(self, symbol: str, timeframe: str = "1m", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[np.ndarray]: + """ + Get candles for a symbol and timeframe. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + numpy array of candles or None if failed + """ + # Create cache key + cache_key = f"{symbol}_{timeframe}_{start_date}_{finish_date}" + + if cache_key in self.cache: + return self.cache[cache_key] + + # Load tick data + tick_data = self.load_tick_data(symbol, start_date, finish_date) + + if tick_data is None: + return None + + # Aggregate to candles + candles = self.aggregate_to_candles(tick_data, timeframe) + + # Cache result + self.cache[cache_key] = candles + + return candles + + def save_candles_to_database(self, symbol: str, timeframe: str = "1m", + exchange: str = "custom", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> bool: + """ + Save candles to Jesse database. + + Args: + symbol: Symbol name (e.g., 'ACH' or 'ACH-USDT') + timeframe: Timeframe + exchange: Exchange name + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + bool: True if saved successfully, False otherwise + """ + candles = self.get_candles(symbol, timeframe, start_date, finish_date) + + if candles is None or len(candles) == 0: + logger.error(f"No candles to save for {symbol}") + return False + + try: + from jesse.services.db import database + from jesse.models.Candle import Candle + import os + + # Ensure we're in a Jesse project directory + if not jh.is_jesse_project(): + # Try to find Jesse project directory + current_dir = os.getcwd() + if 'project-template' in current_dir: + # We're already in the right place + pass + else: + # Try to change to project-template directory + project_template_dir = '/Users/alxy/Desktop/1PROJ/JesseLocal/project-template' + if os.path.exists(project_template_dir): + os.chdir(project_template_dir) + + database.open_connection() + + # Clear existing data for this exchange/symbol/timeframe + Candle.delete().where( + (Candle.exchange == 'custom') & + (Candle.symbol == symbol) & + (Candle.timeframe == timeframe) + ).execute() + + # Insert new data in batches to avoid connection timeout + batch_size = 1000 # Insert 1000 candles at a time + total_candles = len(candles) + + for i in range(0, total_candles, batch_size): + batch_candles = candles[i:i + batch_size] + candles_to_insert = [] + + for candle in batch_candles: + candles_to_insert.append({ + 'id': jh.generate_unique_id(), + 'timestamp': int(candle[0]), + 'open': float(candle[1]), + 'close': float(candle[2]), + 'high': float(candle[3]), + 'low': float(candle[4]), + 'volume': float(candle[5]), + 'exchange': 'custom', + 'symbol': symbol, + 'timeframe': timeframe + }) + + # Insert batch + Candle.insert_many(candles_to_insert).execute() + print(f" 📊 Вставлено {min(i + batch_size, total_candles)} из {total_candles} свечей") + + database.close_connection() + logger.info(f"Successfully saved {len(candles_to_insert)} candles to database") + return True + + except Exception as e: + print(f"❌ Error saving candles to database: {e}") + import traceback + print(f"❌ Traceback: {traceback.format_exc()}") + logger.error(f"Error saving candles to database: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return False + + def get_available_timeframes(self, symbol: str) -> List[str]: + """ + Get available timeframes for a symbol based on data frequency. + + Args: + symbol: Symbol name + + Returns: + List of available timeframes + """ + # For tick data, we can generate any timeframe + return ["1m", "3m", "5m", "15m", "30m", "1h", "2h", "4h", "6h", "8h", "12h", "1d"] + + def clear_cache(self): + """Clear the data cache.""" + self.cache.clear() + logger.info("CSV data cache cleared") + + +# Global instance +csv_data_provider = CSVDataProvider(data_directory="/Users/alxy/Downloads/Fond/KucoinData") + + +def get_csv_candles(symbol: str, timeframe: str = "1m", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[np.ndarray]: + """ + Convenience function to get candles from CSV data. + + Args: + symbol: Symbol name + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + numpy array of candles or None if failed + """ + return csv_data_provider.get_candles(symbol, timeframe, start_date, finish_date) + + +def get_available_csv_symbols() -> List[str]: + """ + Get list of available symbols from CSV data. + + Returns: + List of symbol names + """ + return csv_data_provider.get_available_symbols() + + +def import_csv_symbol_to_database(symbol: str, timeframe: str = "1m", + exchange: str = "custom", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> bool: + """ + Import a CSV symbol to Jesse database. + + Args: + symbol: Symbol name + timeframe: Timeframe + exchange: Exchange name + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + bool: True if imported successfully, False otherwise + """ + return csv_data_provider.save_candles_to_database( + symbol, timeframe, exchange, start_date, finish_date + ) diff --git a/jesse/services/csv_parser.py b/jesse/services/csv_parser.py new file mode 100644 index 000000000..46a21cc89 --- /dev/null +++ b/jesse/services/csv_parser.py @@ -0,0 +1,385 @@ +""" +CSV Parser service for Jesse trading framework. +Handles parsing of CSV files containing OHLCV data for backtesting and optimization. +""" + +import csv +import os +import pandas as pd +import numpy as np +from typing import List, Dict, Optional, Tuple +from datetime import datetime +import jesse.helpers as jh +from jesse.services import logger + + +class CSVParser: + """ + Parser for CSV files containing OHLCV data. + Supports various CSV formats commonly used in trading data. + """ + + # Supported column name variations + TIMESTAMP_COLUMNS = ['timestamp', 'time', 'date', 'datetime', 'ts'] + OPEN_COLUMNS = ['open', 'o', 'Open', 'OPEN'] + HIGH_COLUMNS = ['high', 'h', 'High', 'HIGH'] + LOW_COLUMNS = ['low', 'l', 'Low', 'LOW'] + CLOSE_COLUMNS = ['close', 'c', 'Close', 'CLOSE'] + VOLUME_COLUMNS = ['volume', 'vol', 'v', 'Volume', 'VOLUME'] + + def __init__(self, file_path: str, exchange: str = "custom", symbol: str = "BTC-USDT", timeframe: str = "1m"): + """ + Initialize CSV parser. + + Args: + file_path: Path to CSV file + exchange: Exchange name (default: "custom") + symbol: Symbol name (default: "BTC-USDT") + timeframe: Timeframe (default: "1m") + """ + self.file_path = file_path + self.exchange = exchange + self.symbol = symbol + self.timeframe = timeframe + self.data = None + self.column_mapping = {} + + def validate_file(self) -> bool: + """ + Validate that the CSV file exists and is readable. + + Returns: + bool: True if file is valid, False otherwise + """ + if not os.path.exists(self.file_path): + logger.error(f"CSV file not found: {self.file_path}") + return False + + if not os.path.isfile(self.file_path): + logger.error(f"Path is not a file: {self.file_path}") + return False + + return True + + def detect_columns(self, sample_rows: int = 5) -> Dict[str, str]: + """ + Automatically detect column names in CSV file. + + Args: + sample_rows: Number of rows to sample for detection + + Returns: + Dict mapping standard names to actual column names + """ + if not self.validate_file(): + return {} + + try: + # Read first few rows to detect columns + df_sample = pd.read_csv(self.file_path, nrows=sample_rows) + columns = df_sample.columns.str.lower() + + mapping = {} + + # Find timestamp column + for col in self.TIMESTAMP_COLUMNS: + if col in columns: + mapping['timestamp'] = col + break + + # Find OHLCV columns + for col in self.OPEN_COLUMNS: + if col in columns: + mapping['open'] = col + break + + for col in self.HIGH_COLUMNS: + if col in columns: + mapping['high'] = col + break + + for col in self.LOW_COLUMNS: + if col in columns: + mapping['low'] = col + break + + for col in self.CLOSE_COLUMNS: + if col in columns: + mapping['close'] = col + break + + for col in self.VOLUME_COLUMNS: + if col in columns: + mapping['volume'] = col + break + + self.column_mapping = mapping + return mapping + + except Exception as e: + logger.error(f"Error detecting columns: {e}") + return {} + + def parse_csv(self, + timestamp_format: str = "auto", + custom_columns: Optional[Dict[str, str]] = None) -> bool: + """ + Parse CSV file and convert to Jesse format. + + Args: + timestamp_format: Format of timestamp column ("auto", "unix", "iso", "custom") + custom_columns: Custom column mapping if auto-detection fails + + Returns: + bool: True if parsing successful, False otherwise + """ + if not self.validate_file(): + return False + + try: + # Use custom columns if provided, otherwise auto-detect + if custom_columns: + self.column_mapping = custom_columns + else: + self.detect_columns() + + # Validate required columns + required_columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume'] + missing_columns = [col for col in required_columns if col not in self.column_mapping] + + if missing_columns: + logger.error(f"Missing required columns: {missing_columns}") + return False + + # Read CSV file + df = pd.read_csv(self.file_path) + + # Rename columns to standard names + df_renamed = df.rename(columns={ + self.column_mapping['timestamp']: 'timestamp', + self.column_mapping['open']: 'open', + self.column_mapping['high']: 'high', + self.column_mapping['low']: 'low', + self.column_mapping['close']: 'close', + self.column_mapping['volume']: 'volume' + }) + + # Convert timestamp to milliseconds + df_renamed['timestamp'] = self._convert_timestamp(df_renamed['timestamp'], timestamp_format) + + # Sort by timestamp + df_renamed = df_renamed.sort_values('timestamp').reset_index(drop=True) + + # Convert to numpy array in Jesse format: [timestamp, open, close, high, low, volume] + self.data = df_renamed[['timestamp', 'open', 'close', 'high', 'low', 'volume']].values + + logger.info(f"Successfully parsed {len(self.data)} candles from {self.file_path}") + return True + + except Exception as e: + logger.error(f"Error parsing CSV file: {e}") + return False + + def _convert_timestamp(self, timestamps: pd.Series, format_type: str) -> pd.Series: + """ + Convert timestamp column to milliseconds since epoch. + + Args: + timestamps: Series of timestamp values + format_type: Format type ("auto", "unix", "iso", "custom") + + Returns: + Series of timestamps in milliseconds + """ + try: + if format_type == "auto": + # Try to auto-detect format + sample = timestamps.iloc[0] + + # Check if it's already a Unix timestamp + if isinstance(sample, (int, float)) and len(str(int(sample))) >= 10: + # Convert to milliseconds if needed + if sample < 1e12: # Unix timestamp in seconds + return timestamps * 1000 + else: # Already in milliseconds + return timestamps + + # Try parsing as ISO format + try: + pd.to_datetime(timestamps) + return pd.to_datetime(timestamps).astype(np.int64) // 10**6 + except: + pass + + # Try parsing as common date formats + for fmt in ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%d/%m/%Y %H:%M:%S', '%d/%m/%Y']: + try: + return pd.to_datetime(timestamps, format=fmt).astype(np.int64) // 10**6 + except: + continue + + raise ValueError("Could not auto-detect timestamp format") + + elif format_type == "unix": + # Unix timestamp in seconds + return timestamps * 1000 + + elif format_type == "iso": + # ISO format + return pd.to_datetime(timestamps).astype(np.int64) // 10**6 + + else: + # Custom format + return pd.to_datetime(timestamps, format=format_type).astype(np.int64) // 10**6 + + except Exception as e: + logger.error(f"Error converting timestamps: {e}") + raise + + def get_candles(self) -> Optional[np.ndarray]: + """ + Get parsed candles data. + + Returns: + numpy array of candles in Jesse format or None if not parsed + """ + return self.data + + def get_candles_info(self) -> Dict: + """ + Get information about parsed candles. + + Returns: + Dictionary with candles information + """ + if self.data is None: + return {} + + return { + 'count': len(self.data), + 'start_time': self.data[0][0] if len(self.data) > 0 else None, + 'end_time': self.data[-1][0] if len(self.data) > 0 else None, + 'exchange': self.exchange, + 'symbol': self.symbol, + 'timeframe': self.timeframe, + 'file_path': self.file_path + } + + def save_to_database(self) -> bool: + """ + Save parsed candles to Jesse database. + + Returns: + bool: True if saved successfully, False otherwise + """ + if self.data is None: + logger.error("No data to save. Parse CSV first.") + return False + + try: + from jesse.services.db import database + from jesse.models.Candle import Candle + + database.open_connection() + + # Clear existing data for this exchange/symbol/timeframe + Candle.delete().where( + (Candle.exchange == self.exchange) & + (Candle.symbol == self.symbol) & + (Candle.timeframe == self.timeframe) + ).execute() + + # Insert new data + candles_to_insert = [] + for candle in self.data: + candles_to_insert.append({ + 'id': jh.generate_unique_id(), + 'timestamp': int(candle[0]), + 'open': float(candle[1]), + 'close': float(candle[2]), + 'high': float(candle[3]), + 'low': float(candle[4]), + 'volume': float(candle[5]), + 'exchange': self.exchange, + 'symbol': self.symbol, + 'timeframe': self.timeframe + }) + + # Batch insert + Candle.insert_many(candles_to_insert).execute() + + database.close_connection() + logger.info(f"Successfully saved {len(candles_to_insert)} candles to database") + return True + + except Exception as e: + logger.error(f"Error saving to database: {e}") + return False + + +def parse_csv_file(file_path: str, + exchange: str = "custom", + symbol: str = "BTC-USDT", + timeframe: str = "1m", + timestamp_format: str = "auto", + custom_columns: Optional[Dict[str, str]] = None) -> Optional[CSVParser]: + """ + Convenience function to parse a CSV file. + + Args: + file_path: Path to CSV file + exchange: Exchange name + symbol: Symbol name + timeframe: Timeframe + timestamp_format: Timestamp format + custom_columns: Custom column mapping + + Returns: + CSVParser instance if successful, None otherwise + """ + parser = CSVParser(file_path, exchange, symbol, timeframe) + + if parser.parse_csv(timestamp_format, custom_columns): + return parser + else: + return None + + +def get_csv_candles(file_path: str, + exchange: str = "custom", + symbol: str = "BTC-USDT", + timeframe: str = "1m", + start_date: Optional[int] = None, + finish_date: Optional[int] = None) -> Optional[np.ndarray]: + """ + Get candles from CSV file with optional date filtering. + + Args: + file_path: Path to CSV file + exchange: Exchange name + symbol: Symbol name + timeframe: Timeframe + start_date: Start timestamp in milliseconds (optional) + finish_date: Finish timestamp in milliseconds (optional) + + Returns: + numpy array of candles or None if failed + """ + parser = CSVParser(file_path, exchange, symbol, timeframe) + + if not parser.parse_csv(): + return None + + candles = parser.get_candles() + + if candles is None: + return None + + # Apply date filtering if specified + if start_date is not None: + candles = candles[candles[:, 0] >= start_date] + + if finish_date is not None: + candles = candles[candles[:, 0] <= finish_date] + + return candles diff --git a/quick_test.py b/quick_test.py new file mode 100644 index 000000000..e0ca352e3 --- /dev/null +++ b/quick_test.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +""" +Quick Test Script for CSV Data Loading +Быстрый тест загрузки CSV данных +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +from jesse.research.external_data.csv_ticks_to_db import CSVDataLoader, BASE_URL, AUTHORIZATION + +def quick_test(): + """Быстрый тест функциональности""" + print("🧪 Быстрый тест CSV функциональности") + print("=" * 40) + + # Создание загрузчика + loader = CSVDataLoader(BASE_URL, AUTHORIZATION) + + # 1. Тест получения символов + print("1️⃣ Получаем список символов...") + symbols = loader.get_available_symbols() + print(f" ✅ Найдено {len(symbols)} символов") + if symbols: + print(f" 📋 Первые 5: {symbols[:5]}") + + # 2. Тест информации о символе + if symbols: + test_symbol = symbols[0] + print(f"\n2️⃣ Получаем информацию о {test_symbol}...") + try: + info = loader.get_symbol_info(test_symbol) + if info: + print(f" ✅ Период: {info['start_date']} - {info['end_date']}") + print(f" ✅ Размер файла: {info['file_size']:,} байт") + else: + print(" ❌ Не удалось получить информацию") + except Exception as e: + print(f" ❌ Ошибка получения информации: {e}") + + # 3. Тест предварительного просмотра + if symbols: + print(f"\n3️⃣ Предварительный просмотр {test_symbol}...") + try: + preview = loader.preview_data(test_symbol, limit=3) + if preview: + print(" ✅ Данные:") + for i, row in enumerate(preview.get('preview', [])[:3]): + print(f" {i+1}. {row}") + else: + print(" ❌ Не удалось получить предварительный просмотр") + except Exception as e: + print(f" ❌ Ошибка предварительного просмотра: {e}") + + # 4. Тест импорта (только один символ) + if symbols: + print(f"\n4️⃣ Тестируем импорт {test_symbol}...") + try: + success = loader.import_symbol(test_symbol, "1m", "custom") + if success: + print(" ✅ Импорт успешен") + + # Проверяем загруженные свечи + candles_data = loader.get_candles(test_symbol, "1m") + if candles_data: + count = candles_data.get('count', 0) + print(f" 📊 Загружено {count:,} свечей") + else: + print(" ❌ Ошибка импорта") + except Exception as e: + print(f" ❌ Ошибка импорта: {e}") + + # 5. Тест очистки кэша + print(f"\n5️⃣ Очищаем кэш...") + loader.clear_cache() + + print("\n🎉 Тест завершен!") + +def test_specific_symbols(): + """Тест конкретных символов""" + print("\n🎯 Тест конкретных символов") + print("=" * 30) + + loader = CSVDataLoader(BASE_URL, AUTHORIZATION) + + # Список символов для тестирования + test_symbols = ["ACH", "CAS", "DOGS"] + + for symbol in test_symbols: + print(f"\n📊 Тестируем {symbol}...") + + # Проверяем доступность + available_symbols = loader.get_available_symbols() + if symbol not in available_symbols: + print(f" ❌ Символ {symbol} не найден") + continue + + # Получаем информацию + try: + info = loader.get_symbol_info(symbol) + if info: + print(f" ✅ Период: {info['start_date']} - {info['end_date']}") + except Exception as e: + print(f" ❌ Ошибка получения информации: {e}") + + # Импортируем + try: + success = loader.import_symbol(symbol, "1m", "custom") + if success: + print(f" ✅ {symbol} импортирован успешно") + else: + print(f" ❌ Ошибка импорта {symbol}") + except Exception as e: + print(f" ❌ Ошибка импорта {symbol}: {e}") + +if __name__ == "__main__": + try: + # Основной тест + quick_test() + + # Тест конкретных символов + test_specific_symbols() + + except KeyboardInterrupt: + print("\n⏹️ Тест прерван пользователем") + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() diff --git a/requirements.txt b/requirements.txt index caf3d2bac..2325ab976 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ eth-account~=0.13.5 msgpack~=1.1.0 starkbank-ecdsa~=1.1.0 jesse-rust==1.0.1 +ccxt~=4.4.82 diff --git a/test_api_symbols.py b/test_api_symbols.py new file mode 100644 index 000000000..ae45809f3 --- /dev/null +++ b/test_api_symbols.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Test symbols through Jesse API +""" + +import requests +import json + +def test_api_symbols(): + """Test symbols through Jesse API""" + print("🧪 Тест символов через Jesse API") + print("=" * 40) + + base_url = "http://localhost:9000" + token = "ef260e9aa3c673af240d17a2660480361a8e081d1ffeca2a5ed0e3219fc18567" + headers = {"Authorization": token} + + try: + # Test 1: Check if CustomCSV is available + print("1️⃣ Проверяем доступные exchanges...") + response = requests.get(f"{base_url}/exchange/supported-symbols", + headers=headers, + params={"exchange": "CustomCSV"}) + + if response.status_code == 200: + data = response.json() + symbols = data.get('data', []) + print(f" ✅ CustomCSV доступен") + print(f" 📊 Символов: {len(symbols)}") + if symbols: + print(f" 📋 Первые 10: {symbols[:10]}") + + # Check format + usdt_symbols = [s for s in symbols if s.endswith('-USDT')] + print(f" 📊 Символов с суффиксом -USDT: {len(usdt_symbols)}") + + if len(usdt_symbols) == len(symbols): + print(" ✅ Все символы в формате SYMBOL-USDT") + else: + print(" ❌ Не все символы в формате SYMBOL-USDT") + else: + print(f" ❌ Ошибка: {response.status_code} - {response.text}") + return + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_api_symbols() diff --git a/test_backtesting_exchanges.py b/test_backtesting_exchanges.py new file mode 100644 index 000000000..39aac8ca0 --- /dev/null +++ b/test_backtesting_exchanges.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +Test backtesting exchanges including CustomCSV +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_backtesting_exchanges(): + """Test backtesting exchanges""" + print("🧪 Тест backtesting exchanges") + print("=" * 40) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.info import backtesting_exchanges, live_trading_exchanges + from jesse.enums import exchanges + print("1️⃣ Импорт backtesting_exchanges и live_trading_exchanges... ✅") + + print(f"\n2️⃣ Backtesting exchanges ({len(backtesting_exchanges)}):") + for i, exchange in enumerate(backtesting_exchanges, 1): + print(f" {i:2d}. {exchange}") + + print(f"\n3️⃣ Live trading exchanges ({len(live_trading_exchanges)}):") + for i, exchange in enumerate(live_trading_exchanges, 1): + print(f" {i:2d}. {exchange}") + + # Check if CustomCSV is in backtesting exchanges + if exchanges.CUSTOM_CSV in backtesting_exchanges: + print(f"\n✅ CustomCSV найден в backtesting exchanges: {exchanges.CUSTOM_CSV}") + else: + print(f"\n❌ CustomCSV НЕ найден в backtesting exchanges") + print(f" Ищем: {exchanges.CUSTOM_CSV}") + print(f" В списке: {backtesting_exchanges}") + + # Check if CustomCSV is in live trading exchanges + if exchanges.CUSTOM_CSV in live_trading_exchanges: + print(f"\n✅ CustomCSV найден в live trading exchanges: {exchanges.CUSTOM_CSV}") + else: + print(f"\n❌ CustomCSV НЕ найден в live trading exchanges (это нормально)") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_backtesting_exchanges() diff --git a/test_csv_functionality.py b/test_csv_functionality.py new file mode 100644 index 000000000..5dea82a8f --- /dev/null +++ b/test_csv_functionality.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Simple test script for CSV functionality in Jesse. +This script tests the CSV data provider and parser functionality. +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +from jesse.services.csv_data_provider import csv_data_provider +from jesse.services.csv_parser import CSVParser +import jesse.helpers as jh + + +def test_csv_data_provider(): + """Test CSV data provider functionality.""" + print("Testing CSV Data Provider...") + + # Test getting available symbols + symbols = csv_data_provider.get_available_symbols() + print(f"Available symbols: {symbols[:10]}...") # Show first 10 + + if not symbols: + print("No symbols found. Make sure CSV data directory is correct.") + return False + + # Test getting symbol info + test_symbol = symbols[0] + info = csv_data_provider.get_symbol_info(test_symbol) + if info: + print(f"Symbol info for {test_symbol}:") + print(f" Start time: {info['start_time']} ({info['start_date']})") + print(f" End time: {info['end_time']} ({info['end_date']})") + print(f" File size: {info['file_size']} bytes") + else: + print(f"Could not get info for {test_symbol}") + return False + + # Test loading tick data + print(f"\nLoading tick data for {test_symbol}...") + tick_data = csv_data_provider.load_tick_data(test_symbol, limit=1000) + if tick_data is not None: + print(f"Loaded {len(tick_data)} ticks") + print(f"First few ticks:") + print(tick_data.head()) + else: + print("Failed to load tick data") + return False + + # Test aggregating to candles + print(f"\nAggregating to 1m candles...") + candles = csv_data_provider.aggregate_to_candles(tick_data, "1m") + if len(candles) > 0: + print(f"Generated {len(candles)} 1m candles") + print(f"First candle: {candles[0]}") + else: + print("Failed to generate candles") + return False + + return True + + +def test_csv_parser(): + """Test CSV parser functionality.""" + print("\nTesting CSV Parser...") + + # Find a CSV file to test with + data_dir = "/Users/alxy/Downloads/Fond/KucoinData" + test_file = None + + for symbol in os.listdir(data_dir): + symbol_path = os.path.join(data_dir, symbol) + if os.path.isdir(symbol_path): + price_file = os.path.join(symbol_path, "price.csv") + if os.path.exists(price_file): + test_file = price_file + break + + if not test_file: + print("No CSV file found for testing") + return False + + print(f"Testing with file: {test_file}") + + # Test CSV parser + parser = CSVParser(test_file, "custom", "TEST", "1m") + + # Test validation + if not parser.validate_file(): + print("File validation failed") + return False + + # Test column detection + columns = parser.detect_columns() + print(f"Detected columns: {columns}") + + # Test parsing + if not parser.parse_csv(): + print("CSV parsing failed") + return False + + # Get candles + candles = parser.get_candles() + if candles is not None and len(candles) > 0: + print(f"Parsed {len(candles)} candles") + print(f"First candle: {candles[0]}") + else: + print("No candles parsed") + return False + + # Get candles info + info = parser.get_candles_info() + print(f"Candles info: {info}") + + return True + + +def main(): + """Main test function.""" + print("=== Jesse CSV Functionality Test ===\n") + + success = True + + # Test CSV data provider + if not test_csv_data_provider(): + success = False + + # Test CSV parser + if not test_csv_parser(): + success = False + + if success: + print("\n✅ All tests passed!") + else: + print("\n❌ Some tests failed!") + + return success + + +if __name__ == "__main__": + main() diff --git a/test_csv_provider.py b/test_csv_provider.py new file mode 100644 index 000000000..30294cf8e --- /dev/null +++ b/test_csv_provider.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Simple test for CSV data provider +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +from jesse.services.csv_data_provider import csv_data_provider + +def test_csv_provider(): + """Test CSV data provider functionality""" + print("🧪 Тестируем CSV Data Provider") + print("=" * 40) + + # Test 1: Get available symbols + print("1️⃣ Получаем список символов...") + symbols = csv_data_provider.get_available_symbols() + print(f" ✅ Найдено {len(symbols)} символов") + if symbols: + print(f" 📋 Первые 5: {symbols[:5]}") + + # Test 2: Get symbol info for ACH + if symbols and 'ACH' in symbols: + print("\n2️⃣ Получаем информацию о ACH...") + info = csv_data_provider.get_symbol_info('ACH') + if info: + print(f" ✅ Период: {info['start_date']} - {info['end_date']}") + print(f" ✅ Размер файла: {info['file_size']:,} байт") + else: + print(" ❌ Не удалось получить информацию") + + # Test 3: Load tick data for ACH + if symbols and 'ACH' in symbols: + print("\n3️⃣ Загружаем tick данные для ACH...") + tick_data = csv_data_provider.load_tick_data('ACH') + if tick_data is not None: + print(f" ✅ Загружено {len(tick_data)} записей") + print(f" 📊 Первые 3 записи:") + print(tick_data.head(3)) + else: + print(" ❌ Не удалось загрузить tick данные") + + # Test 4: Get candles for ACH + if symbols and 'ACH' in symbols: + print("\n4️⃣ Получаем свечи для ACH...") + candles = csv_data_provider.get_candles('ACH', '1m') + if candles is not None and len(candles) > 0: + print(f" ✅ Получено {len(candles)} свечей") + print(f" 📊 Первая свеча: {candles[0]}") + else: + print(" ❌ Не удалось получить свечи") + +if __name__ == "__main__": + try: + test_csv_provider() + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() diff --git a/test_csv_provider_updated.py b/test_csv_provider_updated.py new file mode 100644 index 000000000..cdf371999 --- /dev/null +++ b/test_csv_provider_updated.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +""" +Test updated CSV data provider with symbol mapping +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_csv_provider_updated(): + """Test updated CSV data provider""" + print("🧪 Тест обновленного CSV data provider") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.services.csv_data_provider import CSVDataProvider + print("1️⃣ Импорт CSVDataProvider... ✅") + + # Create provider instance + provider = CSVDataProvider() + print("2️⃣ Создание provider instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT', 'BTC-USDT', 'ETH-USDC'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_symbol_info + symbol_info = provider.get_symbol_info(symbol) + if symbol_info: + print(f" ✅ Symbol info: {symbol_info['symbol']} ({symbol_info['start_date']} - {symbol_info['end_date']})") + else: + print(f" ❌ Symbol info not found") + + # Test get_candles + candles = provider.get_candles(symbol, '1m') + if candles is not None and len(candles) > 0: + print(f" ✅ Получено {len(candles)} свечей") + print(f" 📊 Первая свеча: {candles[0]}") + else: + print(f" ❌ Свечи не найдены") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_csv_provider_updated() diff --git a/test_csv_simple.py b/test_csv_simple.py new file mode 100644 index 000000000..abfb46ef6 --- /dev/null +++ b/test_csv_simple.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +""" +Simple test script for CSV functionality in Jesse. +This script tests the CSV data provider and parser functionality without full Jesse dependencies. +""" + +import os +import sys +import pandas as pd +import numpy as np + +# Add jesse to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_csv_parser_basic(): + """Test basic CSV parser functionality.""" + print("Testing CSV Parser (basic functionality)...") + + # Find a CSV file to test with + data_dir = "/Users/alxy/Downloads/Fond/KucoinData" + test_file = None + + for symbol in os.listdir(data_dir): + symbol_path = os.path.join(data_dir, symbol) + if os.path.isdir(symbol_path): + price_file = os.path.join(symbol_path, "price.csv") + if os.path.exists(price_file): + test_file = price_file + break + + if not test_file: + print("No CSV file found for testing") + return False + + print(f"Testing with file: {test_file}") + + try: + # Test basic CSV reading + df = pd.read_csv(test_file, names=['timestamp', 'price', 'volume'], skiprows=1) # Skip header + print(f"Loaded {len(df)} rows from CSV") + print(f"First 5 rows:") + print(df.head()) + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + df = df.dropna() # Remove any rows with invalid timestamps + + # Test aggregation to 1m candles + df['candle_timestamp'] = (df['timestamp'] // 60000) * 60000 # 1 minute buckets + + candles = df.groupby('candle_timestamp').agg({ + 'price': ['first', 'last', 'max', 'min'], + 'volume': 'sum' + }).reset_index() + + candles.columns = ['timestamp', 'open', 'close', 'high', 'low', 'volume'] + + print(f"\nGenerated {len(candles)} 1m candles") + print(f"First 3 candles:") + print(candles.head(3)) + + return True + + except Exception as e: + print(f"Error testing CSV parser: {e}") + return False + + +def test_data_directory(): + """Test data directory structure.""" + print("\nTesting data directory structure...") + + data_dir = "/Users/alxy/Downloads/Fond/KucoinData" + + if not os.path.exists(data_dir): + print(f"Data directory not found: {data_dir}") + return False + + symbols = [] + for item in os.listdir(data_dir): + item_path = os.path.join(data_dir, item) + if os.path.isdir(item_path): + price_file = os.path.join(item_path, "price.csv") + if os.path.exists(price_file): + symbols.append(item) + + print(f"Found {len(symbols)} symbols with CSV data") + print(f"First 10 symbols: {symbols[:10]}") + + if symbols: + # Test one symbol + test_symbol = symbols[0] + price_file = os.path.join(data_dir, test_symbol, "price.csv") + + # Get file info + file_size = os.path.getsize(price_file) + print(f"\nTesting symbol: {test_symbol}") + print(f"File size: {file_size} bytes") + + # Read first and last lines to get time range + with open(price_file, 'r') as f: + first_line = f.readline().strip() # Skip header + first_line = f.readline().strip() # First data line + f.seek(0, 2) # Go to end + file_size = f.tell() + f.seek(max(0, file_size - 1000)) # Read last 1000 bytes + last_chunk = f.read() + last_line = last_chunk.split('\n')[-2] if '\n' in last_chunk else last_chunk + + first_parts = first_line.split(',') + last_parts = last_line.split(',') + + if len(first_parts) >= 2 and len(last_parts) >= 2: + start_time = int(first_parts[0]) # First column is timestamp + end_time = int(last_parts[0]) # First column is timestamp + print(f"Time range: {start_time} - {end_time}") + print(f"Duration: {(end_time - start_time) / 1000 / 60 / 60:.2f} hours") + + return True + + return False + + +def main(): + """Main test function.""" + print("=== Jesse CSV Functionality Test (Simple) ===\n") + + success = True + + # Test data directory + if not test_data_directory(): + success = False + + # Test CSV parser + if not test_csv_parser_basic(): + success = False + + if success: + print("\n✅ All tests passed!") + print("\nCSV functionality is working correctly!") + print("\nNext steps:") + print("1. Start Jesse server: jesse run") + print("2. Access CSV endpoints at: http://localhost:9000/csv/") + print("3. Use the API to import CSV data for backtesting") + else: + print("\n❌ Some tests failed!") + + return success + + +if __name__ == "__main__": + main() diff --git a/test_csv_simple_provider.py b/test_csv_simple_provider.py new file mode 100644 index 000000000..b3a847a3a --- /dev/null +++ b/test_csv_simple_provider.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +Simple test for CSV data provider without full Jesse import +""" + +import os +import pandas as pd +import numpy as np +from typing import Dict, List, Optional, Tuple + +class SimpleCSVDataProvider: + """ + Simple CSV data provider for testing + """ + + def __init__(self, data_directory: str = "/Users/alxy/Downloads/Fond/KucoinData"): + self.data_directory = data_directory + self.cache = {} + + def get_available_symbols(self) -> List[str]: + """Get list of available symbols from data directory.""" + if not os.path.exists(self.data_directory): + print(f"❌ Директория {self.data_directory} не существует") + return [] + + symbols = [] + for item in os.listdir(self.data_directory): + item_path = os.path.join(self.data_directory, item) + if os.path.isdir(item_path): + # Check if price.csv exists in the directory + price_file = os.path.join(item_path, "price.csv") + if os.path.exists(price_file): + symbols.append(item) + + return sorted(symbols) + + def get_symbol_info(self, symbol: str) -> Optional[Dict]: + """Get information about a specific symbol.""" + symbol_dir = os.path.join(self.data_directory, symbol) + price_file = os.path.join(symbol_dir, "price.csv") + + if not os.path.exists(price_file): + return None + + try: + # Get file size + file_size = os.path.getsize(price_file) + + # Read first and last lines to get time range + with open(price_file, 'r') as f: + first_line = f.readline().strip() # Skip header + first_line = f.readline().strip() # Get first data line + f.seek(0, 2) # Go to end of file + f.seek(f.tell() - 1000, 0) # Go back 1000 bytes + lines = f.readlines() + last_line = lines[-1].strip() if lines else first_line + + # Parse timestamps + first_parts = first_line.split(',') + last_parts = last_line.split(',') + + if len(first_parts) >= 1 and len(last_parts) >= 1: + start_timestamp = int(first_parts[0]) + end_timestamp = int(last_parts[0]) + + # Convert to readable dates + start_date = pd.to_datetime(start_timestamp, unit='ms').strftime('%Y-%m-%d %H:%M:%S') + end_date = pd.to_datetime(end_timestamp, unit='ms').strftime('%Y-%m-%d %H:%M:%S') + + return { + 'symbol': symbol, + 'start_time': start_timestamp, + 'end_time': end_timestamp, + 'start_date': start_date, + 'end_date': end_date, + 'file_path': price_file, + 'file_size': file_size + } + except Exception as e: + print(f"❌ Ошибка при чтении файла {price_file}: {e}") + return None + + return None + +def test_csv_provider(): + """Test CSV data provider functionality""" + print("🧪 Тестируем Simple CSV Data Provider") + print("=" * 40) + + # Create provider + provider = SimpleCSVDataProvider() + + # Test 1: Get available symbols + print("1️⃣ Получаем список символов...") + symbols = provider.get_available_symbols() + print(f" ✅ Найдено {len(symbols)} символов") + if symbols: + print(f" 📋 Первые 5: {symbols[:5]}") + + # Test 2: Get symbol info for ACH + if symbols and 'ACH' in symbols: + print("\n2️⃣ Получаем информацию о ACH...") + info = provider.get_symbol_info('ACH') + if info: + print(f" ✅ Период: {info['start_date']} - {info['end_date']}") + print(f" ✅ Размер файла: {info['file_size']:,} байт") + else: + print(" ❌ Не удалось получить информацию") + else: + print(" ❌ Символ ACH не найден в списке") + +if __name__ == "__main__": + try: + test_csv_provider() + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() diff --git a/test_custom_driver.py b/test_custom_driver.py new file mode 100644 index 000000000..e81f04620 --- /dev/null +++ b/test_custom_driver.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Test CustomCSV driver +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_custom_driver(): + """Test CustomCSV driver""" + print("🧪 Тест CustomCSV driver") + print("=" * 40) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test get_available_symbols + print("\n3️⃣ Тестируем get_available_symbols...") + symbols = driver.get_available_symbols() + print(f" ✅ Найдено {len(symbols)} символов") + print(f" 📋 Первые 5: {symbols[:5]}") + + # Test get_starting_time + if symbols: + symbol = symbols[0] + print(f"\n4️⃣ Тестируем get_starting_time для {symbol}...") + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + if symbols: + symbol = symbols[0] + print(f"\n5️⃣ Тестируем fetch для {symbol}...") + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + + print("\n🎉 Все тесты прошли успешно!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_custom_driver() diff --git a/test_custom_driver_complete.py b/test_custom_driver_complete.py new file mode 100644 index 000000000..066e5c651 --- /dev/null +++ b/test_custom_driver_complete.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Test complete CustomCSV driver with all required fields +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_custom_driver_complete(): + """Test complete CustomCSV driver""" + print("🧪 Тест полного CustomCSV driver") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + print(f" 📊 Тип первой свечи: {type(candles[0])}") + if isinstance(candles[0], dict): + print(f" 📊 Ключи: {list(candles[0].keys())}") + + # Check if all required keys are present + required_keys = ['timestamp', 'open', 'close', 'high', 'low', 'volume', 'symbol', 'exchange', 'timeframe'] + missing_keys = [key for key in required_keys if key not in candles[0]] + if missing_keys: + print(f" ❌ Отсутствующие ключи: {missing_keys}") + else: + print(f" ✅ Все необходимые ключи присутствуют") + + # Check values + print(f" 📊 timestamp: {candles[0]['timestamp']}") + print(f" 📊 symbol: {candles[0]['symbol']}") + print(f" 📊 exchange: {candles[0]['exchange']}") + print(f" 📊 timeframe: {candles[0]['timeframe']}") + else: + print(f" ❌ Ошибка: свеча не является словарем") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_custom_driver_complete() diff --git a/test_custom_driver_fixed.py b/test_custom_driver_fixed.py new file mode 100644 index 000000000..05423035c --- /dev/null +++ b/test_custom_driver_fixed.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Test fixed CustomCSV driver with dictionary format +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_custom_driver_fixed(): + """Test fixed CustomCSV driver""" + print("🧪 Тест исправленного CustomCSV driver") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + print(f" 📊 Тип первой свечи: {type(candles[0])}") + if isinstance(candles[0], dict): + print(f" 📊 Ключи: {list(candles[0].keys())}") + print(f" 📊 timestamp: {candles[0]['timestamp']}") + else: + print(f" ❌ Ошибка: свеча не является словарем") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_custom_driver_fixed() diff --git a/test_custom_driver_id.py b/test_custom_driver_id.py new file mode 100644 index 000000000..32ad0e024 --- /dev/null +++ b/test_custom_driver_id.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" +Test CustomCSV driver with id field +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_custom_driver_id(): + """Test CustomCSV driver with id field""" + print("🧪 Тест CustomCSV driver с полем id") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + print(f" 📊 Тип первой свечи: {type(candles[0])}") + if isinstance(candles[0], dict): + print(f" 📊 Ключи: {list(candles[0].keys())}") + + # Check if all required keys are present + required_keys = ['id', 'timestamp', 'open', 'close', 'high', 'low', 'volume', 'symbol', 'exchange', 'timeframe'] + missing_keys = [key for key in required_keys if key not in candles[0]] + if missing_keys: + print(f" ❌ Отсутствующие ключи: {missing_keys}") + else: + print(f" ✅ Все необходимые ключи присутствуют") + + # Check values + print(f" 📊 id: {candles[0]['id']}") + print(f" 📊 timestamp: {candles[0]['timestamp']}") + print(f" 📊 symbol: {candles[0]['symbol']}") + print(f" 📊 exchange: {candles[0]['exchange']}") + print(f" 📊 timeframe: {candles[0]['timeframe']}") + + # Check if id is not None + if candles[0]['id'] is not None: + print(f" ✅ ID не пустой") + else: + print(f" ❌ ID пустой") + else: + print(f" ❌ Ошибка: свеча не является словарем") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_custom_driver_id() diff --git a/test_custom_driver_symbol.py b/test_custom_driver_symbol.py new file mode 100644 index 000000000..457d68c35 --- /dev/null +++ b/test_custom_driver_symbol.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +Test CustomCSV driver with symbol field +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_custom_driver_symbol(): + """Test CustomCSV driver with symbol field""" + print("🧪 Тест CustomCSV driver с полем symbol") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + print(f" 📊 Тип первой свечи: {type(candles[0])}") + if isinstance(candles[0], dict): + print(f" 📊 Ключи: {list(candles[0].keys())}") + print(f" 📊 timestamp: {candles[0]['timestamp']}") + print(f" 📊 symbol: {candles[0]['symbol']}") + + # Check if all required keys are present + required_keys = ['timestamp', 'open', 'close', 'high', 'low', 'volume', 'symbol'] + missing_keys = [key for key in required_keys if key not in candles[0]] + if missing_keys: + print(f" ❌ Отсутствующие ключи: {missing_keys}") + else: + print(f" ✅ Все необходимые ключи присутствуют") + else: + print(f" ❌ Ошибка: свеча не является словарем") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_custom_driver_symbol() diff --git a/test_db_connection.py b/test_db_connection.py new file mode 100644 index 000000000..2e05cac9b --- /dev/null +++ b/test_db_connection.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +""" +Test database connection +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_db_connection(): + """Test database connection""" + print("🧪 Тестируем подключение к базе данных") + print("=" * 40) + + try: + from jesse.services.db import database + print("1️⃣ Импорт database модуля... ✅") + + # Try to open connection + database.open_connection() + print("2️⃣ Открытие подключения... ✅") + + # Check if we can query + from jesse.models.Candle import Candle + print("3️⃣ Импорт Candle модели... ✅") + + # Try to count candles + count = Candle.select().count() + print(f"4️⃣ Количество свечей в базе: {count}") + + # Close connection + database.close_connection() + print("5️⃣ Закрытие подключения... ✅") + + print("\n✅ База данных работает правильно!") + + except Exception as e: + print(f"\n❌ Ошибка с базой данных: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_db_connection() diff --git a/test_exchanges.py b/test_exchanges.py new file mode 100644 index 000000000..1a46bf542 --- /dev/null +++ b/test_exchanges.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +""" +Test available exchanges including CustomCSV +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_exchanges(): + """Test available exchanges""" + print("🧪 Тест доступных exchanges") + print("=" * 40) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers import driver_names + from jesse.enums import exchanges + print("1️⃣ Импорт driver_names и exchanges... ✅") + + print(f"\n2️⃣ Доступные exchanges ({len(driver_names)}):") + for i, exchange in enumerate(driver_names, 1): + print(f" {i:2d}. {exchange}") + + # Check if CustomCSV is in the list + if exchanges.CUSTOM_CSV in driver_names: + print(f"\n✅ CustomCSV найден в списке: {exchanges.CUSTOM_CSV}") + else: + print(f"\n❌ CustomCSV НЕ найден в списке") + print(f" Ищем: {exchanges.CUSTOM_CSV}") + print(f" В списке: {driver_names}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_exchanges() diff --git a/test_import_api.py b/test_import_api.py new file mode 100644 index 000000000..e8bfdb367 --- /dev/null +++ b/test_import_api.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Test import through Jesse API +""" + +import requests +import json +import time + +def test_import_api(): + """Test import through Jesse API""" + print("🧪 Тест импорта через Jesse API") + print("=" * 40) + + base_url = "http://localhost:9000" + token = "ef260e9aa3c673af240d17a2660480361a8e081d1ffeca2a5ed0e3219fc18567" + headers = {"Authorization": token} + + try: + # Test 1: Check if CustomCSV is available + print("1️⃣ Проверяем доступные exchanges...") + response = requests.get(f"{base_url}/exchange/supported-symbols", + headers=headers, + params={"exchange": "CustomCSV"}) + + if response.status_code == 200: + data = response.json() + print(f" ✅ CustomCSV доступен") + print(f" 📊 Символов: {len(data.get('data', []))}") + if data.get('data'): + print(f" 📋 Первые 5: {data['data'][:5]}") + else: + print(f" ❌ Ошибка: {response.status_code} - {response.text}") + return + + # Test 2: Try to import ACH-USDT + print("\n2️⃣ Пытаемся импортировать ACH-USDT...") + + # First, let's check what symbols are available + symbols_response = requests.get(f"{base_url}/exchange/supported-symbols", + headers=headers, + params={"exchange": "CustomCSV"}) + + if symbols_response.status_code == 200: + symbols_data = symbols_response.json() + available_symbols = symbols_data.get('data', []) + print(f" 📊 Доступные символы: {len(available_symbols)}") + + if 'ACH' in available_symbols: + print(" ✅ ACH найден в списке символов") + + # Try to import + import_data = { + "exchange": "CustomCSV", + "symbol": "ACH-USDT", # Use USDT suffix as Jesse expects + "start_date": "2023-01-01", + "finish_date": "2023-01-02" + } + + print(f" 📤 Отправляем запрос на импорт: {import_data}") + + # Note: We need to find the correct import endpoint + # Let's try the import candles endpoint + import_response = requests.post(f"{base_url}/import-candles", + headers=headers, + json=import_data) + + if import_response.status_code == 200: + print(" ✅ Импорт успешен!") + print(f" 📊 Ответ: {import_response.json()}") + else: + print(f" ❌ Ошибка импорта: {import_response.status_code} - {import_response.text}") + else: + print(" ❌ ACH не найден в списке символов") + else: + print(f" ❌ Ошибка получения символов: {symbols_response.status_code}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_import_api() diff --git a/test_import_detailed.py b/test_import_detailed.py new file mode 100644 index 000000000..7dda81e26 --- /dev/null +++ b/test_import_detailed.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Detailed test for CSV import functionality +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_import_detailed(): + """Test CSV import functionality with detailed error reporting""" + print("🧪 Детальный тест CSV импорта") + print("=" * 40) + + try: + # Set Jesse project directory + import os + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.services.csv_data_provider import csv_data_provider + print("1️⃣ Импорт CSV data provider... ✅") + + # Test 1: Load tick data + print("\n2️⃣ Загружаем tick данные для IMT...") + tick_data = csv_data_provider.load_tick_data('IMT') + if tick_data is not None: + print(f" ✅ Загружено {len(tick_data)} записей") + else: + print(" ❌ Не удалось загрузить tick данные") + return + + # Test 2: Aggregate to candles + print("\n3️⃣ Агрегируем в свечи...") + candles = csv_data_provider.aggregate_to_candles(tick_data, '1m') + if candles is not None and len(candles) > 0: + print(f" ✅ Получено {len(candles)} свечей") + else: + print(" ❌ Не удалось агрегировать в свечи") + return + + # Test 3: Try to save to database with detailed error reporting + print("\n4️⃣ Пытаемся сохранить в базу данных...") + try: + from jesse.services.db import database + import jesse.helpers as jh + + print(" 📊 Проверяем условия подключения...") + print(f" 📊 is_jesse_project(): {jh.is_jesse_project()}") + print(f" 📊 is_unit_testing(): {jh.is_unit_testing()}") + + print(" 📊 Открываем подключение к базе данных...") + database.open_connection() + print(f" 📊 database.db: {database.db}") + print(" ✅ Подключение открыто") + + print(" 📊 Запускаем миграции базы данных...") + from jesse.services.migrator import run as run_migrations + run_migrations() + print(" ✅ Миграции выполнены") + + # Use the Jesse approach for database operations + print(" 📊 Используем Jesse подход для работы с базой данных...") + from jesse.models.Candle import fetch_candles_from_db, store_candles_into_db + + print(" 📊 Подготавливаем данные для вставки...") + # Convert candles to Jesse format + jesse_candles = [] + for i, candle in enumerate(candles[:100]): # Только первые 100 свечей для теста + jesse_candles.append([ + int(candle[0]), # timestamp + float(candle[1]), # open + float(candle[2]), # close + float(candle[3]), # high + float(candle[4]), # low + float(candle[5]) # volume + ]) + + print(f" 📊 Подготовлено {len(jesse_candles)} свечей для вставки") + + print(" 📊 Вставляем данные в базу используя Jesse store_candles_into_db...") + import numpy as np + store_candles_into_db('custom', 'IMT', '1m', np.array(jesse_candles)) + print(" ✅ Данные успешно вставлены!") + + # Verify insertion + print(" 📊 Проверяем вставленные данные...") + stored_candles = fetch_candles_from_db('custom', 'IMT', '1m', 0, 9999999999999) + print(f" 📊 Проверка: в базе {len(stored_candles)} записей для IMT") + + database.close_connection() + print(" ✅ Подключение закрыто") + + except Exception as e: + print(f" ❌ Ошибка при работе с базой данных: {e}") + import traceback + traceback.print_exc() + + except Exception as e: + print(f"\n❌ Общая ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_import_detailed() diff --git a/test_import_simple.py b/test_import_simple.py new file mode 100644 index 000000000..1748b0277 --- /dev/null +++ b/test_import_simple.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Simple test for CSV import functionality +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +# Test CSV data provider directly +from jesse.services.csv_data_provider import csv_data_provider + +def test_import(): + """Test CSV import functionality""" + print("🧪 Тестируем CSV импорт") + print("=" * 30) + + # Test 1: Load tick data + print("1️⃣ Загружаем tick данные для IMT...") + tick_data = csv_data_provider.load_tick_data('IMT') + if tick_data is not None: + print(f" ✅ Загружено {len(tick_data)} записей") + print(f" 📊 Первые 3 записи:") + print(tick_data.head(3)) + else: + print(" ❌ Не удалось загрузить tick данные") + return + + # Test 2: Aggregate to candles + print("\n2️⃣ Агрегируем в свечи...") + candles = csv_data_provider.aggregate_to_candles(tick_data, '1m') + if candles is not None and len(candles) > 0: + print(f" ✅ Получено {len(candles)} свечей") + print(f" 📊 Первая свеча: {candles[0]}") + print(f" 📊 Последняя свеча: {candles[-1]}") + else: + print(" ❌ Не удалось агрегировать в свечи") + return + + # Test 3: Try to save to database (this might fail without proper DB setup) + print("\n3️⃣ Пытаемся сохранить в базу данных...") + try: + success = csv_data_provider.save_candles_to_database('IMT', '1m', 'custom') + if success: + print(" ✅ Успешно сохранено в базу данных") + else: + print(" ❌ Не удалось сохранить в базу данных") + except Exception as e: + print(f" ❌ Ошибка при сохранении: {e}") + +if __name__ == "__main__": + try: + test_import() + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() diff --git a/test_save_direct.py b/test_save_direct.py new file mode 100644 index 000000000..b8bc76335 --- /dev/null +++ b/test_save_direct.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +Direct test for save_candles_to_database function +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_save_direct(): + """Test save_candles_to_database function directly""" + print("🧪 Прямой тест save_candles_to_database") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.services.csv_data_provider import csv_data_provider + print("1️⃣ Импорт CSV data provider... ✅") + + # Test save_candles_to_database directly + print("\n2️⃣ Тестируем save_candles_to_database для ACH...") + + # First check if we have candles + candles = csv_data_provider.get_candles('ACH', '1m') + if candles is not None: + print(f" 📊 Найдено {len(candles)} свечей для ACH") + else: + print(" ❌ Нет свечей для ACH") + return + + result = csv_data_provider.save_candles_to_database('ACH', '1m') + + if result: + print(" ✅ Данные успешно сохранены!") + else: + print(" ❌ Ошибка при сохранении данных") + + except Exception as e: + print(f"\n❌ Общая ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_save_direct() diff --git a/test_symbol_mapping.py b/test_symbol_mapping.py new file mode 100644 index 000000000..8420bb9f5 --- /dev/null +++ b/test_symbol_mapping.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Test symbol mapping in CustomCSV driver +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_symbol_mapping(): + """Test symbol mapping""" + print("🧪 Тест mapping символов в CustomCSV driver") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test different symbol formats + test_symbols = ['ACH', 'ACH-USDT', 'BTC-USDT', 'ETH-USDC'] + + for symbol in test_symbols: + print(f"\n3️⃣ Тестируем символ: {symbol}") + + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ Получено {len(candles)} свечей") + if candles: + print(f" 📊 Первая свеча: {candles[0]}") + + except Exception as e: + print(f" ❌ Ошибка для {symbol}: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_symbol_mapping() diff --git a/test_symbols_format.py b/test_symbols_format.py new file mode 100644 index 000000000..9772535cc --- /dev/null +++ b/test_symbols_format.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Test symbols format in CustomCSV driver +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'jesse')) + +def test_symbols_format(): + """Test symbols format""" + print("🧪 Тест формата символов в CustomCSV driver") + print("=" * 50) + + try: + # Set Jesse project directory + os.chdir('/Users/alxy/Desktop/1PROJ/JesseLocal/project-template') + print(f" 📊 Рабочая директория: {os.getcwd()}") + + from jesse.modes.import_candles_mode.drivers.Custom.CustomCSV import CustomCSV + print("1️⃣ Импорт CustomCSV driver... ✅") + + # Create driver instance + driver = CustomCSV() + print("2️⃣ Создание driver instance... ✅") + + # Test get_available_symbols + print("\n3️⃣ Тестируем get_available_symbols...") + symbols = driver.get_available_symbols() + print(f" ✅ Получено {len(symbols)} символов") + print(f" 📋 Первые 10: {symbols[:10]}") + + # Check format + print("\n4️⃣ Проверяем формат символов...") + usdt_symbols = [s for s in symbols if s.endswith('-USDT')] + print(f" 📊 Символов с суффиксом -USDT: {len(usdt_symbols)}") + + if len(usdt_symbols) == len(symbols): + print(" ✅ Все символы в формате SYMBOL-USDT") + else: + print(" ❌ Не все символы в формате SYMBOL-USDT") + + # Test a few symbols + print("\n5️⃣ Тестируем несколько символов...") + test_symbols = symbols[:3] # Test first 3 symbols + + for symbol in test_symbols: + try: + # Test get_starting_time + start_time = driver.get_starting_time(symbol) + print(f" ✅ {symbol}: Начальное время: {start_time}") + + # Test fetch + candles = driver.fetch(symbol, start_time, '1m') + print(f" ✅ {symbol}: Получено {len(candles)} свечей") + + except Exception as e: + print(f" ❌ {symbol}: Ошибка: {e}") + + print("\n🎉 Тест завершен!") + + except Exception as e: + print(f"\n❌ Ошибка: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_symbols_format()