forked from fiit-ba/ML-for-arbitrage-in-cryptoexchanges
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_dataset.py
More file actions
70 lines (55 loc) · 2.51 KB
/
load_dataset.py
File metadata and controls
70 lines (55 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from os import listdir
import pandas as pd
class Load_dataset:
def __init__(self, pairs):
self.pairs = pairs
self.datasets = {}
def load_datasets(self):
"""
Load gathered datasets for further preprocessing, divided according to pair, time interval, and exchange
:return: loaded datasets
"""
for pair in self.pairs:
self.datasets[pair] = {}
for pair in self.datasets:
self.datasets[pair] = {"1m": {"Binance": None, "Bybit": None}, "5m": {"Binance": None, "Bybit": None},
"15m": {"Binance": None, "Bybit": None}}
for file in listdir("./dataset"):
first_underscore = file.find('_')
exchange = file[0:first_underscore]
second_underscore = file.find('_', first_underscore + 1)
pair = file[(second_underscore + 1):(second_underscore + 8)]
third_underscore = file.find('_', second_underscore + 1)
interval = file[(third_underscore + 1):(third_underscore + 3)]
if interval[-1] != 'm':
interval += file[third_underscore + 3]
dataset = pd.read_csv("./dataset/" + file, index_col=False)
self.datasets[pair][interval][exchange] = dataset
return self.datasets
def load_preprocessed_datasets(self):
"""
Load preprocessed datasets divided according to pair and time interval
:return: loaded datasets
"""
for pair in self.pairs:
self.datasets[pair] = {}
for pair in self.datasets:
self.datasets[pair] = {"1m": None, "5m": None, "15m": None}
for file in listdir("./dataset_preprocessed"):
underscore = file.find('_')
pair = file[0:underscore]
dot = file.find('.')
interval = file[(underscore + 1):dot]
dataset = pd.read_csv("./dataset_preprocessed/" + file, index_col=False)
self.datasets[pair][interval] = dataset
return self.datasets
def load_preprocessed_datasets_for_training(self):
"""
Load preprocessed datasets divided according to pair and time interval without index and date columns
:return: loaded datasets
"""
self.load_preprocessed_datasets()
for pair_key, pair in self.datasets.items():
for interval_key in pair.keys():
self.datasets[pair_key][interval_key] = self.datasets[pair_key][interval_key].iloc[:, 2:]
return self.datasets