forked from svishnu88/CatsandDogs
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
119 lines (99 loc) · 3.3 KB
/
data.py
File metadata and controls
119 lines (99 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from typing import Tuple
import PIL
from sklearn.utils import shuffle
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
from PIL.Image import Image as PILImage
from torch.utils.data.dataloader import DataLoader
from augmentations import get_augmentations
import numpy as np
import pandas as pd
from pytorch_lightning import LightningDataModule
from sklearn.model_selection import KFold
import os
import numpy as np
import torch
path = Path("../data/")
def list_files(path: Path):
return [o for o in path.iterdir()]
def get_label(file_path: Path):
return file_path.stem.split('.')[0]
class DogsandCatsDataset(Dataset):
def __init__(self, files, transform=None) -> None:
super().__init__()
self.files = files
self.transform = transform
self.labels = {'cat': 0, 'dog': 1}
def __getitem__(self, index) -> Tuple[PILImage, int]:
file_path = self.files[index]
label = self.labels[get_label(file_path)]
image = Image.open(file_path)
image = np.array(image)
if self.transform is not None:
transformed = self.transform(image=image)
image = transformed["image"]
return image, torch.tensor(label, dtype=torch.float32)
def __len__(self):
return len(self.files)
class DogsandCatsDataModule(LightningDataModule):
def __init__(
self,
path: str = None,
aug_p: float = 0.5,
val_pct: float = 0.2,
img_sz: int = 224,
batch_size: int = 64,
num_workers: int = 4,
fold_id: int = 0,
splits: int = 5
):
super().__init__()
self.path = Path(path)
self.aug_p = aug_p
self.val_pct = val_pct
self.img_sz = img_sz
self.batch_size = batch_size
self.num_workers = num_workers
self.fold_id = fold_id
self.splits = splits
def prepare_data(self):
# only called on 1 GPU/TPU in distributed
files = np.array(list_files(self.path/'train'))
kf = KFold(n_splits=self.splits, random_state=2020, shuffle=True)
splits = kf.split(files)
train_idxs, validation_idxs = list(splits)[self.fold_id]
self.train_files = files[train_idxs]
self.valid_files = files[validation_idxs]
self.train_transform, self.test_transform = get_augmentations(
p=self.aug_p, image_size=self.img_sz
)
def train_dataloader(self):
train_dataset = DogsandCatsDataset(
files=self.train_files, transform=self.train_transform
)
return DataLoader(
train_dataset,
batch_size=self.batch_size,
num_workers=self.num_workers,
shuffle=True,
pin_memory=True,
)
def val_dataloader(self):
valid_dataset = DogsandCatsDataset(
files=self.valid_files, transform=self.test_transform
)
return DataLoader(
valid_dataset,
batch_size=self.batch_size,
num_workers=self.num_workers,
shuffle=False,
pin_memory=True,
)
if __name__ == "__main__":
# Test Cassava Test Module
path = Path("../data")
dm = DogsandCatsDataModule(path)
dm.prepare_data()
xb, yb = next(iter(dm.train_dataloader()))
print(xb.shape, yb.shape)