Skip to content

Commit 9f2c9e4

Browse files
author
Beat Buesser
committed
Remove unrelated changes
Signed-off-by: Beat Buesser <[email protected]>
1 parent 486cd97 commit 9f2c9e4

File tree

1 file changed

+3
-147
lines changed

1 file changed

+3
-147
lines changed

art/estimators/classification/pytorch.py

Lines changed: 3 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,6 @@ def fit( # pylint: disable=W0221
378378
"""
379379
import torch # lgtm [py/repeated-import]
380380

381-
use_ffcv = kwargs.get("ffcv")
382-
383381
# Set model mode
384382
self._model.train(mode=training_mode)
385383

@@ -397,157 +395,15 @@ def fit( # pylint: disable=W0221
397395
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
398396
ind = np.arange(len(x_preprocessed))
399397

400-
if use_ffcv:
401-
self._fit_ffcv(
402-
x=x_preprocessed,
403-
y=y_preprocessed,
404-
batch_size=batch_size,
405-
nb_epochs=nb_epochs,
406-
training_mode=training_mode,
407-
**kwargs,
408-
)
409-
else:
410-
# Start training
411-
for _ in range(nb_epochs):
412-
# Shuffle the examples
413-
random.shuffle(ind)
414-
415-
# Train for one epoch
416-
for m in range(num_batch):
417-
i_batch = torch.from_numpy(x_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(
418-
self._device
419-
)
420-
o_batch = torch.from_numpy(y_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(
421-
self._device
422-
)
423-
424-
# Zero the parameter gradients
425-
self._optimizer.zero_grad()
426-
427-
# Perform prediction
428-
model_outputs = self._model(i_batch)
429-
430-
# Form the loss function
431-
loss = self._loss(model_outputs[-1], o_batch) # lgtm [py/call-to-non-callable]
432-
433-
# Do training
434-
if self._use_amp: # pragma: no cover
435-
from apex import amp # pylint: disable=E0611
436-
437-
with amp.scale_loss(loss, self._optimizer) as scaled_loss:
438-
scaled_loss.backward()
439-
440-
else:
441-
loss.backward()
442-
443-
self._optimizer.step()
444-
445-
def _fit_ffcv(
446-
self,
447-
x: np.ndarray,
448-
y: np.ndarray,
449-
batch_size: int = 128,
450-
nb_epochs: int = 10,
451-
training_mode: bool = True,
452-
**kwargs,
453-
) -> None:
454-
"""
455-
Fit the classifier on the training set `(x, y)`.
456-
457-
:param x: Training data.
458-
:param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
459-
shape (nb_samples,).
460-
:param batch_size: Size of batches.
461-
:param nb_epochs: Number of epochs to use for training.
462-
:param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode.
463-
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
464-
and providing it takes no effect.
465-
"""
466-
ind = np.arange(len(x))
467-
468-
# FFCV - prepare
469-
from ffcv.writer import DatasetWriter
470-
from ffcv.fields import NDArrayField
471-
472-
# Your dataset (`torch.utils.data.Dataset`) of (image, label) pairs
473-
# my_dataset = make_my_dataset()
474-
475-
class NumpyDataset:
476-
def __init__(self, x, y):
477-
self.X = x
478-
self.Y = y
479-
480-
def __getitem__(self, idx):
481-
return (self.X[idx], self.Y[idx])
482-
483-
def __len__(self):
484-
return len(self.X)
485-
486-
my_dataset = NumpyDataset(x, y)
487-
488-
write_path = "/home/bbuesser/tmp/ffcv/ds.beton"
489-
490-
# Pass a type for each data field
491-
jpeg_quality = 50
492-
493-
writer = DatasetWriter(
494-
write_path,
495-
{
496-
# Tune options to optimize dataset size, throughput at train-time
497-
# 'image': RGBImageField(max_resolution=256, jpeg_quality=jpeg_quality),
498-
"image": NDArrayField(dtype=x.dtype, shape=(1, 28, 28)),
499-
"label": NDArrayField(dtype=y.dtype, shape=(10,)),
500-
},
501-
)
502-
503-
# Write dataset
504-
writer.from_indexed_dataset(my_dataset)
505-
506-
# FFCV
507-
from ffcv.loader import Loader, OrderOption
508-
from ffcv.transforms import ToTensor, ToDevice, ToTorchImage, Cutout
509-
from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder, NDArrayDecoder
510-
511-
# Random resized crop
512-
# decoder = RandomResizedCropRGBImageDecoder((224, 224))
513-
514-
# Data decoding and augmentation
515-
# image_pipeline = [decoder, Cutout(), ToTensor(), ToTorchImage(), ToDevice(0)]
516-
image_pipeline = [NDArrayDecoder(), ToTensor()]
517-
label_pipeline = [NDArrayDecoder(), ToTensor()]
518-
519-
# Pipeline for each data field
520-
pipelines = {"image": image_pipeline, "label": label_pipeline}
521-
522-
# Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
523-
# write_path = "/home/bbuesser/tmp/ffcv/"
524-
bs = batch_size
525-
num_workers = 1
526-
# loader = Loader(
527-
# write_path, batch_size=bs, num_workers=num_workers, order=OrderOption.RANDOM, pipelines=pipelines
528-
# )
529-
loader = Loader(
530-
write_path,
531-
batch_size=bs,
532-
num_workers=num_workers,
533-
order=OrderOption.RANDOM,
534-
pipelines=pipelines,
535-
os_cache=True,
536-
)
537-
538398
# Start training
539399
for _ in range(nb_epochs):
540-
print(_)
541400
# Shuffle the examples
542401
random.shuffle(ind)
543402

544403
# Train for one epoch
545-
# for m in range(num_batch):
546-
# i_batch = torch.from_numpy(x_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
547-
# o_batch = torch.from_numpy(y_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
548-
from tqdm import tqdm
549-
550-
for i, (i_batch, o_batch) in enumerate(tqdm(loader)):
404+
for m in range(num_batch):
405+
i_batch = torch.from_numpy(x_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
406+
o_batch = torch.from_numpy(y_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
551407

552408
# Zero the parameter gradients
553409
self._optimizer.zero_grad()

0 commit comments

Comments
 (0)