@@ -52,75 +52,36 @@ a more extensive list on API and usage.
52
52
.multi_sample
53
53
.sample_proportion
54
54
55
- Construct informative batches
56
- -----------------------------
55
+ Merge / stratify / oversample datastreams
56
+ -----------------------------------------
57
+ The fruit datastreams given below repeatedly yields the string of its fruit
58
+ type.
57
59
58
- >>> datastream = Datastream.merge([
59
- ... (apple_datastream, 2 ),
60
- ... (pear_datastream, 1 ),
61
- ... (banana_datastream, 1 ),
62
- ... ])
63
- ... next (iter (datastream.data_loader(batch_size = 6 )))
64
- ['apple', 'apple', 'pear', 'banana', 'apple', 'apple']
60
+ .. code-block :: python
65
61
62
+ >> > datastream = Datastream.merge([
63
+ ... (apple_datastream, 2 ),
64
+ ... (pear_datastream, 1 ),
65
+ ... (banana_datastream, 1 ),
66
+ ... ])
67
+ >> > next (iter (datastream.data_loader(batch_size = 8 )))
68
+ [' apple' , ' apple' , ' pear' , ' banana' , ' apple' , ' apple' , ' pear' , ' banana' ]
66
69
67
70
Zip independently sampled datastreams
68
71
-------------------------------------
72
+ The fruit datastreams given below repeatedly yields the string of its fruit
73
+ type.
69
74
70
75
.. code-block :: python
71
76
72
- datastream = Datastream.zip([
73
- apple_datastream,
74
- Datastream.merge([pear_datastream, banana_datastream])
75
- ])
76
-
77
- Pipeline functions
78
- ------------------
79
-
80
- .. code-block :: python
81
-
82
- from PIL import Image
83
- from imgaug import augmenters as iaa
84
- from datastream import Dataset
85
-
86
- augmenter = iaa.Sequential([... ])
87
-
88
- def preprocess (image , class_names ):
89
- ...
90
-
91
- dataset = (
92
- Dataset.from_dataframe(df)
93
- .map(lambda row : (
94
- row[' image_path' ],
95
- row[' class_names' ],
96
- ))
97
- .map(lambda image_path , class_names : (
98
- Image.open(image_path),
99
- class_names,
100
- ))
101
- .map(lambda image , class_names : (
102
- augmenter.augment(image = image),
103
- class_names,
104
- ))
105
- .map(preprocess)
106
- )
107
-
108
- Datastream to pytorch data loader
109
- ---------------------------------
110
-
111
- .. code-block :: python
112
-
113
- data_loader = (
114
- Datastream(dataset)
115
- .data_loader(
116
- batch_size = 32 ,
117
- num_workers = 8 ,
118
- n_batches_per_epoch = 100 ,
119
- )
120
- )
77
+ >> > datastream = Datastream.zip([
78
+ ... apple_datastream,
79
+ ... Datastream.merge([pear_datastream, banana_datastream]),
80
+ ... ])
81
+ >> > next (iter (datastream.data_loader(batch_size = 4 )))
82
+ [(' apple' , ' pear' ), (' apple' , ' banana' ), (' apple' , ' pear' ), (' apple' , ' banana' )]
121
83
122
84
More usage examples
123
85
-------------------
124
-
125
86
See the `documentation <https://pytorch-datastream.readthedocs.io/en/latest/ >`_
126
- for examples with oversampling / stratification and weighted sampling .
87
+ for more usage examples .
0 commit comments