Skip to content

Commit 2597582

Browse files
authored
Merge pull request #725 from dfalbel/feature/flow_images_from_dataframe
Feature/flow images from dataframe
2 parents fcaa323 + 643bca3 commit 2597582

File tree

10 files changed

+259
-1
lines changed

10 files changed

+259
-1
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ export(flag_numeric)
9292
export(flag_string)
9393
export(flags)
9494
export(flow_images_from_data)
95+
export(flow_images_from_dataframe)
9596
export(flow_images_from_directory)
9697
export(freeze_weights)
9798
export(from_config)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
- Added AppVeyor to test on Windows.
77

8+
- Added `flow_images_from_dataframe` function (#658).
9+
810

911
## Keras 2.2.4 (CRAN)
1012

R/preprocessing.R

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,104 @@ flow_images_from_directory <- function(
843843
do.call(generator$flow_from_directory, args)
844844
}
845845

846+
#' Takes the dataframe and the path to a directory and generates batches of
847+
#' augmented/normalized data.
848+
#'
849+
#' @details Yields batches indefinitely, in an infinite loop.
850+
#'
851+
#' @inheritParams image_load
852+
#' @inheritParams flow_images_from_data
853+
#'
854+
#' @param dataframe `data.frame` containing the filepaths relative to
855+
#' directory (or absolute paths if directory is `NULL`) of the images in a
856+
#' character column. It should include other column/s depending on the
857+
#' `class_mode`:
858+
#' - if `class_mode` is "categorical" (default value) it must
859+
#' include the `y_col` column with the class/es of each image. Values in
860+
#' column can be character/list if a single class or list if multiple classes.
861+
#' - if `class_mode` is "binary" or "sparse" it must include the given
862+
#' `y_col` column with class values as strings.
863+
#' - if `class_mode` is "other" it
864+
#' should contain the columns specified in `y_col`.
865+
#' - if `class_mode` is "input" or NULL no extra column is needed.
866+
#' @param directory character, path to the directory to read images from.
867+
#' If `NULL`, data in `x_col` column should be absolute paths.
868+
#' @param x_col character, column in dataframe that contains the filenames
869+
#' (or absolute paths if directory is `NULL`).
870+
#' @param y_col string or list, column/s in dataframe that has the target data.
871+
#' @param color_mode one of "grayscale", "rgb". Default: "rgb". Whether the
872+
#' images will be converted to have 1 or 3 color channels.
873+
#' @param drop_duplicates Boolean, whether to drop duplicate rows based on
874+
#' filename.
875+
#' @param classes optional list of classes (e.g. `c('dogs', 'cats')`. Default:
876+
#' `NULL` If not provided, the list of classes will be automatically inferred
877+
#' from the `y_col`, which will map to the label indices, will be alphanumeric).
878+
#' The dictionary containing the mapping from class names to class indices
879+
#' can be obtained via the attribute `class_indices`.
880+
#' @param class_mode one of "categorical", "binary", "sparse", "input", "other" or None.
881+
#' Default: "categorical". Mode for yielding the targets:
882+
#' * "binary": 1D array of binary labels,
883+
#' * "categorical": 2D array of one-hot encoded labels. Supports multi-label output.
884+
#' * "sparse": 1D array of integer labels,
885+
#' * "input": images identical to input images (mainly used to work with autoencoders),
886+
#' * "other": array of y_col data,
887+
#' `NULL`, no targets are returned (the generator will only yield batches of
888+
#' image data, which is useful to use in `predict_generator()`).
889+
#'
890+
#' @note
891+
#' This functions requires that `pandas` (python module) is installed in the
892+
#' same environment as `tensorflow` and `keras`.
893+
#'
894+
#' If you are using `r-tensorflow` (the default environment) you can install
895+
#' `pandas` by running `reticulate::virtualenv_install("pandas", envname = "r-tensorflow")`
896+
#' or `reticulate::conda_install("pandas", envname = "r-tensorflow")` depending on
897+
#' the kind of environment you are using.
898+
#'
899+
#' @section Yields: `(x, y)` where `x` is an array of image data and `y` is a
900+
#' array of corresponding labels. The generator loops indefinitely.
901+
#'
902+
#' @family image preprocessing
903+
#' @export
904+
flow_images_from_dataframe <- function(
905+
dataframe, directory = NULL, x_col = "filename", y_col = "class",
906+
generator = image_data_generator(), target_size = c(256,256),
907+
color_mode = "rgb", classes = NULL, class_mode = "categorical",
908+
batch_size = 32, shuffle = TRUE, seed = NULL, save_to_dir = NULL,
909+
save_prefix = "", save_format = "png", subset = NULL,
910+
interpolation = "nearest", drop_duplicates = TRUE) {
911+
912+
if (!reticulate::py_module_available("pandas"))
913+
stop("Pandas (python module) must be installed in the same environment as Keras.",
914+
'Install it using reticulate::virtualenv_install("pandas", envname = "r-tensorflow") ',
915+
'or reticulate::conda_install("pandas", envname = "r-tensorflow") depending on ',
916+
'the kind of environment you are using.')
917+
918+
args <- list(
919+
dataframe = as.data.frame(dataframe),
920+
directory = normalize_path(directory),
921+
x_col = x_col, y_col = y_col,
922+
target_size = as.integer(target_size),
923+
color_mode = color_mode,
924+
classes = classes,
925+
class_mode = class_mode,
926+
batch_size = as.integer(batch_size),
927+
shuffle = shuffle,
928+
seed = as_nullable_integer(seed),
929+
save_to_dir = normalize_path(save_to_dir),
930+
save_prefix = save_prefix,
931+
save_format = save_format,
932+
drop_duplicates = drop_duplicates
933+
)
934+
935+
if (keras_version() >= "2.1.2")
936+
args$interpolation <- interpolation
937+
938+
if (keras_version() >= "2.1.5")
939+
args$subset <- subset
940+
941+
do.call(generator$flow_from_dataframe, args)
942+
}
943+
846944

847945

848946

man/fit_image_data_generator.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/flow_images_from_data.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/flow_images_from_dataframe.Rd

Lines changed: 123 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/flow_images_from_directory.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/image_load.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/image_to_array.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-preprocessing.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,34 @@ test_succeeds("images arrays can be resized", {
7575
}
7676
})
7777

78+
test_succeeds("flow images from dataframe works", {
79+
80+
if (!reticulate::py_module_available("pandas"))
81+
skip("Needs pandas")
82+
83+
84+
if (have_pillow()) {
85+
86+
df <- data.frame(
87+
fname = rep("digit.jpeg", 10),
88+
class = letters[1:10],
89+
stringsAsFactors = FALSE
90+
)
91+
img_gen <- flow_images_from_dataframe(
92+
df,
93+
directory = ".",
94+
x_col = "fname",
95+
y_col = "class",
96+
drop_duplicates = FALSE
97+
)
98+
99+
batch <- reticulate::iter_next(img_gen)
100+
101+
expect_equal(dim(batch[[1]]), c(10, 256, 256, 3))
102+
expect_equal(dim(batch[[2]]), c(10, 10))
103+
}
104+
})
105+
106+
78107

79108

0 commit comments

Comments
 (0)