Skip to content

Commit 2c6b06f

Browse files
authored
Merge pull request #1355 from rstudio/optimizers-tf-v2.11-update
Updates for TF v2.11
2 parents 20fee76 + b5f3936 commit 2c6b06f

37 files changed

+2198
-698
lines changed

.github/workflows/R-CMD-check.yaml

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
- main
55
pull_request:
66
schedule:
7-
- cron: '51 3 * * MON'
7+
- cron: '51 3 * * Fri'
88

99
name: R-CMD-check
1010

@@ -20,28 +20,27 @@ jobs:
2020
fail-fast: false
2121
matrix:
2222
include:
23-
- {os: 'ubuntu-20.04' , tf: 'default', r: 'release'}
23+
- {os: 'ubuntu-latest' , tf: 'default', r: 'release'}
2424
- {os: 'windows-latest', tf: 'default', r: 'release'}
2525
- {os: 'macOS-latest' , tf: 'default', r: 'release'}
2626

27-
- {os: 'ubuntu-20.04' , tf: 'default', r: 'oldrel'}
27+
- {os: 'ubuntu-latest' , tf: 'default', r: 'oldrel'}
2828
- {os: 'windows-latest', tf: 'default', r: 'oldrel'}
2929
- {os: 'macOS-latest' , tf: 'default', r: 'oldrel'}
3030

31-
- {os: 'ubuntu-20.04' , tf: 'default', r: 'oldrel-1'}
32-
- {os: 'ubuntu-20.04', tf: 'default', r: '3.6'} # default R in ubuntu-20.04
33-
- {os: 'ubuntu-20.04', tf: 'default', r: '3.5'} #
31+
- {os: 'ubuntu-latest', tf: 'default', r: 'oldrel-1'}
32+
- {os: 'ubuntu-latest', tf: 'default', r: '3.6'} # default R in ubuntu-20.04
33+
- {os: 'ubuntu-latest', tf: 'default', r: '3.5'}
3434

35-
# - {os: 'ubuntu-20.04' , tf: 'release', r: 'release'}
36-
# - {os: 'windows-latest', tf: 'release', r: 'release'}
37-
# - {os: 'macOS-latest' , tf: 'release', r: 'release'}
35+
- {os: 'ubuntu-20.04' , tf: 'release', r: 'release'}
36+
- {os: 'windows-latest', tf: 'release', r: 'release'}
37+
- {os: 'macOS-latest' , tf: 'release', r: 'release'}
3838

39-
- {os: 'ubuntu-20.04', tf: '2.9', r: 'release'}
40-
- {os: 'ubuntu-20.04', tf: '2.8', r: 'release'}
41-
- {os: 'ubuntu-20.04', tf: '2.7', r: 'release'}
42-
- {os: 'ubuntu-20.04', tf: '2.6', r: 'release'}
43-
- {os: 'ubuntu-20.04', tf: '2.5', r: 'release'}
44-
- {os: 'ubuntu-20.04', tf: '2.4', r: 'release'}
39+
- {os: 'ubuntu-latest', tf: '2.10', r: 'release'}
40+
- {os: 'ubuntu-latest', tf: '2.9', r: 'release'}
41+
- {os: 'ubuntu-latest', tf: '2.8', r: 'release'}
42+
- {os: 'ubuntu-latest', tf: '2.7', r: 'release'}
43+
- {os: 'ubuntu-latest', tf: '2.6', r: 'release'}
4544

4645
# these are allowed to fail
4746
# - {os: 'ubuntu-20.04', tf: 'default', r: 'devel'}

DESCRIPTION

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ Imports:
3737
glue,
3838
methods,
3939
R6,
40-
ellipsis,
4140
rlang
4241
Suggests:
4342
ggplot2,
@@ -50,5 +49,5 @@ Suggests:
5049
png,
5150
jpeg
5251
Roxygen: list(markdown = TRUE, r6 = FALSE)
53-
RoxygenNote: 7.2.1
52+
RoxygenNote: 7.2.3
5453
VignetteBuilder: knitr

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ export(optimizer_adadelta)
526526
export(optimizer_adagrad)
527527
export(optimizer_adam)
528528
export(optimizer_adamax)
529+
export(optimizer_ftrl)
529530
export(optimizer_nadam)
530531
export(optimizer_rmsprop)
531532
export(optimizer_sgd)
@@ -618,6 +619,7 @@ importFrom(reticulate,use_condaenv)
618619
importFrom(reticulate,use_python)
619620
importFrom(reticulate,use_virtualenv)
620621
importFrom(rlang,"%||%")
622+
importFrom(rlang,.data)
621623
importFrom(rlang,names2)
622624
importFrom(stats,predict)
623625
importFrom(tensorflow,as_tensor)

NEWS.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
11
# keras (development version)
22

3+
- Default TensorFlow version installed by `install_keras()` is now 2.11.
4+
5+
- All optimizers have been updated for keras/tensorflow version 2.11.
6+
Arguments to all the optimizers have changed. To access the previous
7+
optimizer implementations, use the constructors available at
8+
`keras$optimizers$legacy`. For example, use `keras$optimizers$legacy$Adam()`
9+
for the previous implementation of `optimizer_adam()`.
10+
11+
- New optimizer `optimizer_frtl()`.
12+
13+
- updates to layers:
14+
- `layer_attention()` gains `score_mode` and `dropout` arguments.
15+
- `layer_discretization()` gains `output_mode` and `sparse` arguments.
16+
- `layer_gaussian_dropout()` and `layer_gaussian_noise()` gain a `seed` argument.
17+
- `layer_hashing()` gains `output_mode` and `sparse` arguments.
18+
- `layer_integer_lookup()` gains `vocabulary_dtype` and `idf_weights` arguments.
19+
- `layer_normalization()` gains an `invert` argument.
20+
- `layer_string_lookup()` gains an `idf_weights` argument.
21+
322
- Fixed issue where `input_shape` supplied to custom layers defined with `new_layer_class()`
423
would result in an error (#1338)
524

R/history.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ print.keras_training_history <- function(x, ...) {
5959
#' black and white.
6060
#' @param ... Additional parameters to pass to the [plot()] method.
6161
#'
62+
#' @importFrom rlang .data
63+
#'
6264
#' @export
6365
plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", "ggplot2", "base"),
6466
smooth = getOption("keras.plot.history.smooth", TRUE),
@@ -95,11 +97,11 @@ plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto",
9597

9698
if (do_validation) {
9799
if (theme_bw)
98-
p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value, color = ~data, fill = ~data, linetype = ~data, shape = ~data))
100+
p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value, color = .data$data, fill = .data$data, linetype = .data$data, shape = .data$data))
99101
else
100-
p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value, color = ~data, fill = ~data))
102+
p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value, color = .data$data, fill = .data$data))
101103
} else {
102-
p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value))
104+
p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value))
103105
}
104106

105107
smooth_args <- list(se = FALSE, method = 'loess', na.rm = TRUE,

R/install.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ install_keras <- function(method = c("auto", "virtualenv", "conda"),
5858
)
5959
}
6060

61-
default_version <- numeric_version("2.10")
61+
default_version <- numeric_version("2.11")
6262

6363
default_extra_packages <- function(tensorflow_version = "default") {
6464
pkgs <- c(

R/layer-attention.R

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,58 @@
11

2-
#' Creates attention layer
2+
3+
#' Dot-product attention layer, a.k.a. Luong-style attention
34
#'
4-
#' Dot-product attention layer, a.k.a. Luong-style attention.
5+
#' @details
6+
#' inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor
7+
#' of shape `[batch_size, Tv, dim]` and `key` tensor of shape
8+
#' `[batch_size, Tv, dim]`. The calculation follows the steps:
59
#'
6-
#' @inheritParams layer_dense
10+
#' 1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot
11+
#' product: `scores = tf$matmul(query, key, transpose_b=TRUE)`.
12+
#' 2. Use scores to calculate a distribution with shape
13+
#' `[batch_size, Tq, Tv]`: `distribution = tf$nn$softmax(scores)`.
14+
#' 3. Use `distribution` to create a linear combination of `value` with
15+
#' shape `[batch_size, Tq, dim]`:
16+
#' return `tf$matmul(distribution, value)`.
17+
#'
18+
#'
19+
#' @param inputs List of the following tensors:
20+
#'
21+
#' - query: Query Tensor of shape `[batch_size, Tq, dim]`.
22+
#'
23+
#' - value: Value Tensor of shape `[batch_size, Tv, dim]`.
724
#'
8-
#' @param inputs a list of inputs first should be the query tensor, the second the value tensor
9-
#' @param use_scale If True, will create a scalar variable to scale the attention scores.
10-
#' @param causal Boolean. Set to True for decoder self-attention. Adds a mask such that position i cannot attend to positions j > i.
11-
#' This prevents the flow of information from the future towards the past.
25+
#' - key: Optional key Tensor of shape `[batch_size, Tv, dim]`. If not
26+
#' given, will use value for both key and value, which is the most common
27+
#' case.
28+
#'
29+
#' @param use_scale If `TRUE`, will create a scalar variable to scale the attention
30+
#' scores.
31+
#'
32+
#' @param dropout Float between 0 and 1. Fraction of the units to drop for the
33+
#' attention scores. Defaults to 0.0.
34+
#'
35+
#' @param score_mode Function to use to compute attention scores, one of
36+
#' `{"dot", "concat"}`. `"dot"` refers to the dot product between the query
37+
#' and key vectors. `"concat"` refers to the hyperbolic tangent of the
38+
#' concatenation of the query and key vectors.
39+
#'
40+
#' @param ... standard layer arguments (e.g., batch_size, dtype, name, trainable, weights)
1241
#'
1342
#' @family core layers
1443
#' @family attention layers
1544
#'
45+
#'
46+
#' @seealso
47+
#' + <https://www.tensorflow.org/api_docs/python/tf/keras/layers/Attention>
1648
#' @export
17-
layer_attention <- function(inputs,use_scale=FALSE, causal = FALSE, batch_size = NULL, dtype = NULL,
18-
name = NULL, trainable = NULL, weights = NULL) {
19-
if (!is_tensorflow_implementation() || !tensorflow::tf_version() >= "1.14")
20-
stop("layer_dense_features requires TensorFlow implementation and version >= 1.14")
21-
create_layer(keras$layers$Attention, inputs, list(
22-
use_scale = use_scale,
23-
causal = causal,
24-
batch_size = batch_size,
25-
dtype = dtype,
26-
name = name,
27-
trainable = trainable,
28-
weights = weights)
29-
)
30-
31-
32-
}
49+
layer_attention <-
50+
function(inputs, use_scale = FALSE, score_mode = "dot", ..., dropout = NULL)
51+
{
52+
args <- capture_args(match.call(), ignore = "inputs")
53+
args$dropout <- dropout
54+
create_layer(keras$layers$Attention, inputs, args)
55+
}
3356

3457
#' MultiHeadAttention layer
3558
#'

R/layers-noise.R

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
#'
1212
#' @param stddev float, standard deviation of the noise distribution.
1313
#'
14+
#' @param seed Integer, optional random seed to enable deterministic behavior.
15+
#'
16+
#' @param ... standard layer arguments.
17+
#'
1418
#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list
1519
#' of integers, does not include the samples axis) when using this layer as
1620
#' the first layer in a model.
@@ -20,19 +24,14 @@
2024
#' @family noise layers
2125
#'
2226
#' @export
23-
layer_gaussian_noise <- function(object, stddev, input_shape = NULL,
24-
batch_input_shape = NULL, batch_size = NULL, dtype = NULL,
25-
name = NULL, trainable = NULL, weights = NULL) {
26-
create_layer(keras$layers$GaussianNoise, object, list(
27-
stddev = stddev,
28-
input_shape = normalize_shape(input_shape),
29-
batch_input_shape = normalize_shape(batch_input_shape),
30-
batch_size = as_nullable_integer(batch_size),
31-
dtype = dtype,
32-
name = name,
33-
trainable = trainable,
34-
weights = weights
35-
))
27+
layer_gaussian_noise <-
28+
function(object, stddev, seed = NULL, ...)
29+
{
30+
args <- capture_args(match.call(),
31+
modifiers = c(standard_layer_arg_modifiers,
32+
seed = as_nullable_integer),
33+
ignore = "object")
34+
create_layer(keras$layers$GaussianNoise, object, args)
3635
}
3736

3837
#' Apply multiplicative 1-centered Gaussian noise.
@@ -44,6 +43,10 @@ layer_gaussian_noise <- function(object, stddev, input_shape = NULL,
4443
#' @param rate float, drop probability (as with `Dropout`). The multiplicative
4544
#' noise will have standard deviation `sqrt(rate / (1 - rate))`.
4645
#'
46+
#' @param seed Integer, optional random seed to enable deterministic behavior.
47+
#'
48+
#' @param ... standard layer arguments.
49+
#'
4750
#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list
4851
#' of integers, does not include the samples axis) when using this layer as
4952
#' the first layer in a model.
@@ -53,25 +56,24 @@ layer_gaussian_noise <- function(object, stddev, input_shape = NULL,
5356
#' @section References:
5457
#' - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
5558
#'
59+
#' @seealso
60+
#' + <https://www.tensorflow.org/api_docs/python/tf/keras/layers/GaussianDropout>
61+
#'
5662
#' @family noise layers
5763
#'
5864
#' @export
59-
layer_gaussian_dropout <- function(object, rate, input_shape = NULL,
60-
batch_input_shape = NULL, batch_size = NULL, dtype = NULL,
61-
name = NULL, trainable = NULL, weights = NULL) {
62-
create_layer(keras$layers$GaussianDropout, object, list(
63-
rate = rate,
64-
input_shape = normalize_shape(input_shape),
65-
batch_input_shape = normalize_shape(batch_input_shape),
66-
batch_size = as_nullable_integer(batch_size),
67-
dtype = dtype,
68-
name = name,
69-
trainable = trainable,
70-
weights = weights
71-
))
65+
layer_gaussian_dropout <-
66+
function(object, rate, seed = NULL, ...)
67+
{
68+
args <- capture_args(match.call(),
69+
modifiers = c(standard_layer_arg_modifiers,
70+
seed = as_nullable_integer),
71+
ignore = "object")
72+
create_layer(keras$layers$GaussianDropout, object, args)
7273
}
7374

7475

76+
7577
#' Applies Alpha Dropout to the input.
7678
#'
7779
#' Alpha Dropout is a dropout that keeps mean and variance of inputs to their

0 commit comments

Comments
 (0)