|
1 | 1 | import math |
2 | 2 |
|
| 3 | +import numpy as np |
| 4 | + |
| 5 | +from keras_core import backend |
| 6 | +from keras_core import operations as ops |
3 | 7 | from keras_core.api_export import keras_core_export |
4 | 8 | from keras_core.backend import random |
5 | 9 | from keras_core.initializers.initializer import Initializer |
@@ -238,15 +242,6 @@ def __init__( |
238 | 242 | self.seed = seed or random.make_default_seed() |
239 | 243 |
|
240 | 244 | def __call__(self, shape, dtype=None): |
241 | | - """Returns a tensor object initialized as specified by the initializer. |
242 | | -
|
243 | | - Args: |
244 | | - shape: Shape of the tensor. |
245 | | - dtype: Optional dtype of the tensor. Only floating point types are |
246 | | - supported. If not specified, `tf.keras.backend.floatx()` is |
247 | | - used, which default to `float32` unless you configured it |
248 | | - otherwise (via `tf.keras.backend.set_floatx(float_dtype)`) |
249 | | - """ |
250 | 245 | scale = self.scale |
251 | 246 | fan_in, fan_out = compute_fans(shape) |
252 | 247 | if self.mode == "fan_in": |
@@ -566,3 +561,79 @@ def compute_fans(shape): |
566 | 561 | fan_in = shape[-2] * receptive_field_size |
567 | 562 | fan_out = shape[-1] * receptive_field_size |
568 | 563 | return int(fan_in), int(fan_out) |
| 564 | + |
| 565 | + |
| 566 | +@keras_core_export( |
| 567 | + [ |
| 568 | + "keras_core.initializers.OrthogonalInitializer", |
| 569 | + "keras_core.initializers.Orthogonal", |
| 570 | + ] |
| 571 | +) |
| 572 | +class OrthogonalInitializer(Initializer): |
| 573 | + """Initializer that generates an orthogonal matrix. |
| 574 | +
|
| 575 | + If the shape of the tensor to initialize is two-dimensional, it is |
| 576 | + initialized with an orthogonal matrix obtained from the QR decomposition of |
| 577 | + a matrix of random numbers drawn from a normal distribution. If the matrix |
| 578 | + has fewer rows than columns then the output will have orthogonal rows. |
| 579 | + Otherwise, the output will have orthogonal columns. |
| 580 | +
|
| 581 | + If the shape of the tensor to initialize is more than two-dimensional, |
| 582 | + a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` |
| 583 | + is initialized, where `n` is the length of the shape vector. |
| 584 | + The matrix is subsequently reshaped to give a tensor of the desired shape. |
| 585 | +
|
| 586 | + Examples: |
| 587 | +
|
| 588 | + >>> # Standalone usage: |
| 589 | + >>> initializer = keras_core.initializers.Orthogonal() |
| 590 | + >>> values = initializer(shape=(2, 2)) |
| 591 | +
|
| 592 | + >>> # Usage in a Keras layer: |
| 593 | + >>> initializer = keras_core.initializers.Orthogonal() |
| 594 | + >>> layer = keras_core.layers.Dense(3, kernel_initializer=initializer) |
| 595 | +
|
| 596 | + Args: |
| 597 | + gain: Multiplicative factor to apply to the orthogonal matrix. |
| 598 | + seed: A Python integer. Used to make the behavior of the initializer |
| 599 | + deterministic. |
| 600 | +
|
| 601 | + Reference: |
| 602 | +
|
| 603 | + - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C) |
| 604 | + """ |
| 605 | + |
| 606 | + def __init__(self, gain=1.0, seed=None): |
| 607 | + self.gain = gain |
| 608 | + self.seed = seed or random.make_default_seed() |
| 609 | + |
| 610 | + def __call__(self, shape, dtype=None): |
| 611 | + if len(shape) < 2: |
| 612 | + raise ValueError( |
| 613 | + "The tensor to initialize must be " |
| 614 | + "at least two-dimensional. Received: " |
| 615 | + f"shape={shape} of rank {len(shape)}." |
| 616 | + ) |
| 617 | + |
| 618 | + # Flatten the input shape with the last dimension remaining |
| 619 | + # its original shape so it works for conv2d |
| 620 | + num_rows = 1 |
| 621 | + for dim in shape[:-1]: |
| 622 | + num_rows *= dim |
| 623 | + num_cols = shape[-1] |
| 624 | + flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows)) |
| 625 | + |
| 626 | + # Generate a random matrix |
| 627 | + a = random.normal(flat_shape, seed=self.seed, dtype=dtype) |
| 628 | + # Compute the qr factorization |
| 629 | + q, r = np.linalg.qr(a) |
| 630 | + # Make Q uniform |
| 631 | + d = np.diag(r) |
| 632 | + q *= np.sign(d) |
| 633 | + if num_rows < num_cols: |
| 634 | + q = np.transpose(q) |
| 635 | + q = backend.convert_to_tensor(q) |
| 636 | + return self.gain * ops.reshape(q, shape) |
| 637 | + |
| 638 | + def get_config(self): |
| 639 | + return {"gain": self.gain, "seed": self.seed} |
0 commit comments