@@ -7735,19 +7735,59 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
7735
7735
7736
7736
def hash (input , hash_size , num_hash = 1 , name = None ):
7737
7737
"""
7738
- hash the input
7739
- Args:
7740
- input (Variable): The input variable which is a one-hot word.
7741
- hash_size (int): The space size for hash algorithm.
7738
+ Hash the input to an integer whose value is less than the given hash size.
7739
+
7740
+ The hash algorithm we used was xxHash - Extremely fast hash algorithm
7741
+ (https://github.com/Cyan4973/xxHash/tree/v0.6.5)
7742
+
7743
+ A simple example as below:
7744
+
7745
+ .. code-block:: text
7746
+
7747
+ Given:
7748
+
7749
+ # shape [2, 2]
7750
+ input.data = [
7751
+ [[1], [2]],
7752
+ [[3], [4]],
7753
+ ]
7754
+
7755
+ input.lod = [[0, 2]]
7756
+
7757
+ hash_size = 10000
7758
+
7759
+ num_hash = 4
7760
+
7761
+ Then:
7762
+
7763
+ Hash op will take all number in input's 2nd dimension as hash algorithm's
7764
+ input for each time. Each input will be hashed for 4 times, and get an
7765
+ array whose length is 4. Each value in the array ranges from 0 to 9999.
7766
+
7767
+ # shape [2, 4]
7768
+ output.data = [
7769
+ [[9662], [9217], [1129], [8487]],
7770
+ [[8310], [1327], [1654], [4567]],
7771
+ ]
7772
+
7773
+ output.lod = [[0, 2]]
7774
+
7775
+ Args:
7776
+ input (Variable): The input variable which is a one-hot word. The
7777
+ dimensions of the input variable must be 2.
7778
+ hash_size (int): The space size for hash algorithm. The output value
7779
+ will keep in the range:math:`[0, hash_size - 1]`.
7742
7780
num_hash (int): The times of hash, default 1.
7743
7781
name (str, default None): The name of this layer.
7744
- Returns:
7745
- Variable: The hash result variable which is a LoDTensor.
7746
- Examples:
7747
- .. code-block:: python
7748
- word_dict = paddle.dataset.imdb.word_dict()
7749
- x = fluid.layers.data(shape[1], dtype='int32', lod_level=1)
7750
- out = fluid.layers.hash(input=x, len(word_dict))
7782
+
7783
+ Returns:
7784
+ Variable: The hash result variable which is a LoDTensor.
7785
+
7786
+ Examples:
7787
+ .. code-block:: python
7788
+ word_dict = paddle.dataset.imdb.word_dict()
7789
+ x = fluid.layers.data(shape[1], dtype='int32', lod_level=1)
7790
+ out = fluid.layers.hash(input=x, num_hash=4, hash_size=1000)
7751
7791
"""
7752
7792
helper = LayerHelper ('hash' , ** locals ())
7753
7793
out = helper .create_variable_for_type_inference (
0 commit comments