Merge pull request #2318 from tensorflow:MarkDaoust-patch-1

copybara-github · copybara-github · commit 5b47d57ac63f · 2024-08-14T05:40:48.000-07:00
PiperOrigin-RevId: 662885206
diff --git a/site/en/tutorials/load_data/pandas_dataframe.ipynb b/site/en/tutorials/load_data/pandas_dataframe.ipynb
@@ -92,6 +92,7 @@
       },
       "outputs": [],
       "source": [
+        "import numpy as np\n",
         "import pandas as pd\n",
         "import tensorflow as tf\n",
         "\n",
@@ -292,7 +293,7 @@
       "outputs": [],
       "source": [
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(numeric_features)"
+        "normalizer.adapt(np.array(numeric_features))"
       ]
     },
     {
@@ -446,79 +447,77 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "U3QDo-jwHYXc"
+        "id": "voDoA447GBC3"
       },
       "outputs": [],
       "source": [
-        "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))"
+        "numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n",
+        "target_array =  target.to_numpy()[:, tf.newaxis]"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "yyEERK9ldIi_"
+        "id": "U3QDo-jwHYXc"
       },
+      "outputs": [],
       "source": [
-        "Here are the first three examples from that dataset:"
+        "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "q0tDwk0VdH6D"
+        "id": "HL4Bf1b7M7DT"
       },
       "outputs": [],
       "source": [
-        "for row in numeric_dict_ds.take(3):\n",
-        "  print(row)"
+        "len(numeric_features_dict)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "DEAM6HAFxlMy"
+        "id": "yyEERK9ldIi_"
       },
       "source": [
-        "### Dictionaries with Keras"
+        "Here are the first three examples from that dataset:"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "dnoyoWLWx07i"
+        "id": "q0tDwk0VdH6D"
       },
+      "outputs": [],
       "source": [
-        "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
-        "\n",
-        "There are two equivalent ways you can write a Keras model that accepts a dictionary as input."
+        "for row in numeric_dict_ds.take(3):\n",
+        "  print(row)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "5xUTrm0apDTr"
+        "id": "dnoyoWLWx07i"
       },
       "source": [
-        "#### 1. The Model-subclass style\n",
+        "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
         "\n",
-        "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
+        "There are two equivalent ways you can write a Keras model that accepts a dictionary as input."
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "Zc3HV99CFRWL"
+        "id": "5xUTrm0apDTr"
       },
-      "outputs": [],
       "source": [
-        "  def stack_dict(inputs, fun=tf.stack):\n",
-        "    values = []\n",
-        "    for key in sorted(inputs.keys()):\n",
-        "      values.append(tf.cast(inputs[key], tf.float32))\n",
+        "### 1. The Model-subclass style\n",
         "\n",
-        "    return fun(values, axis=-1)"
+        "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
       ]
     },
     {
@@ -544,22 +543,31 @@
         "      tf.keras.layers.Dense(1)\n",
         "    ])\n",
         "\n",
+        "    self.concat = tf.keras.layers.Concatenate(axis=1)\n",
+        "\n",
+        "  def _stack(self, input_dict):\n",
+        "    values = []\n",
+        "    for key, value in sorted(input_dict.items()):\n",
+        "      values.append(value)\n",
+        "\n",
+        "    return self.concat(values)\n",
+        "\n",
         "  def adapt(self, inputs):\n",
         "    # Stack the inputs and `adapt` the normalization layer.\n",
-        "    inputs = stack_dict(inputs)\n",
+        "    inputs = self._stack(inputs)\n",
         "    self.normalizer.adapt(inputs)\n",
         "\n",
         "  def call(self, inputs):\n",
         "    # Stack the inputs\n",
-        "    inputs = stack_dict(inputs)\n",
+        "    inputs = self._stack(inputs)\n",
         "    # Run them through all the layers.\n",
         "    result = self.seq(inputs)\n",
         "\n",
         "    return result\n",
         "\n",
         "model = MyModel()\n",
         "\n",
-        "model.adapt(dict(numeric_features))\n",
+        "model.adapt(numeric_features_dict)\n",
         "\n",
         "model.compile(optimizer='adam',\n",
         "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
@@ -584,7 +592,7 @@
       },
       "outputs": [],
       "source": [
-        "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
+        "model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)"
       ]
     },
     {
@@ -625,7 +633,7 @@
         "id": "QIIdxIYm13Ik"
       },
       "source": [
-        "#### 2. The Keras functional style"
+        "### 2. The Keras functional style"
       ]
     },
     {
@@ -652,10 +660,13 @@
       },
       "outputs": [],
       "source": [
-        "x = stack_dict(inputs, fun=tf.concat)\n",
+        "xs = [value for key, value in sorted(inputs.items())]\n",
+        "\n",
+        "concat = tf.keras.layers.Concatenate(axis=1)\n",
+        "x = concat(xs)\n",
         "\n",
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(stack_dict(dict(numeric_features)))\n",
+        "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n",
         "\n",
         "x = normalizer(x)\n",
         "x = tf.keras.layers.Dense(10, activation='relu')(x)\n",
@@ -678,7 +689,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)"
+        "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True,  show_layer_names=True)"
       ]
     },
     {
@@ -698,7 +709,7 @@
       },
       "outputs": [],
       "source": [
-        "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
+        "model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)"
       ]
     },
     {
@@ -806,7 +817,7 @@
         "  else:\n",
         "    dtype = tf.float32\n",
         "\n",
-        "  inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)"
+        "  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)"
       ]
     },
     {
@@ -852,9 +863,7 @@
         "\n",
         "for name in binary_feature_names:\n",
         "  inp = inputs[name]\n",
-        "  inp = inp[:, tf.newaxis]\n",
-        "  float_value = tf.cast(inp, tf.float32)\n",
-        "  preprocessed.append(float_value)\n",
+        "  preprocessed.append(inp)\n",
         "\n",
         "preprocessed"
       ]
@@ -879,7 +888,7 @@
       "outputs": [],
       "source": [
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(stack_dict(dict(numeric_features)))"
+        "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))"
       ]
     },
     {
@@ -899,11 +908,11 @@
       },
       "outputs": [],
       "source": [
-        "numeric_inputs = {}\n",
+        "numeric_inputs = []\n",
         "for name in numeric_feature_names:\n",
-        "  numeric_inputs[name]=inputs[name]\n",
+        "  numeric_inputs.append(inputs[name])\n",
         "\n",
-        "numeric_inputs = stack_dict(numeric_inputs)\n",
+        "numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n",
         "numeric_normalized = normalizer(numeric_inputs)\n",
         "\n",
         "preprocessed.append(numeric_normalized)\n",
@@ -985,7 +994,7 @@
         "  else:\n",
         "    lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n",
         "\n",
-        "  x = inputs[name][:, tf.newaxis]\n",
+        "  x = inputs[name]\n",
         "  x = lookup(x)\n",
         "  preprocessed.append(x)"
       ]
@@ -1036,7 +1045,7 @@
       },
       "outputs": [],
       "source": [
-        "preprocessed_result = tf.concat(preprocessed, axis=-1)\n",
+        "preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n",
         "preprocessed_result"
       ]
     },
@@ -1068,7 +1077,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)"
+        "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True,  show_layer_names=True)"
       ]
     },
     {
@@ -1183,6 +1192,17 @@
         "                metrics=['accuracy'])"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i_Z2C2ZcZ3oC"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(model, show_shapes=True,  show_layer_names=True)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1258,7 +1278,6 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "pandas_dataframe.ipynb",
       "toc_visible": true
     },