Skip to content

Commit 5b47d57

Browse files
Merge pull request #2318 from tensorflow:MarkDaoust-patch-1
PiperOrigin-RevId: 662885206
2 parents 460419a + 117c1d3 commit 5b47d57

File tree

1 file changed

+68
-49
lines changed

1 file changed

+68
-49
lines changed

site/en/tutorials/load_data/pandas_dataframe.ipynb

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
},
9393
"outputs": [],
9494
"source": [
95+
"import numpy as np\n",
9596
"import pandas as pd\n",
9697
"import tensorflow as tf\n",
9798
"\n",
@@ -292,7 +293,7 @@
292293
"outputs": [],
293294
"source": [
294295
"normalizer = tf.keras.layers.Normalization(axis=-1)\n",
295-
"normalizer.adapt(numeric_features)"
296+
"normalizer.adapt(np.array(numeric_features))"
296297
]
297298
},
298299
{
@@ -446,79 +447,77 @@
446447
"cell_type": "code",
447448
"execution_count": null,
448449
"metadata": {
449-
"id": "U3QDo-jwHYXc"
450+
"id": "voDoA447GBC3"
450451
},
451452
"outputs": [],
452453
"source": [
453-
"numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))"
454+
"numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n",
455+
"target_array = target.to_numpy()[:, tf.newaxis]"
454456
]
455457
},
456458
{
457-
"cell_type": "markdown",
459+
"cell_type": "code",
460+
"execution_count": null,
458461
"metadata": {
459-
"id": "yyEERK9ldIi_"
462+
"id": "U3QDo-jwHYXc"
460463
},
464+
"outputs": [],
461465
"source": [
462-
"Here are the first three examples from that dataset:"
466+
"numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))"
463467
]
464468
},
465469
{
466470
"cell_type": "code",
467471
"execution_count": null,
468472
"metadata": {
469-
"id": "q0tDwk0VdH6D"
473+
"id": "HL4Bf1b7M7DT"
470474
},
471475
"outputs": [],
472476
"source": [
473-
"for row in numeric_dict_ds.take(3):\n",
474-
" print(row)"
477+
"len(numeric_features_dict)"
475478
]
476479
},
477480
{
478481
"cell_type": "markdown",
479482
"metadata": {
480-
"id": "DEAM6HAFxlMy"
483+
"id": "yyEERK9ldIi_"
481484
},
482485
"source": [
483-
"### Dictionaries with Keras"
486+
"Here are the first three examples from that dataset:"
484487
]
485488
},
486489
{
487-
"cell_type": "markdown",
490+
"cell_type": "code",
491+
"execution_count": null,
488492
"metadata": {
489-
"id": "dnoyoWLWx07i"
493+
"id": "q0tDwk0VdH6D"
490494
},
495+
"outputs": [],
491496
"source": [
492-
"Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
493-
"\n",
494-
"There are two equivalent ways you can write a Keras model that accepts a dictionary as input."
497+
"for row in numeric_dict_ds.take(3):\n",
498+
" print(row)"
495499
]
496500
},
497501
{
498502
"cell_type": "markdown",
499503
"metadata": {
500-
"id": "5xUTrm0apDTr"
504+
"id": "dnoyoWLWx07i"
501505
},
502506
"source": [
503-
"#### 1. The Model-subclass style\n",
507+
"Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
504508
"\n",
505-
"You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
509+
"There are two equivalent ways you can write a Keras model that accepts a dictionary as input."
506510
]
507511
},
508512
{
509-
"cell_type": "code",
510-
"execution_count": null,
513+
"cell_type": "markdown",
511514
"metadata": {
512-
"id": "Zc3HV99CFRWL"
515+
"id": "5xUTrm0apDTr"
513516
},
514-
"outputs": [],
515517
"source": [
516-
" def stack_dict(inputs, fun=tf.stack):\n",
517-
" values = []\n",
518-
" for key in sorted(inputs.keys()):\n",
519-
" values.append(tf.cast(inputs[key], tf.float32))\n",
518+
"### 1. The Model-subclass style\n",
520519
"\n",
521-
" return fun(values, axis=-1)"
520+
"You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
522521
]
523522
},
524523
{
@@ -544,22 +543,31 @@
544543
" tf.keras.layers.Dense(1)\n",
545544
" ])\n",
546545
"\n",
546+
" self.concat = tf.keras.layers.Concatenate(axis=1)\n",
547+
"\n",
548+
" def _stack(self, input_dict):\n",
549+
" values = []\n",
550+
" for key, value in sorted(input_dict.items()):\n",
551+
" values.append(value)\n",
552+
"\n",
553+
" return self.concat(values)\n",
554+
"\n",
547555
" def adapt(self, inputs):\n",
548556
" # Stack the inputs and `adapt` the normalization layer.\n",
549-
" inputs = stack_dict(inputs)\n",
557+
" inputs = self._stack(inputs)\n",
550558
" self.normalizer.adapt(inputs)\n",
551559
"\n",
552560
" def call(self, inputs):\n",
553561
" # Stack the inputs\n",
554-
" inputs = stack_dict(inputs)\n",
562+
" inputs = self._stack(inputs)\n",
555563
" # Run them through all the layers.\n",
556564
" result = self.seq(inputs)\n",
557565
"\n",
558566
" return result\n",
559567
"\n",
560568
"model = MyModel()\n",
561569
"\n",
562-
"model.adapt(dict(numeric_features))\n",
570+
"model.adapt(numeric_features_dict)\n",
563571
"\n",
564572
"model.compile(optimizer='adam',\n",
565573
" loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
@@ -584,7 +592,7 @@
584592
},
585593
"outputs": [],
586594
"source": [
587-
"model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
595+
"model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)"
588596
]
589597
},
590598
{
@@ -625,7 +633,7 @@
625633
"id": "QIIdxIYm13Ik"
626634
},
627635
"source": [
628-
"#### 2. The Keras functional style"
636+
"### 2. The Keras functional style"
629637
]
630638
},
631639
{
@@ -652,10 +660,13 @@
652660
},
653661
"outputs": [],
654662
"source": [
655-
"x = stack_dict(inputs, fun=tf.concat)\n",
663+
"xs = [value for key, value in sorted(inputs.items())]\n",
664+
"\n",
665+
"concat = tf.keras.layers.Concatenate(axis=1)\n",
666+
"x = concat(xs)\n",
656667
"\n",
657668
"normalizer = tf.keras.layers.Normalization(axis=-1)\n",
658-
"normalizer.adapt(stack_dict(dict(numeric_features)))\n",
669+
"normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n",
659670
"\n",
660671
"x = normalizer(x)\n",
661672
"x = tf.keras.layers.Dense(10, activation='relu')(x)\n",
@@ -678,7 +689,7 @@
678689
},
679690
"outputs": [],
680691
"source": [
681-
"tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)"
692+
"tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True, show_layer_names=True)"
682693
]
683694
},
684695
{
@@ -698,7 +709,7 @@
698709
},
699710
"outputs": [],
700711
"source": [
701-
"model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
712+
"model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)"
702713
]
703714
},
704715
{
@@ -806,7 +817,7 @@
806817
" else:\n",
807818
" dtype = tf.float32\n",
808819
"\n",
809-
" inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)"
820+
" inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)"
810821
]
811822
},
812823
{
@@ -852,9 +863,7 @@
852863
"\n",
853864
"for name in binary_feature_names:\n",
854865
" inp = inputs[name]\n",
855-
" inp = inp[:, tf.newaxis]\n",
856-
" float_value = tf.cast(inp, tf.float32)\n",
857-
" preprocessed.append(float_value)\n",
866+
" preprocessed.append(inp)\n",
858867
"\n",
859868
"preprocessed"
860869
]
@@ -879,7 +888,7 @@
879888
"outputs": [],
880889
"source": [
881890
"normalizer = tf.keras.layers.Normalization(axis=-1)\n",
882-
"normalizer.adapt(stack_dict(dict(numeric_features)))"
891+
"normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))"
883892
]
884893
},
885894
{
@@ -899,11 +908,11 @@
899908
},
900909
"outputs": [],
901910
"source": [
902-
"numeric_inputs = {}\n",
911+
"numeric_inputs = []\n",
903912
"for name in numeric_feature_names:\n",
904-
" numeric_inputs[name]=inputs[name]\n",
913+
" numeric_inputs.append(inputs[name])\n",
905914
"\n",
906-
"numeric_inputs = stack_dict(numeric_inputs)\n",
915+
"numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n",
907916
"numeric_normalized = normalizer(numeric_inputs)\n",
908917
"\n",
909918
"preprocessed.append(numeric_normalized)\n",
@@ -985,7 +994,7 @@
985994
" else:\n",
986995
" lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n",
987996
"\n",
988-
" x = inputs[name][:, tf.newaxis]\n",
997+
" x = inputs[name]\n",
989998
" x = lookup(x)\n",
990999
" preprocessed.append(x)"
9911000
]
@@ -1036,7 +1045,7 @@
10361045
},
10371046
"outputs": [],
10381047
"source": [
1039-
"preprocessed_result = tf.concat(preprocessed, axis=-1)\n",
1048+
"preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n",
10401049
"preprocessed_result"
10411050
]
10421051
},
@@ -1068,7 +1077,7 @@
10681077
},
10691078
"outputs": [],
10701079
"source": [
1071-
"tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)"
1080+
"tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True, show_layer_names=True)"
10721081
]
10731082
},
10741083
{
@@ -1183,6 +1192,17 @@
11831192
" metrics=['accuracy'])"
11841193
]
11851194
},
1195+
{
1196+
"cell_type": "code",
1197+
"execution_count": null,
1198+
"metadata": {
1199+
"id": "i_Z2C2ZcZ3oC"
1200+
},
1201+
"outputs": [],
1202+
"source": [
1203+
"tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)"
1204+
]
1205+
},
11861206
{
11871207
"cell_type": "markdown",
11881208
"metadata": {
@@ -1258,7 +1278,6 @@
12581278
],
12591279
"metadata": {
12601280
"colab": {
1261-
"collapsed_sections": [],
12621281
"name": "pandas_dataframe.ipynb",
12631282
"toc_visible": true
12641283
},

0 commit comments

Comments
 (0)