|
92 | 92 | },
|
93 | 93 | "outputs": [],
|
94 | 94 | "source": [
|
| 95 | + "import numpy as np\n", |
95 | 96 | "import pandas as pd\n",
|
96 | 97 | "import tensorflow as tf\n",
|
97 | 98 | "\n",
|
|
292 | 293 | "outputs": [],
|
293 | 294 | "source": [
|
294 | 295 | "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
|
295 |
| - "normalizer.adapt(numeric_features)" |
| 296 | + "normalizer.adapt(np.array(numeric_features))" |
296 | 297 | ]
|
297 | 298 | },
|
298 | 299 | {
|
|
446 | 447 | "cell_type": "code",
|
447 | 448 | "execution_count": null,
|
448 | 449 | "metadata": {
|
449 |
| - "id": "U3QDo-jwHYXc" |
| 450 | + "id": "voDoA447GBC3" |
450 | 451 | },
|
451 | 452 | "outputs": [],
|
452 | 453 | "source": [
|
453 |
| - "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))" |
| 454 | + "numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n", |
| 455 | + "target_array = target.to_numpy()[:, tf.newaxis]" |
454 | 456 | ]
|
455 | 457 | },
|
456 | 458 | {
|
457 |
| - "cell_type": "markdown", |
| 459 | + "cell_type": "code", |
| 460 | + "execution_count": null, |
458 | 461 | "metadata": {
|
459 |
| - "id": "yyEERK9ldIi_" |
| 462 | + "id": "U3QDo-jwHYXc" |
460 | 463 | },
|
| 464 | + "outputs": [], |
461 | 465 | "source": [
|
462 |
| - "Here are the first three examples from that dataset:" |
| 466 | + "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))" |
463 | 467 | ]
|
464 | 468 | },
|
465 | 469 | {
|
466 | 470 | "cell_type": "code",
|
467 | 471 | "execution_count": null,
|
468 | 472 | "metadata": {
|
469 |
| - "id": "q0tDwk0VdH6D" |
| 473 | + "id": "HL4Bf1b7M7DT" |
470 | 474 | },
|
471 | 475 | "outputs": [],
|
472 | 476 | "source": [
|
473 |
| - "for row in numeric_dict_ds.take(3):\n", |
474 |
| - " print(row)" |
| 477 | + "len(numeric_features_dict)" |
475 | 478 | ]
|
476 | 479 | },
|
477 | 480 | {
|
478 | 481 | "cell_type": "markdown",
|
479 | 482 | "metadata": {
|
480 |
| - "id": "DEAM6HAFxlMy" |
| 483 | + "id": "yyEERK9ldIi_" |
481 | 484 | },
|
482 | 485 | "source": [
|
483 |
| - "### Dictionaries with Keras" |
| 486 | + "Here are the first three examples from that dataset:" |
484 | 487 | ]
|
485 | 488 | },
|
486 | 489 | {
|
487 |
| - "cell_type": "markdown", |
| 490 | + "cell_type": "code", |
| 491 | + "execution_count": null, |
488 | 492 | "metadata": {
|
489 |
| - "id": "dnoyoWLWx07i" |
| 493 | + "id": "q0tDwk0VdH6D" |
490 | 494 | },
|
| 495 | + "outputs": [], |
491 | 496 | "source": [
|
492 |
| - "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", |
493 |
| - "\n", |
494 |
| - "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." |
| 497 | + "for row in numeric_dict_ds.take(3):\n", |
| 498 | + " print(row)" |
495 | 499 | ]
|
496 | 500 | },
|
497 | 501 | {
|
498 | 502 | "cell_type": "markdown",
|
499 | 503 | "metadata": {
|
500 |
| - "id": "5xUTrm0apDTr" |
| 504 | + "id": "dnoyoWLWx07i" |
501 | 505 | },
|
502 | 506 | "source": [
|
503 |
| - "#### 1. The Model-subclass style\n", |
| 507 | + "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", |
504 | 508 | "\n",
|
505 |
| - "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" |
| 509 | + "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." |
506 | 510 | ]
|
507 | 511 | },
|
508 | 512 | {
|
509 |
| - "cell_type": "code", |
510 |
| - "execution_count": null, |
| 513 | + "cell_type": "markdown", |
511 | 514 | "metadata": {
|
512 |
| - "id": "Zc3HV99CFRWL" |
| 515 | + "id": "5xUTrm0apDTr" |
513 | 516 | },
|
514 |
| - "outputs": [], |
515 | 517 | "source": [
|
516 |
| - " def stack_dict(inputs, fun=tf.stack):\n", |
517 |
| - " values = []\n", |
518 |
| - " for key in sorted(inputs.keys()):\n", |
519 |
| - " values.append(tf.cast(inputs[key], tf.float32))\n", |
| 518 | + "### 1. The Model-subclass style\n", |
520 | 519 | "\n",
|
521 |
| - " return fun(values, axis=-1)" |
| 520 | + "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" |
522 | 521 | ]
|
523 | 522 | },
|
524 | 523 | {
|
|
544 | 543 | " tf.keras.layers.Dense(1)\n",
|
545 | 544 | " ])\n",
|
546 | 545 | "\n",
|
| 546 | + " self.concat = tf.keras.layers.Concatenate(axis=1)\n", |
| 547 | + "\n", |
| 548 | + " def _stack(self, input_dict):\n", |
| 549 | + " values = []\n", |
| 550 | + " for key, value in sorted(input_dict.items()):\n", |
| 551 | + " values.append(value)\n", |
| 552 | + "\n", |
| 553 | + " return self.concat(values)\n", |
| 554 | + "\n", |
547 | 555 | " def adapt(self, inputs):\n",
|
548 | 556 | " # Stack the inputs and `adapt` the normalization layer.\n",
|
549 |
| - " inputs = stack_dict(inputs)\n", |
| 557 | + " inputs = self._stack(inputs)\n", |
550 | 558 | " self.normalizer.adapt(inputs)\n",
|
551 | 559 | "\n",
|
552 | 560 | " def call(self, inputs):\n",
|
553 | 561 | " # Stack the inputs\n",
|
554 |
| - " inputs = stack_dict(inputs)\n", |
| 562 | + " inputs = self._stack(inputs)\n", |
555 | 563 | " # Run them through all the layers.\n",
|
556 | 564 | " result = self.seq(inputs)\n",
|
557 | 565 | "\n",
|
558 | 566 | " return result\n",
|
559 | 567 | "\n",
|
560 | 568 | "model = MyModel()\n",
|
561 | 569 | "\n",
|
562 |
| - "model.adapt(dict(numeric_features))\n", |
| 570 | + "model.adapt(numeric_features_dict)\n", |
563 | 571 | "\n",
|
564 | 572 | "model.compile(optimizer='adam',\n",
|
565 | 573 | " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
|
|
584 | 592 | },
|
585 | 593 | "outputs": [],
|
586 | 594 | "source": [
|
587 |
| - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" |
| 595 | + "model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)" |
588 | 596 | ]
|
589 | 597 | },
|
590 | 598 | {
|
|
625 | 633 | "id": "QIIdxIYm13Ik"
|
626 | 634 | },
|
627 | 635 | "source": [
|
628 |
| - "#### 2. The Keras functional style" |
| 636 | + "### 2. The Keras functional style" |
629 | 637 | ]
|
630 | 638 | },
|
631 | 639 | {
|
|
652 | 660 | },
|
653 | 661 | "outputs": [],
|
654 | 662 | "source": [
|
655 |
| - "x = stack_dict(inputs, fun=tf.concat)\n", |
| 663 | + "xs = [value for key, value in sorted(inputs.items())]\n", |
| 664 | + "\n", |
| 665 | + "concat = tf.keras.layers.Concatenate(axis=1)\n", |
| 666 | + "x = concat(xs)\n", |
656 | 667 | "\n",
|
657 | 668 | "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
|
658 |
| - "normalizer.adapt(stack_dict(dict(numeric_features)))\n", |
| 669 | + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n", |
659 | 670 | "\n",
|
660 | 671 | "x = normalizer(x)\n",
|
661 | 672 | "x = tf.keras.layers.Dense(10, activation='relu')(x)\n",
|
|
678 | 689 | },
|
679 | 690 | "outputs": [],
|
680 | 691 | "source": [
|
681 |
| - "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)" |
| 692 | + "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" |
682 | 693 | ]
|
683 | 694 | },
|
684 | 695 | {
|
|
698 | 709 | },
|
699 | 710 | "outputs": [],
|
700 | 711 | "source": [
|
701 |
| - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" |
| 712 | + "model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)" |
702 | 713 | ]
|
703 | 714 | },
|
704 | 715 | {
|
|
806 | 817 | " else:\n",
|
807 | 818 | " dtype = tf.float32\n",
|
808 | 819 | "\n",
|
809 |
| - " inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)" |
| 820 | + " inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)" |
810 | 821 | ]
|
811 | 822 | },
|
812 | 823 | {
|
|
852 | 863 | "\n",
|
853 | 864 | "for name in binary_feature_names:\n",
|
854 | 865 | " inp = inputs[name]\n",
|
855 |
| - " inp = inp[:, tf.newaxis]\n", |
856 |
| - " float_value = tf.cast(inp, tf.float32)\n", |
857 |
| - " preprocessed.append(float_value)\n", |
| 866 | + " preprocessed.append(inp)\n", |
858 | 867 | "\n",
|
859 | 868 | "preprocessed"
|
860 | 869 | ]
|
|
879 | 888 | "outputs": [],
|
880 | 889 | "source": [
|
881 | 890 | "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
|
882 |
| - "normalizer.adapt(stack_dict(dict(numeric_features)))" |
| 891 | + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))" |
883 | 892 | ]
|
884 | 893 | },
|
885 | 894 | {
|
|
899 | 908 | },
|
900 | 909 | "outputs": [],
|
901 | 910 | "source": [
|
902 |
| - "numeric_inputs = {}\n", |
| 911 | + "numeric_inputs = []\n", |
903 | 912 | "for name in numeric_feature_names:\n",
|
904 |
| - " numeric_inputs[name]=inputs[name]\n", |
| 913 | + " numeric_inputs.append(inputs[name])\n", |
905 | 914 | "\n",
|
906 |
| - "numeric_inputs = stack_dict(numeric_inputs)\n", |
| 915 | + "numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n", |
907 | 916 | "numeric_normalized = normalizer(numeric_inputs)\n",
|
908 | 917 | "\n",
|
909 | 918 | "preprocessed.append(numeric_normalized)\n",
|
|
985 | 994 | " else:\n",
|
986 | 995 | " lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n",
|
987 | 996 | "\n",
|
988 |
| - " x = inputs[name][:, tf.newaxis]\n", |
| 997 | + " x = inputs[name]\n", |
989 | 998 | " x = lookup(x)\n",
|
990 | 999 | " preprocessed.append(x)"
|
991 | 1000 | ]
|
|
1036 | 1045 | },
|
1037 | 1046 | "outputs": [],
|
1038 | 1047 | "source": [
|
1039 |
| - "preprocessed_result = tf.concat(preprocessed, axis=-1)\n", |
| 1048 | + "preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n", |
1040 | 1049 | "preprocessed_result"
|
1041 | 1050 | ]
|
1042 | 1051 | },
|
|
1068 | 1077 | },
|
1069 | 1078 | "outputs": [],
|
1070 | 1079 | "source": [
|
1071 |
| - "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)" |
| 1080 | + "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" |
1072 | 1081 | ]
|
1073 | 1082 | },
|
1074 | 1083 | {
|
|
1183 | 1192 | " metrics=['accuracy'])"
|
1184 | 1193 | ]
|
1185 | 1194 | },
|
| 1195 | + { |
| 1196 | + "cell_type": "code", |
| 1197 | + "execution_count": null, |
| 1198 | + "metadata": { |
| 1199 | + "id": "i_Z2C2ZcZ3oC" |
| 1200 | + }, |
| 1201 | + "outputs": [], |
| 1202 | + "source": [ |
| 1203 | + "tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)" |
| 1204 | + ] |
| 1205 | + }, |
1186 | 1206 | {
|
1187 | 1207 | "cell_type": "markdown",
|
1188 | 1208 | "metadata": {
|
|
1258 | 1278 | ],
|
1259 | 1279 | "metadata": {
|
1260 | 1280 | "colab": {
|
1261 |
| - "collapsed_sections": [], |
1262 | 1281 | "name": "pandas_dataframe.ipynb",
|
1263 | 1282 | "toc_visible": true
|
1264 | 1283 | },
|
|
0 commit comments