|
447 | 447 | "cell_type": "code",
|
448 | 448 | "execution_count": null,
|
449 | 449 | "metadata": {
|
450 |
| - "id": "U3QDo-jwHYXc" |
| 450 | + "id": "voDoA447GBC3" |
451 | 451 | },
|
452 | 452 | "outputs": [],
|
453 | 453 | "source": [
|
454 |
| - "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))" |
| 454 | + "numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n", |
| 455 | + "target_array = target.to_numpy()[:, tf.newaxis]" |
455 | 456 | ]
|
456 | 457 | },
|
457 | 458 | {
|
458 |
| - "cell_type": "markdown", |
| 459 | + "cell_type": "code", |
| 460 | + "execution_count": null, |
459 | 461 | "metadata": {
|
460 |
| - "id": "yyEERK9ldIi_" |
| 462 | + "id": "U3QDo-jwHYXc" |
461 | 463 | },
|
| 464 | + "outputs": [], |
462 | 465 | "source": [
|
463 |
| - "Here are the first three examples from that dataset:" |
| 466 | + "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))" |
464 | 467 | ]
|
465 | 468 | },
|
466 | 469 | {
|
467 | 470 | "cell_type": "code",
|
468 | 471 | "execution_count": null,
|
469 | 472 | "metadata": {
|
470 |
| - "id": "q0tDwk0VdH6D" |
| 473 | + "id": "HL4Bf1b7M7DT" |
471 | 474 | },
|
472 | 475 | "outputs": [],
|
473 | 476 | "source": [
|
474 |
| - "for row in numeric_dict_ds.take(3):\n", |
475 |
| - " print(row)" |
| 477 | + "len(numeric_features_dict)" |
476 | 478 | ]
|
477 | 479 | },
|
478 | 480 | {
|
479 | 481 | "cell_type": "markdown",
|
480 | 482 | "metadata": {
|
481 |
| - "id": "DEAM6HAFxlMy" |
| 483 | + "id": "yyEERK9ldIi_" |
482 | 484 | },
|
483 | 485 | "source": [
|
484 |
| - "### Dictionaries with Keras" |
| 486 | + "Here are the first three examples from that dataset:" |
485 | 487 | ]
|
486 | 488 | },
|
487 | 489 | {
|
488 |
| - "cell_type": "markdown", |
| 490 | + "cell_type": "code", |
| 491 | + "execution_count": null, |
489 | 492 | "metadata": {
|
490 |
| - "id": "dnoyoWLWx07i" |
| 493 | + "id": "q0tDwk0VdH6D" |
491 | 494 | },
|
| 495 | + "outputs": [], |
492 | 496 | "source": [
|
493 |
| - "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", |
494 |
| - "\n", |
495 |
| - "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." |
| 497 | + "for row in numeric_dict_ds.take(3):\n", |
| 498 | + " print(row)" |
496 | 499 | ]
|
497 | 500 | },
|
498 | 501 | {
|
499 | 502 | "cell_type": "markdown",
|
500 | 503 | "metadata": {
|
501 |
| - "id": "5xUTrm0apDTr" |
| 504 | + "id": "dnoyoWLWx07i" |
502 | 505 | },
|
503 | 506 | "source": [
|
504 |
| - "#### 1. The Model-subclass style\n", |
| 507 | + "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", |
505 | 508 | "\n",
|
506 |
| - "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" |
| 509 | + "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." |
507 | 510 | ]
|
508 | 511 | },
|
509 | 512 | {
|
510 |
| - "cell_type": "code", |
511 |
| - "execution_count": null, |
| 513 | + "cell_type": "markdown", |
512 | 514 | "metadata": {
|
513 |
| - "id": "Zc3HV99CFRWL" |
| 515 | + "id": "5xUTrm0apDTr" |
514 | 516 | },
|
515 |
| - "outputs": [], |
516 | 517 | "source": [
|
517 |
| - " def stack_dict(inputs, fun=tf.stack):\n", |
518 |
| - " values = []\n", |
519 |
| - " for key in sorted(inputs.keys()):\n", |
520 |
| - " values.append(tf.cast(inputs[key], tf.float32))\n", |
| 518 | + "### 1. The Model-subclass style\n", |
521 | 519 | "\n",
|
522 |
| - " return fun(values, axis=-1)" |
| 520 | + "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" |
523 | 521 | ]
|
524 | 522 | },
|
525 | 523 | {
|
|
545 | 543 | " tf.keras.layers.Dense(1)\n",
|
546 | 544 | " ])\n",
|
547 | 545 | "\n",
|
| 546 | + " self.concat = tf.keras.layers.Concatenate(axis=1)\n", |
| 547 | + "\n", |
| 548 | + " def _stack(self, input_dict):\n", |
| 549 | + " values = []\n", |
| 550 | + " for key, value in sorted(input_dict.items()):\n", |
| 551 | + " values.append(value)\n", |
| 552 | + "\n", |
| 553 | + " return self.concat(values)\n", |
| 554 | + "\n", |
548 | 555 | " def adapt(self, inputs):\n",
|
549 | 556 | " # Stack the inputs and `adapt` the normalization layer.\n",
|
550 |
| - " inputs = stack_dict(inputs)\n", |
| 557 | + " inputs = self._stack(inputs)\n", |
551 | 558 | " self.normalizer.adapt(inputs)\n",
|
552 | 559 | "\n",
|
553 | 560 | " def call(self, inputs):\n",
|
554 | 561 | " # Stack the inputs\n",
|
555 |
| - " inputs = stack_dict(inputs)\n", |
| 562 | + " inputs = self._stack(inputs)\n", |
556 | 563 | " # Run them through all the layers.\n",
|
557 | 564 | " result = self.seq(inputs)\n",
|
558 | 565 | "\n",
|
559 | 566 | " return result\n",
|
560 | 567 | "\n",
|
561 | 568 | "model = MyModel()\n",
|
562 | 569 | "\n",
|
563 |
| - "model.adapt(dict(numeric_features))\n", |
| 570 | + "model.adapt(numeric_features_dict)\n", |
564 | 571 | "\n",
|
565 | 572 | "model.compile(optimizer='adam',\n",
|
566 | 573 | " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
|
|
585 | 592 | },
|
586 | 593 | "outputs": [],
|
587 | 594 | "source": [
|
588 |
| - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" |
| 595 | + "model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)" |
589 | 596 | ]
|
590 | 597 | },
|
591 | 598 | {
|
|
626 | 633 | "id": "QIIdxIYm13Ik"
|
627 | 634 | },
|
628 | 635 | "source": [
|
629 |
| - "#### 2. The Keras functional style" |
| 636 | + "### 2. The Keras functional style" |
630 | 637 | ]
|
631 | 638 | },
|
632 | 639 | {
|
|
653 | 660 | },
|
654 | 661 | "outputs": [],
|
655 | 662 | "source": [
|
656 |
| - "x = stack_dict(inputs, fun=tf.concat)\n", |
| 663 | + "xs = [value for key, value in sorted(inputs.items())]\n", |
| 664 | + "\n", |
| 665 | + "concat = tf.keras.layers.Concatenate(axis=1)\n", |
| 666 | + "x = concat(xs)\n", |
657 | 667 | "\n",
|
658 | 668 | "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
|
659 |
| - "normalizer.adapt(stack_dict(dict(numeric_features)))\n", |
| 669 | + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n", |
660 | 670 | "\n",
|
661 | 671 | "x = normalizer(x)\n",
|
662 | 672 | "x = tf.keras.layers.Dense(10, activation='relu')(x)\n",
|
|
679 | 689 | },
|
680 | 690 | "outputs": [],
|
681 | 691 | "source": [
|
682 |
| - "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)" |
| 692 | + "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" |
683 | 693 | ]
|
684 | 694 | },
|
685 | 695 | {
|
|
699 | 709 | },
|
700 | 710 | "outputs": [],
|
701 | 711 | "source": [
|
702 |
| - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" |
| 712 | + "model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)" |
703 | 713 | ]
|
704 | 714 | },
|
705 | 715 | {
|
|
807 | 817 | " else:\n",
|
808 | 818 | " dtype = tf.float32\n",
|
809 | 819 | "\n",
|
810 |
| - " inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)" |
| 820 | + " inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)" |
811 | 821 | ]
|
812 | 822 | },
|
813 | 823 | {
|
|
853 | 863 | "\n",
|
854 | 864 | "for name in binary_feature_names:\n",
|
855 | 865 | " inp = inputs[name]\n",
|
856 |
| - " inp = inp[:, tf.newaxis]\n", |
857 |
| - " float_value = tf.cast(inp, tf.float32)\n", |
858 |
| - " preprocessed.append(float_value)\n", |
| 866 | + " preprocessed.append(inp)\n", |
859 | 867 | "\n",
|
860 | 868 | "preprocessed"
|
861 | 869 | ]
|
|
880 | 888 | "outputs": [],
|
881 | 889 | "source": [
|
882 | 890 | "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
|
883 |
| - "normalizer.adapt(stack_dict(dict(numeric_features)))" |
| 891 | + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))" |
884 | 892 | ]
|
885 | 893 | },
|
886 | 894 | {
|
|
900 | 908 | },
|
901 | 909 | "outputs": [],
|
902 | 910 | "source": [
|
903 |
| - "numeric_inputs = {}\n", |
| 911 | + "numeric_inputs = []\n", |
904 | 912 | "for name in numeric_feature_names:\n",
|
905 |
| - " numeric_inputs[name]=inputs[name]\n", |
| 913 | + " numeric_inputs.append(inputs[name])\n", |
906 | 914 | "\n",
|
907 |
| - "numeric_inputs = stack_dict(numeric_inputs)\n", |
| 915 | + "numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n", |
908 | 916 | "numeric_normalized = normalizer(numeric_inputs)\n",
|
909 | 917 | "\n",
|
910 | 918 | "preprocessed.append(numeric_normalized)\n",
|
|
986 | 994 | " else:\n",
|
987 | 995 | " lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n",
|
988 | 996 | "\n",
|
989 |
| - " x = inputs[name][:, tf.newaxis]\n", |
| 997 | + " x = inputs[name]\n", |
990 | 998 | " x = lookup(x)\n",
|
991 | 999 | " preprocessed.append(x)"
|
992 | 1000 | ]
|
|
1037 | 1045 | },
|
1038 | 1046 | "outputs": [],
|
1039 | 1047 | "source": [
|
1040 |
| - "preprocessed_result = tf.concat(preprocessed, axis=-1)\n", |
| 1048 | + "preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n", |
1041 | 1049 | "preprocessed_result"
|
1042 | 1050 | ]
|
1043 | 1051 | },
|
|
1069 | 1077 | },
|
1070 | 1078 | "outputs": [],
|
1071 | 1079 | "source": [
|
1072 |
| - "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)" |
| 1080 | + "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" |
1073 | 1081 | ]
|
1074 | 1082 | },
|
1075 | 1083 | {
|
|
1184 | 1192 | " metrics=['accuracy'])"
|
1185 | 1193 | ]
|
1186 | 1194 | },
|
| 1195 | + { |
| 1196 | + "cell_type": "code", |
| 1197 | + "execution_count": null, |
| 1198 | + "metadata": { |
| 1199 | + "id": "i_Z2C2ZcZ3oC" |
| 1200 | + }, |
| 1201 | + "outputs": [], |
| 1202 | + "source": [ |
| 1203 | + "tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)" |
| 1204 | + ] |
| 1205 | + }, |
1187 | 1206 | {
|
1188 | 1207 | "cell_type": "markdown",
|
1189 | 1208 | "metadata": {
|
|
1259 | 1278 | ],
|
1260 | 1279 | "metadata": {
|
1261 | 1280 | "colab": {
|
1262 |
| - "collapsed_sections": [], |
1263 | 1281 | "name": "pandas_dataframe.ipynb",
|
1264 | 1282 | "toc_visible": true
|
1265 | 1283 | },
|
|
0 commit comments