|
157 | 157 | " \"user_occupation_text\",\n",
|
158 | 158 | " ],\n",
|
159 | 159 | " # model\n",
|
160 |
| - " \"embedding_dim\": 32,\n", |
| 160 | + " \"embedding_dim\": 8,\n", |
161 | 161 | " \"deep_net_num_units\": [192, 192, 192],\n",
|
162 |
| - " \"projection_dim\": 20,\n", |
| 162 | + " \"projection_dim\": 8,\n", |
163 | 163 | " \"dcn_num_units\": [192, 192],\n",
|
164 | 164 | " # training\n",
|
165 |
| - " \"learning_rate\": 0.01,\n", |
166 |
| - " \"num_epochs\": 10,\n", |
167 |
| - " \"batch_size\": 1024,\n", |
| 165 | + " \"learning_rate\": 1e-2,\n", |
| 166 | + " \"num_epochs\": 8,\n", |
| 167 | + " \"batch_size\": 8192,\n", |
168 | 168 | "}\n",
|
169 | 169 | ""
|
170 | 170 | ]
|
|
199 | 199 | " cax = divider.append_axes(\"right\", size=\"5%\", pad=0.05)\n",
|
200 | 200 | " plt.colorbar(im, cax=cax)\n",
|
201 | 201 | " cax.tick_params(labelsize=10)\n",
|
202 |
| - " ax.set_xticklabels([\"\"] + features, rotation=45, fontsize=10)\n", |
203 |
| - " ax.set_yticklabels([\"\"] + features, fontsize=10)\n", |
| 202 | + " ax.set_xticklabels([\"\"] + features, rotation=45, fontsize=5)\n", |
| 203 | + " ax.set_yticklabels([\"\"] + features, fontsize=5)\n", |
204 | 204 | "\n",
|
205 | 205 | "\n",
|
206 | 206 | "def train_and_evaluate(\n",
|
|
636 | 636 | "outputs": [],
|
637 | 637 | "source": [
|
638 | 638 | "\n",
|
639 |
| - "def get_model(\n", |
640 |
| - " dense_num_units_lst,\n", |
641 |
| - " embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n", |
642 |
| - " use_cross_layer=False,\n", |
643 |
| - " projection_dim=None,\n", |
644 |
| - "):\n", |
645 |
| - " inputs = {}\n", |
646 |
| - " embeddings = []\n", |
647 |
| - " for feature_name, vocabulary in vocabularies.items():\n", |
648 |
| - " inputs[feature_name] = keras.Input(shape=(), dtype=\"int32\", name=feature_name)\n", |
649 |
| - " embedding_layer = keras.layers.Embedding(\n", |
650 |
| - " input_dim=len(vocabulary) + 1,\n", |
651 |
| - " output_dim=embedding_dim,\n", |
652 |
| - " )\n", |
653 |
| - " embedding = embedding_layer(inputs[feature_name])\n", |
654 |
| - " embeddings.append(embedding)\n", |
655 |
| - "\n", |
656 |
| - " x = keras.ops.concatenate(embeddings, axis=1)\n", |
657 |
| - "\n", |
658 |
| - " # Cross layer.\n", |
659 |
| - " if use_cross_layer:\n", |
660 |
| - " x = keras_rs.layers.FeatureCross(projection_dim=projection_dim)(x)\n", |
661 |
| - "\n", |
662 |
| - " # Dense layer.\n", |
663 |
| - " for num_units in dense_num_units_lst:\n", |
664 |
| - " x = keras.layers.Dense(num_units, activation=\"relu\")(x)\n", |
665 |
| - "\n", |
666 |
| - " x = keras.layers.Dense(1)(x)\n", |
667 |
| - "\n", |
668 |
| - " return keras.Model(inputs=inputs, outputs=x)\n", |
| 639 | + "class DCN(keras.Model):\n", |
| 640 | + " def __init__(\n", |
| 641 | + " self,\n", |
| 642 | + " dense_num_units_lst,\n", |
| 643 | + " embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n", |
| 644 | + " use_cross_layer=False,\n", |
| 645 | + " projection_dim=None,\n", |
| 646 | + " **kwargs,\n", |
| 647 | + " ):\n", |
| 648 | + " super().__init__(**kwargs)\n", |
| 649 | + "\n", |
| 650 | + " # Layers.\n", |
| 651 | + "\n", |
| 652 | + " self.embedding_layers = []\n", |
| 653 | + " for feature_name, vocabulary in vocabularies.items():\n", |
| 654 | + " self.embedding_layers.append(\n", |
| 655 | + " keras.layers.Embedding(\n", |
| 656 | + " input_dim=len(vocabulary) + 1,\n", |
| 657 | + " output_dim=embedding_dim,\n", |
| 658 | + " )\n", |
| 659 | + " )\n", |
| 660 | + "\n", |
| 661 | + " if use_cross_layer:\n", |
| 662 | + " self.cross_layer = keras_rs.layers.FeatureCross(\n", |
| 663 | + " projection_dim=projection_dim\n", |
| 664 | + " )\n", |
| 665 | + "\n", |
| 666 | + " self.dense_layers = []\n", |
| 667 | + " for num_units in dense_num_units_lst:\n", |
| 668 | + " self.dense_layers.append(keras.layers.Dense(num_units, activation=\"relu\"))\n", |
| 669 | + "\n", |
| 670 | + " self.output_layer = keras.layers.Dense(1)\n", |
| 671 | + "\n", |
| 672 | + " # Attributes.\n", |
| 673 | + " self.dense_num_units_lst = dense_num_units_lst\n", |
| 674 | + " self.embedding_dim = embedding_dim\n", |
| 675 | + " self.use_cross_layer = use_cross_layer\n", |
| 676 | + " self.projection_dim = projection_dim\n", |
| 677 | + "\n", |
| 678 | + " def call(self, inputs):\n", |
| 679 | + " embeddings = []\n", |
| 680 | + " for feature_name, embedding_layer in zip(vocabularies, self.embedding_layers):\n", |
| 681 | + " embeddings.append(embedding_layer(inputs[feature_name]))\n", |
| 682 | + "\n", |
| 683 | + " x = keras.ops.concatenate(embeddings, axis=1)\n", |
| 684 | + "\n", |
| 685 | + " if self.use_cross_layer:\n", |
| 686 | + " x = self.cross_layer(x)\n", |
| 687 | + "\n", |
| 688 | + " for dense_layer in self.dense_layers:\n", |
| 689 | + " x = dense_layer(x)\n", |
| 690 | + "\n", |
| 691 | + " x = self.output_layer(x)\n", |
| 692 | + "\n", |
| 693 | + " return x\n", |
669 | 694 | ""
|
670 | 695 | ]
|
671 | 696 | },
|
|
695 | 720 | "opt_cross_network_rmse_list = []\n",
|
696 | 721 | "deep_network_rmse_list = []\n",
|
697 | 722 | "\n",
|
698 |
| - "for _ in range(10):\n", |
699 |
| - " cross_network = get_model(\n", |
| 723 | + "for _ in range(20):\n", |
| 724 | + " cross_network = DCN(\n", |
700 | 725 | " dense_num_units_lst=MOVIELENS_CONFIG[\"dcn_num_units\"],\n",
|
701 | 726 | " embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
|
702 | 727 | " use_cross_layer=True,\n",
|
|
710 | 735 | " )\n",
|
711 | 736 | " cross_network_rmse_list.append(rmse)\n",
|
712 | 737 | "\n",
|
713 |
| - " opt_cross_network = get_model(\n", |
| 738 | + " opt_cross_network = DCN(\n", |
714 | 739 | " dense_num_units_lst=MOVIELENS_CONFIG[\"dcn_num_units\"],\n",
|
715 | 740 | " embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
|
716 | 741 | " use_cross_layer=True,\n",
|
|
725 | 750 | " )\n",
|
726 | 751 | " opt_cross_network_rmse_list.append(rmse)\n",
|
727 | 752 | "\n",
|
728 |
| - " deep_network = get_model(dense_num_units_lst=MOVIELENS_CONFIG[\"deep_net_num_units\"])\n", |
| 753 | + " deep_network = DCN(dense_num_units_lst=MOVIELENS_CONFIG[\"deep_net_num_units\"])\n", |
729 | 754 | " rmse, deep_network_num_params = train_and_evaluate(\n",
|
730 | 755 | " learning_rate=MOVIELENS_CONFIG[\"learning_rate\"],\n",
|
731 | 756 | " epochs=MOVIELENS_CONFIG[\"num_epochs\"],\n",
|
|
758 | 783 | "colab_type": "text"
|
759 | 784 | },
|
760 | 785 | "source": [
|
761 |
| - "DCN outperforms a similarly sized DNN with ReLU layers, demonstrating\n", |
| 786 | + "DCN slightly outperforms a larger DNN with ReLU layers, demonstrating\n", |
762 | 787 | "superior performance. Furthermore, the low-rank DCN effectively reduces the\n",
|
763 | 788 | "number of parameters without compromising accuracy."
|
764 | 789 | ]
|
|
0 commit comments