Skip to content

Commit 4549b70

Browse files
authored
Fix DCN (#2104)
1 parent 63c4fd8 commit 4549b70

File tree

7 files changed

+229
-131
lines changed

7 files changed

+229
-131
lines changed

examples/keras_rs/dcn.py

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,14 @@
116116
"user_occupation_text",
117117
],
118118
# model
119-
"embedding_dim": 32,
119+
"embedding_dim": 8,
120120
"deep_net_num_units": [192, 192, 192],
121-
"projection_dim": 20,
121+
"projection_dim": 8,
122122
"dcn_num_units": [192, 192],
123123
# training
124-
"learning_rate": 0.01,
125-
"num_epochs": 10,
126-
"batch_size": 1024,
124+
"learning_rate": 1e-2,
125+
"num_epochs": 8,
126+
"batch_size": 8192,
127127
}
128128

129129

@@ -144,8 +144,8 @@ def visualize_layer(matrix, features):
144144
cax = divider.append_axes("right", size="5%", pad=0.05)
145145
plt.colorbar(im, cax=cax)
146146
cax.tick_params(labelsize=10)
147-
ax.set_xticklabels([""] + features, rotation=45, fontsize=10)
148-
ax.set_yticklabels([""] + features, fontsize=10)
147+
ax.set_xticklabels([""] + features, rotation=45, fontsize=5)
148+
ax.set_yticklabels([""] + features, fontsize=5)
149149

150150

151151
def train_and_evaluate(
@@ -441,36 +441,61 @@ def get_mixer_data(data_size=100_000):
441441
"""
442442

443443

444-
def get_model(
445-
dense_num_units_lst,
446-
embedding_dim=MOVIELENS_CONFIG["embedding_dim"],
447-
use_cross_layer=False,
448-
projection_dim=None,
449-
):
450-
inputs = {}
451-
embeddings = []
452-
for feature_name, vocabulary in vocabularies.items():
453-
inputs[feature_name] = keras.Input(shape=(), dtype="int32", name=feature_name)
454-
embedding_layer = keras.layers.Embedding(
455-
input_dim=len(vocabulary) + 1,
456-
output_dim=embedding_dim,
457-
)
458-
embedding = embedding_layer(inputs[feature_name])
459-
embeddings.append(embedding)
444+
class DCN(keras.Model):
445+
def __init__(
446+
self,
447+
dense_num_units_lst,
448+
embedding_dim=MOVIELENS_CONFIG["embedding_dim"],
449+
use_cross_layer=False,
450+
projection_dim=None,
451+
**kwargs,
452+
):
453+
super().__init__(**kwargs)
454+
455+
# Layers.
456+
457+
self.embedding_layers = []
458+
for feature_name, vocabulary in vocabularies.items():
459+
self.embedding_layers.append(
460+
keras.layers.Embedding(
461+
input_dim=len(vocabulary) + 1,
462+
output_dim=embedding_dim,
463+
)
464+
)
465+
466+
if use_cross_layer:
467+
self.cross_layer = keras_rs.layers.FeatureCross(
468+
projection_dim=projection_dim
469+
)
470+
471+
self.dense_layers = []
472+
for num_units in dense_num_units_lst:
473+
self.dense_layers.append(keras.layers.Dense(num_units, activation="relu"))
474+
475+
self.output_layer = keras.layers.Dense(1)
476+
477+
# Attributes.
478+
self.dense_num_units_lst = dense_num_units_lst
479+
self.embedding_dim = embedding_dim
480+
self.use_cross_layer = use_cross_layer
481+
self.projection_dim = projection_dim
482+
483+
def call(self, inputs):
484+
embeddings = []
485+
for feature_name, embedding_layer in zip(vocabularies, self.embedding_layers):
486+
embeddings.append(embedding_layer(inputs[feature_name]))
460487

461-
x = keras.ops.concatenate(embeddings, axis=1)
488+
x = keras.ops.concatenate(embeddings, axis=1)
462489

463-
# Cross layer.
464-
if use_cross_layer:
465-
x = keras_rs.layers.FeatureCross(projection_dim=projection_dim)(x)
490+
if self.use_cross_layer:
491+
x = self.cross_layer(x)
466492

467-
# Dense layer.
468-
for num_units in dense_num_units_lst:
469-
x = keras.layers.Dense(num_units, activation="relu")(x)
493+
for dense_layer in self.dense_layers:
494+
x = dense_layer(x)
470495

471-
x = keras.layers.Dense(1)(x)
496+
x = self.output_layer(x)
472497

473-
return keras.Model(inputs=inputs, outputs=x)
498+
return x
474499

475500

476501
"""
@@ -486,8 +511,8 @@ def get_model(
486511
opt_cross_network_rmse_list = []
487512
deep_network_rmse_list = []
488513

489-
for _ in range(10):
490-
cross_network = get_model(
514+
for _ in range(20):
515+
cross_network = DCN(
491516
dense_num_units_lst=MOVIELENS_CONFIG["dcn_num_units"],
492517
embedding_dim=MOVIELENS_CONFIG["embedding_dim"],
493518
use_cross_layer=True,
@@ -501,7 +526,7 @@ def get_model(
501526
)
502527
cross_network_rmse_list.append(rmse)
503528

504-
opt_cross_network = get_model(
529+
opt_cross_network = DCN(
505530
dense_num_units_lst=MOVIELENS_CONFIG["dcn_num_units"],
506531
embedding_dim=MOVIELENS_CONFIG["embedding_dim"],
507532
use_cross_layer=True,
@@ -516,7 +541,7 @@ def get_model(
516541
)
517542
opt_cross_network_rmse_list.append(rmse)
518543

519-
deep_network = get_model(dense_num_units_lst=MOVIELENS_CONFIG["deep_net_num_units"])
544+
deep_network = DCN(dense_num_units_lst=MOVIELENS_CONFIG["deep_net_num_units"])
520545
rmse, deep_network_num_params = train_and_evaluate(
521546
learning_rate=MOVIELENS_CONFIG["learning_rate"],
522547
epochs=MOVIELENS_CONFIG["num_epochs"],
@@ -543,7 +568,7 @@ def get_model(
543568
)
544569

545570
"""
546-
DCN outperforms a similarly sized DNN with ReLU layers, demonstrating
571+
DCN slightly outperforms a larger DNN with ReLU layers, demonstrating
547572
superior performance. Furthermore, the low-rank DCN effectively reduces the
548573
number of parameters without compromising accuracy.
549574
"""
-25.1 KB
Binary file not shown.
12 KB
Loading
-32.5 KB
Binary file not shown.
16.1 KB
Loading

examples/keras_rs/ipynb/dcn.ipynb

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,14 @@
157157
" \"user_occupation_text\",\n",
158158
" ],\n",
159159
" # model\n",
160-
" \"embedding_dim\": 32,\n",
160+
" \"embedding_dim\": 8,\n",
161161
" \"deep_net_num_units\": [192, 192, 192],\n",
162-
" \"projection_dim\": 20,\n",
162+
" \"projection_dim\": 8,\n",
163163
" \"dcn_num_units\": [192, 192],\n",
164164
" # training\n",
165-
" \"learning_rate\": 0.01,\n",
166-
" \"num_epochs\": 10,\n",
167-
" \"batch_size\": 1024,\n",
165+
" \"learning_rate\": 1e-2,\n",
166+
" \"num_epochs\": 8,\n",
167+
" \"batch_size\": 8192,\n",
168168
"}\n",
169169
""
170170
]
@@ -199,8 +199,8 @@
199199
" cax = divider.append_axes(\"right\", size=\"5%\", pad=0.05)\n",
200200
" plt.colorbar(im, cax=cax)\n",
201201
" cax.tick_params(labelsize=10)\n",
202-
" ax.set_xticklabels([\"\"] + features, rotation=45, fontsize=10)\n",
203-
" ax.set_yticklabels([\"\"] + features, fontsize=10)\n",
202+
" ax.set_xticklabels([\"\"] + features, rotation=45, fontsize=5)\n",
203+
" ax.set_yticklabels([\"\"] + features, fontsize=5)\n",
204204
"\n",
205205
"\n",
206206
"def train_and_evaluate(\n",
@@ -636,36 +636,61 @@
636636
"outputs": [],
637637
"source": [
638638
"\n",
639-
"def get_model(\n",
640-
" dense_num_units_lst,\n",
641-
" embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
642-
" use_cross_layer=False,\n",
643-
" projection_dim=None,\n",
644-
"):\n",
645-
" inputs = {}\n",
646-
" embeddings = []\n",
647-
" for feature_name, vocabulary in vocabularies.items():\n",
648-
" inputs[feature_name] = keras.Input(shape=(), dtype=\"int32\", name=feature_name)\n",
649-
" embedding_layer = keras.layers.Embedding(\n",
650-
" input_dim=len(vocabulary) + 1,\n",
651-
" output_dim=embedding_dim,\n",
652-
" )\n",
653-
" embedding = embedding_layer(inputs[feature_name])\n",
654-
" embeddings.append(embedding)\n",
655-
"\n",
656-
" x = keras.ops.concatenate(embeddings, axis=1)\n",
657-
"\n",
658-
" # Cross layer.\n",
659-
" if use_cross_layer:\n",
660-
" x = keras_rs.layers.FeatureCross(projection_dim=projection_dim)(x)\n",
661-
"\n",
662-
" # Dense layer.\n",
663-
" for num_units in dense_num_units_lst:\n",
664-
" x = keras.layers.Dense(num_units, activation=\"relu\")(x)\n",
665-
"\n",
666-
" x = keras.layers.Dense(1)(x)\n",
667-
"\n",
668-
" return keras.Model(inputs=inputs, outputs=x)\n",
639+
"class DCN(keras.Model):\n",
640+
" def __init__(\n",
641+
" self,\n",
642+
" dense_num_units_lst,\n",
643+
" embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
644+
" use_cross_layer=False,\n",
645+
" projection_dim=None,\n",
646+
" **kwargs,\n",
647+
" ):\n",
648+
" super().__init__(**kwargs)\n",
649+
"\n",
650+
" # Layers.\n",
651+
"\n",
652+
" self.embedding_layers = []\n",
653+
" for feature_name, vocabulary in vocabularies.items():\n",
654+
" self.embedding_layers.append(\n",
655+
" keras.layers.Embedding(\n",
656+
" input_dim=len(vocabulary) + 1,\n",
657+
" output_dim=embedding_dim,\n",
658+
" )\n",
659+
" )\n",
660+
"\n",
661+
" if use_cross_layer:\n",
662+
" self.cross_layer = keras_rs.layers.FeatureCross(\n",
663+
" projection_dim=projection_dim\n",
664+
" )\n",
665+
"\n",
666+
" self.dense_layers = []\n",
667+
" for num_units in dense_num_units_lst:\n",
668+
" self.dense_layers.append(keras.layers.Dense(num_units, activation=\"relu\"))\n",
669+
"\n",
670+
" self.output_layer = keras.layers.Dense(1)\n",
671+
"\n",
672+
" # Attributes.\n",
673+
" self.dense_num_units_lst = dense_num_units_lst\n",
674+
" self.embedding_dim = embedding_dim\n",
675+
" self.use_cross_layer = use_cross_layer\n",
676+
" self.projection_dim = projection_dim\n",
677+
"\n",
678+
" def call(self, inputs):\n",
679+
" embeddings = []\n",
680+
" for feature_name, embedding_layer in zip(vocabularies, self.embedding_layers):\n",
681+
" embeddings.append(embedding_layer(inputs[feature_name]))\n",
682+
"\n",
683+
" x = keras.ops.concatenate(embeddings, axis=1)\n",
684+
"\n",
685+
" if self.use_cross_layer:\n",
686+
" x = self.cross_layer(x)\n",
687+
"\n",
688+
" for dense_layer in self.dense_layers:\n",
689+
" x = dense_layer(x)\n",
690+
"\n",
691+
" x = self.output_layer(x)\n",
692+
"\n",
693+
" return x\n",
669694
""
670695
]
671696
},
@@ -695,8 +720,8 @@
695720
"opt_cross_network_rmse_list = []\n",
696721
"deep_network_rmse_list = []\n",
697722
"\n",
698-
"for _ in range(10):\n",
699-
" cross_network = get_model(\n",
723+
"for _ in range(20):\n",
724+
" cross_network = DCN(\n",
700725
" dense_num_units_lst=MOVIELENS_CONFIG[\"dcn_num_units\"],\n",
701726
" embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
702727
" use_cross_layer=True,\n",
@@ -710,7 +735,7 @@
710735
" )\n",
711736
" cross_network_rmse_list.append(rmse)\n",
712737
"\n",
713-
" opt_cross_network = get_model(\n",
738+
" opt_cross_network = DCN(\n",
714739
" dense_num_units_lst=MOVIELENS_CONFIG[\"dcn_num_units\"],\n",
715740
" embedding_dim=MOVIELENS_CONFIG[\"embedding_dim\"],\n",
716741
" use_cross_layer=True,\n",
@@ -725,7 +750,7 @@
725750
" )\n",
726751
" opt_cross_network_rmse_list.append(rmse)\n",
727752
"\n",
728-
" deep_network = get_model(dense_num_units_lst=MOVIELENS_CONFIG[\"deep_net_num_units\"])\n",
753+
" deep_network = DCN(dense_num_units_lst=MOVIELENS_CONFIG[\"deep_net_num_units\"])\n",
729754
" rmse, deep_network_num_params = train_and_evaluate(\n",
730755
" learning_rate=MOVIELENS_CONFIG[\"learning_rate\"],\n",
731756
" epochs=MOVIELENS_CONFIG[\"num_epochs\"],\n",
@@ -758,7 +783,7 @@
758783
"colab_type": "text"
759784
},
760785
"source": [
761-
"DCN outperforms a similarly sized DNN with ReLU layers, demonstrating\n",
786+
"DCN slightly outperforms a larger DNN with ReLU layers, demonstrating\n",
762787
"superior performance. Furthermore, the low-rank DCN effectively reduces the\n",
763788
"number of parameters without compromising accuracy."
764789
]

0 commit comments

Comments
 (0)