Skip to content

Commit 78a76eb

Browse files
committed
chore: update literals example
1 parent ed283a9 commit 78a76eb

File tree

1 file changed

+91
-48
lines changed

1 file changed

+91
-48
lines changed

examples/literals.py

Lines changed: 91 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444
"http://dl-learner.org/carcinogenesis#hasAtom",
4545
"http://dl-learner.org/carcinogenesis#charge",
4646
],
47+
["http://dl-learner.org/carcinogenesis#salmonella"],
48+
["http://dl-learner.org/carcinogenesis#cytogen_sce"],
49+
["http://dl-learner.org/carcinogenesis#cytogen_ca"],
50+
["http://dl-learner.org/carcinogenesis#mouse_lymph"],
51+
["http://dl-learner.org/carcinogenesis#amesTestPositive"],
4752
],
4853
),
4954
entities,
@@ -71,8 +76,27 @@
7176

7277
print("\nUsing literals:")
7378
features = []
74-
for charges in literals:
75-
charges = list(map(float, *charges)) # type: ignore
79+
80+
for literal in literals:
81+
charges, salmonella, sce, ca, lymph, pos_test = literal
82+
83+
charges = list(charges) # type: ignore
84+
85+
salmonella_feat = int(salmonella == "true")
86+
salmonella_missing = int(salmonella == np.NaN)
87+
88+
sce_feat = int(sce == "true")
89+
sce_missing = int(sce == np.NaN)
90+
91+
ca_feat = int(ca == "true")
92+
ca_missing = int(ca == np.NaN)
93+
94+
lymph_feat = int(lymph == "true")
95+
lymph_missing = int(lymph == np.NaN)
96+
97+
pos_test_feat = int(pos_test == "true")
98+
pos_test_missing = int(pos_test == np.NaN)
99+
76100
features.append(
77101
[
78102
np.max(charges),
@@ -81,96 +105,115 @@
81105
np.std(charges), # type: ignore
82106
len(charges), # type: ignore
83107
np.sum(charges), # type: ignore
108+
salmonella_feat,
109+
salmonella_missing,
110+
sce_feat,
111+
sce_missing,
112+
ca_feat,
113+
ca_missing,
114+
lymph_feat,
115+
lymph_missing,
116+
pos_test_feat,
117+
pos_test_missing,
84118
]
85119
)
86120
features = np.array(features) # type: ignore
87-
train_embeddings = np.hstack(
121+
122+
train_embeddings2 = np.hstack(
88123
(train_embeddings, features[: len(train_entities)]) # type: ignore
89124
)
90-
test_embeddings = np.hstack(
125+
test_embeddings2 = np.hstack(
91126
(test_embeddings, features[len(train_entities) :]) # type: ignore
92127
)
93128

129+
train_features = features[: len(train_entities)]
130+
test_features = features[len(train_entities) :]
131+
132+
# fit a Support Vector Machine on train embeddings.
94133
clf = GridSearchCV(
95134
SVC(random_state=RANDOM_STATE), {"C": [10 ** i for i in range(-3, 4)]}
96135
)
97-
clf.fit(train_embeddings, train_labels)
136+
clf.fit(train_embeddings2, train_labels)
98137

99-
predictions = clf.predict(test_embeddings)
138+
# Evaluate the Support Vector Machine on test embeddings.
139+
predictions2 = clf.predict(test_embeddings2)
100140
print(
101141
f"Predicted {len(test_entities)} entities with an accuracy of "
102-
+ f"{accuracy_score(test_labels, predictions) * 100 :.4f}%"
142+
+ f"{accuracy_score(test_labels, predictions2) * 100 :.4f}%"
103143
)
104144
print(f"Confusion Matrix ([[TN, FP], [FN, TP]]):")
105-
print(confusion_matrix(test_labels, predictions))
145+
print(confusion_matrix(test_labels, predictions2))
106146

107-
# Reduce the dimensions of entity embeddings to represent them in a 2D plane.
108-
X_tsne = TSNE(random_state=RANDOM_STATE).fit_transform(
109-
np.vstack((train_embeddings, test_embeddings))
110-
)
147+
f, ax = plt.subplots(1, 2, figsize=(15, 6))
111148

112149
# Define the color map.
113150
colors = ["r", "g"]
114151
color_map = {}
115152
for i, label in enumerate(set(labels)):
116153
color_map[label] = colors[i]
117154

118-
# Set the graph with a certain size.
119-
plt.figure(figsize=(10, 4))
155+
ax[0].set_title(
156+
f"Without Literals ({accuracy_score(test_labels, predictions) * 100:.2f}%)"
157+
)
158+
159+
# Reduce the dimensions of entity embeddings without literals to represent them in a 2D plane.
160+
X_tsne = TSNE(random_state=RANDOM_STATE).fit_transform(
161+
np.vstack((train_embeddings, test_embeddings))
162+
)
120163

121-
# Plot the train embeddings.
122-
plt.scatter(
164+
# Plot the train embeddings without literals.
165+
ax[0].scatter(
123166
X_tsne[: len(train_entities), 0],
124167
X_tsne[: len(train_entities), 1],
125168
edgecolors=[color_map[i] for i in labels[: len(train_entities)]],
126169
facecolors=[color_map[i] for i in labels[: len(train_entities)]],
127170
)
128171

129-
# Plot the test embeddings.
130-
plt.scatter(
172+
# Plot the test embeddings without literals.
173+
ax[0].scatter(
131174
X_tsne[len(train_entities) :, 0],
132175
X_tsne[len(train_entities) :, 1],
133176
edgecolors=[color_map[i] for i in labels[len(train_entities) :]],
134177
facecolors="none",
135178
)
136179

137-
# Annotate few points.
138-
plt.annotate(
139-
entities[25].split("/")[-1],
140-
xy=(X_tsne[25, 0], X_tsne[25, 1]),
141-
xycoords="data",
142-
xytext=(0.01, 0.0),
143-
fontsize=8,
144-
textcoords="axes fraction",
145-
arrowprops=dict(arrowstyle="->", facecolor="black"),
180+
# Create a legend.
181+
ax[0].scatter([], [], edgecolors="r", facecolors="r", label="train -")
182+
ax[0].scatter([], [], edgecolors="g", facecolors="g", label="train +")
183+
ax[0].scatter([], [], edgecolors="r", facecolors="none", label="test -")
184+
ax[0].scatter([], [], edgecolors="g", facecolors="none", label="test +")
185+
ax[0].legend(loc="upper right", ncol=2)
186+
187+
ax[1].set_title(
188+
f"With Literals ({accuracy_score(test_labels, predictions2) * 100 :.2f}%)"
146189
)
147-
plt.annotate(
148-
entities[35].split("/")[-1],
149-
xy=(X_tsne[35, 0], X_tsne[35, 1]),
150-
xycoords="data",
151-
xytext=(0.4, 0.0),
152-
fontsize=8,
153-
textcoords="axes fraction",
154-
arrowprops=dict(arrowstyle="->", facecolor="black"),
190+
191+
# Reduce the dimensions of entity embeddings with literals to represent them in a 2D plane.
192+
X_tsne = TSNE(random_state=RANDOM_STATE).fit_transform(
193+
np.vstack((train_embeddings2, test_embeddings2))
155194
)
156195

157-
# Create a legend.
158-
plt.scatter([], [], edgecolors="r", facecolors="r", label="train -")
159-
plt.scatter([], [], edgecolors="g", facecolors="g", label="train +")
160-
plt.scatter([], [], edgecolors="r", facecolors="none", label="test -")
161-
plt.scatter([], [], edgecolors="g", facecolors="none", label="test +")
162-
plt.legend(loc="upper right", ncol=2)
163-
164-
# Plot the test embeddings.
165-
plt.scatter(
196+
# Plot the train embeddings with literals.
197+
ax[1].scatter(
198+
X_tsne[: len(train_entities), 0],
199+
X_tsne[: len(train_entities), 1],
200+
edgecolors=[color_map[i] for i in labels[: len(train_entities)]],
201+
facecolors=[color_map[i] for i in labels[: len(train_entities)]],
202+
)
203+
204+
# Plot the test embeddings with literals.
205+
ax[1].scatter(
166206
X_tsne[len(train_entities) :, 0],
167207
X_tsne[len(train_entities) :, 1],
168208
edgecolors=[color_map[i] for i in labels[len(train_entities) :]],
169209
facecolors="none",
170210
)
171211

172-
# Display the graph with a title, removing the axes for
173-
# better readability.
174-
plt.title("pyRDF2Vec", fontsize=32)
175-
plt.axis("off")
212+
# Create a legend.
213+
ax[1].scatter([], [], edgecolors="r", facecolors="r", label="train -")
214+
ax[1].scatter([], [], edgecolors="g", facecolors="g", label="train +")
215+
ax[1].scatter([], [], edgecolors="r", facecolors="none", label="test -")
216+
ax[1].scatter([], [], edgecolors="g", facecolors="none", label="test +")
217+
ax[1].legend(loc="upper right", ncol=2)
218+
176219
plt.show()

0 commit comments

Comments
 (0)