|
10 | 10 | int main() { |
11 | 11 | Config conf; |
12 | 12 | conf.setHost("localhost").setPort(15002); |
| 13 | + // ----------------------------------------------------------- |
| 14 | + // Alternatively... |
| 15 | + // conf.setHost("sc://localhost").setPort(15002); |
13 | 16 | // conf.setHost("123.45.67.8").setPort(15002); |
| 17 | + // ... |
| 18 | + // ----------------------------------------------------------- |
14 | 19 | SparkSession spark(conf); |
15 | 20 |
|
16 | 21 | auto df = spark->sql("SELECT * FROM range(100)"); |
@@ -1194,4 +1199,165 @@ df.show(); |
1194 | 1199 | | Grant | Martin | 72 | grrm@cmpny.com | Grant Martin | adult | |
1195 | 1200 | | Hannah | Abbott | 18 | h.abbott@hogwarts... | Hannah Abbott | minor | |
1196 | 1201 | +----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+ |
| 1202 | +``` |
| 1203 | +
|
| 1204 | +## GraphFrames |
| 1205 | +
|
| 1206 | +### Page Rank |
| 1207 | +
|
| 1208 | +```cpp |
| 1209 | +DataFrame *vertices = nullptr; |
| 1210 | +DataFrame *edges = nullptr; |
| 1211 | +
|
| 1212 | +vertices = spark->sql(R"( |
| 1213 | + SELECT CAST(id AS INT) AS id, name, age FROM VALUES |
| 1214 | + (1, 'Alice', 34), |
| 1215 | + (2, 'Bob', 36), |
| 1216 | + (3, 'Charlie', 30), |
| 1217 | + (4, 'Anne', 29) |
| 1218 | + AS people(id, name, age) |
| 1219 | +)"); |
| 1220 | +
|
| 1221 | +edges = spark->sql(R"( |
| 1222 | + SELECT CAST(src AS INT) AS src, CAST(dst AS INT) AS dst, relationship FROM VALUES |
| 1223 | + (1, 2, 'friend'), |
| 1224 | + (2, 3, 'follow'), |
| 1225 | + (3, 1, 'friend'), |
| 1226 | + (1, 4, 'colleague') |
| 1227 | + AS connections(src, dst, relationship) |
| 1228 | +)"); |
| 1229 | +
|
| 1230 | +auto gf = GraphFrame(*vertices, *edges); |
| 1231 | +
|
| 1232 | +auto rows = gf().pageRank(0.15, 5).collect(); |
| 1233 | +gf().pageRank(0.15, 5).show(); |
| 1234 | +``` |
| 1235 | + |
| 1236 | +### Motif Matching |
| 1237 | + |
| 1238 | +```cpp |
| 1239 | +auto gf = GraphFrame(*vertices, *edges); |
| 1240 | + |
| 1241 | +gf().find("(a)-[e]->(b)"); |
| 1242 | +gf().find("(a)-[e1]->(b); (b)-[e2]->(c); (c)-[e3]->(a)"); |
| 1243 | +gf().find("(a)-[e1]->(b); (b)-[e2]->(c)"); |
| 1244 | +gf().find("(a)-[e]->(b)"); |
| 1245 | +gf().find("(a)-[e]->(b)").show(); |
| 1246 | +``` |
| 1247 | +
|
| 1248 | +### Triplets |
| 1249 | +
|
| 1250 | +```cpp |
| 1251 | +auto gf = GraphFrame(*vertices, *edges); |
| 1252 | +gf().triplets(); |
| 1253 | +gf().triplets().show(); |
| 1254 | +``` |
| 1255 | + |
| 1256 | +### Filter Edges |
| 1257 | + |
| 1258 | +```cpp |
| 1259 | +auto gf = GraphFrame(*vertices, *edges); |
| 1260 | +gf().filterEdges("relationship = 'friend'"); |
| 1261 | +gf().filterEdges(col("relationship") == lit("friend")); |
| 1262 | +gf().filterEdges("relationship = 'enemy'"); |
| 1263 | +gf().filterEdges("relationship = 'friend'").show() |
| 1264 | +``` |
| 1265 | +
|
| 1266 | +### Filter Vertices |
| 1267 | +
|
| 1268 | +```cpp |
| 1269 | +auto gf = GraphFrame(*vertices, *edges); |
| 1270 | +gf().filterVertices("age < 34"); |
| 1271 | +gf().filterVertices(col("age") < lit(34)); |
| 1272 | +gf().filterVertices("age > 100"); |
| 1273 | +gf().filterVertices("age < 34").show(); |
| 1274 | +``` |
| 1275 | + |
| 1276 | +### Drop Isolated Vertices |
| 1277 | + |
| 1278 | +```cpp |
| 1279 | +auto gf = GraphFrame(*vertices, *edges); |
| 1280 | +gf().dropIsolatedVertices(); |
| 1281 | + |
| 1282 | +auto v_with_isolated = spark->sql(R"( |
| 1283 | + SELECT * FROM VALUES |
| 1284 | + (1, 'Alice', 34), |
| 1285 | + (2, 'Bob', 36), |
| 1286 | + (3, 'Charlie', 30), |
| 1287 | + (4, 'Anne', 29), |
| 1288 | + (99, 'Ghost', 99) |
| 1289 | + AS people(id, name, age) |
| 1290 | +)"); |
| 1291 | + |
| 1292 | +GraphFrame(v_with_isolated, *edges).dropIsolatedVertices().show(); |
| 1293 | +``` |
| 1294 | +
|
| 1295 | +### Breadth First Search |
| 1296 | +
|
| 1297 | +```cpp |
| 1298 | +auto gf = GraphFrame(*vertices, *edges); |
| 1299 | +gf().bfs("id = 1", "id = 3"); |
| 1300 | +gf().bfs("id = 4", "id = 1"); |
| 1301 | +gf().bfs("id = 1", "id = 2", "relationship = 'friend'"); |
| 1302 | +gf().bfs(col("id") == lit(1), col("id") == lit(3)); |
| 1303 | +gf().bfs("id = 1", "id = 3").show(); |
| 1304 | +``` |
| 1305 | + |
| 1306 | +### Connected Components |
| 1307 | + |
| 1308 | +```cpp |
| 1309 | +auto gf = GraphFrame(*vertices, *edges); |
| 1310 | +gf().connectedComponents(); |
| 1311 | +gf().connectedComponents().show(); |
| 1312 | +``` |
| 1313 | + |
| 1314 | +### Strongly Connected Components |
| 1315 | + |
| 1316 | +```cpp |
| 1317 | +auto gf = GraphFrame(*vertices, *edges); |
| 1318 | +gf().stronglyConnectedComponents(10); |
| 1319 | +gf().stronglyConnectedComponents(); |
| 1320 | +gf().stronglyConnectedComponents().show(); |
| 1321 | +``` |
| 1322 | +
|
| 1323 | +### Shortest Paths |
| 1324 | +
|
| 1325 | +```cpp |
| 1326 | +auto gf = GraphFrame(*vertices, *edges); |
| 1327 | +gf().shortestPaths(std::vector<int32_t>{1, 3}); |
| 1328 | +gf().shortestPaths(std::vector<int32_t>{1}); |
| 1329 | +gf().shortestPaths(std::vector<int32_t>{1}).show(); |
| 1330 | +``` |
| 1331 | + |
| 1332 | +### Triangle Count |
| 1333 | + |
| 1334 | +```cpp |
| 1335 | +auto gf = GraphFrame(*vertices, *edges); |
| 1336 | + |
| 1337 | +gf().triangleCount(); |
| 1338 | +gf().triangleCount().show(); |
| 1339 | + |
| 1340 | +auto rows = gf().triangleCount().collect(); |
| 1341 | + |
| 1342 | +std::map<int32_t, int64_t> counts; |
| 1343 | +for (auto &row : rows) |
| 1344 | + counts[row.get<int32_t>("id")] = row.get<int64_t>("count"); |
| 1345 | +``` |
| 1346 | + |
| 1347 | +### Label Propagation |
| 1348 | + |
| 1349 | +```cpp |
| 1350 | +auto gf = GraphFrame(*vertices, *edges); |
| 1351 | +gf().labelPropagation(5); |
| 1352 | +``` |
| 1353 | +
|
| 1354 | +### Method Chaining (GraphFrames) |
| 1355 | +
|
| 1356 | +```cpp |
| 1357 | +// GraphFrames result into plain DataFrame ops |
| 1358 | +auto result = gf() |
| 1359 | + .find("(a)-[e]->(b)") |
| 1360 | + .filter("e.relationship = 'friend'"); |
| 1361 | +
|
| 1362 | +auto result = gf().pageRank(0.15, 5).filter("pagerank > 0.0"); |
1197 | 1363 | ``` |
0 commit comments