@@ -1199,4 +1199,165 @@ df.show();
11991199| Grant | Martin | 72 | grrm@cmpny.com | Grant Martin | adult |
12001200| Hannah | Abbott | 18 | h.abbott@hogwarts... | Hannah Abbott | minor |
12011201+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+
1202+ ```
1203+
1204+ ## GraphFrames
1205+
1206+ ### Page Rank
1207+
1208+ ```cpp
1209+ DataFrame *vertices = nullptr;
1210+ DataFrame *edges = nullptr;
1211+
1212+ vertices = spark->sql(R"(
1213+ SELECT CAST(id AS INT) AS id, name, age FROM VALUES
1214+ (1, 'Alice', 34),
1215+ (2, 'Bob', 36),
1216+ (3, 'Charlie', 30),
1217+ (4, 'Anne', 29)
1218+ AS people(id, name, age)
1219+ )");
1220+
1221+ edges = spark->sql(R"(
1222+ SELECT CAST(src AS INT) AS src, CAST(dst AS INT) AS dst, relationship FROM VALUES
1223+ (1, 2, 'friend'),
1224+ (2, 3, 'follow'),
1225+ (3, 1, 'friend'),
1226+ (1, 4, 'colleague')
1227+ AS connections(src, dst, relationship)
1228+ )");
1229+
1230+ auto gf = GraphFrame(*vertices, *edges);
1231+
1232+ auto rows = gf().pageRank(0.15, 5).collect();
1233+ gf().pageRank(0.15, 5).show();
1234+ ```
1235+
1236+ ### Motif Matching
1237+
1238+ ``` cpp
1239+ auto gf = GraphFrame(*vertices, *edges);
1240+
1241+ gf ().find("(a)-[ e] ->(b)");
1242+ gf().find("(a)-[ e1] ->(b); (b)-[ e2] ->(c); (c)-[ e3] ->(a)");
1243+ gf().find("(a)-[ e1] ->(b); (b)-[ e2] ->(c)");
1244+ gf().find("(a)-[ e] ->(b)");
1245+ gf().find("(a)-[ e] ->(b)").show();
1246+ ```
1247+
1248+ ### Triplets
1249+
1250+ ```cpp
1251+ auto gf = GraphFrame(*vertices, *edges);
1252+ gf().triplets();
1253+ gf().triplets().show();
1254+ ```
1255+
1256+ ### Filter Edges
1257+
1258+ ``` cpp
1259+ auto gf = GraphFrame(*vertices, *edges);
1260+ gf ().filterEdges("relationship = 'friend'");
1261+ gf().filterEdges(col("relationship") == lit("friend"));
1262+ gf().filterEdges("relationship = 'enemy'");
1263+ gf().filterEdges("relationship = 'friend'").show()
1264+ ```
1265+
1266+ ### Filter Vertices
1267+
1268+ ```cpp
1269+ auto gf = GraphFrame(*vertices, *edges);
1270+ gf().filterVertices("age < 34");
1271+ gf().filterVertices(col("age") < lit(34));
1272+ gf().filterVertices("age > 100");
1273+ gf().filterVertices("age < 34").show();
1274+ ```
1275+
1276+ ### Drop Isolated Vertices
1277+
1278+ ``` cpp
1279+ auto gf = GraphFrame(*vertices, *edges);
1280+ gf ().dropIsolatedVertices();
1281+
1282+ auto v_with_isolated = spark->sql (R"(
1283+ SELECT * FROM VALUES
1284+ (1, 'Alice', 34),
1285+ (2, 'Bob', 36),
1286+ (3, 'Charlie', 30),
1287+ (4, 'Anne', 29),
1288+ (99, 'Ghost', 99)
1289+ AS people(id, name, age)
1290+ )");
1291+
1292+ GraphFrame (v_with_isolated, * edges).dropIsolatedVertices().show();
1293+ ```
1294+
1295+ ### Breadth First Search
1296+
1297+ ```cpp
1298+ auto gf = GraphFrame(*vertices, *edges);
1299+ gf().bfs("id = 1", "id = 3");
1300+ gf().bfs("id = 4", "id = 1");
1301+ gf().bfs("id = 1", "id = 2", "relationship = 'friend'");
1302+ gf().bfs(col("id") == lit(1), col("id") == lit(3));
1303+ gf().bfs("id = 1", "id = 3").show();
1304+ ```
1305+
1306+ ### Connected Components
1307+
1308+ ``` cpp
1309+ auto gf = GraphFrame(*vertices, *edges);
1310+ gf ().connectedComponents();
1311+ gf ().connectedComponents().show();
1312+ ```
1313+
1314+ ### Strongly Connected Components
1315+
1316+ ``` cpp
1317+ auto gf = GraphFrame(*vertices, *edges);
1318+ gf ().stronglyConnectedComponents(10);
1319+ gf().stronglyConnectedComponents();
1320+ gf().stronglyConnectedComponents().show();
1321+ ```
1322+
1323+ ### Shortest Paths
1324+
1325+ ```cpp
1326+ auto gf = GraphFrame(*vertices, *edges);
1327+ gf().shortestPaths(std::vector<int32_t>{1, 3});
1328+ gf().shortestPaths(std::vector<int32_t>{1});
1329+ gf().shortestPaths(std::vector<int32_t>{1}).show();
1330+ ```
1331+
1332+ ### Triangle Count
1333+
1334+ ``` cpp
1335+ auto gf = GraphFrame(*vertices, *edges);
1336+
1337+ gf ().triangleCount();
1338+ gf ().triangleCount().show();
1339+
1340+ auto rows = gf().triangleCount().collect();
1341+
1342+ std::map<int32_t , int64_t > counts;
1343+ for (auto &row : rows)
1344+ counts[row.get<int32_t >(" id" )] = row.get<int64_t >(" count" );
1345+ ```
1346+
1347+ ### Label Propagation
1348+
1349+ ``` cpp
1350+ auto gf = GraphFrame(*vertices, *edges);
1351+ gf ().labelPropagation(5);
1352+ ```
1353+
1354+ ### Method Chaining (GraphFrames)
1355+
1356+ ```cpp
1357+ // GraphFrames result into plain DataFrame ops
1358+ auto result = gf()
1359+ .find("(a)-[e]->(b)")
1360+ .filter("e.relationship = 'friend'");
1361+
1362+ auto result = gf().pageRank(0.15, 5).filter("pagerank > 0.0");
12021363```
0 commit comments