diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/java/org/apache/geaflow/dsl/runtime/query/GQLAlgorithmTest.java b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/java/org/apache/geaflow/dsl/runtime/query/GQLAlgorithmTest.java index 04c62509f..07bd11662 100644 --- a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/java/org/apache/geaflow/dsl/runtime/query/GQLAlgorithmTest.java +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/java/org/apache/geaflow/dsl/runtime/query/GQLAlgorithmTest.java @@ -324,6 +324,76 @@ public void testAlgorithmJaccardSimilarity() throws Exception { .checkSinkResult(); } + @Test + public void testAlgorithmJaccardSimilarityNoCommonNeighbors() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/modern_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_no_common.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilarityIdenticalVertices() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/modern_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_identical.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilarityHighSimilarity() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/modern_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_high.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilarityCompleteOverlap() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/jaccard_similarity_test_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_complete_overlap.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilarityDisjointSets() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/jaccard_similarity_test_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_disjoint.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilaritySelfLoop() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/jaccard_similarity_test_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_selfloop.sql") + .execute() + .checkSinkResult(); + } + + @Test + public void testAlgorithmJaccardSimilarityIsolatedVertex() throws Exception { + QueryTester + .build() + .withGraphDefine("/query/jaccard_similarity_test_graph.sql") + .withQueryPath("/query/gql_algorithm_jaccard_similarity_isolated.sql") + .execute() + .checkSinkResult(); + } + @Test public void testEdgeIterator() throws Exception { QueryTester diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_edge.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_edge.txt new file mode 100644 index 000000000..b28a44def --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_edge.txt @@ -0,0 +1,10 @@ +1,2,1.0 +1,3,1.0 +1,4,1.0 +2,3,1.0 +2,4,1.0 +3,4,1.0 +5,6,1.0 +5,7,1.0 +6,7,1.0 +9,9,1.0 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_vertex.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_vertex.txt new file mode 100644 index 000000000..f821fb1c2 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/data/jaccard_test_vertex.txt @@ -0,0 +1,9 @@ +1,node1 +2,node2 +3,node3 +4,node4 +5,node5 +6,node6 +7,node7 +8,isolated +9,selfloop diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_complete_overlap.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_complete_overlap.txt new file mode 100644 index 000000000..a8815fa11 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_complete_overlap.txt @@ -0,0 +1 @@ +3,4,0.2 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_disjoint.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_disjoint.txt new file mode 100644 index 000000000..a8005ab77 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_disjoint.txt @@ -0,0 +1 @@ +1,5,0.0 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_high.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_high.txt new file mode 100644 index 000000000..501633347 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_high.txt @@ -0,0 +1 @@ +4,1,0.2 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_identical.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_identical.txt new file mode 100644 index 000000000..e69de29bb diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_isolated.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_isolated.txt new file mode 100644 index 000000000..8b906b083 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_isolated.txt @@ -0,0 +1 @@ +8,1,0.0 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_no_common.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_no_common.txt new file mode 100644 index 000000000..da79576f4 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_no_common.txt @@ -0,0 +1 @@ +2,6,0.0 diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_selfloop.txt b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/expect/gql_algorithm_jaccard_similarity_selfloop.txt new file mode 100644 index 000000000..e69de29bb diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_complete_overlap.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_complete_overlap.sql new file mode 100644 index 000000000..4d01cf988 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_complete_overlap.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH jaccard_test; + +INSERT INTO result_tb +CALL jaccard_similarity(3, 4) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_disjoint.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_disjoint.sql new file mode 100644 index 000000000..fb9a7aee4 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_disjoint.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH jaccard_test; + +INSERT INTO result_tb +CALL jaccard_similarity(1, 5) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_high.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_high.sql new file mode 100644 index 000000000..4a4ff54a3 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_high.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH modern; + +INSERT INTO result_tb +CALL jaccard_similarity(4, 1) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_identical.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_identical.sql new file mode 100644 index 000000000..cf10b430a --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_identical.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH modern; + +INSERT INTO result_tb +CALL jaccard_similarity(1, 1) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_isolated.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_isolated.sql new file mode 100644 index 000000000..066a90299 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_isolated.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH jaccard_test; + +INSERT INTO result_tb +CALL jaccard_similarity(8, 1) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_no_common.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_no_common.sql new file mode 100644 index 000000000..f5da2f27d --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_no_common.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH modern; + +INSERT INTO result_tb +CALL jaccard_similarity(2, 6) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_selfloop.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_selfloop.sql new file mode 100644 index 000000000..46a3d7805 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/gql_algorithm_jaccard_similarity_selfloop.sql @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE result_tb ( + vertex_a int, + vertex_b int, + jaccard_coefficient double +) WITH ( + type='file', + geaflow.dsl.file.path='${target}' +); + +USE GRAPH jaccard_test; + +INSERT INTO result_tb +CALL jaccard_similarity(9, 9) YIELD (vertex_a, vertex_b, jaccard_coefficient) +RETURN cast(vertex_a as int), cast(vertex_b as int), jaccard_coefficient +; diff --git a/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/jaccard_similarity_test_graph.sql b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/jaccard_similarity_test_graph.sql new file mode 100644 index 000000000..155dffec5 --- /dev/null +++ b/geaflow/geaflow-dsl/geaflow-dsl-runtime/src/test/resources/query/jaccard_similarity_test_graph.sql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE v_node ( + id bigint, + name varchar +) WITH ( + type='file', + geaflow.dsl.window.size = -1, + geaflow.dsl.file.path = 'resource:///data/jaccard_test_vertex.txt' +); + +CREATE TABLE e_link ( + srcId bigint, + targetId bigint, + weight double +) WITH ( + type='file', + geaflow.dsl.window.size = -1, + geaflow.dsl.file.path = 'resource:///data/jaccard_test_edge.txt' +); + +CREATE GRAPH jaccard_test ( + Vertex node using v_node WITH ID(id), + Edge link using e_link WITH ID(srcId, targetId) +) WITH ( + storeType='memory', + shardCount = 2 +);