Skip to content

Commit a60e912

Browse files
authored
fix(interactive): Cut off the string if it exceeds max_length. (#4359)
Fix two bugs: - Vertex table does not resize to actual size of the vertex record batch. #4383 - The string is not cut off when the length exceeds the specified max_length for `var_char`. #4365
1 parent 0e2db7c commit a60e912

File tree

9 files changed

+478
-45
lines changed

9 files changed

+478
-45
lines changed

.github/workflows/interactive.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ jobs:
219219
- name: Robustness test
220220
env:
221221
INTERACTIVE_WORKSPACE: /tmp/interactive_workspace
222+
GS_TEST_DIR: ${{ github.workspace }}/gstest
222223
run: |
223224
cd ${GITHUB_WORKSPACE}/flex/tests/hqps
224225
bash hqps_robust_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml ./interactive_config_test_cbo.yaml

flex/interactive/sdk/python/gs_interactive/tests/conftest.py

Lines changed: 342 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
)
4040
print("MODERN_GRAPH_DATA_DIR: ", MODERN_GRAPH_DATA_DIR)
4141

42-
4342
modern_graph_full = {
4443
"name": "full_graph",
4544
"description": "This is a test graph",
@@ -287,6 +286,295 @@
287286
}
288287

289288

289+
new_graph_algo = {
290+
"name": "graph_algo",
291+
"schema": {
292+
"vertex_types": [
293+
{
294+
"type_name": "Challenge",
295+
"properties": [
296+
{
297+
"property_name": "id",
298+
"property_type": {"string": {"long_text": None}},
299+
},
300+
{
301+
"property_name": "name",
302+
"property_type": {"string": {"long_text": None}},
303+
},
304+
{
305+
"property_name": "description",
306+
"property_type": {"string": {"long_text": None}},
307+
},
308+
],
309+
"primary_keys": ["id"],
310+
},
311+
{
312+
"type_name": "Task",
313+
"properties": [
314+
{
315+
"property_name": "id",
316+
"property_type": {"string": {"long_text": None}},
317+
},
318+
{
319+
"property_name": "name",
320+
"property_type": {"string": {"long_text": None}},
321+
},
322+
{
323+
"property_name": "description",
324+
"property_type": {"string": {"long_text": None}},
325+
},
326+
],
327+
"primary_keys": ["id"],
328+
},
329+
{
330+
"type_name": "Solution",
331+
"properties": [
332+
{
333+
"property_name": "id",
334+
"property_type": {"string": {"long_text": None}},
335+
},
336+
{
337+
"property_name": "name",
338+
"property_type": {"string": {"long_text": None}},
339+
},
340+
{
341+
"property_name": "description",
342+
"property_type": {"string": {"long_text": None}},
343+
},
344+
],
345+
"primary_keys": ["id"],
346+
},
347+
{
348+
"type_name": "Paper",
349+
"properties": [
350+
{
351+
"property_name": "id",
352+
"property_type": {"string": {"long_text": None}},
353+
},
354+
{
355+
"property_name": "published",
356+
"property_type": {"string": {"long_text": None}},
357+
},
358+
{
359+
"property_name": "year",
360+
"property_type": {"primitive_type": "DT_SIGNED_INT32"},
361+
},
362+
{
363+
"property_name": "month",
364+
"property_type": {"primitive_type": "DT_SIGNED_INT32"},
365+
},
366+
{
367+
"property_name": "title",
368+
"property_type": {"string": {"long_text": None}},
369+
},
370+
{
371+
"property_name": "authors",
372+
"property_type": {"string": {"long_text": None}},
373+
},
374+
{
375+
"property_name": "summary",
376+
"property_type": {"string": {"long_text": None}},
377+
},
378+
{
379+
"property_name": "journal_ref",
380+
"property_type": {"string": {"long_text": None}},
381+
},
382+
{
383+
"property_name": "doi",
384+
"property_type": {"string": {"long_text": None}},
385+
},
386+
{
387+
"property_name": "primary_category",
388+
"property_type": {"string": {"long_text": None}},
389+
},
390+
{
391+
"property_name": "categories",
392+
"property_type": {"string": {"long_text": None}},
393+
},
394+
{
395+
"property_name": "problem_def",
396+
"property_type": {"string": {"long_text": None}},
397+
},
398+
{
399+
"property_name": "keywords",
400+
"property_type": {"string": {"long_text": None}},
401+
},
402+
],
403+
"primary_keys": ["id"],
404+
},
405+
],
406+
"edge_types": [
407+
{
408+
"type_name": "SolvedBy",
409+
"properties": [],
410+
"vertex_type_pair_relations": [
411+
{
412+
"destination_vertex": "Solution",
413+
"relation": "MANY_TO_MANY",
414+
"source_vertex": "Challenge",
415+
}
416+
],
417+
},
418+
{
419+
"type_name": "Cite",
420+
"properties": [],
421+
"vertex_type_pair_relations": [
422+
{
423+
"destination_vertex": "Paper",
424+
"relation": "MANY_TO_MANY",
425+
"source_vertex": "Paper",
426+
}
427+
],
428+
},
429+
{
430+
"type_name": "Has",
431+
"properties": [],
432+
"vertex_type_pair_relations": [
433+
{
434+
"destination_vertex": "Challenge",
435+
"relation": "MANY_TO_MANY",
436+
"source_vertex": "Paper",
437+
}
438+
],
439+
},
440+
{
441+
"type_name": "WorkOn",
442+
"properties": [],
443+
"vertex_type_pair_relations": [
444+
{
445+
"destination_vertex": "Task",
446+
"relation": "MANY_TO_MANY",
447+
"source_vertex": "Paper",
448+
}
449+
],
450+
},
451+
{
452+
"type_name": "Use",
453+
"properties": [],
454+
"vertex_type_pair_relations": [
455+
{
456+
"destination_vertex": "Solution",
457+
"relation": "MANY_TO_MANY",
458+
"source_vertex": "Paper",
459+
}
460+
],
461+
},
462+
],
463+
},
464+
}
465+
466+
"""
467+
Replace the source location with the real location
468+
"""
469+
new_graph_algo_import_config = {
470+
"graph": "graph_algo",
471+
"loading_config": {
472+
"data_source": {"scheme": "file"},
473+
"import_option": "init",
474+
"format": {
475+
"type": "csv",
476+
"metadata": {
477+
"delimiter": "|",
478+
"header_row": True,
479+
"quoting": True,
480+
"quote_char": '"',
481+
"double_quote": True,
482+
"batch_reader": True,
483+
},
484+
},
485+
},
486+
"vertex_mappings": [
487+
{
488+
"type_name": "Challenge",
489+
"inputs": ["Challenge.csv"],
490+
},
491+
{"type_name": "Task", "inputs": ["Task.csv"]},
492+
{
493+
"type_name": "Solution",
494+
"inputs": ["Solution.csv"],
495+
},
496+
{"type_name": "Paper", "inputs": ["Paper.csv"]},
497+
],
498+
"edge_mappings": [
499+
{
500+
"type_triplet": {
501+
"edge": "SolvedBy",
502+
"source_vertex": "Challenge",
503+
"destination_vertex": "Solution",
504+
},
505+
"inputs": ["Challenge_Solvedby_Solution.csv"],
506+
"column_mappings": [],
507+
"source_vertex_mappings": [
508+
{"column": {"index": 0, "name": "source"}, "property": "id"}
509+
],
510+
"destination_vertex_mappings": [
511+
{"column": {"index": 1, "name": "target"}, "property": "id"}
512+
],
513+
},
514+
{
515+
"type_triplet": {
516+
"edge": "Cite",
517+
"source_vertex": "Paper",
518+
"destination_vertex": "Paper",
519+
},
520+
"inputs": ["Paper_Cite_Paper.csv"],
521+
"column_mappings": [],
522+
"source_vertex_mappings": [
523+
{"column": {"index": 0, "name": "source"}, "property": "id"}
524+
],
525+
"destination_vertex_mappings": [
526+
{"column": {"index": 1, "name": "target"}, "property": "id"}
527+
],
528+
},
529+
{
530+
"type_triplet": {
531+
"edge": "Has",
532+
"source_vertex": "Paper",
533+
"destination_vertex": "Challenge",
534+
},
535+
"inputs": ["Paper_Has_Challenge.csv"],
536+
"column_mappings": [],
537+
"source_vertex_mappings": [
538+
{"column": {"index": 0, "name": "source"}, "property": "id"}
539+
],
540+
"destination_vertex_mappings": [
541+
{"column": {"index": 1, "name": "target"}, "property": "id"}
542+
],
543+
},
544+
{
545+
"type_triplet": {
546+
"edge": "WorkOn",
547+
"source_vertex": "Paper",
548+
"destination_vertex": "Task",
549+
},
550+
"inputs": ["Paper_WorkOn_Task.csv"],
551+
"column_mappings": [],
552+
"source_vertex_mappings": [
553+
{"column": {"index": 0, "name": "source"}, "property": "id"}
554+
],
555+
"destination_vertex_mappings": [
556+
{"column": {"index": 1, "name": "target"}, "property": "id"}
557+
],
558+
},
559+
{
560+
"type_triplet": {
561+
"edge": "Use",
562+
"source_vertex": "Paper",
563+
"destination_vertex": "Solution",
564+
},
565+
"inputs": ["Paper_Use_Solution.csv"],
566+
"column_mappings": [],
567+
"source_vertex_mappings": [
568+
{"column": {"index": 0, "name": "source"}, "property": "id"}
569+
],
570+
"destination_vertex_mappings": [
571+
{"column": {"index": 1, "name": "target"}, "property": "id"}
572+
],
573+
},
574+
],
575+
}
576+
577+
290578
@pytest.fixture(scope="module")
291579
def interactive_driver():
292580
driver = Driver()
@@ -350,6 +638,41 @@ def create_graph_with_custom_pk_name(interactive_session):
350638
delete_running_graph(interactive_session, graph_id)
351639

352640

641+
@pytest.fixture(scope="function")
642+
def create_graph_with_var_char_property(interactive_session):
643+
modern_graph_custom_pk_name = copy.deepcopy(modern_graph_full)
644+
for vertex_type in modern_graph_custom_pk_name["schema"]["vertex_types"]:
645+
# replace each string property with var_char
646+
for prop in vertex_type["properties"]:
647+
if prop["property_type"]:
648+
if "string" in prop["property_type"]:
649+
prop["property_type"]["string"] = {"var_char": {"max_length": 2}}
650+
create_graph_request = CreateGraphRequest.from_dict(modern_graph_custom_pk_name)
651+
resp = interactive_session.create_graph(create_graph_request)
652+
assert resp.is_ok()
653+
graph_id = resp.get_value().graph_id
654+
yield graph_id
655+
delete_running_graph(interactive_session, graph_id)
656+
657+
658+
@pytest.fixture(scope="function")
659+
def create_graph_algo_graph_with_x_csr_params(interactive_session):
660+
"""
661+
Create a relative complex graph with small max_vertex_num and import
662+
relatively large data to test the bulk loading procedure could handle
663+
the case.
664+
"""
665+
new_graph_algo_schema = new_graph_algo.copy()
666+
for vertex_type in new_graph_algo_schema["schema"]["vertex_types"]:
667+
vertex_type["x_csr_params"] = {"max_vertex_num": 1}
668+
create_graph_request = CreateGraphRequest.from_dict(new_graph_algo_schema)
669+
resp = interactive_session.create_graph(create_graph_request)
670+
assert resp.is_ok()
671+
graph_id = resp.get_value().graph_id
672+
yield graph_id
673+
delete_running_graph(interactive_session, graph_id)
674+
675+
353676
def wait_job_finish(sess: Session, job_id: str):
354677
assert job_id is not None
355678
while True:
@@ -397,6 +720,24 @@ def import_data_to_full_modern_graph(sess: Session, graph_id: str):
397720
assert wait_job_finish(sess, job_id)
398721

399722

723+
def import_data_to_new_graph_algo_graph(sess: Session, graph_id: str):
724+
# check whether GS_TEST_DIR is set
725+
if "GS_TEST_DIR" not in os.environ:
726+
raise Exception("GS_TEST_DIR is not set")
727+
728+
GS_TEST_DIR = os.environ["GS_TEST_DIR"]
729+
NEW_GRAPH_ALGO_SOURCE_DIR = os.path.join(GS_TEST_DIR, "flex/new_graph_algo")
730+
import_config = copy.deepcopy(new_graph_algo_import_config)
731+
import_config["loading_config"]["data_source"][
732+
"location"
733+
] = NEW_GRAPH_ALGO_SOURCE_DIR
734+
schema_mapping = SchemaMapping.from_dict(import_config)
735+
resp = sess.bulk_loading(graph_id, schema_mapping)
736+
assert resp.is_ok()
737+
job_id = resp.get_value().job_id
738+
assert wait_job_finish(sess, job_id)
739+
740+
400741
def submit_query_via_neo4j_endpoint(
401742
neo4j_sess: Neo4jSession, graph_id: str, query: str
402743
):

0 commit comments

Comments
 (0)