InternScience
diff --git a/‎examples/generate/generate_aggregated_qa/aggregated_config.yaml‎
Lines changed: 119 additions & 0 deletions b/‎examples/generate/generate_aggregated_qa/aggregated_config.yaml‎
Lines changed: 119 additions & 0 deletions
diff --git a/‎scripts/generate/generate_aggregated.sh‎ ‎…ate_aggregated_qa/generate_aggregated.sh‎scripts/generate/generate_aggregated.sh renamed to examples/generate/generate_aggregated_qa/generate_aggregated.sh b/‎scripts/generate/generate_aggregated.sh‎ ‎…ate_aggregated_qa/generate_aggregated.sh‎scripts/generate/generate_aggregated.sh renamed to examples/generate/generate_aggregated_qa/generate_aggregated.sh
diff --git a/‎resources/input_examples/csv_demo.csv‎ ‎examples/input_examples/csv_demo.csv‎resources/input_examples/csv_demo.csv renamed to examples/input_examples/csv_demo.csv b/‎resources/input_examples/csv_demo.csv‎ ‎examples/input_examples/csv_demo.csv‎resources/input_examples/csv_demo.csv renamed to examples/input_examples/csv_demo.csv
diff --git a/‎resources/input_examples/extract_demo.txt‎ ‎examples/input_examples/extract_demo.txt‎resources/input_examples/extract_demo.txt renamed to examples/input_examples/extract_demo.txt b/‎resources/input_examples/extract_demo.txt‎ ‎examples/input_examples/extract_demo.txt‎resources/input_examples/extract_demo.txt renamed to examples/input_examples/extract_demo.txt
diff --git a/‎…rces/input_examples/graphml_demo.graphml‎ ‎…ples/input_examples/graphml_demo.graphml‎resources/input_examples/graphml_demo.graphml renamed to examples/input_examples/graphml_demo.graphml b/‎…rces/input_examples/graphml_demo.graphml‎ ‎…ples/input_examples/graphml_demo.graphml‎resources/input_examples/graphml_demo.graphml renamed to examples/input_examples/graphml_demo.graphml
diff --git a/‎…064cf17c5435814edfbee42ae6b19aac37d2.jpg‎ ‎…064cf17c5435814edfbee42ae6b19aac37d2.jpg‎resources/input_examples/images/0f25783fdfa99042db274ba9f6b3064cf17c5435814edfbee42ae6b19aac37d2.jpg renamed to examples/input_examples/images/0f25783fdfa99042db274ba9f6b3064cf17c5435814edfbee42ae6b19aac37d2.jpg b/‎…064cf17c5435814edfbee42ae6b19aac37d2.jpg‎ ‎…064cf17c5435814edfbee42ae6b19aac37d2.jpg‎resources/input_examples/images/0f25783fdfa99042db274ba9f6b3064cf17c5435814edfbee42ae6b19aac37d2.jpg renamed to examples/input_examples/images/0f25783fdfa99042db274ba9f6b3064cf17c5435814edfbee42ae6b19aac37d2.jpg
diff --git a/‎…e99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg‎ ‎…e99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg‎resources/input_examples/images/390516e39e77030092027ded523ee99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg renamed to examples/input_examples/images/390516e39e77030092027ded523ee99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg b/‎…e99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg‎ ‎…e99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg‎resources/input_examples/images/390516e39e77030092027ded523ee99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg renamed to examples/input_examples/images/390516e39e77030092027ded523ee99e96ffa8b6df4476c9b12d7bb1dd20d635.jpg
diff --git a/‎…2ae309fee014082db00bc2d87187a6bb5dca.jpg‎ ‎…2ae309fee014082db00bc2d87187a6bb5dca.jpg‎resources/input_examples/images/4abc534d1dea2b706e44aaac26fe2ae309fee014082db00bc2d87187a6bb5dca.jpg renamed to examples/input_examples/images/4abc534d1dea2b706e44aaac26fe2ae309fee014082db00bc2d87187a6bb5dca.jpg b/‎…2ae309fee014082db00bc2d87187a6bb5dca.jpg‎ ‎…2ae309fee014082db00bc2d87187a6bb5dca.jpg‎resources/input_examples/images/4abc534d1dea2b706e44aaac26fe2ae309fee014082db00bc2d87187a6bb5dca.jpg renamed to examples/input_examples/images/4abc534d1dea2b706e44aaac26fe2ae309fee014082db00bc2d87187a6bb5dca.jpg
diff --git a/‎…df02964c9c3da8d8e9567ea19240b14cc742.jpg‎ ‎…df02964c9c3da8d8e9567ea19240b14cc742.jpg‎resources/input_examples/images/8fb93cfc0d6b0ebb3e5d5aaae237df02964c9c3da8d8e9567ea19240b14cc742.jpg renamed to examples/input_examples/images/8fb93cfc0d6b0ebb3e5d5aaae237df02964c9c3da8d8e9567ea19240b14cc742.jpg b/‎…df02964c9c3da8d8e9567ea19240b14cc742.jpg‎ ‎…df02964c9c3da8d8e9567ea19240b14cc742.jpg‎resources/input_examples/images/8fb93cfc0d6b0ebb3e5d5aaae237df02964c9c3da8d8e9567ea19240b14cc742.jpg renamed to examples/input_examples/images/8fb93cfc0d6b0ebb3e5d5aaae237df02964c9c3da8d8e9567ea19240b14cc742.jpg
diff --git a/‎…fe56f793f287b3399345aea31cd20eed2824.jpg‎ ‎…fe56f793f287b3399345aea31cd20eed2824.jpg‎resources/input_examples/images/cc5b36e3c972b210d8b56d34fc7ffe56f793f287b3399345aea31cd20eed2824.jpg renamed to examples/input_examples/images/cc5b36e3c972b210d8b56d34fc7ffe56f793f287b3399345aea31cd20eed2824.jpg b/‎…fe56f793f287b3399345aea31cd20eed2824.jpg‎ ‎…fe56f793f287b3399345aea31cd20eed2824.jpg‎resources/input_examples/images/cc5b36e3c972b210d8b56d34fc7ffe56f793f287b3399345aea31cd20eed2824.jpg renamed to examples/input_examples/images/cc5b36e3c972b210d8b56d34fc7ffe56f793f287b3399345aea31cd20eed2824.jpg
@@ -0,0 +1,119 @@
+global_params:
+  working_dir: cache
+
+nodes:
+  - id: read_files
+    op_name: read
+    type: source
+    dependencies: []
+    params:
+        input_path:
+          - resources/input_examples/jsonl_demo.jsonl
+
+  - id: chunk_documents
+    op_name: chunk
+    type: map_batch
+    dependencies:
+      - read_files
+    execution_params:
+      replicas: 4
+    params:
+        chunk_size: 1024
+        chunk_overlap: 100
+
+  - id: build_kg
+    op_name: build_kg
+    type: map_batch
+    dependencies:
+      - chunk_documents
+    execution_params:
+      replicas: 1
+      batch_size: 128
+
+  - id: quiz
+    op_name: quiz
+    type: aggregate
+    dependencies:
+      - build_kg
+    execution_params:
+      replicas: 1
+      batch_size: 128
+    params:
+      quiz_samples: 2
+      concurrency_limit: 200
+
+  - id: judge
+    op_name: judge
+    type: map_batch
+    dependencies:
+      - quiz
+    execution_params:
+      replicas: 1
+      batch_size: 16
+
+  - id: partition
+    op_name: partition
+    type: aggregate
+    dependencies:
+      - judge
+    params:
+      method: ece
+      method_params:
+        max_units_per_community: 20
+        min_units_per_community: 5
+        max_tokens_per_community: 10240
+        unit_sampling: max_loss
+
+  - id: generate
+    op_name: generate
+    type: map_batch
+    dependencies:
+      - partition
+    execution_params:
+      replicas: 1
+      batch_size: 16
+    params:
+      method: aggregated
+      data_format: ChatML
+
+#pipeline:
+#  - name: read_step # step name is unique in the pipeline, and can be referenced by other steps
+#    op_key: read
+#    params:
+#      input_file: resources/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+#
+#  - name: chunk_step
+#    op_key: chunk
+#    deps: [read_step] # chunk_step depends on read_step
+#    params:
+#        chunk_size: 1024 # chunk size for text splitting
+#        chunk_overlap: 100 # chunk overlap for text splitting
+#
+#  - name: build_kg_step
+#    op_key: build_kg
+#    deps: [chunk_step] # build_kg_step depends on chunk_step
+#
+#  - name: quiz_and_judge_step
+#    op_key: quiz_and_judge
+#    deps: [build_kg_step] # quiz_and_judge depends on build_kg_step
+#    params:
+#      quiz_samples: 2 # number of quiz samples to generate
+#      re_judge: false # whether to re-judge the existing quiz samples
+#
+#  - name: partition_step
+#    op_key: partition
+#    deps: [quiz_and_judge_step] # partition_step depends on quiz_and_judge_step
+#    params:
+#      method: ece # ece is a custom partition method based on comprehension loss
+#      method_params:
+#        max_units_per_community: 20 # max nodes and edges per community
+#        min_units_per_community: 5 # min nodes and edges per community
+#        max_tokens_per_community: 10240 # max tokens per community
+#        unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
+#
+#  - name: generate_step
+#    op_key: generate
+#    deps: [partition_step] # generate_step depends on partition_step
+#    params:
+#      method: aggregated # atomic, aggregated, multi_hop, cot, vqa
+#      data_format: ChatML # Alpaca, Sharegpt, ChatML