Skip to content

Commit ecc129e

Browse files
authored
Merge branch 'main' into docs/reasoning-models
2 parents 5ab7755 + c862147 commit ecc129e

27 files changed

+248
-116
lines changed

.semversioner/2.2.0.json

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"changes": [
3+
{
4+
"description": "Support OpenAI reasoning models.",
5+
"type": "minor"
6+
},
7+
{
8+
"description": "Add option to snapshot raw extracted graph tables.",
9+
"type": "patch"
10+
},
11+
{
12+
"description": "Added batching logic to the prompt tuning autoselection embeddings workflow",
13+
"type": "patch"
14+
},
15+
{
16+
"description": "Align config classes and docs better.",
17+
"type": "patch"
18+
},
19+
{
20+
"description": "Align embeddings table loading with configured fields.",
21+
"type": "patch"
22+
},
23+
{
24+
"description": "Brings parity with our latest NLP extraction approaches.",
25+
"type": "patch"
26+
},
27+
{
28+
"description": "Fix fnllm to 0.2.3",
29+
"type": "patch"
30+
},
31+
{
32+
"description": "Fixes to basic search.",
33+
"type": "patch"
34+
},
35+
{
36+
"description": "Update llm args for consistency.",
37+
"type": "patch"
38+
},
39+
{
40+
"description": "add vector store integration tests",
41+
"type": "patch"
42+
}
43+
],
44+
"created_at": "2025-04-25T23:30:57+00:00",
45+
"version": "2.2.0"
46+
}

.semversioner/next-release/minor-20250325000101658359.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

.semversioner/next-release/patch-20250304195925818723.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

.semversioner/next-release/patch-20250318015236952876.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

.semversioner/next-release/patch-20250319182609055856.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

.semversioner/next-release/patch-20250322015033893577.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

.semversioner/next-release/patch-20250331184323312702.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
# Changelog
22
Note: version releases in the 0.x.y range may introduce breaking changes.
33

4+
## 2.2.0
5+
6+
- minor: Support OpenAI reasoning models.
7+
- patch: Add option to snapshot raw extracted graph tables.
8+
- patch: Added batching logic to the prompt tuning autoselection embeddings workflow
9+
- patch: Align config classes and docs better.
10+
- patch: Align embeddings table loading with configured fields.
11+
- patch: Brings parity with our latest NLP extraction approaches.
12+
- patch: Fix fnllm to 0.2.3
13+
- patch: Fixes to basic search.
14+
- patch: Update llm args for consistency.
15+
- patch: add vector store integration tests
16+
417
## 2.1.0
518

619
- minor: Add support for JSON input files.

docs/config/yaml.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ Parameters for manual graph pruning. This can be used to optimize the modularity
253253
- max_node_freq_std **float | None** - The maximum standard deviation of node frequency to allow.
254254
- min_node_degree **int** - The minimum node degree to allow.
255255
- max_node_degree_std **float | None** - The maximum standard deviation of node degree to allow.
256-
- min_edge_weight_pct **int** - The minimum edge weight percentile to allow.
256+
- min_edge_weight_pct **float** - The minimum edge weight percentile to allow.
257257
- remove_ego_nodes **bool** - Remove ego nodes.
258258
- lcc_only **bool** - Only use largest connected component.
259259

graphrag/config/defaults.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
ReportingType,
2020
TextEmbeddingTarget,
2121
)
22+
from graphrag.index.operations.build_noun_graph.np_extractors.stop_words import (
23+
EN_STOP_WORDS,
24+
)
2225
from graphrag.vector_stores.factory import VectorStoreType
2326

2427
DEFAULT_OUTPUT_BASE_DIR = "output"
@@ -42,6 +45,7 @@ class BasicSearchDefaults:
4245

4346
prompt: None = None
4447
k: int = 10
48+
max_context_tokens: int = 12_000
4549
chat_model_id: str = DEFAULT_CHAT_MODEL_ID
4650
embedding_model_id: str = DEFAULT_EMBEDDING_MODEL_ID
4751

@@ -185,7 +189,7 @@ class TextAnalyzerDefaults:
185189
max_word_length: int = 15
186190
word_delimiter: str = " "
187191
include_named_entities: bool = True
188-
exclude_nouns: None = None
192+
exclude_nouns: list[str] = field(default_factory=lambda: EN_STOP_WORDS)
189193
exclude_entity_tags: list[str] = field(default_factory=lambda: ["DATE"])
190194
exclude_pos_tags: list[str] = field(
191195
default_factory=lambda: ["DET", "PRON", "INTJ", "X"]
@@ -316,8 +320,8 @@ class PruneGraphDefaults:
316320
max_node_freq_std: None = None
317321
min_node_degree: int = 1
318322
max_node_degree_std: None = None
319-
min_edge_weight_pct: int = 40
320-
remove_ego_nodes: bool = False
323+
min_edge_weight_pct: float = 40.0
324+
remove_ego_nodes: bool = True
321325
lcc_only: bool = False
322326

323327

@@ -338,6 +342,7 @@ class SnapshotsDefaults:
338342

339343
embeddings: bool = False
340344
graphml: bool = False
345+
raw_graph: bool = False
341346

342347

343348
@dataclass

0 commit comments

Comments
 (0)