Skip to content

Commit c3f774b

Browse files
committed
v0.16.0 performance optimisation and query enhancements
1 parent e1d1de7 commit c3f774b

File tree

11 files changed

+122
-91
lines changed

11 files changed

+122
-91
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ llm-generated*
9898
concatenated_fsharp_files.txt
9999
concatenate_fsharp.sh
100100

101+
# Debug Scripts
102+
debug-scripts/*
103+
101104

102105

103106
# Distribution directory - contains built binaries

.version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.15.0
1+
0.16.0

Directory.Build.props

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22
<PropertyGroup>
3-
<Version>0.15.0</Version>
4-
<AssemblyVersion>0.15.0.0</AssemblyVersion>
5-
<FileVersion>0.15.0.0</FileVersion>
3+
<Version>0.16.0</Version>
4+
<AssemblyVersion>0.16.0.0</AssemblyVersion>
5+
<FileVersion>0.16.0.0</FileVersion>
66
</PropertyGroup>
77
</Project>

Neo4jExport/src/Core/Types.fs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ type AppError =
167167
| FileSystemError of path: string * message: string * exn: exn option
168168
| SecurityError of message: string
169169
| TimeoutError of operation: string * duration: TimeSpan
170+
| PaginationError of entityType: string * message: string
170171
| AggregateError of NonEmptyList<AppError>
171172

172173
/// Mutable context for managing application lifecycle and cleanup

Neo4jExport/src/ErrorManagement/ErrorAccumulation.fs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ module ErrorAccumulation =
6767
| MemoryError msg -> msg
6868
| ExportError(msg, _) -> msg
6969
| TimeoutError(operation, duration) -> sprintf "Operation '%s' timed out after %A" operation duration
70+
| PaginationError(entityType, msg) -> sprintf "Pagination error for %s: %s" entityType msg
7071
| AggregateError nel ->
7172
let errors = NonEmptyList.toList nel
7273

Neo4jExport/src/Export/BatchProcessing.fs

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -185,21 +185,8 @@ let processNodeRecord (exportState: ExportState) (recordCtx: RecordContext<Batch
185185

186186
exportState.NodeIdMapping.TryAdd(elementId, stableId) |> ignore
187187

188-
writeNode writer { new INode with
189-
member _.ElementId = elementId
190-
member _.Labels = labels :> IReadOnlyList<string>
191-
member _.Properties = properties
192-
member _.Item with get(key: string) = properties.[key]
193-
member _.Get<'T>(key: string) : 'T = properties.[key] :?> 'T
194-
member _.TryGet<'T>(key: string, [<System.Runtime.InteropServices.Out>] value: byref<'T>) =
195-
let success, v = properties.TryGetValue(key)
196-
if success && v :? 'T then
197-
value <- unbox<'T> v
198-
true
199-
else
200-
false
201-
member _.Id = 0L // Not used, but required by interface
202-
member _.Equals(other: INode) = false } elementId stableId ctx
188+
// Use direct serialization to avoid object allocation in hot path
189+
writeNodeDirect writer elementId stableId (labels :> IReadOnlyList<string>) properties ctx
203190
with ex ->
204191
let elementId = ""
205192

@@ -310,25 +297,8 @@ let processRelationshipRecord
310297
EndElementId = endNodeId
311298
EndStableId = endStableId } // End node content hash
312299

313-
writeRelationship writer { new IRelationship with
314-
member _.ElementId = elementId
315-
member _.Type = relType
316-
member _.StartNodeElementId = startNodeId
317-
member _.EndNodeElementId = endNodeId
318-
member _.Properties = properties
319-
member _.Item with get(key: string) = properties.[key]
320-
member _.Get<'T>(key: string) : 'T = properties.[key] :?> 'T
321-
member _.TryGet<'T>(key: string, [<System.Runtime.InteropServices.Out>] value: byref<'T>) =
322-
let success, v = properties.TryGetValue(key)
323-
if success && v :? 'T then
324-
value <- unbox<'T> v
325-
true
326-
else
327-
false
328-
member _.Id = 0L // Not used, but required by interface
329-
member _.StartNodeId = 0L // Not used, but required by interface
330-
member _.EndNodeId = 0L // Not used, but required by interface
331-
member _.Equals(other: IRelationship) = false } ids ctx
300+
// Use direct serialization to avoid object allocation in hot path
301+
writeRelationshipDirect writer relType properties ids ctx
332302
with ex ->
333303
// Failed to parse relationship from record
334304
trackSerializationErrorDedup recordCtx.ErrorAccumulator ex "" "relationship" "RecordAccessError"
@@ -383,19 +353,19 @@ module private KeysetPagination =
383353
/// Extract ID from record for pagination
384354
let extractId (version: Neo4jVersion) (entityType: string) (record: IRecord) : KeysetId option =
385355
try
386-
let fieldName =
387-
match entityType with
388-
| "Nodes" -> "nodeId"
389-
| "Relationships" -> "relId"
390-
| _ -> failwithf "Unknown entity type: %s" entityType
391-
392356
match version with
393357
| V4x ->
358+
// Neo4j 4.x uses different field names for nodes and relationships
359+
let fieldName =
360+
match entityType with
361+
| "Nodes" -> "nodeId"
362+
| "Relationships" -> "relId"
363+
| _ -> failwithf "Unknown entity type: %s" entityType
394364
let id = record.[fieldName].As<int64>()
395365
Some(NumericId id)
396-
| V5x
397-
| V6x ->
398-
let id = record.[fieldName].As<string>()
366+
| V5x | V6x ->
367+
// Neo4j 5.x+ uses elementId for both nodes and relationships
368+
let id = record.["elementId"].As<string>()
399369
Some(ElementId id)
400370
| Unknown -> None
401371
with ex ->
@@ -629,7 +599,16 @@ let processBatchedQuery<'state>
629599

630600
// Continue if we got a full batch
631601
if recordCount = batchSize then
632-
return! processBatch batchStats newLastProgress nextPaginationState currentHandlerState
602+
// Critical check: Prevent infinite loop when using keyset pagination
603+
match paginationState, nextPaginationState with
604+
| Keyset(prevId, _), Keyset(nextId, _) when prevId = nextId ->
605+
// We processed a full batch but couldn't advance the pagination cursor
606+
// This means ID extraction failed for all records in the batch
607+
return Error(PaginationError(
608+
batchCtx.Processor.EntityName,
609+
sprintf "Unable to advance pagination after processing %d records. This typically occurs when the ID field (nodeId/relId) cannot be extracted from query results. Check that your Neo4j query returns the expected fields." recordCount))
610+
| _ ->
611+
return! processBatch batchStats newLastProgress nextPaginationState currentHandlerState
633612
else
634613
return Ok(batchStats, currentHandlerState)
635614
}

Neo4jExport/src/Export/Core.fs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ module QueryBuilders =
4343
| V5x
4444
| V6x
4545
| Unknown ->
46-
"MATCH (n) RETURN n, labels(n) as labels, elementId(n) as nodeId ORDER BY elementId(n) SKIP $skip LIMIT $limit"
46+
"MATCH (n) RETURN n, labels(n) as labels, elementId(n) as elementId ORDER BY elementId(n) SKIP $skip LIMIT $limit"
4747

4848
/// Build node query for future keyset pagination
4949
let buildNodeQueryKeyset (version: Neo4jVersion) (lastId: KeysetId option) =
@@ -80,8 +80,7 @@ module QueryBuilders =
8080
RETURN
8181
elementId(n) AS elementId,
8282
labels(n) AS labels,
83-
n AS node,
84-
elementId(n) AS nodeId
83+
n AS node
8584
ORDER BY elementId(n)
8685
LIMIT $limit
8786
""" whereClause
@@ -93,7 +92,7 @@ module QueryBuilders =
9392
| V5x
9493
| V6x
9594
| Unknown ->
96-
"MATCH ()-[r]->() RETURN r, type(r) as type, elementId(r) as relId ORDER BY elementId(r) SKIP $skip LIMIT $limit"
95+
"MATCH ()-[r]->() RETURN r, type(r) as type, elementId(r) as elementId ORDER BY elementId(r) SKIP $skip LIMIT $limit"
9796

9897
/// Build relationship query for future keyset pagination
9998
let buildRelationshipQueryKeyset (version: Neo4jVersion) (lastId: KeysetId option) =
@@ -134,8 +133,7 @@ module QueryBuilders =
134133
type(r) AS type,
135134
elementId(startNode) AS startNodeElementId,
136135
elementId(endNode) AS endNodeElementId,
137-
r AS relationship,
138-
elementId(r) AS relId
136+
r AS relationship
139137
ORDER BY elementId(r)
140138
LIMIT $limit
141139
""" whereClause
@@ -292,20 +290,23 @@ let exportNodesUnified
292290
with _ ->
293291
[]
294292

295-
let bytesPerLabel =
296-
if List.isEmpty labels then
297-
bytesWritten
298-
else
299-
bytesWritten / int64 labels.Length
300-
293+
// Handle both labeled and unlabeled nodes
301294
let newLabelTracker =
302-
labels
303-
|> List.fold
304-
(fun tracker label ->
305-
tracker
306-
|> LabelStatsTracker.startLabel label
307-
|> LabelStatsTracker.updateLabel label 1L bytesPerLabel)
295+
if List.isEmpty labels then
296+
// Track unlabeled nodes under a special category
308297
state.LabelTracker
298+
|> LabelStatsTracker.startLabel "_unlabeled"
299+
|> LabelStatsTracker.updateLabel "_unlabeled" 1L bytesWritten
300+
else
301+
// Distribute bytes evenly among labels
302+
let bytesPerLabel = bytesWritten / int64 labels.Length
303+
labels
304+
|> List.fold
305+
(fun tracker label ->
306+
tracker
307+
|> LabelStatsTracker.startLabel label
308+
|> LabelStatsTracker.updateLabel label 1L bytesPerLabel)
309+
state.LabelTracker
309310

310311
{ LineState = newLineState
311312
LabelTracker = newLabelTracker }

Neo4jExport/src/Export/Serialization/Engine.fs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ let writeNode =
7575
let writeRelationship =
7676
SerializationGraphElements.writeRelationship
7777

78+
/// Re-export high-performance direct functions for hot path
79+
let internal writeNodeDirect =
80+
SerializationGraphElements.writeNodeDirect
81+
82+
let internal writeRelationshipDirect =
83+
SerializationGraphElements.writeRelationshipDirect
84+
7885
let serializeProperties =
7986
SerializationCollections.serializeProperties
8087

Neo4jExport/src/Export/Serialization/GraphElements.fs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
module Neo4jExport.SerializationGraphElements
2424

2525
open System
26+
open System.Collections.Generic
2627
open System.Text.Json
2728
open Neo4j.Driver
2829
open Neo4jExport
@@ -122,15 +123,23 @@ let serializeRelationship
122123
writer.WriteEndObject()
123124
writer.WriteEndObject()
124125

125-
let writeNode (writer: Utf8JsonWriter) (node: INode) (elementId: string) (stableId: string) (ctx: WriterContext) =
126+
/// Internal high-performance node serialization that works directly with primitive data
127+
/// Used in the hot path to avoid object allocations
128+
let internal writeNodeDirect
129+
(writer: Utf8JsonWriter)
130+
(elementId: string)
131+
(stableId: string)
132+
(labels: IReadOnlyList<string>)
133+
(properties: IReadOnlyDictionary<string, obj>)
134+
(ctx: WriterContext) =
126135
writer.WriteStartObject()
127136
writer.WriteString("type", "node")
128137
writer.WriteString("element_id", elementId)
129138
writer.WriteString("NET_node_content_hash", stableId)
130139
writer.WriteString("export_id", ctx.ExportId.ToString())
131140
writer.WriteStartArray("labels")
132141

133-
node.Labels
142+
labels
134143
|> Seq.truncate ctx.Config.MaxLabelsPerNode
135144
|> Seq.iter (fun label ->
136145
match validateLabel label elementId with
@@ -142,20 +151,31 @@ let writeNode (writer: Utf8JsonWriter) (node: INode) (elementId: string) (stable
142151
writer.WriteEndArray()
143152
writer.WriteStartObject "properties"
144153

145-
serializeProperties writer ctx SerializationDepth.zero node.Properties
154+
serializeProperties writer ctx SerializationDepth.zero properties
146155

147156
writer.WriteEndObject()
148157
writer.WriteEndObject()
149158

150-
let writeRelationship (writer: Utf8JsonWriter) (rel: IRelationship) (ids: EntityIdsWithStable) (ctx: WriterContext) =
159+
let writeNode (writer: Utf8JsonWriter) (node: INode) (elementId: string) (stableId: string) (ctx: WriterContext) =
160+
// Delegate to the direct implementation for code reuse
161+
writeNodeDirect writer elementId stableId node.Labels node.Properties ctx
162+
163+
/// Internal high-performance relationship serialization that works directly with primitive data
164+
/// Used in the hot path to avoid object allocations
165+
let internal writeRelationshipDirect
166+
(writer: Utf8JsonWriter)
167+
(relType: string)
168+
(properties: IReadOnlyDictionary<string, obj>)
169+
(ids: EntityIdsWithStable)
170+
(ctx: WriterContext) =
151171
writer.WriteStartObject()
152172
writer.WriteString("type", "relationship")
153173
writer.WriteString("element_id", ids.ElementId)
154174
writer.WriteString("NET_rel_identity_hash", ids.StableId)
155175
writer.WriteString("export_id", ctx.ExportId.ToString())
156176

157177
let safeType =
158-
match validateRelType rel.Type ids.ElementId with
178+
match validateRelType relType ids.ElementId with
159179
| Ok t -> t
160180
| Error msg ->
161181
ctx.ErrorFuncs.TrackWarning msg (Some ids.ElementId) None
@@ -168,11 +188,15 @@ let writeRelationship (writer: Utf8JsonWriter) (rel: IRelationship) (ids: Entity
168188
writer.WriteString("end_node_content_hash", ids.EndStableId)
169189
writer.WriteStartObject("properties")
170190

171-
serializeProperties writer ctx SerializationDepth.zero rel.Properties
191+
serializeProperties writer ctx SerializationDepth.zero properties
172192

173193
writer.WriteEndObject()
174194
writer.WriteEndObject()
175195

196+
let writeRelationship (writer: Utf8JsonWriter) (rel: IRelationship) (ids: EntityIdsWithStable) (ctx: WriterContext) =
197+
// Delegate to the direct implementation for code reuse
198+
writeRelationshipDirect writer rel.Type rel.Properties ids ctx
199+
176200
let serializeGraphElement
177201
(writer: Utf8JsonWriter)
178202
(ctx: WriterContext)

Neo4jExport/src/Export/Serialization/Path.fs

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,38 @@ open Neo4jExport.SerializationCollections
3232
open Neo4jExport.JsonHelpers
3333
open ErrorTracking
3434

35-
let private generatePathSequenceTailRec nodeCount relCount =
36-
let rec loop acc nodeIdx relIdx isNode =
37-
match nodeIdx < nodeCount, relIdx < relCount, isNode with
38-
| true, _, true ->
39-
let item =
40-
{| Type = "node"; Index = nodeIdx |}
41-
42-
loop (item :: acc) (nodeIdx + 1) relIdx false
43-
| _, true, false ->
44-
let item =
45-
{| Type = "relationship"
46-
Index = relIdx |}
47-
48-
loop (item :: acc) nodeIdx (relIdx + 1) true
49-
| false, false, _ -> List.rev acc
50-
| _ -> loop acc nodeIdx relIdx (not isNode)
51-
52-
loop [] 0 0 true
35+
/// Generate the alternating sequence of nodes and relationships for a path
36+
/// Neo4j paths always follow the pattern: Node -> Relationship -> Node -> ... -> Node
37+
/// Therefore: relationships = nodes - 1 for valid paths
38+
let private generatePathSequence nodeCount relCount =
39+
// Validate Neo4j path invariant
40+
if relCount <> nodeCount - 1 && nodeCount > 0 then
41+
// Log warning but continue - defensive against malformed data
42+
Log.warn (sprintf "Invalid path structure: %d nodes and %d relationships (expected %d relationships)"
43+
nodeCount relCount (nodeCount - 1))
44+
45+
// Simple approach: generate indices for the alternating pattern
46+
// Total elements = nodes + relationships
47+
let totalElements = nodeCount + relCount
48+
49+
// Handle edge cases
50+
if totalElements = 0 then
51+
[]
52+
else
53+
[0 .. totalElements - 1]
54+
|> List.map (fun i ->
55+
if i % 2 = 0 then
56+
// Even positions are nodes: 0, 2, 4, ...
57+
{| Type = "node"; Index = i / 2 |}
58+
else
59+
// Odd positions are relationships: 1, 3, 5, ...
60+
{| Type = "relationship"; Index = i / 2 |})
61+
|> List.filter (fun item ->
62+
// Defensive: ensure we don't exceed actual counts
63+
match item.Type with
64+
| "node" -> item.Index < nodeCount
65+
| "relationship" -> item.Index < relCount
66+
| _ -> false)
5367

5468
let private serializePathFull (writer: Utf8JsonWriter) (ctx: WriterContext) (path: IPath) =
5569
writer.WritePropertyName("nodes")
@@ -179,7 +193,7 @@ let serializePath (writer: Utf8JsonWriter) (ctx: WriterContext) (path: IPath) =
179193
writer.WritePropertyName("sequence")
180194
writer.WriteStartArray()
181195

182-
generatePathSequenceTailRec path.Nodes.Count path.Relationships.Count
196+
generatePathSequence path.Nodes.Count path.Relationships.Count
183197
|> List.iter (fun item ->
184198
writer.WriteStartObject()
185199
writer.WriteString("type", item.Type)

0 commit comments

Comments
 (0)