Skip to content

Commit e91f7dc

Browse files
committed
fix: add Julia (.jl) support for code indexing with fallback chunking
- Add .jl extension to supported extensions list in tree-sitter/index.ts - Add .jl to fallbackExtensions array in shared/supported-extensions.ts - Add comprehensive tests for Julia file parsing and chunking - Fixes issue where Julia files were not being indexed at all Resolves #8966
1 parent 89d67ef commit e91f7dc

File tree

3 files changed

+230
-0
lines changed

3 files changed

+230
-0
lines changed
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
import { describe, it, expect, beforeAll } from "vitest"
2+
import { CodeParser } from "../parser"
3+
import { shouldUseFallbackChunking } from "../../shared/supported-extensions"
4+
import * as path from "path"
5+
import { initializeTreeSitter } from "../../../tree-sitter/__tests__/helpers"
6+
7+
describe("Julia file parsing", () => {
8+
let parser: CodeParser
9+
10+
beforeAll(async () => {
11+
await initializeTreeSitter()
12+
parser = new CodeParser()
13+
})
14+
15+
it("should use fallback chunking for Julia files", () => {
16+
// Verify that Julia extension is marked for fallback chunking
17+
expect(shouldUseFallbackChunking(".jl")).toBe(true)
18+
})
19+
20+
it("should parse Julia files using fallback chunking", async () => {
21+
const juliaContent = `# Julia module for data analysis
22+
module DataAnalytics
23+
24+
using Statistics
25+
using DataFrames
26+
27+
# Type definition for data points
28+
struct DataPoint
29+
x::Float64
30+
y::Float64
31+
label::String
32+
end
33+
34+
# Function to calculate basic statistics
35+
function calculate_statistics(data::Vector{Float64})
36+
return (
37+
mean = mean(data),
38+
median = median(data),
39+
std = std(data),
40+
min = minimum(data),
41+
max = maximum(data)
42+
)
43+
end
44+
45+
# Filter data by value range
46+
function filter_data(data::Vector{DataPoint}, min_val::Float64, max_val::Float64)
47+
return filter(p -> min_val <= p.x <= max_val, data)
48+
end
49+
50+
# Process dataset with custom transformation
51+
function process_dataset(df::DataFrame, transform_func::Function)
52+
processed = DataFrame()
53+
for col in names(df)
54+
if eltype(df[!, col]) <: Number
55+
processed[!, col] = transform_func.(df[!, col])
56+
else
57+
processed[!, col] = df[!, col]
58+
end
59+
end
60+
return processed
61+
end
62+
63+
# Main analysis pipeline
64+
function run_analysis(input_file::String, output_file::String)
65+
# Load data
66+
df = DataFrame(CSV.File(input_file))
67+
68+
# Process data
69+
processed = process_dataset(df, x -> log(1 + abs(x)))
70+
71+
# Calculate statistics for numeric columns
72+
stats_dict = Dict{String, Any}()
73+
for col in names(processed)
74+
if eltype(processed[!, col]) <: Number
75+
stats_dict[col] = calculate_statistics(processed[!, col])
76+
end
77+
end
78+
79+
# Save results
80+
CSV.write(output_file, processed)
81+
82+
return stats_dict
83+
end
84+
85+
# Export public interface
86+
export DataPoint, calculate_statistics, filter_data, process_dataset, run_analysis
87+
88+
end # module DataAnalytics
89+
90+
# Usage example
91+
using .DataAnalytics
92+
93+
# Create sample data
94+
sample_points = [
95+
DataPoint(1.0, 2.0, "A"),
96+
DataPoint(3.0, 4.0, "B"),
97+
DataPoint(5.0, 6.0, "C")
98+
]
99+
100+
# Filter data
101+
filtered = filter_data(sample_points, 2.0, 4.0)
102+
println("Filtered data: ", filtered)
103+
104+
# Calculate statistics
105+
values = [p.x for p in sample_points]
106+
stats = calculate_statistics(values)
107+
println("Statistics: ", stats)`
108+
109+
const testFilePath = path.join("/tmp", "test.jl")
110+
const result = await parser.parseFile(testFilePath, {
111+
content: juliaContent,
112+
fileHash: "test-hash",
113+
})
114+
115+
// Should have results from fallback chunking
116+
expect(result.length).toBeGreaterThan(0)
117+
118+
// Check that all blocks are of type 'fallback_chunk'
119+
result.forEach((block) => {
120+
expect(block.type).toBe("fallback_chunk")
121+
})
122+
123+
// Verify that the content is properly chunked
124+
const firstBlock = result[0]
125+
expect(firstBlock.file_path).toBe(testFilePath)
126+
expect(firstBlock.content).toContain("Julia module")
127+
expect(firstBlock.identifier).toBeNull()
128+
expect(firstBlock.segmentHash).toMatch(/^[a-f0-9]{64}$/)
129+
expect(firstBlock.fileHash).toBe("test-hash")
130+
})
131+
132+
it("should handle small Julia files that don't meet minimum character requirements", async () => {
133+
const smallJuliaContent = `# Small Julia file
134+
x = 1
135+
y = 2`
136+
137+
const testFilePath = path.join("/tmp", "small.jl")
138+
const result = await parser.parseFile(testFilePath, {
139+
content: smallJuliaContent,
140+
fileHash: "small-hash",
141+
})
142+
143+
// Should return empty array for files too small to index
144+
expect(result.length).toBe(0)
145+
})
146+
147+
it("should chunk large Julia files appropriately", async () => {
148+
// Create a large Julia file content with multiple sections
149+
const sections: string[] = []
150+
151+
// Add multiple function definitions to create chunks
152+
for (let i = 0; i < 20; i++) {
153+
sections.push(`
154+
# Function ${i} for processing data
155+
function process_data_${i}(data::Vector{Float64})
156+
# This is a longer function with detailed implementation
157+
# to ensure we have enough content for chunking
158+
159+
# Step 1: Validate input data
160+
if isempty(data)
161+
throw(ArgumentError("Data cannot be empty"))
162+
end
163+
164+
# Step 2: Calculate intermediate results
165+
intermediate = map(x -> x * 2.0, data)
166+
167+
# Step 3: Apply transformation
168+
transformed = map(x -> log(1 + abs(x)), intermediate)
169+
170+
# Step 4: Compute final result
171+
result = sum(transformed) / length(transformed)
172+
173+
# Step 5: Return processed value
174+
return result
175+
end
176+
`)
177+
}
178+
179+
const largeJuliaContent = `# Large Julia module with many functions
180+
module LargeModule
181+
182+
using Statistics
183+
using LinearAlgebra
184+
185+
${sections.join("\n")}
186+
187+
# Export all functions
188+
export ${Array.from({ length: 20 }, (_, i) => `process_data_${i}`).join(", ")}
189+
190+
end # module LargeModule`
191+
192+
const testFilePath = path.join("/tmp", "large.jl")
193+
const result = await parser.parseFile(testFilePath, {
194+
content: largeJuliaContent,
195+
fileHash: "large-hash",
196+
})
197+
198+
// Should have multiple chunks
199+
expect(result.length).toBeGreaterThan(1)
200+
201+
// All chunks should be fallback chunks
202+
result.forEach((block) => {
203+
expect(block.type).toBe("fallback_chunk")
204+
})
205+
206+
// Check that chunks have reasonable sizes
207+
result.forEach((block) => {
208+
// Each chunk should have content
209+
expect(block.content.length).toBeGreaterThan(0)
210+
// Chunks should not exceed maximum size (with tolerance)
211+
expect(block.content.length).toBeLessThanOrEqual(150000) // MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
212+
})
213+
})
214+
})
215+
216+
describe("Fallback Extensions Configuration for Julia", () => {
217+
it("should correctly identify Julia extension for fallback chunking", () => {
218+
// Julia should use fallback
219+
expect(shouldUseFallbackChunking(".jl")).toBe(true)
220+
expect(shouldUseFallbackChunking(".JL")).toBe(true) // Case insensitive
221+
222+
// Non-Julia extensions should not use fallback (unless they're in the list)
223+
expect(shouldUseFallbackChunking(".py")).toBe(false)
224+
expect(shouldUseFallbackChunking(".js")).toBe(false)
225+
expect(shouldUseFallbackChunking(".ts")).toBe(false)
226+
})
227+
})

src/services/code-index/shared/supported-extensions.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export const fallbackExtensions = [
2222
".vb", // Visual Basic .NET - no dedicated WASM parser
2323
".scala", // Scala - uses fallback chunking instead of Lua query workaround
2424
".swift", // Swift - uses fallback chunking due to parser instability
25+
".jl", // Julia - no dedicated WASM parser available
2526
]
2627

2728
/**

src/services/tree-sitter/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ const extensions = [
9191
"erb",
9292
// Visual Basic .NET
9393
"vb",
94+
// Julia
95+
"jl",
9496
].map((e) => `.${e}`)
9597

9698
export { extensions }

0 commit comments

Comments
 (0)