|
| 1 | +import { describe, it, expect, beforeAll } from "vitest" |
| 2 | +import { CodeParser } from "../parser" |
| 3 | +import { shouldUseFallbackChunking } from "../../shared/supported-extensions" |
| 4 | +import * as path from "path" |
| 5 | +import { initializeTreeSitter } from "../../../tree-sitter/__tests__/helpers" |
| 6 | + |
| 7 | +describe("Julia file parsing", () => { |
| 8 | + let parser: CodeParser |
| 9 | + |
| 10 | + beforeAll(async () => { |
| 11 | + await initializeTreeSitter() |
| 12 | + parser = new CodeParser() |
| 13 | + }) |
| 14 | + |
| 15 | + it("should use fallback chunking for Julia files", () => { |
| 16 | + // Verify that Julia extension is marked for fallback chunking |
| 17 | + expect(shouldUseFallbackChunking(".jl")).toBe(true) |
| 18 | + }) |
| 19 | + |
| 20 | + it("should parse Julia files using fallback chunking", async () => { |
| 21 | + const juliaContent = `# Julia module for data analysis |
| 22 | +module DataAnalytics |
| 23 | +
|
| 24 | +using Statistics |
| 25 | +using DataFrames |
| 26 | +
|
| 27 | +# Type definition for data points |
| 28 | +struct DataPoint |
| 29 | + x::Float64 |
| 30 | + y::Float64 |
| 31 | + label::String |
| 32 | +end |
| 33 | +
|
| 34 | +# Function to calculate basic statistics |
| 35 | +function calculate_statistics(data::Vector{Float64}) |
| 36 | + return ( |
| 37 | + mean = mean(data), |
| 38 | + median = median(data), |
| 39 | + std = std(data), |
| 40 | + min = minimum(data), |
| 41 | + max = maximum(data) |
| 42 | + ) |
| 43 | +end |
| 44 | +
|
| 45 | +# Filter data by value range |
| 46 | +function filter_data(data::Vector{DataPoint}, min_val::Float64, max_val::Float64) |
| 47 | + return filter(p -> min_val <= p.x <= max_val, data) |
| 48 | +end |
| 49 | +
|
| 50 | +# Process dataset with custom transformation |
| 51 | +function process_dataset(df::DataFrame, transform_func::Function) |
| 52 | + processed = DataFrame() |
| 53 | + for col in names(df) |
| 54 | + if eltype(df[!, col]) <: Number |
| 55 | + processed[!, col] = transform_func.(df[!, col]) |
| 56 | + else |
| 57 | + processed[!, col] = df[!, col] |
| 58 | + end |
| 59 | + end |
| 60 | + return processed |
| 61 | +end |
| 62 | +
|
| 63 | +# Main analysis pipeline |
| 64 | +function run_analysis(input_file::String, output_file::String) |
| 65 | + # Load data |
| 66 | + df = DataFrame(CSV.File(input_file)) |
| 67 | + |
| 68 | + # Process data |
| 69 | + processed = process_dataset(df, x -> log(1 + abs(x))) |
| 70 | + |
| 71 | + # Calculate statistics for numeric columns |
| 72 | + stats_dict = Dict{String, Any}() |
| 73 | + for col in names(processed) |
| 74 | + if eltype(processed[!, col]) <: Number |
| 75 | + stats_dict[col] = calculate_statistics(processed[!, col]) |
| 76 | + end |
| 77 | + end |
| 78 | + |
| 79 | + # Save results |
| 80 | + CSV.write(output_file, processed) |
| 81 | + |
| 82 | + return stats_dict |
| 83 | +end |
| 84 | +
|
| 85 | +# Export public interface |
| 86 | +export DataPoint, calculate_statistics, filter_data, process_dataset, run_analysis |
| 87 | +
|
| 88 | +end # module DataAnalytics |
| 89 | +
|
| 90 | +# Usage example |
| 91 | +using .DataAnalytics |
| 92 | +
|
| 93 | +# Create sample data |
| 94 | +sample_points = [ |
| 95 | + DataPoint(1.0, 2.0, "A"), |
| 96 | + DataPoint(3.0, 4.0, "B"), |
| 97 | + DataPoint(5.0, 6.0, "C") |
| 98 | +] |
| 99 | +
|
| 100 | +# Filter data |
| 101 | +filtered = filter_data(sample_points, 2.0, 4.0) |
| 102 | +println("Filtered data: ", filtered) |
| 103 | +
|
| 104 | +# Calculate statistics |
| 105 | +values = [p.x for p in sample_points] |
| 106 | +stats = calculate_statistics(values) |
| 107 | +println("Statistics: ", stats)` |
| 108 | + |
| 109 | + const testFilePath = path.join("/tmp", "test.jl") |
| 110 | + const result = await parser.parseFile(testFilePath, { |
| 111 | + content: juliaContent, |
| 112 | + fileHash: "test-hash", |
| 113 | + }) |
| 114 | + |
| 115 | + // Should have results from fallback chunking |
| 116 | + expect(result.length).toBeGreaterThan(0) |
| 117 | + |
| 118 | + // Check that all blocks are of type 'fallback_chunk' |
| 119 | + result.forEach((block) => { |
| 120 | + expect(block.type).toBe("fallback_chunk") |
| 121 | + }) |
| 122 | + |
| 123 | + // Verify that the content is properly chunked |
| 124 | + const firstBlock = result[0] |
| 125 | + expect(firstBlock.file_path).toBe(testFilePath) |
| 126 | + expect(firstBlock.content).toContain("Julia module") |
| 127 | + expect(firstBlock.identifier).toBeNull() |
| 128 | + expect(firstBlock.segmentHash).toMatch(/^[a-f0-9]{64}$/) |
| 129 | + expect(firstBlock.fileHash).toBe("test-hash") |
| 130 | + }) |
| 131 | + |
| 132 | + it("should handle small Julia files that don't meet minimum character requirements", async () => { |
| 133 | + const smallJuliaContent = `# Small Julia file |
| 134 | +x = 1 |
| 135 | +y = 2` |
| 136 | + |
| 137 | + const testFilePath = path.join("/tmp", "small.jl") |
| 138 | + const result = await parser.parseFile(testFilePath, { |
| 139 | + content: smallJuliaContent, |
| 140 | + fileHash: "small-hash", |
| 141 | + }) |
| 142 | + |
| 143 | + // Should return empty array for files too small to index |
| 144 | + expect(result.length).toBe(0) |
| 145 | + }) |
| 146 | + |
| 147 | + it("should chunk large Julia files appropriately", async () => { |
| 148 | + // Create a large Julia file content with multiple sections |
| 149 | + const sections: string[] = [] |
| 150 | + |
| 151 | + // Add multiple function definitions to create chunks |
| 152 | + for (let i = 0; i < 20; i++) { |
| 153 | + sections.push(` |
| 154 | +# Function ${i} for processing data |
| 155 | +function process_data_${i}(data::Vector{Float64}) |
| 156 | + # This is a longer function with detailed implementation |
| 157 | + # to ensure we have enough content for chunking |
| 158 | + |
| 159 | + # Step 1: Validate input data |
| 160 | + if isempty(data) |
| 161 | + throw(ArgumentError("Data cannot be empty")) |
| 162 | + end |
| 163 | + |
| 164 | + # Step 2: Calculate intermediate results |
| 165 | + intermediate = map(x -> x * 2.0, data) |
| 166 | + |
| 167 | + # Step 3: Apply transformation |
| 168 | + transformed = map(x -> log(1 + abs(x)), intermediate) |
| 169 | + |
| 170 | + # Step 4: Compute final result |
| 171 | + result = sum(transformed) / length(transformed) |
| 172 | + |
| 173 | + # Step 5: Return processed value |
| 174 | + return result |
| 175 | +end |
| 176 | +`) |
| 177 | + } |
| 178 | + |
| 179 | + const largeJuliaContent = `# Large Julia module with many functions |
| 180 | +module LargeModule |
| 181 | +
|
| 182 | +using Statistics |
| 183 | +using LinearAlgebra |
| 184 | +
|
| 185 | +${sections.join("\n")} |
| 186 | +
|
| 187 | +# Export all functions |
| 188 | +export ${Array.from({ length: 20 }, (_, i) => `process_data_${i}`).join(", ")} |
| 189 | +
|
| 190 | +end # module LargeModule` |
| 191 | + |
| 192 | + const testFilePath = path.join("/tmp", "large.jl") |
| 193 | + const result = await parser.parseFile(testFilePath, { |
| 194 | + content: largeJuliaContent, |
| 195 | + fileHash: "large-hash", |
| 196 | + }) |
| 197 | + |
| 198 | + // Should have multiple chunks |
| 199 | + expect(result.length).toBeGreaterThan(1) |
| 200 | + |
| 201 | + // All chunks should be fallback chunks |
| 202 | + result.forEach((block) => { |
| 203 | + expect(block.type).toBe("fallback_chunk") |
| 204 | + }) |
| 205 | + |
| 206 | + // Check that chunks have reasonable sizes |
| 207 | + result.forEach((block) => { |
| 208 | + // Each chunk should have content |
| 209 | + expect(block.content.length).toBeGreaterThan(0) |
| 210 | + // Chunks should not exceed maximum size (with tolerance) |
| 211 | + expect(block.content.length).toBeLessThanOrEqual(150000) // MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR |
| 212 | + }) |
| 213 | + }) |
| 214 | +}) |
| 215 | + |
| 216 | +describe("Fallback Extensions Configuration for Julia", () => { |
| 217 | + it("should correctly identify Julia extension for fallback chunking", () => { |
| 218 | + // Julia should use fallback |
| 219 | + expect(shouldUseFallbackChunking(".jl")).toBe(true) |
| 220 | + expect(shouldUseFallbackChunking(".JL")).toBe(true) // Case insensitive |
| 221 | + |
| 222 | + // Non-Julia extensions should not use fallback (unless they're in the list) |
| 223 | + expect(shouldUseFallbackChunking(".py")).toBe(false) |
| 224 | + expect(shouldUseFallbackChunking(".js")).toBe(false) |
| 225 | + expect(shouldUseFallbackChunking(".ts")).toBe(false) |
| 226 | + }) |
| 227 | +}) |
0 commit comments