Skip to content

Commit 857244e

Browse files
committed
Refactor binary input handling in parser and test files for improved efficiency
- Replaced custom byte reading logic with built-in `input_binary_int` function for reading module counts in both `parser.ml` and `test_ast.ml`. - Streamlined the reading of module references by enhancing the `read_modules` function to efficiently handle line reading and filtering. - Improved error handling for reading operations to ensure robustness against unexpected input formats.
1 parent 5c6fbf6 commit 857244e

File tree

2 files changed

+60
-58
lines changed

2 files changed

+60
-58
lines changed

lib/parser.ml

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -266,15 +266,7 @@ module DependencyExtractor = struct
266266
let ic = open_in_bin ast_path in
267267

268268
(* Efficiently read the entire header section of the AST file *)
269-
let module_count =
270-
try
271-
let b1 = input_byte ic in
272-
let b2 = input_byte ic in
273-
let b3 = input_byte ic in
274-
let b4 = input_byte ic in
275-
(b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4
276-
with _ -> 0
277-
in
269+
let module_count = input_binary_int ic in
278270

279271
(* Skip the newline after the count *)
280272
let _ = try input_char ic with _ -> '\000' in

test/test_ast.ml

Lines changed: 59 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
open Stdlib
22
open Printf
33

4-
(* Function to read a 4-byte integer in big-endian format *)
5-
let read_int32_be ic =
6-
let b1 = input_byte ic in
7-
let b2 = input_byte ic in
8-
let b3 = input_byte ic in
9-
let b4 = input_byte ic in
10-
(b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4
11-
124
(* Function to read a line from binary file *)
135
let read_line_bin ic =
146
let buf = Buffer.create 128 in
@@ -31,56 +23,74 @@ let parse_module_refs filename =
3123

3224
try
3325
(* Read the number of modules (first 4 bytes) *)
34-
let modules_count = read_int32_be ic in
26+
let modules_count = try input_binary_int ic with _ -> 0 in
3527
printf "Found %d module references in AST file\n" modules_count;
3628

3729
(* Skip the newline after the count *)
38-
let _ = input_char ic in
30+
let _ = try input_char ic with _ -> '\000' in
3931

40-
(* Read module references line by line until we hit something that looks like a file path *)
32+
(* Read all module references efficiently *)
4133
let module_names = ref [] in
42-
let found_source_path = ref false in
34+
35+
let rec read_modules remaining max_iterations =
36+
if remaining <= 0 || max_iterations <= 0 then !module_names
37+
else
38+
try
39+
let buf = Buffer.create 128 in
40+
let rec read_line max_chars =
41+
if max_chars <= 0 then
42+
(* Line too long, skip the rest of this line *)
43+
try
44+
while input_char ic <> '\n' do
45+
()
46+
done;
47+
read_modules (remaining - 1) (max_iterations - 1)
48+
with End_of_file -> !module_names
49+
else
50+
let c = input_char ic in
51+
if c = '\n' then (
52+
let line = Buffer.contents buf in
53+
(* Filter out file paths - only add module names *)
54+
if
55+
String.length line > 0
56+
&& line.[0] <> '/'
57+
&& (String.length line <= 1
58+
|| not (line.[0] = 'C' && line.[1] = ':'))
59+
&& line.[0] >= 'A'
60+
&& line.[0] <= 'Z'
61+
then module_names := line :: !module_names;
62+
read_modules (remaining - 1) (max_iterations - 1))
63+
else (
64+
Buffer.add_char buf c;
65+
read_line (max_chars - 1))
66+
in
67+
read_line 10000 (* Limit line length to 10,000 chars *)
68+
with End_of_file -> !module_names
69+
in
70+
71+
let result = read_modules modules_count 200000 in
72+
(* Limit iterations to prevent infinite loops *)
73+
74+
(* Try to find source file path after reading modules *)
4375
let source_path = ref "" in
76+
let found_source_path = ref false in
4477

45-
(* Read specified number of lines *)
46-
for _ = 1 to modules_count do
47-
let line = read_line_bin ic in
48-
49-
(* Check if this looks like a file path *)
50-
if
51-
(not !found_source_path)
52-
&& String.length line > 0
53-
&& (line.[0] = '/'
54-
|| (String.length line > 1 && line.[0] = 'C' && line.[1] = ':'))
55-
then (
56-
found_source_path := true;
57-
source_path := line
58-
(* If it's not a file path and is a valid module name (starts with uppercase) *))
59-
else if
60-
(not !found_source_path)
61-
&& String.length line > 0
62-
&& line.[0] >= 'A'
63-
&& line.[0] <= 'Z'
64-
then module_names := line :: !module_names
65-
done;
66-
67-
(if !found_source_path then printf "Source file: %s\n" !source_path
68-
else
69-
(* If we didn't find the source path in the expected module count range,
70-
try to read one more line in case it's the source path *)
71-
try
72-
let line = read_line_bin ic in
73-
if
74-
String.length line > 0
75-
&& (line.[0] = '/'
76-
|| (String.length line > 1 && line.[0] = 'C' && line.[1] = ':'))
77-
then printf "Source file: %s\n" line
78-
with End_of_file -> printf "Note: Could not find source file path\n");
78+
(try
79+
let line = read_line_bin ic in
80+
if
81+
String.length line > 0
82+
&& (line.[0] = '/'
83+
|| (String.length line > 1 && line.[0] = 'C' && line.[1] = ':'))
84+
then (
85+
found_source_path := true;
86+
source_path := line;
87+
printf "Source file: %s\n" !source_path)
88+
with End_of_file ->
89+
if not !found_source_path then
90+
printf "Note: Could not find source file path\n");
7991

8092
(* Add individual module names to the set *)
81-
List.iter
82-
(fun name -> modules := StringSet.add name !modules)
83-
(List.rev !module_names);
93+
List.iter (fun name -> modules := StringSet.add name !modules) result;
8494

8595
close_in ic;
8696
!modules

0 commit comments

Comments
 (0)