Skip to content

Commit 4b363b9

Browse files
committed
Use compact strings for operand hashmaps
1 parent ed3fa59 commit 4b363b9

File tree

4 files changed

+61
-46
lines changed

4 files changed

+61
-46
lines changed

backend/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ walkdir = "2"
1818
glob = "0.3"
1919
regex = "1"
2020
rayon = "1.10"
21+
compact_str = "0.9"
2122
git2 = "0.20.2"
2223
arrow = { version = "54", default-features = false }
2324
parquet = { version = "54", default-features = false, features = ["arrow", "lz4", "zstd"] }

backend/src/halstead.rs

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,21 @@
1515
//! - For BoolOp, operands are the entire sub-expressions (not leaf values)
1616
//! - AugAssign counts as an operator with target and value as operands
1717
18+
use compact_str::CompactString;
1819
use ruff_python_ast::{
1920
self as ast, Expr, ModModule, Stmt, visitor::{self, Visitor}
2021
};
21-
use ruff_text_size::{Ranged};
22+
use ruff_text_size::Ranged;
2223
use std::collections::HashSet;
2324

2425
/// Halstead metrics for a code block
2526
#[derive(Debug, Clone, Default)]
2627
pub struct HalsteadMetrics {
27-
/// Set of unique operators seen
28-
operators_seen: HashSet<String>,
29-
/// Set of unique operands seen (context, operand_repr)
30-
operands_seen: HashSet<(Option<String>, String)>,
28+
/// Set of unique operators seen (stored as &'static str for efficiency)
29+
operators_seen: HashSet<&'static str>,
30+
/// Set of unique operands seen (context index, operand repr)
31+
/// Using CompactString to inline short strings (up to 24 bytes on 64-bit)
32+
operands_seen: HashSet<(u32, CompactString)>,
3133
/// Total operator count
3234
operators: u32,
3335
/// Total operand count
@@ -90,8 +92,7 @@ impl HalsteadMetrics {
9092
}
9193

9294
fn merge(&mut self, other: &HalsteadMetrics) {
93-
self.operators_seen
94-
.extend(other.operators_seen.iter().cloned());
95+
self.operators_seen.extend(other.operators_seen.iter());
9596
self.operands_seen
9697
.extend(other.operands_seen.iter().cloned());
9798
self.operators += other.operators;
@@ -112,34 +113,37 @@ pub struct FunctionHalstead {
112113
struct HalsteadVisitor<'src> {
113114
/// Source code (for generating operand repr strings)
114115
source: &'src str,
115-
/// Current function context (for tracking unique operands per context)
116-
context: Option<String>,
116+
/// Current function context index (0 = module level, 1+ = function index)
117+
context_idx: u32,
117118
/// Metrics for current scope
118119
metrics: HalsteadMetrics,
119120
/// Collected function metrics
120121
functions: Vec<FunctionHalstead>,
122+
/// Next context index to assign
123+
next_context_idx: u32,
121124
}
122125

123126
impl<'src> HalsteadVisitor<'src> {
124-
fn new(source: &'src str, context: Option<String>) -> Self {
127+
fn new(source: &'src str, context_idx: u32) -> Self {
125128
Self {
126129
source,
127-
context,
130+
context_idx,
128131
metrics: HalsteadMetrics::default(),
129132
functions: Vec::new(),
133+
next_context_idx: context_idx + 1,
130134
}
131135
}
132136

133-
fn add_operator(&mut self, op_name: &str) {
137+
#[inline]
138+
fn add_operator(&mut self, op_name: &'static str) {
134139
self.metrics.operators += 1;
135-
self.metrics.operators_seen.insert(op_name.to_string());
140+
self.metrics.operators_seen.insert(op_name);
136141
}
137142

138-
fn add_operand(&mut self, operand: &str) {
143+
#[inline]
144+
fn add_operand(&mut self, operand: CompactString) {
139145
self.metrics.operands += 1;
140-
self.metrics
141-
.operands_seen
142-
.insert((self.context.clone(), operand.to_string()));
146+
self.metrics.operands_seen.insert((self.context_idx, operand));
143147
}
144148

145149
/// Get the operator name from a binary operator
@@ -163,37 +167,39 @@ impl<'src> HalsteadVisitor<'src> {
163167
}
164168

165169
/// Extract operand string from an expression - radon uses simple values
166-
fn expr_to_operand(expr: &Expr) -> String {
170+
#[inline]
171+
fn expr_to_operand(expr: &Expr) -> CompactString {
167172
match expr {
168-
Expr::Name(n) => n.id.to_string(),
173+
Expr::Name(n) => CompactString::new(&n.id),
169174
Expr::NumberLiteral(n) => {
170175
// Return the numeric value as a string
171176
match &n.value {
172-
ast::Number::Int(i) => i.to_string(),
173-
ast::Number::Float(f) => f.to_string(),
174-
ast::Number::Complex { real, imag } => format!("{}+{}j", real, imag),
177+
ast::Number::Int(i) => CompactString::new(i.to_string()),
178+
ast::Number::Float(f) => CompactString::new(f.to_string()),
179+
ast::Number::Complex { real, imag } => CompactString::new(format!("{}+{}j", real, imag)),
175180
}
176181
}
177-
Expr::StringLiteral(s) => format!("{:?}", s.value.to_str()),
178-
Expr::BytesLiteral(b) => format!("{:?}", b.value),
179-
Expr::BooleanLiteral(b) => b.value.to_string(),
180-
Expr::NoneLiteral(_) => "None".to_string(),
181-
Expr::EllipsisLiteral(_) => "...".to_string(),
182-
Expr::Attribute(a) => a.attr.to_string(),
183-
_ => format!("{:?}", expr),
182+
Expr::StringLiteral(s) => CompactString::new(format!("{:?}", s.value.to_str())),
183+
Expr::BytesLiteral(b) => CompactString::new(format!("{:?}", b.value)),
184+
Expr::BooleanLiteral(b) => CompactString::const_new(if b.value { "True" } else { "False" }),
185+
Expr::NoneLiteral(_) => CompactString::const_new("None"),
186+
Expr::EllipsisLiteral(_) => CompactString::const_new("..."),
187+
Expr::Attribute(a) => CompactString::new(&a.attr),
188+
_ => CompactString::new(format!("{:?}", expr)),
184189
}
185190
}
186191

187192
/// Get a string representation of an expression (for BoolOp operands)
188193
/// Radon stores the entire AST node as the operand
189-
fn expr_repr(&self, expr: &Expr) -> String {
194+
#[inline]
195+
fn expr_repr(&self, expr: &Expr) -> CompactString {
190196
// Get the source text for this expression
191197
let start = expr.range().start().to_usize();
192198
let end = expr.range().end().to_usize();
193199
if start < self.source.len() && end <= self.source.len() {
194-
self.source[start..end].to_string()
200+
CompactString::new(&self.source[start..end])
195201
} else {
196-
format!("{:?}", expr)
202+
CompactString::new(format!("{:?}", expr))
197203
}
198204
}
199205

@@ -202,13 +208,21 @@ impl<'src> HalsteadVisitor<'src> {
202208
// Radon does NOT prefix method names with class name - just use the function name
203209
let func_name = node.name.to_string();
204210

205-
let mut func_visitor = HalsteadVisitor::new(self.source, Some(func_name.clone()));
211+
// Assign a unique context index for this function
212+
let func_context_idx = self.next_context_idx;
213+
self.next_context_idx += 1;
214+
215+
let mut func_visitor = HalsteadVisitor::new(self.source, func_context_idx);
216+
func_visitor.next_context_idx = self.next_context_idx;
206217

207218
// Visit the function body
208219
for stmt in &node.body {
209220
func_visitor.visit_stmt(stmt);
210221
}
211222

223+
// Update our next_context_idx from nested functions
224+
self.next_context_idx = func_visitor.next_context_idx;
225+
212226
// Store function metrics (before merging so we keep per-function metrics separate)
213227
let func_metrics = func_visitor.metrics.clone();
214228
self.functions.push(FunctionHalstead {
@@ -246,8 +260,8 @@ impl<'a, 'src> Visitor<'a> for HalsteadVisitor<'src> {
246260
Stmt::AugAssign(node) => {
247261
// Augmented assignment: 1 operator, 2 operands (target, value)
248262
self.add_operator(Self::binop_name(&node.op));
249-
self.add_operand(&Self::expr_to_operand(&node.target));
250-
self.add_operand(&Self::expr_to_operand(&node.value));
263+
self.add_operand(Self::expr_to_operand(&node.target));
264+
self.add_operand(Self::expr_to_operand(&node.value));
251265
visitor::walk_stmt(self, stmt);
252266
}
253267
_ => {
@@ -261,22 +275,22 @@ impl<'a, 'src> Visitor<'a> for HalsteadVisitor<'src> {
261275
Expr::BinOp(node) => {
262276
// Binary operator: 1 operator, 2 operands
263277
self.add_operator(Self::binop_name(&node.op));
264-
self.add_operand(&Self::expr_to_operand(&node.left));
265-
self.add_operand(&Self::expr_to_operand(&node.right));
278+
self.add_operand(Self::expr_to_operand(&node.left));
279+
self.add_operand(Self::expr_to_operand(&node.right));
266280
visitor::walk_expr(self, expr);
267281
}
268282
Expr::UnaryOp(node) => {
269283
// Unary operator: 1 operator, 1 operand
270284
self.add_operator(Self::unaryop_name(&node.op));
271-
self.add_operand(&Self::expr_to_operand(&node.operand));
285+
self.add_operand(Self::expr_to_operand(&node.operand));
272286
visitor::walk_expr(self, expr);
273287
}
274288
Expr::BoolOp(node) => {
275289
// Boolean operator: 1 operator, N operands
276290
// Radon stores the entire sub-expressions as operands!
277291
self.add_operator(Self::boolop_name(&node.op));
278292
for value in &node.values {
279-
self.add_operand(&self.expr_repr(value));
293+
self.add_operand(self.expr_repr(value));
280294
}
281295
visitor::walk_expr(self, expr);
282296
}
@@ -285,9 +299,9 @@ impl<'a, 'src> Visitor<'a> for HalsteadVisitor<'src> {
285299
for op in &node.ops {
286300
self.add_operator(Self::cmpop_name(op));
287301
}
288-
self.add_operand(&Self::expr_to_operand(&node.left));
302+
self.add_operand(Self::expr_to_operand(&node.left));
289303
for comp in &node.comparators {
290-
self.add_operand(&Self::expr_to_operand(comp));
304+
self.add_operand(Self::expr_to_operand(comp));
291305
}
292306
visitor::walk_expr(self, expr);
293307
}
@@ -303,7 +317,7 @@ pub fn analyze(
303317
source: &str,
304318
parsed: &ruff_python_parser::Parsed<ModModule>
305319
) -> (HalsteadMetrics, Vec<FunctionHalstead>) {
306-
let mut visitor = HalsteadVisitor::new(source, None);
320+
let mut visitor = HalsteadVisitor::new(source, 0);
307321

308322
for stmt in parsed.suite() {
309323
visitor.visit_stmt(stmt);

backend/src/storage.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,8 +1129,7 @@ impl WilyIndex {
11291129
use crate::maintainability;
11301130
use crate::raw;
11311131
use rayon::prelude::*;
1132-
use std::collections::{HashSet};
1133-
use std::fs;
1132+
use std::collections::HashSet;
11341133

11351134
let operators = &self.operators;
11361135
let include_raw = operators.iter().any(|o| o == "raw");
@@ -1171,13 +1170,13 @@ impl WilyIndex {
11711170
mi: Option<(f64, MIRank)>,
11721171
}
11731172

1174-
// Phase 1: Parallel file analysis
1173+
// Phase 1: Parallel file I/O and analysis
11751174
let file_results: Vec<FileResult> = py.detach(|| {
11761175
paths
11771176
.par_iter()
11781177
.filter_map(|rel_path| {
11791178
let abs_path = base_path_buf.join(rel_path);
1180-
let content = fs::read_to_string(abs_path).ok()?;
1179+
let content = std::fs::read_to_string(&abs_path).ok()?;
11811180

11821181
let parsed = parse_module(&content).ok()?;
11831182
let line_index = LineIndex::from_source_text(&content);

0 commit comments

Comments
 (0)