|
1 | 1 | use tree_sitter::{Node, Tree}; |
2 | 2 |
|
3 | 3 | use crate::model::entity::{build_entity_id, SemanticEntity}; |
4 | | -use crate::utils::hash::{content_hash, structural_hash}; |
| 4 | +use crate::utils::hash::{content_hash, structural_hash, structural_hash_excluding_range}; |
5 | 5 | use super::languages::LanguageConfig; |
6 | 6 |
|
7 | 7 | pub fn extract_entities( |
@@ -39,7 +39,7 @@ fn visit_node( |
39 | 39 | if let Some((name, entity_type)) = extract_call_entity(node, config, source) { |
40 | 40 | let content_str = node_text(node, source); |
41 | 41 | let content = content_str.to_string(); |
42 | | - let struct_hash = structural_hash(node, source); |
| 42 | + let struct_hash = compute_structural_hash(node, source); |
43 | 43 | let entity = SemanticEntity { |
44 | 44 | id: build_entity_id(file_path, entity_type, &name, parent_id), |
45 | 45 | file_path: file_path.to_string(), |
@@ -92,7 +92,7 @@ fn visit_node( |
92 | 92 | let content_str = node_text(node, source); |
93 | 93 | let content = content_str.to_string(); |
94 | 94 |
|
95 | | - let struct_hash = structural_hash(node, source); |
| 95 | + let struct_hash = compute_structural_hash(node, source); |
96 | 96 | let entity = SemanticEntity { |
97 | 97 | id: build_entity_id(file_path, entity_type, &name, parent_id), |
98 | 98 | file_path: file_path.to_string(), |
@@ -157,6 +157,125 @@ fn visit_node( |
157 | 157 | } |
158 | 158 | } |
159 | 159 |
|
| 160 | +/// Compute the structural hash for an entity, excluding the name token so that |
| 161 | +/// renames of otherwise identical entities produce the same hash. |
| 162 | +fn compute_structural_hash(node: Node, source: &[u8]) -> String { |
| 163 | + match find_name_byte_range(node, source) { |
| 164 | + Some((start, end)) => structural_hash_excluding_range(node, source, start, end), |
| 165 | + None => structural_hash(node, source), |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +/// Find the byte range of the name node, mirroring extract_name() logic. |
| 170 | +/// Returns (start_byte, end_byte) of the name token to exclude from hashing. |
| 171 | +fn find_name_byte_range(node: Node, _source: &[u8]) -> Option<(usize, usize)> { |
| 172 | + // Try 'name' field first (works for most languages) |
| 173 | + if let Some(name_node) = node.child_by_field_name("name") { |
| 174 | + return Some((name_node.start_byte(), name_node.end_byte())); |
| 175 | + } |
| 176 | + |
| 177 | + let node_type = node.kind(); |
| 178 | + |
| 179 | + // Variable/lexical declarations: name is inside variable_declarator |
| 180 | + if node_type == "lexical_declaration" || node_type == "variable_declaration" { |
| 181 | + let mut cursor = node.walk(); |
| 182 | + for child in node.named_children(&mut cursor) { |
| 183 | + if child.kind() == "variable_declarator" { |
| 184 | + if let Some(decl_name) = child.child_by_field_name("name") { |
| 185 | + return Some((decl_name.start_byte(), decl_name.end_byte())); |
| 186 | + } |
| 187 | + } |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + // Decorated definitions (Python): look at the inner definition |
| 192 | + if node_type == "decorated_definition" { |
| 193 | + let mut cursor = node.walk(); |
| 194 | + for child in node.named_children(&mut cursor) { |
| 195 | + if child.kind() == "function_definition" || child.kind() == "class_definition" { |
| 196 | + if let Some(inner_name) = child.child_by_field_name("name") { |
| 197 | + return Some((inner_name.start_byte(), inner_name.end_byte())); |
| 198 | + } |
| 199 | + } |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + // C/C++ function_definition: name is inside declarator |
| 204 | + if node_type == "function_definition" { |
| 205 | + if let Some(declarator) = node.child_by_field_name("declarator") { |
| 206 | + return find_declarator_name_range(declarator); |
| 207 | + } |
| 208 | + } |
| 209 | + |
| 210 | + // C++ template_declaration |
| 211 | + if node_type == "template_declaration" { |
| 212 | + let mut cursor = node.walk(); |
| 213 | + for child in node.named_children(&mut cursor) { |
| 214 | + if child.kind() != "template_parameter_list" { |
| 215 | + if let Some(name) = child.child_by_field_name("name") { |
| 216 | + return Some((name.start_byte(), name.end_byte())); |
| 217 | + } |
| 218 | + if let Some(declarator) = child.child_by_field_name("declarator") { |
| 219 | + return find_declarator_name_range(declarator); |
| 220 | + } |
| 221 | + } |
| 222 | + } |
| 223 | + } |
| 224 | + |
| 225 | + // C declarations |
| 226 | + if node_type == "declaration" || node_type == "type_definition" { |
| 227 | + if let Some(declarator) = node.child_by_field_name("declarator") { |
| 228 | + return find_declarator_name_range(declarator); |
| 229 | + } |
| 230 | + } |
| 231 | + |
| 232 | + // Fallback: first identifier child |
| 233 | + let mut cursor = node.walk(); |
| 234 | + for child in node.named_children(&mut cursor) { |
| 235 | + if child.kind() == "identifier" || child.kind() == "type_identifier" { |
| 236 | + return Some((child.start_byte(), child.end_byte())); |
| 237 | + } |
| 238 | + } |
| 239 | + |
| 240 | + None |
| 241 | +} |
| 242 | + |
| 243 | +/// Find the byte range of the name within a C-style declarator chain. |
| 244 | +fn find_declarator_name_range(node: Node) -> Option<(usize, usize)> { |
| 245 | + match node.kind() { |
| 246 | + "identifier" | "type_identifier" | "field_identifier" => { |
| 247 | + Some((node.start_byte(), node.end_byte())) |
| 248 | + } |
| 249 | + "qualified_identifier" | "scoped_identifier" => { |
| 250 | + Some((node.start_byte(), node.end_byte())) |
| 251 | + } |
| 252 | + "pointer_declarator" | "function_declarator" | "array_declarator" |
| 253 | + | "parenthesized_declarator" => { |
| 254 | + if let Some(inner) = node.child_by_field_name("declarator") { |
| 255 | + find_declarator_name_range(inner) |
| 256 | + } else { |
| 257 | + let mut cursor = node.walk(); |
| 258 | + let result = node |
| 259 | + .named_children(&mut cursor) |
| 260 | + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") |
| 261 | + .map(|c| (c.start_byte(), c.end_byte())); |
| 262 | + result |
| 263 | + } |
| 264 | + } |
| 265 | + _ => { |
| 266 | + if let Some(name) = node.child_by_field_name("name") { |
| 267 | + return Some((name.start_byte(), name.end_byte())); |
| 268 | + } |
| 269 | + let mut cursor = node.walk(); |
| 270 | + let result = node |
| 271 | + .named_children(&mut cursor) |
| 272 | + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") |
| 273 | + .map(|c| (c.start_byte(), c.end_byte())); |
| 274 | + result |
| 275 | + } |
| 276 | + } |
| 277 | +} |
| 278 | + |
160 | 279 | fn extract_name(node: Node, source: &[u8]) -> Option<String> { |
161 | 280 | // Try 'name' field first (works for most languages) |
162 | 281 | if let Some(name_node) = node.child_by_field_name("name") { |
|
0 commit comments