-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsemantic.rs
More file actions
179 lines (162 loc) · 7.55 KB
/
semantic.rs
File metadata and controls
179 lines (162 loc) · 7.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
//! Authoritative semantic types for Nexum Graph.
//!
//! These types are transcribed directly from the Implementation Specification
//! and constitute the API contract for all downstream crates. Codex (the code
//! generator) must not alter these type signatures.
//!
//! Reviewing this file:
//! - Treat it like a shared schema, not an implementation detail.
//! - `SemanticId` semantics must stay aligned with `CORE_INVARIANTS.md` and
//! the extractor implementation in `nex-parse`.
//! - If this file changes, expect coordinated updates in parsing, graph diff,
//! coordination, validation, event log replay, and tests across the repo.
use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::path::PathBuf;
// ─────────────────────────────────────────────────────────────────────────────
// Core Identity
// ─────────────────────────────────────────────────────────────────────────────
/// Content-addressed identity for semantic units.
/// BLAKE3 256-bit hash over the unit's qualified name, file path, and normalized body hash.
pub type SemanticId = [u8; 32];
// ─────────────────────────────────────────────────────────────────────────────
// Semantic Unit (the fundamental node)
// ─────────────────────────────────────────────────────────────────────────────
/// A single semantic unit extracted from source code.
///
/// Represents a function, class, interface, module, or other named code entity
/// at a granularity suitable for semantic diffing and coordination.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct SemanticUnit {
/// Content-addressed hash (BLAKE3 over qualified_name + file_path + normalized body hash).
pub id: SemanticId,
/// What kind of code entity this is.
pub kind: UnitKind,
/// Short name, e.g. "validateToken".
pub name: String,
/// Fully qualified name, e.g. "auth::AuthManager::validateToken".
pub qualified_name: String,
/// Path to the source file.
pub file_path: PathBuf,
/// Byte range within the source file.
pub byte_range: Range<usize>,
/// Hash of parameter types + return type (for change detection).
pub signature_hash: u64,
/// Hash of normalized body AST (for change detection).
pub body_hash: u64,
/// IDs of units this unit depends on (calls, imports, inherits, etc.).
pub dependencies: Vec<SemanticId>,
}
/// Classification of semantic unit kinds.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum UnitKind {
Function,
Method,
Class,
Struct,
Interface,
Trait,
Enum,
Module,
Constant,
}
// ─────────────────────────────────────────────────────────────────────────────
// Semantic Diff (output of graph comparison)
// ─────────────────────────────────────────────────────────────────────────────
/// The result of comparing two CodeGraphs.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SemanticDiff {
/// Units present in `after` but not `before`.
pub added: Vec<SemanticUnit>,
/// Units present in `before` but not `after`.
pub removed: Vec<SemanticUnit>,
/// Units present in both but with changed signature or body.
pub modified: Vec<ModifiedUnit>,
/// Units that moved to a different file path.
pub moved: Vec<MovedUnit>,
}
/// A unit that exists in both refs but has changed.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModifiedUnit {
/// The unit as it appeared before the change.
pub before: SemanticUnit,
/// The unit as it appears after the change.
pub after: SemanticUnit,
/// What specifically changed.
pub changes: Vec<ChangeKind>,
}
/// Classification of what changed within a modified unit.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ChangeKind {
/// Parameter types, return type, or visibility changed.
SignatureChanged,
/// Function/method body changed (but signature is the same).
BodyChanged,
/// Documentation or comments changed.
DocChanged,
}
/// A unit that moved from one file path to another.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MovedUnit {
/// The unit (with its current state).
pub unit: SemanticUnit,
/// Where it was before.
pub old_path: PathBuf,
/// Where it is now.
pub new_path: PathBuf,
}
// ─────────────────────────────────────────────────────────────────────────────
// Dependency Classification
// ─────────────────────────────────────────────────────────────────────────────
/// Classification of dependency edges between semantic units.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum DepKind {
/// Unit A calls unit B.
Calls,
/// Unit A imports unit B.
Imports,
/// Unit A inherits from / extends unit B.
Inherits,
/// Unit A implements unit B (trait/interface).
Implements,
/// Unit A uses unit B (type reference, field access, etc.).
Uses,
}
// ─────────────────────────────────────────────────────────────────────────────
// File Identity (for git integration)
// ─────────────────────────────────────────────────────────────────────────────
/// Identifies a source file at a specific git revision.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct FileIdentity {
/// Relative path within the repository.
pub path: PathBuf,
/// Git blob OID (if available).
pub blob_oid: Option<[u8; 20]>,
/// Language detected from file extension.
pub language: Language,
}
/// Supported programming languages.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Language {
TypeScript,
Tsx,
Python,
Rust,
Go,
Java,
Unknown,
}
impl Language {
/// Detect language from file extension.
pub fn from_extension(ext: &str) -> Self {
match ext {
"ts" => Language::TypeScript,
"tsx" => Language::Tsx,
"py" => Language::Python,
"rs" => Language::Rust,
"go" => Language::Go,
"java" => Language::Java,
_ => Language::Unknown,
}
}
}