Skip to content

Commit 66caa3c

Browse files
Merge pull request #58 from pythonbpf/vmlinux-handler
finish table construction for vmlinux symbol info transfer
2 parents 5512bf5 + 76d0dbf commit 66caa3c

File tree

7 files changed

+163
-19
lines changed

7 files changed

+163
-19
lines changed

pythonbpf/codegen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ def processor(source_code, filename, module):
5555
for func_node in bpf_chunks:
5656
logger.info(f"Found BPF function/struct: {func_node.name}")
5757

58-
vmlinux_proc(tree, module)
58+
vmlinux_symtab = vmlinux_proc(tree, module)
5959
populate_global_symbol_table(tree, module)
6060
license_processing(tree, module)
6161
globals_processing(tree, module)
62-
62+
print("DEBUG:", vmlinux_symtab)
6363
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
6464
map_sym_tab = maps_proc(tree, module, bpf_chunks)
6565
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from enum import Enum, auto
2+
from typing import Any, Dict, List, Optional, TypedDict
3+
from dataclasses import dataclass
4+
import llvmlite.ir as ir
5+
6+
from pythonbpf.vmlinux_parser.dependency_node import Field
7+
8+
9+
@dataclass
10+
class AssignmentType(Enum):
11+
CONSTANT = auto()
12+
STRUCT = auto()
13+
ARRAY = auto() # probably won't be used
14+
FUNCTION_POINTER = auto()
15+
POINTER = auto() # again, probably won't be used
16+
17+
18+
@dataclass
19+
class FunctionSignature(TypedDict):
20+
return_type: str
21+
param_types: List[str]
22+
varargs: bool
23+
24+
25+
# Thew name of the assignment will be in the dict that uses this class
26+
@dataclass
27+
class AssignmentInfo(TypedDict):
28+
value_type: AssignmentType
29+
python_type: type
30+
value: Optional[Any]
31+
pointer_level: Optional[int]
32+
signature: Optional[FunctionSignature] # For function pointers
33+
# The key of the dict is the name of the field.
34+
# Value is a tuple that contains the global variable representing that field
35+
# along with all the information about that field as a Field type.
36+
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.

pythonbpf/vmlinux_parser/class_handler.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
from functools import lru_cache
33
import importlib
4+
45
from .dependency_handler import DependencyHandler
56
from .dependency_node import DependencyNode
67
import ctypes
@@ -15,7 +16,11 @@ def get_module_symbols(module_name: str):
1516
return [name for name in dir(imported_module)], imported_module
1617

1718

18-
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
19+
def process_vmlinux_class(
20+
node,
21+
llvm_module,
22+
handler: DependencyHandler,
23+
):
1924
symbols_in_module, imported_module = get_module_symbols("vmlinux")
2025
if node.name in symbols_in_module:
2126
vmlinux_type = getattr(imported_module, node.name)
@@ -25,7 +30,10 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
2530

2631

2732
def process_vmlinux_post_ast(
28-
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
33+
elem_type_class,
34+
llvm_handler,
35+
handler: DependencyHandler,
36+
processing_stack=None,
2937
):
3038
# Initialize processing stack on first call
3139
if processing_stack is None:
@@ -46,7 +54,7 @@ def process_vmlinux_post_ast(
4654
logger.debug(f"Node {current_symbol_name} already processed and ready")
4755
return True
4856

49-
# XXX:Check it's use. It's probably not being used.
57+
# XXX:Check its use. It's probably not being used.
5058
if current_symbol_name in processing_stack:
5159
logger.debug(
5260
f"Dependency already in processing stack for {current_symbol_name}, skipping"
@@ -98,6 +106,7 @@ def process_vmlinux_post_ast(
98106
[elem_type, elem_bitfield_size] = elem_temp_list
99107
local_module_name = getattr(elem_type, "__module__", None)
100108
new_dep_node.add_field(elem_name, elem_type, ready=False)
109+
101110
if local_module_name == ctypes.__name__:
102111
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
103112
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
@@ -226,7 +235,10 @@ def process_vmlinux_post_ast(
226235
else str(elem_type)
227236
)
228237
process_vmlinux_post_ast(
229-
elem_type, llvm_handler, handler, processing_stack
238+
elem_type,
239+
llvm_handler,
240+
handler,
241+
processing_stack,
230242
)
231243
new_dep_node.set_field_ready(elem_name, True)
232244
else:
@@ -237,7 +249,7 @@ def process_vmlinux_post_ast(
237249
else:
238250
raise ImportError("UNSUPPORTED Module")
239251

240-
logging.info(
252+
logger.info(
241253
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
242254
)
243255
return True

pythonbpf/vmlinux_parser/dependency_node.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,31 @@ class Field:
1818
value: Any = None
1919
ready: bool = False
2020

21+
def __hash__(self):
22+
"""
23+
Create a hash based on the immutable attributes that define this field's identity.
24+
This allows Field objects to be used as dictionary keys.
25+
"""
26+
# Use a tuple of the fields that uniquely identify this field
27+
identity = (
28+
self.name,
29+
id(self.type), # Use id for non-hashable types
30+
id(self.ctype_complex_type) if self.ctype_complex_type else None,
31+
id(self.containing_type) if self.containing_type else None,
32+
self.type_size,
33+
self.bitfield_size,
34+
self.offset,
35+
self.value if self.value else None,
36+
)
37+
return hash(identity)
38+
39+
def __eq__(self, other):
40+
"""
41+
Define equality consistent with the hash function.
42+
Two fields are equal if they have they are the same
43+
"""
44+
return self is other
45+
2146
def set_ready(self, is_ready: bool = True) -> None:
2247
"""Set the readiness state of this field."""
2348
self.ready = is_ready

pythonbpf/vmlinux_parser/import_detector.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import ast
22
import logging
3-
from typing import List, Tuple, Any
43
import importlib
54
import inspect
65

6+
from .assignment_info import AssignmentInfo, AssignmentType
77
from .dependency_handler import DependencyHandler
88
from .ir_gen import IRGenerator
99
from .class_handler import process_vmlinux_class
1010

1111
logger = logging.getLogger(__name__)
1212

1313

14-
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
14+
def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
1515
"""
1616
Parse AST and detect import statements from vmlinux.
1717
@@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
8282
# initialise dependency handler
8383
handler = DependencyHandler()
8484
# initialise assignment dictionary of name to type
85-
assignments: dict[str, tuple[type, Any]] = {}
85+
assignments: dict[str, AssignmentInfo] = {}
8686

8787
if not import_statements:
8888
logger.info("No vmlinux imports found")
@@ -128,20 +128,35 @@ def vmlinux_proc(tree: ast.AST, module):
128128
f"{imported_name} not found as ClassDef or Assign in vmlinux"
129129
)
130130

131-
IRGenerator(module, handler)
131+
IRGenerator(module, handler, assignments)
132132
return assignments
133133

134134

135-
def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]):
136-
# Check if this is a simple assignment with a constant value
135+
def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]):
136+
"""Process assignments from vmlinux module."""
137+
# Only handle single-target assignments
137138
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
138139
target_name = node.targets[0].id
140+
141+
# Handle constant value assignments
139142
if isinstance(node.value, ast.Constant):
140-
assignments[target_name] = (type(node.value.value), node.value.value)
143+
# Fixed: using proper TypedDict creation syntax with named arguments
144+
assignments[target_name] = AssignmentInfo(
145+
value_type=AssignmentType.CONSTANT,
146+
python_type=type(node.value.value),
147+
value=node.value.value,
148+
pointer_level=None,
149+
signature=None,
150+
members=None,
151+
)
141152
logger.info(
142153
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
143154
)
155+
156+
# Handle other assignment types that we may need to support
144157
else:
145-
raise ValueError(f"Unsupported assignment type for {target_name}")
158+
logger.warning(
159+
f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}"
160+
)
146161
else:
147162
raise ValueError("Not a simple assignment")

pythonbpf/vmlinux_parser/ir_gen/ir_generation.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import ctypes
22
import logging
3+
4+
from ..assignment_info import AssignmentInfo, AssignmentType
35
from ..dependency_handler import DependencyHandler
46
from .debug_info_gen import debug_info_generation
57
from ..dependency_node import DependencyNode
@@ -10,11 +12,14 @@
1012

1113
class IRGenerator:
1214
# get the assignments dict and add this stuff to it.
13-
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
15+
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
1416
self.llvm_module = llvm_module
1517
self.handler: DependencyHandler = handler
1618
self.generated: list[str] = []
1719
self.generated_debug_info: list = []
20+
# Use struct_name and field_name as key instead of Field object
21+
self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {}
22+
self.assignments: dict[str, AssignmentInfo] = assignments
1823
if not handler.is_ready:
1924
raise ImportError(
2025
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
@@ -67,10 +72,42 @@ def struct_processor(self, struct, processing_stack=None):
6772
f"Warning: Dependency {dependency} not found in handler"
6873
)
6974

70-
# Actual processor logic here after dependencies are resolved
75+
# Generate IR first to populate field names
7176
self.generated_debug_info.append(
7277
(struct, self.gen_ir(struct, self.generated_debug_info))
7378
)
79+
80+
# Fill the assignments dictionary with struct information
81+
if struct.name not in self.assignments:
82+
# Create a members dictionary for AssignmentInfo
83+
members_dict = {}
84+
for field_name, field in struct.fields.items():
85+
# Get the generated field name from our dictionary, or use field_name if not found
86+
if (
87+
struct.name in self.generated_field_names
88+
and field_name in self.generated_field_names[struct.name]
89+
):
90+
field_global_variable = self.generated_field_names[struct.name][
91+
field_name
92+
]
93+
members_dict[field_name] = (field_global_variable, field)
94+
else:
95+
raise ValueError(
96+
f"llvm global name not found for struct field {field_name}"
97+
)
98+
# members_dict[field_name] = (field_name, field)
99+
100+
# Add struct to assignments dictionary
101+
self.assignments[struct.name] = AssignmentInfo(
102+
value_type=AssignmentType.STRUCT,
103+
python_type=struct.ctype_struct,
104+
value=None,
105+
pointer_level=None,
106+
signature=None,
107+
members=members_dict,
108+
)
109+
logger.info(f"Added struct assignment info for {struct.name}")
110+
74111
self.generated.append(struct.name)
75112

76113
finally:
@@ -85,6 +122,11 @@ def gen_ir(self, struct, generated_debug_info):
85122
struct, self.llvm_module, generated_debug_info
86123
)
87124
field_index = 0
125+
126+
# Make sure the struct has an entry in our field names dictionary
127+
if struct.name not in self.generated_field_names:
128+
self.generated_field_names[struct.name] = {}
129+
88130
for field_name, field in struct.fields.items():
89131
# does not take arrays and similar types into consideration yet.
90132
if field.ctype_complex_type is not None and issubclass(
@@ -94,6 +136,18 @@ def gen_ir(self, struct, generated_debug_info):
94136
containing_type = field.containing_type
95137
if containing_type.__module__ == ctypes.__name__:
96138
containing_type_size = ctypes.sizeof(containing_type)
139+
if array_size == 0:
140+
field_co_re_name = self._struct_name_generator(
141+
struct, field, field_index, True, 0, containing_type_size
142+
)
143+
globvar = ir.GlobalVariable(
144+
self.llvm_module, ir.IntType(64), name=field_co_re_name
145+
)
146+
globvar.linkage = "external"
147+
globvar.set_metadata("llvm.preserve.access.index", debug_info)
148+
self.generated_field_names[struct.name][field_name] = globvar
149+
field_index += 1
150+
continue
97151
for i in range(0, array_size):
98152
field_co_re_name = self._struct_name_generator(
99153
struct, field, field_index, True, i, containing_type_size
@@ -103,6 +157,7 @@ def gen_ir(self, struct, generated_debug_info):
103157
)
104158
globvar.linkage = "external"
105159
globvar.set_metadata("llvm.preserve.access.index", debug_info)
160+
self.generated_field_names[struct.name][field_name] = globvar
106161
field_index += 1
107162
elif field.type_size is not None:
108163
array_size = field.type_size
@@ -120,6 +175,7 @@ def gen_ir(self, struct, generated_debug_info):
120175
)
121176
globvar.linkage = "external"
122177
globvar.set_metadata("llvm.preserve.access.index", debug_info)
178+
self.generated_field_names[struct.name][field_name] = globvar
123179
field_index += 1
124180
else:
125181
field_co_re_name = self._struct_name_generator(
@@ -131,6 +187,7 @@ def gen_ir(self, struct, generated_debug_info):
131187
)
132188
globvar.linkage = "external"
133189
globvar.set_metadata("llvm.preserve.access.index", debug_info)
190+
self.generated_field_names[struct.name][field_name] = globvar
134191
return debug_info
135192

136193
def _struct_name_generator(

tests/passing_tests/vmlinux/simple_struct_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
1+
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
22
from vmlinux import TASK_COMM_LEN # noqa: F401
33
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
44

@@ -27,4 +27,3 @@ def LICENSE() -> str:
2727

2828

2929
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll")
30-
compile()

0 commit comments

Comments
 (0)