From 6b7842f5219d3219d5ad078c04403fff6c4949e2 Mon Sep 17 00:00:00 2001 From: Nicholas Sielicki Date: Fri, 2 Jan 2026 14:47:30 -0600 Subject: [PATCH 1/3] prov/cxi: add script to break-out headers Add some scripts to refactor prov/cxi headers. usage: $ ./prov/cxi/scripts/analyze_symbols.py | \ ./prov/cxi/scripts/generate_refactor_plan.py | \ ./prov/cxi/scripts/apply_refactor.py $ git add prov/cxi/include $ make -j Signed-off-by: Nicholas Sielicki --- prov/cxi/scripts/analyze_symbols.py | 500 +++++++++ prov/cxi/scripts/apply_refactor.py | 1182 ++++++++++++++++++++ prov/cxi/scripts/generate_refactor_plan.py | 610 ++++++++++ 3 files changed, 2292 insertions(+) create mode 100755 prov/cxi/scripts/analyze_symbols.py create mode 100755 prov/cxi/scripts/apply_refactor.py create mode 100755 prov/cxi/scripts/generate_refactor_plan.py diff --git a/prov/cxi/scripts/analyze_symbols.py b/prov/cxi/scripts/analyze_symbols.py new file mode 100755 index 00000000000..30f624b43b2 --- /dev/null +++ b/prov/cxi/scripts/analyze_symbols.py @@ -0,0 +1,500 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "ast-grep-py>=0.31.0", +# ] +# /// +""" +Analyze CXI provider source code to extract symbol information. + +This script extracts: +- Function declarations and definitions +- Type definitions (structs, enums, typedefs, unions) +- Macro definitions +- Usage sites for all of the above + +Output is a JSON report that can be used to plan header refactoring. +""" + +import json +import sys +from pathlib import Path +from dataclasses import dataclass, field, asdict +from typing import Literal +from ast_grep_py import SgRoot, SgNode + + +@dataclass +class Location: + file: str + line: int + column: int + + +@dataclass +class FunctionDecl: + name: str + location: Location + is_static: bool + is_inline: bool + is_definition: bool # True if this is a definition, False if just declaration + signature: str # Full function signature for matching + + +@dataclass +class TypeDef: + name: str + location: Location + kind: Literal["struct", "enum", "union", "typedef"] + is_forward_decl: bool + full_text: str # For complex types + + +@dataclass +class MacroDef: + name: str + location: Location + is_function_like: bool + full_text: str + + +@dataclass +class SymbolUsage: + symbol_name: str + location: Location + usage_kind: Literal["call", "type_ref", "macro_ref", "pointer_only"] + # pointer_only means we only use a pointer to this type, so forward decl suffices + + +@dataclass +class FileAnalysis: + path: str + function_decls: list[FunctionDecl] = field(default_factory=list) + type_defs: list[TypeDef] = field(default_factory=list) + macro_defs: list[MacroDef] = field(default_factory=list) + usages: list[SymbolUsage] = field(default_factory=list) + + +def get_location(node: SgNode, file_path: str) -> Location: + """Extract location from an ast-grep node.""" + range_info = node.range() + return Location( + file=file_path, + line=range_info.start.line + 1, # ast-grep uses 0-indexed lines + column=range_info.start.column, + ) + + +def find_child_by_kind(node: SgNode, kind: str) -> SgNode | None: + """Find first child with given kind.""" + for child in node.children(): + if child.kind() == kind: + return child + return None + + +def find_all_children_by_kind(node: SgNode, kind: str) -> list[SgNode]: + """Find all children with given kind.""" + return [child for child in node.children() if child.kind() == kind] + + +def find_identifier_in_declarator(node: SgNode) -> str | None: + """Recursively find the identifier in a declarator.""" + if node.kind() == "identifier": + return node.text() + + # Check field access first if available + field_result = node.field("declarator") + if field_result: + result = find_identifier_in_declarator(field_result) + if result: + return result + + # Then check children + for child in node.children(): + if child.kind() == "identifier": + return child.text() + elif child.kind() in ("function_declarator", "pointer_declarator", + "array_declarator", "parenthesized_declarator"): + result = find_identifier_in_declarator(child) + if result: + return result + + return None + + +def analyze_functions(root: SgRoot, file_path: str) -> list[FunctionDecl]: + """Extract function declarations and definitions.""" + functions = [] + + # Find function definitions + for node in root.root().find_all(kind="function_definition"): + declarator = node.field("declarator") + if declarator is None: + continue + + func_name = find_identifier_in_declarator(declarator) + if func_name is None: + continue + + full_text = node.text() + + # Check for static/inline by looking at storage_class_specifier children + # or by checking the text before the function name + prefix = full_text.split(func_name)[0] if func_name in full_text else "" + is_static = "static" in prefix + is_inline = "inline" in prefix + + # Get signature (everything before the body) + sig_end = full_text.find('{') + signature = full_text[:sig_end].strip() if sig_end > 0 else full_text + + functions.append(FunctionDecl( + name=func_name, + location=get_location(node, file_path), + is_static=is_static, + is_inline=is_inline, + is_definition=True, + signature=signature, + )) + + # Find function declarations (no body) + for node in root.root().find_all(kind="declaration"): + declarator = node.field("declarator") + if declarator is None: + continue + + # Check if this has a function_declarator somewhere + has_func_decl = False + check_node = declarator + while check_node: + if check_node.kind() == "function_declarator": + has_func_decl = True + break + check_node = find_child_by_kind(check_node, "function_declarator") + if check_node is None: + # Also check pointer_declarator + ptr = find_child_by_kind(declarator, "pointer_declarator") + if ptr: + check_node = find_child_by_kind(ptr, "function_declarator") + break + + if not has_func_decl: + continue + + func_name = find_identifier_in_declarator(declarator) + if func_name is None: + continue + + full_text = node.text() + prefix = full_text.split(func_name)[0] if func_name in full_text else "" + is_static = "static" in prefix + is_inline = "inline" in prefix + + functions.append(FunctionDecl( + name=func_name, + location=get_location(node, file_path), + is_static=is_static, + is_inline=is_inline, + is_definition=False, + signature=full_text.rstrip(';'), + )) + + return functions + + +def analyze_types(root: SgRoot, file_path: str) -> list[TypeDef]: + """Extract type definitions (struct, enum, union, typedef).""" + types = [] + + # Find struct/union/enum definitions + for kind, type_kind in [("struct_specifier", "struct"), + ("union_specifier", "union"), + ("enum_specifier", "enum")]: + for node in root.root().find_all(kind=kind): + # Get the name (type_identifier child) + name_node = find_child_by_kind(node, "type_identifier") + if name_node is None: + continue + + type_name = name_node.text() + full_text = node.text() + + # Check if this is a forward declaration (no field_declaration_list) + body = find_child_by_kind(node, "field_declaration_list") + if body is None: + body = find_child_by_kind(node, "enumerator_list") + is_forward = body is None + + types.append(TypeDef( + name=type_name, + location=get_location(node, file_path), + kind=type_kind, + is_forward_decl=is_forward, + full_text=full_text, + )) + + # Find typedefs + for node in root.root().find_all(kind="type_definition"): + declarator = node.field("declarator") + if declarator is None: + continue + + # Get the typedef'd name - look for type_identifier + type_name = None + if declarator.kind() == "type_identifier": + type_name = declarator.text() + else: + # Look in children + for child in declarator.children(): + if child.kind() == "type_identifier": + type_name = child.text() + break + # Also check if declarator itself contains the name + if type_name is None: + ti = find_child_by_kind(declarator, "type_identifier") + if ti: + type_name = ti.text() + + if type_name is None: + # Last resort: look for any identifier + for child in declarator.children(): + if child.kind() == "identifier": + type_name = child.text() + break + + if type_name is None: + continue + + types.append(TypeDef( + name=type_name, + location=get_location(node, file_path), + kind="typedef", + is_forward_decl=False, + full_text=node.text(), + )) + + return types + + +def analyze_macros(root: SgRoot, file_path: str) -> list[MacroDef]: + """Extract macro definitions.""" + macros = [] + + for node in root.root().find_all(kind="preproc_def"): + name_node = node.field("name") + if name_node is None: + continue + + macro_name = name_node.text() + full_text = node.text() + + macros.append(MacroDef( + name=macro_name, + location=get_location(node, file_path), + is_function_like=False, + full_text=full_text, + )) + + for node in root.root().find_all(kind="preproc_function_def"): + name_node = node.field("name") + if name_node is None: + continue + + macro_name = name_node.text() + full_text = node.text() + + macros.append(MacroDef( + name=macro_name, + location=get_location(node, file_path), + is_function_like=True, + full_text=full_text, + )) + + return macros + + +def analyze_usages(root: SgRoot, file_path: str, known_functions: set[str], + known_types: set[str], known_macros: set[str]) -> list[SymbolUsage]: + """Find usages of known symbols.""" + usages = [] + seen = set() # Avoid duplicates at same location + + # Find function calls + for node in root.root().find_all(kind="call_expression"): + func_node = node.field("function") + if func_node is None: + continue + + # Handle direct calls + if func_node.kind() == "identifier": + func_name = func_node.text() + if func_name in known_functions: + loc = get_location(node, file_path) + key = (func_name, loc.line, loc.column, "call") + if key not in seen: + seen.add(key) + usages.append(SymbolUsage( + symbol_name=func_name, + location=loc, + usage_kind="call", + )) + + # Find type references + for node in root.root().find_all(kind="type_identifier"): + type_name = node.text() + if type_name in known_types: + # Determine if this is pointer-only usage + parent = node.parent() + is_pointer_only = False + if parent: + gp = parent.parent() + if gp and "pointer" in gp.kind(): + is_pointer_only = True + + loc = get_location(node, file_path) + kind = "pointer_only" if is_pointer_only else "type_ref" + key = (type_name, loc.line, loc.column, kind) + if key not in seen: + seen.add(key) + usages.append(SymbolUsage( + symbol_name=type_name, + location=loc, + usage_kind=kind, + )) + + # Find struct/union/enum references (when used as types) + for kind in ["struct_specifier", "union_specifier", "enum_specifier"]: + for node in root.root().find_all(kind=kind): + name_node = find_child_by_kind(node, "type_identifier") + body = find_child_by_kind(node, "field_declaration_list") + if body is None: + body = find_child_by_kind(node, "enumerator_list") + + # Only count as usage if no body (reference, not definition) + if name_node and body is None: + type_name = name_node.text() + if type_name in known_types: + parent = node.parent() + is_pointer_only = parent and "pointer" in parent.kind() + + loc = get_location(node, file_path) + usage_kind = "pointer_only" if is_pointer_only else "type_ref" + key = (type_name, loc.line, loc.column, usage_kind) + if key not in seen: + seen.add(key) + usages.append(SymbolUsage( + symbol_name=type_name, + location=loc, + usage_kind=usage_kind, + )) + + # Find macro usages + for node in root.root().find_all(kind="identifier"): + ident = node.text() + if ident in known_macros: + # Make sure this isn't the macro definition itself + parent = node.parent() + if parent and parent.kind() in ("preproc_def", "preproc_function_def"): + # Check if this is the name field + name_field = parent.field("name") + if name_field and name_field.text() == ident: + continue + + loc = get_location(node, file_path) + key = (ident, loc.line, loc.column, "macro_ref") + if key not in seen: + seen.add(key) + usages.append(SymbolUsage( + symbol_name=ident, + location=loc, + usage_kind="macro_ref", + )) + + return usages + + +def analyze_file(file_path: Path) -> FileAnalysis: + """Analyze a single C source file.""" + content = file_path.read_text() + + try: + root = SgRoot(content, "c") + except Exception as e: + print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr) + return FileAnalysis(path=str(file_path)) + + analysis = FileAnalysis(path=str(file_path)) + analysis.function_decls = analyze_functions(root, str(file_path)) + analysis.type_defs = analyze_types(root, str(file_path)) + analysis.macro_defs = analyze_macros(root, str(file_path)) + + return analysis + + +def main(): + # Find all C source and header files in prov/cxi + cxi_dir = Path("prov/cxi") + + if not cxi_dir.exists(): + print(f"Error: {cxi_dir} does not exist. Run from libfabric root.", + file=sys.stderr) + sys.exit(1) + + # Note: This script outputs to stdout, which can be piped to generate_refactor_plan.py + + c_files = list(cxi_dir.rglob("*.c")) + h_files = list(cxi_dir.rglob("*.h")) + all_files = c_files + h_files + + print(f"Found {len(c_files)} C files and {len(h_files)} header files", file=sys.stderr) + + # First pass: collect all definitions + all_analyses: list[FileAnalysis] = [] + known_functions: set[str] = set() + known_types: set[str] = set() + known_macros: set[str] = set() + + for file_path in all_files: + print(f"Analyzing {file_path}...", file=sys.stderr) + analysis = analyze_file(file_path) + all_analyses.append(analysis) + + for func in analysis.function_decls: + known_functions.add(func.name) + for typedef in analysis.type_defs: + known_types.add(typedef.name) + for macro in analysis.macro_defs: + known_macros.add(macro.name) + + print(f"Found {len(known_functions)} functions, {len(known_types)} types, " + f"{len(known_macros)} macros", file=sys.stderr) + + # Second pass: find usages + for file_path, analysis in zip(all_files, all_analyses): + content = file_path.read_text() + try: + root = SgRoot(content, "c") + analysis.usages = analyze_usages(root, str(file_path), + known_functions, known_types, known_macros) + except Exception as e: + print(f"Warning: Failed to analyze usages in {file_path}: {e}", file=sys.stderr) + + # Convert to JSON-serializable format + result = { + "files": [asdict(a) for a in all_analyses], + "summary": { + "total_functions": len(known_functions), + "total_types": len(known_types), + "total_macros": len(known_macros), + "files_analyzed": len(all_files), + } + } + + # Output JSON + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/prov/cxi/scripts/apply_refactor.py b/prov/cxi/scripts/apply_refactor.py new file mode 100755 index 00000000000..82c4b612467 --- /dev/null +++ b/prov/cxi/scripts/apply_refactor.py @@ -0,0 +1,1182 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "ast-grep-py>=0.31.0", +# "networkx>=3.0", +# ] +# /// +""" +Apply the refactoring plan to transform the CXI provider codebase. + +This script: +1. Reads the refactor plan (refactor_plan.json) +2. Extracts symbol definitions from cxip.h +3. Creates new header files under prov/cxi/include/cxip/ +4. Updates source files to include appropriate headers +5. Removes extracted content from cxip.h + +The transformation is done in multiple passes: +- Pass 1: Parse and extract all symbol definitions from cxip.h +- Pass 2: Group symbols by target header +- Pass 3: Generate new header files with proper include guards +- Pass 4: Update source file includes +- Pass 5: Clean up cxip.h to only include the new headers +""" + +import json +import re +import sys +from pathlib import Path +from dataclasses import dataclass, field +from collections import defaultdict +from ast_grep_py import SgRoot +import networkx as nx + + +# License header for new files +LICENSE_HEADER = """\ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ +""" + + +@dataclass +class ExtractedSymbol: + """A symbol extracted from the source.""" + name: str + kind: str # "function", "type", "macro" + text: str # Full text of the declaration/definition + start_line: int + end_line: int + dependencies: list[str] = field(default_factory=list) + + +@dataclass +class HeaderContent: + """Content for a new header file.""" + path: str + symbols: list[ExtractedSymbol] = field(default_factory=list) + includes: set[str] = field(default_factory=set) + forward_decls: set[str] = field(default_factory=set) + + +def load_refactor_plan(path: str | None = None) -> dict: + """Load the refactor plan JSON from file or stdin if in a pipeline.""" + if path: + with open(path) as f: + return json.load(f) + else: + return json.load(sys.stdin) + + +def get_include_guard(header_path: str) -> tuple[str, str]: + """Generate include guard macros for a header.""" + # Convert path like "prov/cxi/include/cxip/ep.h" to "_CXIP_EP_H_" + name = Path(header_path).stem.upper() + guard = f"_CXIP_{name}_H_" + return f"#ifndef {guard}\n#define {guard}\n", f"#endif /* {guard} */\n" + + +def extract_macro_definitions(content: str, macro_names: set[str]) -> dict[str, ExtractedSymbol]: + """Extract macro definitions using regex (more reliable for preprocessor).""" + macros = {} + lines = content.split('\n') + + i = 0 + while i < len(lines): + line = lines[i] + + # Match #define MACRO_NAME + match = re.match(r'^#define\s+(\w+)(?:\(|[^(]|\s|$)', line) + if match: + macro_name = match.group(1) + if macro_name in macro_names: + # Find the full extent (handle line continuations) + start_line = i + end_line = i + while end_line < len(lines) and lines[end_line].rstrip().endswith('\\'): + end_line += 1 + + macro_text = '\n'.join(lines[start_line:end_line + 1]) + macros[macro_name] = ExtractedSymbol( + name=macro_name, + kind="macro", + text=macro_text, + start_line=start_line + 1, # 1-indexed + end_line=end_line + 1, + ) + i += 1 + + return macros + + +def extract_type_definitions(root: SgRoot, content: str, type_names: set[str]) -> tuple[dict[str, ExtractedSymbol], dict[str, ExtractedSymbol]]: + """Extract type definitions using ast-grep. + + Returns: + Tuple of (enums, other_types) - enums are separated because they + need to be defined early (before structs that use them as fields). + """ + enums = {} + types = {} + lines = content.split('\n') + + # Extract struct/union/enum definitions + for kind, type_kind in [("struct_specifier", "struct"), + ("union_specifier", "union"), + ("enum_specifier", "enum")]: + for node in root.root().find_all(kind=kind): + # Get the name + name_node = None + for child in node.children(): + if child.kind() == "type_identifier": + name_node = child + break + + if name_node is None: + continue + + type_name = name_node.text() + if type_name not in type_names: + continue + + # Check if this is a definition (has body) + has_body = False + for child in node.children(): + if child.kind() in ("field_declaration_list", "enumerator_list"): + has_body = True + break + + if not has_body: + continue # Skip forward declarations + + # Get the full text including potential typedef wrapper + parent = node.parent() + if parent and parent.kind() == "type_definition": + node_to_extract = parent + else: + # Check if this is part of a declaration + if parent and parent.kind() == "declaration": + node_to_extract = parent + else: + node_to_extract = node + + range_info = node_to_extract.range() + start = range_info.start.line + end = range_info.end.line + + # Extract the lines, including the semicolon if needed + text = '\n'.join(lines[start:end + 1]) + if not text.rstrip().endswith(';'): + # Look for semicolon on next line + if end + 1 < len(lines) and lines[end + 1].strip() == ';': + text += '\n;' + end += 1 + + sym = ExtractedSymbol( + name=type_name, + kind="type", + text=text, + start_line=start + 1, + end_line=end + 1, + ) + + # Enums go to a separate collection + if type_kind == "enum": + enums[type_name] = sym + else: + types[type_name] = sym + + # Extract typedefs + for node in root.root().find_all(kind="type_definition"): + declarator = node.field("declarator") + if declarator is None: + continue + + # Get the typedef'd name + type_name = None + if declarator.kind() == "type_identifier": + type_name = declarator.text() + else: + for child in declarator.children(): + if child.kind() == "type_identifier": + type_name = child.text() + break + + if type_name is None or type_name not in type_names: + continue + + # Don't re-add if we already have this from struct extraction + if type_name in types: + continue + + # Skip typedefs of enums - they're already extracted into enums dict + # Check if this typedef contains an enum_specifier + is_enum_typedef = False + for child in node.children(): + if child.kind() == "enum_specifier": + is_enum_typedef = True + break + if is_enum_typedef: + continue + + range_info = node.range() + start = range_info.start.line + end = range_info.end.line + + text = '\n'.join(lines[start:end + 1]) + + types[type_name] = ExtractedSymbol( + name=type_name, + kind="type", + text=text, + start_line=start + 1, + end_line=end + 1, + ) + + return enums, types + + +def extract_function_declarations(root: SgRoot, content: str, func_names: set[str]) -> tuple[dict[str, ExtractedSymbol], dict[str, ExtractedSymbol]]: + """Extract function declarations from header. + + Returns: + Tuple of (non_inline_functions, inline_functions) + Inline functions are kept separate to be placed after all types are defined. + """ + non_inline_functions = {} + inline_functions = {} + lines = content.split('\n') + + # Find function declarations (not definitions - those have bodies) + for node in root.root().find_all(kind="declaration"): + declarator = node.field("declarator") + if declarator is None: + continue + + # Check if this has a function_declarator + has_func_decl = False + check_node = declarator + while check_node: + if check_node.kind() == "function_declarator": + has_func_decl = True + break + # Look in children + found = None + for child in check_node.children(): + if child.kind() in ("function_declarator", "pointer_declarator"): + found = child + break + check_node = found + + if not has_func_decl: + continue + + # Get the function name + func_name = find_identifier_in_declarator(declarator) + if func_name is None or func_name not in func_names: + continue + + range_info = node.range() + start = range_info.start.line + end = range_info.end.line + + text = '\n'.join(lines[start:end + 1]) + + non_inline_functions[func_name] = ExtractedSymbol( + name=func_name, + kind="function", + text=text, + start_line=start + 1, + end_line=end + 1, + ) + + # Find static inline function definitions - these go to a separate collection + for node in root.root().find_all(kind="function_definition"): + declarator = node.field("declarator") + if declarator is None: + continue + + func_name = find_identifier_in_declarator(declarator) + if func_name is None or func_name not in func_names: + continue + + full_text = node.text() + prefix = full_text.split(func_name)[0] if func_name in full_text else "" + + # Only include if static inline + if "static" in prefix and "inline" in prefix: + range_info = node.range() + start = range_info.start.line + end = range_info.end.line + + text = '\n'.join(lines[start:end + 1]) + + inline_functions[func_name] = ExtractedSymbol( + name=func_name, + kind="inline_function", + text=text, + start_line=start + 1, + end_line=end + 1, + ) + + return non_inline_functions, inline_functions + + +def find_type_references(text: str, all_type_names: set[str]) -> dict[str, str]: + """Find all type references in a piece of code. + + Returns a dict mapping type_name -> kind ('struct', 'union', 'enum', or 'typedef') + for types that are referenced but might need forward declarations. + + We detect: + - struct foo * -> needs forward decl "struct foo;" + - union foo * -> needs forward decl "union foo;" + - struct foo field; -> needs full definition (embedded field - can't forward declare) + - enum foo field; -> needs full definition (embedded) + """ + references = {} + + # Pattern for struct/union/enum references + # Match: struct/union/enum type_name followed by * (pointer) or identifier (field) + for kind in ['struct', 'union', 'enum']: + # Find all occurrences of "struct typename" or "union typename" etc + pattern = rf'\b{kind}\s+(\w+)\s*(\*?)' + for match in re.finditer(pattern, text): + type_name = match.group(1) + is_pointer = bool(match.group(2)) + if type_name in all_type_names: + # For pointers, we can use forward declarations + # For embedded fields, we need the full type + if is_pointer: + if type_name not in references: + references[type_name] = kind + # If it's not a pointer, it's an embedded field - mark as needing full type + # We'll handle this differently (can't forward declare) + + return references + + +def find_embedded_type_references(text: str, all_type_names: set[str]) -> set[str]: + """Find types that are embedded (not pointers) and need full definitions. + + These CANNOT be forward declared - the including header must come first. + """ + embedded = set() + + # Pattern for embedded struct/union/enum fields (not pointers) + # Match: struct/union/enum type_name identifier; (without *) + for kind in ['struct', 'union', 'enum']: + # Look for embedded fields: "struct foo bar;" or "struct foo bar[N];" + # NOT "struct foo *bar;" (pointer) + pattern = rf'\b{kind}\s+(\w+)\s+(?!\*)\w+[\s\[\];]' + for match in re.finditer(pattern, text): + type_name = match.group(1) + if type_name in all_type_names: + embedded.add(type_name) + + return embedded + + +def extract_function_pointer_typedefs(content: str) -> dict[str, ExtractedSymbol]: + """Extract function pointer typedefs that may not be caught by ast-grep. + + These have the form: typedef returntype (*name)(params); + """ + typedefs = {} + lines = content.split('\n') + + # Pattern: typedef (*)(); + pattern = r'typedef\s+\w+\s+\(\*(\w+)\)\s*\([^)]*\)\s*;' + + for i, line in enumerate(lines): + match = re.match(pattern, line) + if match: + name = match.group(1) + typedefs[name] = ExtractedSymbol( + name=name, + kind="type", + text=line, + start_line=i + 1, + end_line=i + 1, + ) + + return typedefs + + +def generate_forward_declarations(type_refs: dict[str, str]) -> list[str]: + """Generate forward declaration statements for the given type references. + + Args: + type_refs: dict mapping type_name -> kind ('struct', 'union', 'enum') + + Returns: + List of forward declaration strings + """ + decls = [] + for type_name, kind in sorted(type_refs.items()): + if kind in ('struct', 'union'): + decls.append(f"{kind} {type_name};") + # Note: enums can't be forward declared in C + return decls + + +def find_identifier_in_declarator(node) -> str | None: + """Recursively find the identifier in a declarator.""" + if node.kind() == "identifier": + return node.text() + + # Check field access first + field_result = node.field("declarator") + if field_result: + result = find_identifier_in_declarator(field_result) + if result: + return result + + # Then check children + for child in node.children(): + if child.kind() == "identifier": + return child.text() + elif child.kind() in ("function_declarator", "pointer_declarator", + "array_declarator", "parenthesized_declarator"): + result = find_identifier_in_declarator(child) + if result: + return result + + return None + + +def detect_required_includes(text: str) -> list[str]: + """Detect which standard/ofi includes are needed based on types used in the code. + + Returns a list of include directives in the correct order. + """ + includes = [] + + # Map of type patterns to their required includes + # Order matters - more fundamental includes should come first + # NOTE: These includes are for documentation purposes since the wrapper + # cxip.h already includes all external dependencies. But they help + # make each split header more self-documenting. + include_checks = [ + # Standard C headers + (r'\b(uint8_t|uint16_t|uint32_t|uint64_t|int8_t|int16_t|int32_t|int64_t|uintptr_t)\b', + ''), + (r'\bsize_t\b', ''), + (r'\bbool\b', ''), + + # POSIX headers + (r'\bpthread_(rwlock_t|mutex_t|cond_t|t)\b', ''), + (r'\bsem_t\b', ''), + + # OFI headers - order matters for dependencies + # Note: ofi_spin_t and ofi_mutex_t are both defined in ofi_lock.h + (r'\b(dlist_entry|slist_entry|slist|dlist_ts)\b', ''), + (r'\bofi_atomic32_t\b', ''), + (r'\b(ofi_spin_t|ofi_mutex_t)\b', ''), + ] + + seen = set() + for pattern, include in include_checks: + if include not in seen and re.search(pattern, text): + includes.append(f'#include {include}') + seen.add(include) + + return includes + + +def generate_header_file(header: HeaderContent, all_type_names: set[str], + types_defined_in_header: set[str], + enum_names_in_enums_h: set[str]) -> str: + """Generate the content of a new header file. + + Note: This generates headers WITHOUT includes. The main cxip.h will + include everything in the correct order to handle dependencies. + Individual headers are not meant to be standalone. + + IMPORTANT: Inline functions are NOT included in split headers - they + remain in cxip.h after all type definitions, because they often + access struct members from multiple modules. + + Args: + header: The header content to generate + all_type_names: Set of all known type names across all headers + types_defined_in_header: Set of type names defined in THIS header + enum_names_in_enums_h: Set of enum names that are in enums.h (skip these) + """ + guard_start, guard_end = get_include_guard(header.path) + + lines = [] + lines.append(LICENSE_HEADER) + lines.append(guard_start) + lines.append("") + + # Group symbols by kind, preserving original source order (by start_line) + # NOTE: inline_function kind is excluded - those stay in cxip.h + # NOTE: Skip enums that are already in enums.h + macros = [s for s in header.symbols if s.kind == "macro"] + types = [s for s in header.symbols if s.kind == "type" and s.name not in enum_names_in_enums_h] + functions = [s for s in header.symbols if s.kind == "function"] + + # Sort by original source line to preserve dependency order + macros.sort(key=lambda s: s.start_line) + types.sort(key=lambda s: s.start_line) + functions.sort(key=lambda s: s.start_line) + + # Collect all text to detect required includes + all_symbol_text = '\n'.join(sym.text for sym in header.symbols) + required_includes = detect_required_includes(all_symbol_text) + if required_includes: + for inc in required_includes: + lines.append(inc) + lines.append("") + + # Compute forward declarations needed for function declarations + # (types used as pointers in function signatures) + all_text = '\n'.join(sym.text for sym in functions) + type_refs = find_type_references(all_text, all_type_names) + + # Also scan type definitions for function pointer members + # These can have struct/union pointers in their parameter lists + # e.g., int (*callback)(struct cxip_req *req, const union c_event *event); + type_text = '\n'.join(sym.text for sym in types) + type_refs_from_types = find_type_references(type_text, all_type_names) + type_refs.update(type_refs_from_types) + + # Remove types that are defined in this header (no forward decl needed) + for defined_type in types_defined_in_header: + type_refs.pop(defined_type, None) + + # Generate forward declarations + forward_decls = generate_forward_declarations(type_refs) + if forward_decls: + lines.append("/* Forward declarations */") + for decl in forward_decls: + lines.append(decl) + lines.append("") + + # Add macros first + if macros: + lines.append("/* Macros */") + for sym in macros: + lines.append(sym.text) + lines.append("") + + # Add types - preserve original order for dependencies + if types: + lines.append("/* Type definitions */") + for sym in types: + lines.append(sym.text) + lines.append("") + + # Add function declarations (non-inline only) + if functions: + lines.append("/* Function declarations */") + for sym in functions: + lines.append(sym.text) + lines.append("") + + lines.append(guard_end) + + return '\n'.join(lines) + + +def generate_mr_lac_cache_header(mr_lac_cache_sym: ExtractedSymbol) -> str: + """Generate a dedicated header for cxip_mr_lac_cache to break the mr.h/ctrl.h cycle. + + This struct is used by ctrl.h but defined in mr.h, creating a circular dependency. + By moving it to its own header that comes before both, we break the cycle. + """ + guard_start, guard_end = get_include_guard("prov/cxi/include/cxip/mr_lac_cache.h") + + lines = [] + lines.append(LICENSE_HEADER) + lines.append(guard_start) + lines.append("") + lines.append("/* cxip_mr_lac_cache type definition */") + lines.append("/* This is in a separate header to break the circular dependency between mr.h and ctrl.h */") + lines.append("") + lines.append("/* Forward declarations */") + lines.append("struct cxip_ctrl_req;") + lines.append("") + lines.append(mr_lac_cache_sym.text) + lines.append("") + lines.append(guard_end) + + return '\n'.join(lines) + + +def find_macro_references(text: str, all_macro_names: set[str]) -> set[str]: + """Find all macros referenced in the code. + + Macros can be used as: + - Bit-field widths: uint32_t field:MACRO_NAME; + - Array sizes: type arr[MACRO_NAME]; + - Initializers: .field = MACRO_NAME + - etc. + """ + referenced = set() + for macro_name in all_macro_names: + # Look for the macro name as a standalone token + pattern = rf'\b{re.escape(macro_name)}\b' + if re.search(pattern, text): + referenced.add(macro_name) + return referenced + + +def generate_enums_header(enums: list[ExtractedSymbol]) -> str: + """Generate a dedicated enums.h header with all enum definitions. + + This header is included first because enums are needed by many structs + (like cxip_environment) that embed enum fields. + """ + guard_start, guard_end = get_include_guard("prov/cxi/include/cxip/enums.h") + + lines = [] + lines.append(LICENSE_HEADER) + lines.append(guard_start) + lines.append("") + lines.append("/* All enum type definitions */") + lines.append("/* Included first because many structs embed enum fields */") + lines.append("") + + # Sort by original source line to preserve order + sorted_enums = sorted(enums, key=lambda s: s.start_line) + for sym in sorted_enums: + lines.append(sym.text) + lines.append("") + + lines.append(guard_end) + + return '\n'.join(lines) + + +def build_header_dependency_graph( + headers: dict[str, 'HeaderContent'], + type_to_header: dict[str, str], + macro_to_header: dict[str, str], + all_type_names: set[str], + all_macro_names: set[str] +) -> nx.DiGraph: + """Build a directed graph of header dependencies based on embedded type and macro usage. + + For each header, we analyze which types it uses as embedded fields (not pointers) + and which macros it references. If a type is embedded or a macro is used, + the header defining that symbol must be included first. + + Args: + headers: Dict mapping header path -> HeaderContent + type_to_header: Dict mapping type name -> header path where it's defined + macro_to_header: Dict mapping macro name -> header path where it's defined + all_type_names: Set of all known type names + all_macro_names: Set of all known macro names + + Returns: + A directed graph where edge A->B means A must be included before B + """ + G = nx.DiGraph() + + # Add all headers as nodes + for header_path in headers: + header_name = Path(header_path).name + G.add_node(header_name) + + # Always include enums.h first + G.add_node("enums.h") + + # For each header, find embedded type and macro dependencies + for header_path, header_content in headers.items(): + header_name = Path(header_path).name + + # Collect all text from types defined in this header + type_texts = [s.text for s in header_content.symbols if s.kind == "type"] + all_text = '\n'.join(type_texts) + + # Find embedded type references (types used as fields, not pointers) + embedded_refs = find_embedded_type_references(all_text, all_type_names) + + for embedded_type in embedded_refs: + # Find which header defines this type + if embedded_type in type_to_header: + dep_header_path = type_to_header[embedded_type] + dep_header_name = Path(dep_header_path).name + + # Don't add self-edges + if dep_header_name != header_name: + # Add edge: dependency must come before this header + G.add_edge(dep_header_name, header_name) + print(f" Dependency: {header_name} embeds type from {dep_header_name} ({embedded_type})", + file=sys.stderr) + + # Find macro references (used in bit-fields, array sizes, etc.) + macro_refs = find_macro_references(all_text, all_macro_names) + # Also include macros defined in this header (to exclude self-refs) + macros_in_this_header = {s.name for s in header_content.symbols if s.kind == "macro"} + + for macro_name in macro_refs: + if macro_name in macros_in_this_header: + continue # Skip self-references + if macro_name in macro_to_header: + dep_header_path = macro_to_header[macro_name] + dep_header_name = Path(dep_header_path).name + + if dep_header_name != header_name: + G.add_edge(dep_header_name, header_name) + print(f" Dependency: {header_name} uses macro from {dep_header_name} ({macro_name})", + file=sys.stderr) + + # enums.h should come before everything else + for node in G.nodes(): + if node != "enums.h": + G.add_edge("enums.h", node) + + # mr_lac_cache.h must come before ctrl.h (to break the circular dependency) + # ctrl.h embeds cxip_mr_lac_cache which is defined in mr_lac_cache.h + G.add_node("mr_lac_cache.h") + G.add_edge("enums.h", "mr_lac_cache.h") # enums must come first + # mr_lac_cache.h embeds union cxip_match_bits from msg.h + if "msg.h" in G.nodes(): + G.add_edge("msg.h", "mr_lac_cache.h") + print(f" Dependency: mr_lac_cache.h embeds union from msg.h (cxip_match_bits)", file=sys.stderr) + if "ctrl.h" in G.nodes(): + G.add_edge("mr_lac_cache.h", "ctrl.h") + print(f" Dependency: ctrl.h needs mr_lac_cache.h (cxip_mr_lac_cache)", file=sys.stderr) + # Also mr.h might reference it + if "mr.h" in G.nodes(): + G.add_edge("mr_lac_cache.h", "mr.h") + print(f" Dependency: mr.h needs mr_lac_cache.h (cxip_mr_lac_cache)", file=sys.stderr) + + return G + + +def compute_header_order(G: nx.DiGraph, fallback_order: list[str]) -> list[str]: + """Compute the topological order of headers. + + Args: + G: Dependency graph where edge A->B means A must come before B + fallback_order: Default order to use for headers not in the graph + + Returns: + List of header names in correct dependency order + """ + try: + # Use topological sort to get correct order + topo_order = list(nx.topological_sort(G)) + print(f"Topological order computed: {len(topo_order)} headers", file=sys.stderr) + + # Add any headers from fallback_order that aren't in the graph + result = [] + seen = set() + for h in topo_order: + if h not in seen: + result.append(h) + seen.add(h) + + for h in fallback_order: + if h not in seen: + result.append(h) + seen.add(h) + + return result + + except nx.NetworkXUnfeasible as e: + # Cycle detected - report it and fall back to manual order + print(f"WARNING: Cycle detected in dependency graph: {e}", file=sys.stderr) + try: + cycle = nx.find_cycle(G) + print(f" Cycle: {cycle}", file=sys.stderr) + except nx.NetworkXNoCycle: + pass + return fallback_order + + +def generate_wrapper_cxip_h(headers: dict[str, 'HeaderContent'], + type_to_header: dict[str, str], + macro_to_header: dict[str, str], + all_type_names: set[str], + all_macro_names: set[str], + original_content: str, + inline_functions: list[ExtractedSymbol], + func_ptr_typedefs: list[ExtractedSymbol] = None) -> str: + """Generate a new cxip.h that includes all the split headers. + + This preserves the original includes and external dependencies, + then includes all the new split headers, followed by inline functions. + + The structure is: + 1. License header + 2. Include guard + 3. External includes (ofi, libcxi, etc.) + 4. Split headers (types, macros, non-inline function declarations) + 5. Inline function definitions (need all types to be defined first) + 6. End guard + """ + lines = [] + + # Extract the original license and includes section + original_lines = original_content.split('\n') + + # Copy license header + lines.append("/*") + for line in original_lines[1:10]: # Get the license block + if line.startswith(" */"): + lines.append(line) + break + lines.append(line) + + lines.append("") + lines.append("#ifndef _CXIP_PROV_H_") + lines.append("#define _CXIP_PROV_H_") + lines.append("") + + # Copy all the original system/library includes + in_includes = False + for line in original_lines: + if line.startswith("#include"): + in_includes = True + # Skip only the new split headers (cxip/), keep other cxip includes + if "cxip/" not in line: + lines.append(line) + elif in_includes and line.strip() == "": + lines.append("") + elif in_includes and not line.startswith("#"): + break + + # Add function pointer typedefs that aren't in the plan + # These need to come before the split headers that use them + if func_ptr_typedefs: + lines.append("") + lines.append("/* Forward declarations for function pointer typedef parameters */") + # Extract struct names referenced in the typedefs + for typedef in func_ptr_typedefs: + for match in re.finditer(r'struct\s+(\w+)', typedef.text): + struct_name = match.group(1) + lines.append(f"struct {struct_name};") + lines.append("") + lines.append("/* Function pointer typedefs (needed by split headers) */") + sorted_typedefs = sorted(func_ptr_typedefs, key=lambda s: s.start_line) + for typedef in sorted_typedefs: + lines.append(typedef.text) + lines.append("") + + # Add extern declarations for global variables used in source files + lines.append("/* Extern declarations for global variables */") + lines.append("extern struct cxip_environment cxip_env;") + lines.append("extern struct fi_provider cxip_prov;") + lines.append("extern struct util_prov cxip_util_prov;") + lines.append("extern char cxip_prov_name[];") + lines.append("extern struct fi_fabric_attr cxip_fabric_attr;") + lines.append("extern struct fi_domain_attr cxip_domain_attr;") + lines.append("extern bool cxip_collectives_supported;") + lines.append("extern int sc_page_size;") + lines.append("extern struct slist cxip_if_list;") + lines.append("") + lines.append("/* Coll trace globals used by inline trace functions */") + lines.append("extern bool cxip_coll_trace_muted;") + lines.append("extern bool cxip_coll_trace_append;") + lines.append("extern bool cxip_coll_trace_linebuf;") + lines.append("extern int cxip_coll_trace_rank;") + lines.append("extern int cxip_coll_trace_numranks;") + lines.append("extern FILE *cxip_coll_trace_fid;") + lines.append("extern bool cxip_coll_prod_trace_initialized;") + lines.append("extern uint64_t cxip_coll_trace_mask;") + lines.append("") + + lines.append("/* Split headers - types, macros, and function declarations */") + + # Build dependency graph and compute topological order + print("\nBuilding header dependency graph...", file=sys.stderr) + dep_graph = build_header_dependency_graph(headers, type_to_header, macro_to_header, + all_type_names, all_macro_names) + + # Fallback order in case of cycles or issues + # NOTE: mr_lac_cache.h breaks the circular dependency between mr.h and ctrl.h + # by defining cxip_mr_lac_cache in a separate header that comes before both. + fallback_order = [ + "enums.h", "addr.h", "common.h", "log.h", "env.h", "if.h", + "iomm.h", "evtq.h", "cmdq.h", "pte.h", "eq.h", "cq.h", "cntr.h", + "msg.h", # Must be before mr_lac_cache.h (defines cxip_match_bits) + "mr_lac_cache.h", # Contains cxip_mr_lac_cache (breaks mr.h/ctrl.h cycle) + "mr.h", # Uses cxip_mr_lac_cache from mr_lac_cache.h + "ctrl.h", # Uses cxip_mr_lac_cache from mr_lac_cache.h + "dom.h", "av.h", "fabric.h", "auth.h", + "req.h", "fc.h", "msg_hpc.h", "rma.h", "atomic.h", "txc.h", "rxc.h", + "curl.h", "repsum.h", "coll_trace.h", "coll.h", "zbcoll.h", "ep.h", + "req_buf.h", "ptelist_buf.h", "rdzv_pte.h", "portals_table.h", + "info.h", "nic.h", "telemetry.h", + ] + + header_order = compute_header_order(dep_graph, fallback_order) + print(f"Header include order: {header_order}", file=sys.stderr) + + # Get list of header names that exist + existing_headers = {Path(h).name for h in headers.keys()} + existing_headers.add("enums.h") # Always include enums.h + existing_headers.add("mr_lac_cache.h") # Include cycle-breaking header + + # Include in computed order + for h in header_order: + if h in existing_headers: + lines.append(f'#include "cxip/{h}"') + + # Add inline functions after all types are defined + if inline_functions: + lines.append("") + lines.append("/*") + lines.append(" * Inline function definitions") + lines.append(" *") + lines.append(" * These are kept here (not in split headers) because they often") + lines.append(" * access struct members from multiple modules, requiring all types") + lines.append(" * to be fully defined first.") + lines.append(" */") + lines.append("") + + # Sort by original source line to preserve order + sorted_inlines = sorted(inline_functions, key=lambda s: s.start_line) + for func in sorted_inlines: + lines.append(func.text) + lines.append("") + + lines.append("#endif /* _CXIP_PROV_H_ */") + lines.append("") + + return '\n'.join(lines) + + +def main(): + cxip_h_path = Path("prov/cxi/include/cxip.h") + output_dir = Path("prov/cxi/include/cxip") + + if not cxip_h_path.exists(): + print(f"Error: {cxip_h_path} not found.", file=sys.stderr) + sys.exit(1) + + # Check if we're receiving input from a pipeline + if not sys.stdin.isatty(): + print("Reading refactor plan from stdin (pipeline mode)...", file=sys.stderr) + plan = load_refactor_plan() + else: + plan_path = Path("prov/cxi/scripts/refactor_plan.json") + if not plan_path.exists(): + print(f"Error: {plan_path} not found. Run generate_refactor_plan.py first.", + file=sys.stderr) + print("Or pipe the output: ./generate_refactor_plan.py | ./apply_refactor.py", + file=sys.stderr) + sys.exit(1) + print("Loading refactor plan from file...", file=sys.stderr) + plan = load_refactor_plan(str(plan_path)) + + # Build a map of symbol name -> target header + symbol_to_header: dict[str, str] = {} + for header, info in plan["new_headers"].items(): + for func in info.get("functions", []): + symbol_to_header[func] = header + for typ in info.get("types", []): + symbol_to_header[typ] = header + for macro in info.get("macros", []): + symbol_to_header[macro] = header + + print(f"Found {len(symbol_to_header)} symbols to extract", file=sys.stderr) + + # Read and parse cxip.h - use the backup if it exists (original content) + backup_path = cxip_h_path.with_suffix('.h.orig') + if backup_path.exists(): + print("Using backup cxip.h.orig for symbol extraction...", file=sys.stderr) + content = backup_path.read_text() + else: + print("Parsing cxip.h...", file=sys.stderr) + content = cxip_h_path.read_text() + root = SgRoot(content, "c") + + # Collect symbol names by kind + macro_names = {name for name, header in symbol_to_header.items() + if any(name in info.get("macros", []) + for info in plan["new_headers"].values())} + type_names = {name for name, header in symbol_to_header.items() + if any(name in info.get("types", []) + for info in plan["new_headers"].values())} + func_names = {name for name, header in symbol_to_header.items() + if any(name in info.get("functions", []) + for info in plan["new_headers"].values())} + + print(f"Looking for: {len(macro_names)} macros, {len(type_names)} types, " + f"{len(func_names)} functions", file=sys.stderr) + + # Extract symbols + print("Extracting macros...", file=sys.stderr) + extracted_macros = extract_macro_definitions(content, macro_names) + print(f" Found {len(extracted_macros)} macros", file=sys.stderr) + + print("Extracting types...", file=sys.stderr) + extracted_enums, extracted_types = extract_type_definitions(root, content, type_names) + print(f" Found {len(extracted_enums)} enums (-> enums.h)", file=sys.stderr) + print(f" Found {len(extracted_types)} other types", file=sys.stderr) + + print("Extracting functions...", file=sys.stderr) + extracted_functions, extracted_inlines = extract_function_declarations(root, content, func_names) + print(f" Found {len(extracted_functions)} non-inline functions", file=sys.stderr) + print(f" Found {len(extracted_inlines)} inline functions (kept in cxip.h)", file=sys.stderr) + + # Extract function pointer typedefs (not caught by ast-grep) + # These are needed by structs that use them, even if not in the plan + print("Extracting function pointer typedefs...", file=sys.stderr) + func_ptr_typedefs = extract_function_pointer_typedefs(content) + # Remove any already extracted + for name in list(func_ptr_typedefs.keys()): + if name in extracted_types: + del func_ptr_typedefs[name] + if func_ptr_typedefs: + print(f" Found {len(func_ptr_typedefs)} function pointer typedefs: {list(func_ptr_typedefs.keys())}", + file=sys.stderr) + # Add these to extracted_types - they'll go to common.h since they're not in the plan + extracted_types.update(func_ptr_typedefs) + + # Combine all extracted symbols (excluding inline functions and enums - they have special handling) + # IMPORTANT: In C, the same name can be used for both a struct/union/enum and a function + # (e.g., struct cxip_domain and int cxip_domain(...)). We use composite keys to avoid collisions. + all_extracted = {} + for name, sym in extracted_macros.items(): + all_extracted[f"{name}:macro"] = sym + for name, sym in extracted_types.items(): + all_extracted[f"{name}:type"] = sym + for name, sym in extracted_functions.items(): + all_extracted[f"{name}:function"] = sym + + # Report symbols not found in cxip.h (they might be in .c files) + # Note: inline functions and enums are tracked separately + # Strip the :kind suffix for comparison with symbol_to_header + extracted_names = {key.rsplit(':', 1)[0] for key in all_extracted.keys()} + all_symbol_names = extracted_names | set(extracted_inlines.keys()) | set(extracted_enums.keys()) + not_found = set(symbol_to_header.keys()) - all_symbol_names + if not_found: + print(f"\nSymbols not found in cxip.h ({len(not_found)}):", file=sys.stderr) + for name in sorted(not_found)[:20]: + details = plan.get("symbol_details", {}).get(name, {}) + defined = details.get("defined_in", []) + print(f" {name}: defined in {defined}", file=sys.stderr) + if len(not_found) > 20: + print(f" ... and {len(not_found) - 20} more", file=sys.stderr) + + # Group extracted symbols by target header (excluding inline functions) + headers: dict[str, HeaderContent] = defaultdict(lambda: HeaderContent(path="")) + for key, sym in all_extracted.items(): + # Extract the original symbol name from composite key + name = key.rsplit(':', 1)[0] + target_header = symbol_to_header.get(name) + if target_header: + if headers[target_header].path == "": + headers[target_header].path = target_header + headers[target_header].symbols.append(sym) + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Generate enums.h first (contains all enum definitions) + if extracted_enums: + enums_header_path = output_dir / "enums.h" + enums_content = generate_enums_header(list(extracted_enums.values())) + enums_header_path.write_text(enums_content) + print(f"\nCreated {enums_header_path} ({len(extracted_enums)} enums)", file=sys.stderr) + + # Generate mr_lac_cache.h to break the circular dependency between mr.h and ctrl.h + # cxip_mr_lac_cache is embedded in ctrl.h but defined in mr.h + mr_lac_cache_sym = extracted_types.pop("cxip_mr_lac_cache", None) + if mr_lac_cache_sym: + # Also remove from the mr.h header's symbols list + mr_h_path = "prov/cxi/include/cxip/mr.h" + if mr_h_path in headers: + headers[mr_h_path].symbols = [ + s for s in headers[mr_h_path].symbols + if s.name != "cxip_mr_lac_cache" + ] + + mr_lac_cache_header_path = output_dir / "mr_lac_cache.h" + mr_lac_cache_content = generate_mr_lac_cache_header(mr_lac_cache_sym) + mr_lac_cache_header_path.write_text(mr_lac_cache_content) + print(f"Created {mr_lac_cache_header_path} (breaks mr.h/ctrl.h cycle)", file=sys.stderr) + + # Build set of all type names for forward declaration analysis + all_type_names = set(extracted_types.keys()) | set(extracted_enums.keys()) + + # Set of enum names that are in enums.h (to skip in individual headers) + enum_names_in_enums_h = set(extracted_enums.keys()) + + # Build type_to_header mapping for dependency analysis + type_to_header: dict[str, str] = {} + for name, sym in extracted_types.items(): + target = symbol_to_header.get(name) + if target: + type_to_header[name] = target + for name, sym in extracted_enums.items(): + # Enums are in enums.h + type_to_header[name] = "enums.h" + # cxip_mr_lac_cache is in its own header (to break mr.h/ctrl.h cycle) + if mr_lac_cache_sym: + type_to_header["cxip_mr_lac_cache"] = "mr_lac_cache.h" + + # Build macro_to_header mapping for dependency analysis + macro_to_header: dict[str, str] = {} + for name, sym in extracted_macros.items(): + target = symbol_to_header.get(name) + if target: + macro_to_header[name] = target + all_macro_names = set(extracted_macros.keys()) + + # Generate and write new header files + print(f"\nGenerating {len(headers)} new headers...", file=sys.stderr) + for header_path, header_content in sorted(headers.items()): + if not header_content.symbols: + continue + + # Get types defined in this specific header + types_in_header = {s.name for s in header_content.symbols if s.kind == "type"} + + output_path = output_dir / Path(header_path).name + content = generate_header_file(header_content, all_type_names, types_in_header, enum_names_in_enums_h) + output_path.write_text(content) + print(f" Created {output_path} ({len(header_content.symbols)} symbols)", + file=sys.stderr) + + # Collect function pointer typedefs that aren't assigned to any header + unassigned_func_ptr_typedefs = [ + sym for name, sym in func_ptr_typedefs.items() + if name not in symbol_to_header + ] + + # Generate new wrapper cxip.h with inline functions at the end + original_cxip_content = cxip_h_path.read_text() + inline_func_list = list(extracted_inlines.values()) + new_cxip_h = generate_wrapper_cxip_h(headers, type_to_header, macro_to_header, + all_type_names, all_macro_names, + original_cxip_content, inline_func_list, + unassigned_func_ptr_typedefs) + + # Save original backup and overwrite cxip.h directly + backup_path = cxip_h_path.with_suffix('.h.orig') + if not backup_path.exists(): + backup_path.write_text(original_cxip_content) + print(f"\nBacked up original cxip.h to {backup_path}", file=sys.stderr) + + # Overwrite cxip.h directly + cxip_h_path.write_text(new_cxip_h) + print(f"Updated {cxip_h_path}", file=sys.stderr) + + # Report summary + print(f"\nSummary:", file=sys.stderr) + print(f" {len(extracted_enums)} enums -> enums.h (included first)", file=sys.stderr) + if mr_lac_cache_sym: + print(f" 1 type (cxip_mr_lac_cache) -> mr_lac_cache.h (breaks cycle)", file=sys.stderr) + print(f" {len(extracted_macros)} macros -> split headers", file=sys.stderr) + print(f" {len(extracted_types)} other types -> split headers", file=sys.stderr) + print(f" {len(extracted_functions)} non-inline functions -> split headers", file=sys.stderr) + print(f" {len(extracted_inlines)} inline functions -> kept in cxip.h (after all types)", file=sys.stderr) + + print("\nDone! Run 'make' to build.", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/prov/cxi/scripts/generate_refactor_plan.py b/prov/cxi/scripts/generate_refactor_plan.py new file mode 100755 index 00000000000..e62e8c341da --- /dev/null +++ b/prov/cxi/scripts/generate_refactor_plan.py @@ -0,0 +1,610 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.11" +# dependencies = [] +# /// +""" +Generate a refactoring plan from symbol analysis. + +Improved heuristics: +1. Use naming conventions first - cxip_foo_* belongs in foo.h +2. Detect callback functions - functions assigned to struct fields aren't dead +3. Handle fundamental types - widely-used types go to logical home based on name +4. Track type dependencies for proper header ordering +""" + +import json +import re +import sys +from pathlib import Path +from dataclasses import dataclass, field +from collections import defaultdict, Counter +from typing import Literal + + +@dataclass +class SymbolInfo: + name: str + kind: Literal["function", "type", "macro"] + defined_in: list[str] + declared_in: list[str] + used_in: list[str] + is_static: bool = False + is_inline: bool = False + is_definition: bool = False + full_text: str = "" + signature: str = "" + + +@dataclass +class RefactorPlan: + symbol_locations: dict[str, str] = field(default_factory=dict) + new_headers: dict[str, list[str]] = field(default_factory=dict) + private_symbols: dict[str, list[str]] = field(default_factory=dict) + inline_handling: dict[str, str] = field(default_factory=dict) + likely_callbacks: list[str] = field(default_factory=list) + + +def is_src_file(path: str) -> bool: + return "prov/cxi/src/" in path and path.endswith(".c") + + +def is_test_file(path: str) -> bool: + return "prov/cxi/test/" in path + + +def is_header_file(path: str) -> bool: + return path.endswith(".h") + + +def is_main_header(path: str) -> bool: + return path.endswith("cxip.h") and "include/cxip.h" in path + + +def get_tu_name(path: str) -> str: + stem = Path(path).stem + if stem.startswith("cxip_"): + return stem[5:] + return stem + + +def get_header_for_tu(tu_name: str) -> str: + return f"prov/cxi/include/cxip/{tu_name}.h" + + +# Naming convention patterns - order matters (more specific first) +NAMING_PATTERNS = [ + # Specific subsystem patterns + (r'^cxip_txc_hpc', 'txc'), + (r'^cxip_txc_rnr', 'txc'), + (r'^cxip_rxc_hpc', 'rxc'), + (r'^cxip_rxc_rnr', 'rxc'), + (r'^cxip_msg_hpc', 'msg_hpc'), + (r'^cxip_msg_rnr', 'msg_rnr'), + (r'^cxip_rdzv_pte', 'rdzv_pte'), + (r'^cxip_rdzv_match', 'rdzv_pte'), + (r'^cxip_rdzv_nomatch', 'rdzv_pte'), + (r'^cxip_ptelist_buf', 'ptelist_buf'), + (r'^cxip_req_buf', 'req_buf'), + (r'^cxip_coll_trace', 'coll_trace'), + (r'^cxip_ep_obj', 'ep'), + (r'^cxip_ep_zbcoll', 'zbcoll'), + (r'^cxip_ep_coll', 'coll'), + + # General patterns + (r'^cxip_txc_', 'txc'), + (r'^cxip_rxc_', 'rxc'), + (r'^cxip_mr_', 'mr'), + (r'^cxip_cq_', 'cq'), + (r'^cxip_eq_', 'eq'), + (r'^cxip_ep_', 'ep'), + (r'^cxip_av_', 'av'), + (r'^cxip_cntr_', 'cntr'), + (r'^cxip_domain_', 'dom'), + (r'^cxip_dom_', 'dom'), + (r'^cxip_pte_', 'pte'), + (r'^cxip_cmdq_', 'cmdq'), + (r'^cxip_evtq_', 'evtq'), + (r'^cxip_ctrl_', 'ctrl'), + (r'^cxip_coll_', 'coll'), + (r'^cxip_zbcoll_', 'zbcoll'), + (r'^cxip_curl_', 'curl'), + (r'^cxip_rma_', 'rma'), + (r'^cxip_atomic_', 'atomic'), + (r'^cxip_msg_', 'msg'), + (r'^cxip_if_', 'if'), + (r'^cxip_iomm_', 'iomm'), + (r'^cxip_fabric_', 'fabric'), + (r'^cxip_telemetry_', 'telemetry'), + (r'^cxip_nic_', 'nic'), + (r'^cxip_info_', 'info'), + (r'^cxip_rep_', 'repsum'), + (r'^cxip_faults_', 'faults'), + (r'^cxip_portals_table', 'portals_table'), + (r'^cxip_lni', 'if'), + (r'^cxip_recv_', 'msg'), + (r'^cxip_send_', 'msg'), + (r'^cxip_ux_', 'msg'), + (r'^cxip_fc_', 'fc'), + (r'^cxip_map', 'iomm'), + (r'^cxip_unmap', 'iomm'), + (r'^cxip_copy_', 'mr'), + (r'^cxip_generic_', 'mr'), + (r'^cxip_tree_', 'zbcoll'), + (r'^cxip_check_auth', 'auth'), + (r'^cxip_gen_auth', 'auth'), + + # Type-specific patterns (for struct names) + (r'^cxip_txc$', 'txc'), + (r'^cxip_rxc$', 'rxc'), + (r'^cxip_ep$', 'ep'), + (r'^cxip_mr$', 'mr'), + (r'^cxip_cq$', 'cq'), + (r'^cxip_eq$', 'eq'), + (r'^cxip_cntr$', 'cntr'), + (r'^cxip_av$', 'av'), + (r'^cxip_domain$', 'dom'), + (r'^cxip_fabric$', 'fabric'), + (r'^cxip_pte$', 'pte'), + (r'^cxip_cmdq$', 'cmdq'), + (r'^cxip_evtq$', 'evtq'), + (r'^cxip_req$', 'req'), + (r'^cxip_md$', 'mr'), + (r'^cxip_if$', 'if'), + (r'^cxip_addr$', 'addr'), + (r'^cxip_environment$', 'env'), + (r'^cxip_env$', 'env'), + + # Additional type patterns to reduce common.h usage + (r'^cxip_req_', 'req'), # cxip_req_send, cxip_req_recv, etc. + (r'^cxip_repsum$', 'repsum'), + (r'^cxip_dbl_bits$', 'repsum'), + (r'^_bits2dbl$', 'repsum'), + (r'^_dbl2bits$', 'repsum'), + (r'^_decompose_dbl$', 'repsum'), + (r'^cxip_ctrl$', 'ctrl'), + (r'^cxip_ptelist_req$', 'ptelist_buf'), + (r'^cxip_fltval$', 'coll'), + (r'^cxip_fltminmax$', 'coll'), + (r'^cxip_intval$', 'coll'), + (r'^cxip_iminmax$', 'coll'), + (r'^curl_ops$', 'curl'), + (r'^cxip_match_bits$', 'msg'), + (r'^cxip_llring_mode$', 'cmdq'), + (r'^cxip_le_type$', 'pte'), + (r'^cxip_amo_req_type$', 'atomic'), + (r'^cxip_ats_mlock_mode$', 'iomm'), + (r'^cxip_fid_list$', 'cq'), + (r'^cxip_remap_cp$', 'if'), + (r'^def_event_ht$', 'evtq'), + + # Inline utility function patterns + (r'^is_netsim$', 'ep'), + (r'^cxip_txq_ring$', 'cmdq'), + (r'^cxip_mac_to_nic$', 'if'), + (r'^cxip_cacheline_size$', 'if'), + (r'^cxip_adjust_remote_offset$', 'mr'), + (r'^single_to_double_quote$', 'curl'), + (r'^cxip_json_', 'curl'), + (r'^cxip_set_env_', 'env'), + (r'^cxip_set_recv_', 'rxc'), + (r'^cxip_get_owner_srx$', 'rxc'), + (r'^cxip_is_trig_req$', 'req'), + (r'^cxip_no_discard$', 'msg'), + (r'^cxip_software_pte_allowed$', 'pte'), + (r'^cxip_stx_alloc$', 'txc'), + (r'^fls64$', 'if'), + (r'^cxi_tc_str$', 'if'), + + # Macro patterns for common macros + (r'^CXIP_ADDR_', 'addr'), + (r'^CXIP_TAG_', 'msg'), + (r'^CXIP_ALIGN', 'common'), + (r'^ARRAY_SIZE$', 'common'), + (r'^CEILING$', 'common'), + (r'^FLOOR$', 'common'), + (r'^CXIP_DBG$', 'log'), + (r'^CXIP_INFO$', 'log'), + (r'^CXIP_WARN', 'log'), + (r'^CXIP_LOG$', 'log'), + (r'^CXIP_FATAL$', 'log'), + (r'^TXC_', 'txc'), + (r'^RXC_', 'rxc'), + (r'^DOM_', 'dom'), +] + +# Callback function patterns - functions that are likely used as callbacks +CALLBACK_PATTERNS = [ + r'_cb$', # Ends with _cb + r'_callback$', # Ends with _callback + r'_handler$', # Ends with _handler + r'_progress$', # Progress functions + r'_recv$', # Receive callbacks + r'_send$', # Send callbacks + r'_complete$', # Completion callbacks + r'_ops$', # Operation tables +] + + +def infer_home_from_name(name: str) -> str | None: + """Infer the home TU from the symbol name using patterns.""" + for pattern, tu in NAMING_PATTERNS: + if re.match(pattern, name, re.IGNORECASE): + return tu + return None + + +def is_likely_callback(name: str, sym: SymbolInfo) -> bool: + """Check if a function is likely a callback based on naming patterns.""" + if sym.kind != "function": + return False + + for pattern in CALLBACK_PATTERNS: + if re.search(pattern, name): + return True + + # Also check if function signature suggests callback (returns int, has specific params) + # This is a heuristic based on common callback patterns + return False + + +def find_home_tu_by_definition(sym: SymbolInfo) -> str | None: + """Find home TU based on where the symbol is defined.""" + src_definitions = [f for f in sym.defined_in if is_src_file(f)] + if src_definitions: + return get_tu_name(src_definitions[0]) + return None + + +def find_home_tu_by_usage(sym: SymbolInfo) -> str | None: + """Find home TU based on usage patterns (fallback).""" + src_users = [f for f in sym.used_in if is_src_file(f)] + if not src_users: + return None + + tu_counts = Counter(get_tu_name(f) for f in src_users) + if tu_counts: + return tu_counts.most_common(1)[0][0] + return None + + +def load_analysis(path: str | None = None) -> dict: + """Load analysis from file or stdin if in a pipeline.""" + if path: + with open(path) as f: + return json.load(f) + else: + return json.load(sys.stdin) + + +def extract_symbols(analysis: dict) -> dict[str, SymbolInfo]: + """Extract and aggregate symbol information from analysis. + + Note: C allows the same name for a struct/union/enum and a function + (e.g., struct cxip_domain and int cxip_domain(...)). We use composite + keys like "name:type" and "name:function" to track both. + """ + symbols: dict[str, SymbolInfo] = {} + + for file_info in analysis["files"]: + file_path = file_info["path"] + + for func in file_info.get("function_decls", []): + name = func["name"] + # Use composite key to allow same name as type + func_key = f"{name}:function" + if func_key not in symbols: + symbols[func_key] = SymbolInfo( + name=name, + kind="function", + defined_in=[], + declared_in=[], + used_in=[], + is_static=func.get("is_static", False), + is_inline=func.get("is_inline", False), + signature=func.get("signature", ""), + ) + + sym = symbols[func_key] + if func.get("is_definition", False): + if file_path not in sym.defined_in: + sym.defined_in.append(file_path) + sym.is_definition = True + sym.is_static = func.get("is_static", False) + sym.is_inline = func.get("is_inline", False) + sym.signature = func.get("signature", sym.signature) + else: + if file_path not in sym.declared_in: + sym.declared_in.append(file_path) + + for typedef in file_info.get("type_defs", []): + name = typedef["name"] + # Use composite key to allow same name as function + type_key = f"{name}:type" + if type_key not in symbols: + symbols[type_key] = SymbolInfo( + name=name, + kind="type", + defined_in=[], + declared_in=[], + used_in=[], + full_text=typedef.get("full_text", ""), + ) + + sym = symbols[type_key] + if not typedef.get("is_forward_decl", False): + if file_path not in sym.defined_in: + sym.defined_in.append(file_path) + sym.full_text = typedef.get("full_text", "") + + for macro in file_info.get("macro_defs", []): + name = macro["name"] + # Macros don't share namespace with types/functions, but use key for consistency + macro_key = f"{name}:macro" + if macro_key not in symbols: + symbols[macro_key] = SymbolInfo( + name=name, + kind="macro", + defined_in=[], + declared_in=[], + used_in=[], + full_text=macro.get("full_text", ""), + ) + + sym = symbols[macro_key] + if file_path not in sym.defined_in: + sym.defined_in.append(file_path) + + for usage in file_info.get("usages", []): + name = usage["symbol_name"] + # Try to find the symbol in any kind + for key in [f"{name}:function", f"{name}:type", f"{name}:macro"]: + if key in symbols: + if file_path not in symbols[key].used_in: + symbols[key].used_in.append(file_path) + + return symbols + + +def analyze_symbol_visibility(symbols: dict[str, SymbolInfo]) -> RefactorPlan: + """Determine where each symbol should live using improved heuristics.""" + plan = RefactorPlan() + + for key, sym in symbols.items(): + # Use sym.name for naming-based heuristics, but key for storage + # This allows both 'cxip_domain:type' and 'cxip_domain:function' to coexist + name = sym.name + # Skip symbols not from main cxip.h or src files + from_main_header = any(is_main_header(f) for f in sym.defined_in + sym.declared_in) + from_src = any(is_src_file(f) for f in sym.defined_in) + + if not from_main_header and not from_src: + plan.symbol_locations[key] = "external" + continue + + src_users = set(f for f in sym.used_in if is_src_file(f)) + test_users = set(f for f in sym.used_in if is_test_file(f)) + + # Check if this is likely a callback function + if is_likely_callback(name, sym): + plan.likely_callbacks.append(key) + + # HEURISTIC 1: Static symbols stay private + if sym.is_static and sym.defined_in: + src_defs = [f for f in sym.defined_in if is_src_file(f)] + if src_defs: + plan.symbol_locations[key] = f"private:{src_defs[0]}" + if src_defs[0] not in plan.private_symbols: + plan.private_symbols[src_defs[0]] = [] + plan.private_symbols[src_defs[0]].append(name) + continue + + # HEURISTIC 2: Use naming convention first + home_tu = infer_home_from_name(name) + + # HEURISTIC 3: For functions, prefer where they're defined + if home_tu is None and sym.kind == "function": + home_tu = find_home_tu_by_definition(sym) + + # HEURISTIC 4: For types/macros defined in header, use name-based + # Don't fall back to usage-based for types - that leads to poor placement + + # Determine if symbol needs to be exported + needs_export = (len(src_users) > 1 or + len(test_users) > 0 or + is_likely_callback(name, sym)) + + # CRITICAL: If a type/macro is DEFINED in the main header, it must go to a header + # Even if it's only used in one place - it's part of the public API + if from_main_header and sym.kind in ("type", "macro"): + needs_export = True + + # Even single-use non-static functions might be callbacks + if sym.kind == "function" and not sym.is_static and len(src_users) <= 1: + if is_likely_callback(name, sym): + needs_export = True + elif from_main_header: # Declared in header = intended to be public + needs_export = True + + if needs_export: + if home_tu: + header = get_header_for_tu(home_tu) + plan.symbol_locations[key] = f"header:{header}" + if header not in plan.new_headers: + plan.new_headers[header] = [] + plan.new_headers[header].append(name) + else: + # Can't determine - put in common.h for now + plan.symbol_locations[key] = "header:prov/cxi/include/cxip/common.h" + if "prov/cxi/include/cxip/common.h" not in plan.new_headers: + plan.new_headers["prov/cxi/include/cxip/common.h"] = [] + plan.new_headers["prov/cxi/include/cxip/common.h"].append(name) + elif len(src_users) == 1: + # Private to one file + src_file = list(src_users)[0] + plan.symbol_locations[key] = f"private:{src_file}" + if src_file not in plan.private_symbols: + plan.private_symbols[src_file] = [] + plan.private_symbols[src_file].append(name) + elif len(src_users) == 0 and not test_users: + # Check if it's a callback or has declaration in header + if is_likely_callback(name, sym) or from_main_header: + if home_tu: + header = get_header_for_tu(home_tu) + plan.symbol_locations[key] = f"header:{header}" + if header not in plan.new_headers: + plan.new_headers[header] = [] + plan.new_headers[header].append(name) + else: + plan.symbol_locations[key] = "header:prov/cxi/include/cxip/common.h" + if "prov/cxi/include/cxip/common.h" not in plan.new_headers: + plan.new_headers["prov/cxi/include/cxip/common.h"] = [] + plan.new_headers["prov/cxi/include/cxip/common.h"].append(name) + else: + plan.symbol_locations[key] = "dead_code" + else: + plan.symbol_locations[key] = "unknown" + + # Analyze inline functions + for key, sym in symbols.items(): + name = sym.name + if sym.kind == "function" and sym.is_inline: + src_users = set(f for f in sym.used_in if is_src_file(f)) + + if len(src_users) == 0: + if is_likely_callback(name, sym): + plan.inline_handling[key] = "keep_for_callback" + else: + plan.inline_handling[key] = "possibly_dead" + elif len(src_users) == 1: + plan.inline_handling[key] = f"private:{list(src_users)[0]}" + else: + loc = plan.symbol_locations.get(key, "") + if loc.startswith("header:"): + plan.inline_handling[key] = f"keep_inline:{loc.split(':', 1)[1]}" + else: + plan.inline_handling[key] = "make_regular_function" + + return plan + + +def generate_report(symbols: dict[str, SymbolInfo], plan: RefactorPlan) -> dict: + """Generate a structured report.""" + # Build a reverse lookup: name -> list of keys (to handle name collisions) + name_to_keys: dict[str, list[str]] = defaultdict(list) + for key, sym in symbols.items(): + name_to_keys[sym.name].append(key) + + report = { + "summary": { + "total_symbols": len(symbols), + "functions": sum(1 for s in symbols.values() if s.kind == "function"), + "types": sum(1 for s in symbols.values() if s.kind == "type"), + "macros": sum(1 for s in symbols.values() if s.kind == "macro"), + "likely_callbacks": len(plan.likely_callbacks), + }, + "new_headers": {}, + "private_symbols": {}, + "inline_functions": plan.inline_handling, + "likely_callbacks": sorted(plan.likely_callbacks), + "location_summary": defaultdict(int), + } + + for key, loc in plan.symbol_locations.items(): + if loc.startswith("header:"): + report["location_summary"]["needs_header"] += 1 + elif loc.startswith("private:"): + report["location_summary"]["private"] += 1 + elif loc == "dead_code": + report["location_summary"]["dead_code"] += 1 + elif loc == "external": + report["location_summary"]["external"] += 1 + else: + report["location_summary"]["other"] += 1 + + for header, sym_names in sorted(plan.new_headers.items()): + # Categorize symbols by kind - need to look up by name, handling collisions + funcs = [] + types = [] + macros = [] + for name in sym_names: + # Find all keys for this name and categorize + for key in name_to_keys.get(name, []): + sym = symbols[key] + if sym.kind == "function" and name not in funcs: + funcs.append(name) + elif sym.kind == "type" and name not in types: + types.append(name) + elif sym.kind == "macro" and name not in macros: + macros.append(name) + + report["new_headers"][header] = { + "count": len(sym_names), + "functions": sorted(set(funcs)), + "types": sorted(set(types)), + "macros": sorted(set(macros)), + } + + for file, syms in sorted(plan.private_symbols.items()): + report["private_symbols"][file] = { + "count": len(syms), + "symbols": sorted(syms), + } + + report["location_summary"] = dict(report["location_summary"]) + + return report + + +def main(): + # Check if we're receiving input from a pipeline + if not sys.stdin.isatty(): + print("Reading symbol analysis from stdin (pipeline mode)...", file=sys.stderr) + analysis = load_analysis() + else: + analysis_path = Path("prov/cxi/scripts/symbol_analysis.json") + if not analysis_path.exists(): + print(f"Error: {analysis_path} not found. Run analyze_symbols.py first.", + file=sys.stderr) + print("Or pipe the output: ./analyze_symbols.py | ./generate_refactor_plan.py", + file=sys.stderr) + sys.exit(1) + print("Loading symbol analysis from file...", file=sys.stderr) + analysis = load_analysis(str(analysis_path)) + + print("Extracting symbols...", file=sys.stderr) + symbols = extract_symbols(analysis) + print(f"Found {len(symbols)} unique symbols", file=sys.stderr) + + print("Analyzing symbol visibility with improved heuristics...", file=sys.stderr) + plan = analyze_symbol_visibility(symbols) + + print("Generating report...", file=sys.stderr) + report = generate_report(symbols, plan) + + # Add detailed symbol info - use sym.name as key in output for readability + # but track collisions (same name with different kinds) + report["symbol_details"] = {} + for key, sym in symbols.items(): + name = sym.name + # If there's already an entry for this name, append the kind to distinguish + output_key = name if name not in report["symbol_details"] else key + report["symbol_details"][output_key] = { + "kind": sym.kind, + "defined_in": sym.defined_in, + "declared_in": sym.declared_in, + "used_in_count": len(sym.used_in), + "used_in_src": [f for f in sym.used_in if is_src_file(f)], + "used_in_test": [f for f in sym.used_in if is_test_file(f)], + "is_static": sym.is_static, + "is_inline": sym.is_inline, + "is_likely_callback": is_likely_callback(name, sym), + "inferred_home": infer_home_from_name(name), + "recommended_location": plan.symbol_locations.get(key, "unknown"), + } + + print(json.dumps(report, indent=2)) + + +if __name__ == "__main__": + main() From 7208ce00814d7f8d8ea537e68cbca6f89e1facf8 Mon Sep 17 00:00:00 2001 From: Nicholas Sielicki Date: Fri, 2 Jan 2026 16:06:50 -0600 Subject: [PATCH 2/3] prov/cxi: break-out cxip.h into multiple headers Using scripts introduced in HEAD~1, break-out cxip.h into smaller headers, and remove the scripts used to create them. Signed-off-by: Nicholas Sielicki --- prov/cxi/include/cxip.h | 3410 +------------------- prov/cxi/include/cxip/addr.h | 36 + prov/cxi/include/cxip/atomic.h | 33 + prov/cxi/include/cxip/auth.h | 16 + prov/cxi/include/cxip/av.h | 133 + prov/cxi/include/cxip/cmdq.h | 71 + prov/cxi/include/cxip/cntr.h | 59 + prov/cxi/include/cxip/coll.h | 307 ++ prov/cxi/include/cxip/coll_trace.h | 30 + prov/cxi/include/cxip/common.h | 313 ++ prov/cxi/include/cxip/cq.h | 86 + prov/cxi/include/cxip/ctrl.h | 84 + prov/cxi/include/cxip/curl.h | 61 + prov/cxi/include/cxip/dom.h | 236 ++ prov/cxi/include/cxip/enums.h | 305 ++ prov/cxi/include/cxip/env.h | 101 + prov/cxi/include/cxip/ep.h | 184 ++ prov/cxi/include/cxip/eq.h | 31 + prov/cxi/include/cxip/evtq.h | 72 + prov/cxi/include/cxip/fabric.h | 23 + prov/cxi/include/cxip/fc.h | 55 + prov/cxi/include/cxip/if.h | 78 + prov/cxi/include/cxip/info.h | 14 + prov/cxi/include/cxip/iomm.h | 27 + prov/cxi/include/cxip/log.h | 23 + prov/cxi/include/cxip/mr.h | 170 + prov/cxi/include/cxip/mr_lac_cache.h | 26 + prov/cxi/include/cxip/msg.h | 198 ++ prov/cxi/include/cxip/msg_hpc.h | 28 + prov/cxi/include/cxip/nic.h | 17 + prov/cxi/include/cxip/portals_table.h | 32 + prov/cxi/include/cxip/pte.h | 82 + prov/cxi/include/cxip/ptelist_buf.h | 126 + prov/cxi/include/cxip/rdzv_pte.h | 66 + prov/cxi/include/cxip/repsum.h | 46 + prov/cxi/include/cxip/req.h | 230 ++ prov/cxi/include/cxip/req_buf.h | 35 + prov/cxi/include/cxip/rma.h | 29 + prov/cxi/include/cxip/rxc.h | 245 ++ prov/cxi/include/cxip/telemetry.h | 37 + prov/cxi/include/cxip/txc.h | 242 ++ prov/cxi/include/cxip/zbcoll.h | 117 + prov/cxi/scripts/analyze_symbols.py | 500 --- prov/cxi/scripts/apply_refactor.py | 1182 ------- prov/cxi/scripts/generate_refactor_plan.py | 610 ---- 45 files changed, 4179 insertions(+), 5627 deletions(-) create mode 100644 prov/cxi/include/cxip/addr.h create mode 100644 prov/cxi/include/cxip/atomic.h create mode 100644 prov/cxi/include/cxip/auth.h create mode 100644 prov/cxi/include/cxip/av.h create mode 100644 prov/cxi/include/cxip/cmdq.h create mode 100644 prov/cxi/include/cxip/cntr.h create mode 100644 prov/cxi/include/cxip/coll.h create mode 100644 prov/cxi/include/cxip/coll_trace.h create mode 100644 prov/cxi/include/cxip/common.h create mode 100644 prov/cxi/include/cxip/cq.h create mode 100644 prov/cxi/include/cxip/ctrl.h create mode 100644 prov/cxi/include/cxip/curl.h create mode 100644 prov/cxi/include/cxip/dom.h create mode 100644 prov/cxi/include/cxip/enums.h create mode 100644 prov/cxi/include/cxip/env.h create mode 100644 prov/cxi/include/cxip/ep.h create mode 100644 prov/cxi/include/cxip/eq.h create mode 100644 prov/cxi/include/cxip/evtq.h create mode 100644 prov/cxi/include/cxip/fabric.h create mode 100644 prov/cxi/include/cxip/fc.h create mode 100644 prov/cxi/include/cxip/if.h create mode 100644 prov/cxi/include/cxip/info.h create mode 100644 prov/cxi/include/cxip/iomm.h create mode 100644 prov/cxi/include/cxip/log.h create mode 100644 prov/cxi/include/cxip/mr.h create mode 100644 prov/cxi/include/cxip/mr_lac_cache.h create mode 100644 prov/cxi/include/cxip/msg.h create mode 100644 prov/cxi/include/cxip/msg_hpc.h create mode 100644 prov/cxi/include/cxip/nic.h create mode 100644 prov/cxi/include/cxip/portals_table.h create mode 100644 prov/cxi/include/cxip/pte.h create mode 100644 prov/cxi/include/cxip/ptelist_buf.h create mode 100644 prov/cxi/include/cxip/rdzv_pte.h create mode 100644 prov/cxi/include/cxip/repsum.h create mode 100644 prov/cxi/include/cxip/req.h create mode 100644 prov/cxi/include/cxip/req_buf.h create mode 100644 prov/cxi/include/cxip/rma.h create mode 100644 prov/cxi/include/cxip/rxc.h create mode 100644 prov/cxi/include/cxip/telemetry.h create mode 100644 prov/cxi/include/cxip/txc.h create mode 100644 prov/cxi/include/cxip/zbcoll.h delete mode 100755 prov/cxi/scripts/analyze_symbols.py delete mode 100755 prov/cxi/scripts/apply_refactor.py delete mode 100755 prov/cxi/scripts/generate_refactor_plan.py diff --git a/prov/cxi/include/cxip.h b/prov/cxi/include/cxip.h index abacb5289a5..2eb102a7a4c 100644 --- a/prov/cxi/include/cxip.h +++ b/prov/cxi/include/cxip.h @@ -48,521 +48,97 @@ #include "cxip_faults.h" #include "fi_cxi_ext.h" -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -#ifndef FLOOR -#define FLOOR(a, b) ((long long)(a) - (((long long)(a)) % (b))) -#endif -#ifndef CEILING -#define CEILING(a, b) ((long long)(a) <= 0LL ? 0 : (FLOOR((a)-1, b) + (b))) -#endif -#define CXIP_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -#define CXIP_ALIGN(x, a) CXIP_ALIGN_MASK(x, (typeof(x))(a) - 1) -#define CXIP_ALIGN_DOWN(x, a) CXIP_ALIGN((x) - ((a) - 1), (a)) -#define CXIP_REQ_CLEANUP_TO 3000 -/* PATH_MAX is generally 4K, when not required and variables - * are stack based use CXIP_PATH_MAX - */ -#define CXIP_PATH_MAX 256 - -#define CXIP_BUFFER_ID_MAX (1 << 16) - -/* Scalable EP not supported */ -#define CXIP_EP_MAX_CTX_BITS 0 -#define CXIP_EP_MAX_TX_CNT (1 << CXIP_EP_MAX_CTX_BITS) -#define CXIP_EP_MAX_RX_CNT (1 << CXIP_EP_MAX_CTX_BITS) -#define CXIP_EP_MAX_MSG_SZ ((1ULL << 32) - 1) -#define CXIP_EP_MIN_MULTI_RECV 64 -#define CXIP_EP_MAX_MULTI_RECV ((1 << 24) - 1) - -#define CXIP_TX_COMP_MODES (FI_INJECT_COMPLETE | \ - FI_TRANSMIT_COMPLETE | \ - FI_DELIVERY_COMPLETE | \ - FI_MATCH_COMPLETE) -#define CXIP_TX_OP_FLAGS (FI_INJECT | \ - FI_COMPLETION | \ - CXIP_TX_COMP_MODES | \ - FI_REMOTE_CQ_DATA | \ - FI_MORE | \ - FI_FENCE) -#define CXIP_RX_OP_FLAGS (FI_COMPLETION | \ - FI_MULTI_RECV | \ - FI_MORE) -/* Invalid OP flags for RX that can be silently ignored */ -#define CXIP_RX_IGNORE_OP_FLAGS (FI_REMOTE_CQ_DATA | \ - FI_INJECT) -#define CXIP_WRITEMSG_ALLOWED_FLAGS (FI_INJECT | \ - FI_COMPLETION | \ - FI_MORE | \ - FI_FENCE | \ - CXIP_TX_COMP_MODES) -#define CXIP_READMSG_ALLOWED_FLAGS (FI_COMPLETION | \ - FI_MORE | \ - FI_FENCE | \ - CXIP_TX_COMP_MODES) - -#define CXIP_AMO_MAX_IOV 1 -#define CXIP_EQ_DEF_SZ (1 << 8) -#define CXIP_CQ_DEF_SZ 131072U -#define CXIP_REMOTE_CQ_DATA_SZ 8 - -#define CXIP_PTE_IGNORE_DROPS ((1 << 24) - 1) -#define CXIP_RDZV_THRESHOLD 16384 -#define CXIP_OFLOW_BUF_SIZE (12*1024*1024) -#define CXIP_OFLOW_BUF_MIN_POSTED 3 -#define CXIP_OFLOW_BUF_MAX_CACHED (CXIP_OFLOW_BUF_MIN_POSTED * 3) -#define CXIP_REQ_BUF_SIZE (12*1024*1024) -#define CXIP_REQ_BUF_MIN_POSTED 6 -#define CXIP_REQ_BUF_MAX_CACHED 0 - -#define CXIP_DEFAULT_MR_CACHE_MAX_CNT 4096 -#define CXIP_DEFAULT_MR_CACHE_MAX_SIZE -1 - -#define CXIP_MR_CACHE_EVENTS_DISABLE_POLL_NSECS 100000U -#define CXIP_MR_CACHE_EVENTS_DISABLE_LE_POLL_NSECS 1000000000U - -/* When device memory is safe to access via load/store then the - * CPU will be used to move data below this threshold. - */ -#define CXIP_SAFE_DEVMEM_COPY_THRESH 4096 - -#define CXIP_EP_PRI_CAPS \ - (FI_RMA | FI_ATOMICS | FI_TAGGED | FI_RECV | FI_SEND | \ - FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | \ - FI_DIRECTED_RECV | FI_MSG | FI_NAMED_RX_CTX | FI_HMEM | \ - FI_COLLECTIVE) -#define CXIP_EP_SEC_CAPS \ - (FI_SOURCE | FI_SOURCE_ERR | FI_LOCAL_COMM | \ - FI_REMOTE_COMM | FI_RMA_EVENT | FI_MULTI_RECV | FI_FENCE | FI_TRIGGER) -#define CXIP_EP_CAPS (CXIP_EP_PRI_CAPS | CXIP_EP_SEC_CAPS) -#define CXIP_DOM_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM | FI_AV_USER_ID | FI_PEER) -#define CXIP_CAPS (CXIP_DOM_CAPS | CXIP_EP_CAPS) -#define CXIP_MSG_ORDER (FI_ORDER_SAS | \ - FI_ORDER_WAW | \ - FI_ORDER_RMA_WAW | \ - FI_ORDER_RMA_RAR | \ - FI_ORDER_ATOMIC_WAW | \ - FI_ORDER_ATOMIC_WAR | \ - FI_ORDER_ATOMIC_RAW | \ - FI_ORDER_ATOMIC_RAR) - -#define CXIP_EP_CQ_FLAGS \ - (FI_SEND | FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION) -#define CXIP_EP_CNTR_FLAGS \ - (FI_SEND | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | \ - FI_REMOTE_WRITE) - -#define CXIP_INJECT_SIZE C_MAX_IDC_PAYLOAD_UNR - -/* Max TX size of 16,384 translate to a 4MiB command queue buffer. */ -#define CXIP_MAX_TX_SIZE 16384U -#define CXIP_DEFAULT_TX_SIZE 1024U - -/* Some LEs need to be used for internally by CXI provider. The user facing - * RX size should be updated to reflect this. - * - * Note: This value is an estimation and may be too high. - */ -#define CXI_PROV_LE_PER_EP 1024U - -/* Maximum number of LEs per endpoint. */ -#define LES_PER_EP_MAX 16384U - -#define CXIP_MAX_RX_SIZE (LES_PER_EP_MAX - CXI_PROV_LE_PER_EP) -#define CXIP_DEFAULT_RX_SIZE 1024U - -#define CXIP_MAJOR_VERSION 0 -#define CXIP_MINOR_VERSION 1 -#define CXIP_PROV_VERSION FI_VERSION(CXIP_MAJOR_VERSION, \ - CXIP_MINOR_VERSION) -#define CXIP_FI_VERSION FI_VERSION(2, 4) -#define CXIP_WIRE_PROTO_VERSION 1 - -#define CXIP_COLL_MAX_CONCUR 8 -#define CXIP_COLL_MIN_RX_BUFS 8 -#define CXIP_COLL_MIN_RX_SIZE 131072 -#define CXIP_COLL_MIN_MULTI_RECV 64 -#define CXIP_COLL_MAX_DATA_SIZE 32 -#define CXIP_COLL_MAX_SEQNO ((1 << 10) - 1) -#define CXIP_COLL_MOD_SEQNO (CXIP_COLL_MAX_SEQNO - 1) - -// TODO adjust based on performance testing -#define CXIP_COLL_MIN_RETRY_USEC 1 -#define CXIP_COLL_MAX_RETRY_USEC 32000 -/* multiplier used to calc leaf rdma get trigger - * this allows the trigger to scale with - * CXIP_COLL_MAX_RETRY_USEC - */ -#define CXIP_COLL_MAX_LEAF_TIMEOUT_MULT 50 -#define CXIP_COLL_MIN_TIMEOUT_USEC 1 -#define CXIP_COLL_MAX_TIMEOUT_USEC 20000000 - -#define CXIP_REQ_BUF_HEADER_MAX_SIZE (sizeof(struct c_port_fab_hdr) + \ - sizeof(struct c_port_unrestricted_hdr)) -#define CXIP_REQ_BUF_HEADER_MIN_SIZE (sizeof(struct c_port_fab_hdr) + \ - sizeof(struct c_port_small_msg_hdr)) - -// Hints for spinloops -#if defined(__aarch64__) -#define CXIP_PAUSE() __asm__ __volatile__ ("YIELD" ::: "memory") -#elif defined(__x86_64__) -#define CXIP_PAUSE() __asm__ __volatile__ ("pause" ::: "memory") -#else -#define CXIP_PAUSE() -#endif -extern int sc_page_size; -extern char cxip_prov_name[]; -extern struct fi_provider cxip_prov; -extern struct util_prov cxip_util_prov; -extern int cxip_cq_def_sz; -extern int cxip_eq_def_sz; +/* Forward declarations for function pointer typedef parameters */ +struct cxip_zbcoll_obj; +struct cxip_curl_handle; -extern struct slist cxip_if_list; +/* Function pointer typedefs (needed by split headers) */ +typedef void (*zbcomplete_t)(struct cxip_zbcoll_obj *zb, void *usrptr); +typedef void (*curlcomplete_t)(struct cxip_curl_handle *); +/* Extern declarations for global variables */ +extern struct cxip_environment cxip_env; +extern struct fi_provider cxip_prov; +extern struct util_prov cxip_util_prov; +extern char cxip_prov_name[]; extern struct fi_fabric_attr cxip_fabric_attr; extern struct fi_domain_attr cxip_domain_attr; -extern struct fi_ep_attr cxip_ep_attr; -extern struct fi_tx_attr cxip_tx_attr; -extern struct fi_rx_attr cxip_rx_attr; - -enum cxip_ats_mlock_mode { - CXIP_ATS_MLOCK_OFF, - CXIP_ATS_MLOCK_CACHE, - CXIP_ATS_MLOCK_ALL, -}; - -enum cxip_llring_mode { - CXIP_LLRING_NEVER, - CXIP_LLRING_IDLE, - CXIP_LLRING_ALWAYS, -}; - -enum cxip_ep_ptle_mode { - CXIP_PTLTE_HARDWARE_MODE, - CXIP_PTLTE_DEFAULT_MODE = CXIP_PTLTE_HARDWARE_MODE, - CXIP_PTLTE_SOFTWARE_MODE, - CXIP_PTLTE_HYBRID_MODE, -}; - -enum cxip_rdzv_proto { - CXIP_RDZV_PROTO_DEFAULT, /* unrestricted gets */ - CXIP_RDZV_PROTO_ALT_READ, /* restricted gets */ - CXIP_RDZV_PROTO_ALT_WRITE, /* restricted puts */ -}; - -const char *cxip_rdzv_proto_to_str(enum cxip_rdzv_proto proto); - -enum cxip_mr_target_ordering { - /* Sets MR target ordering based on message and target RMA ordering - * options. - */ - MR_ORDER_DEFAULT, - - /* Force ordering to always be strict. */ - MR_ORDER_STRICT, - - /* Force ordering to always be relaxed. */ - MR_ORDER_RELAXED, -}; - -struct cxip_environment { - /* Translation */ - int odp; - int force_odp; - int ats; - int iotlb; - int disable_dmabuf_cuda; - int disable_dmabuf_rocr; - enum cxip_ats_mlock_mode ats_mlock_mode; - - /* Messaging */ - int fork_safe_requested; - enum cxip_ep_ptle_mode rx_match_mode; - int msg_offload; - int trunc_ok; - int hybrid_preemptive; - int hybrid_recv_preemptive; - size_t rdzv_threshold; - size_t rdzv_get_min; - size_t rdzv_eager_size; - int rdzv_aligned_sw_rget; - int rnr_max_timeout_us; - int disable_non_inject_msg_idc; - int disable_non_inject_rma_idc; - int disable_non_inject_amo_idc; - int disable_host_register; - size_t oflow_buf_size; - size_t oflow_buf_min_posted; - size_t oflow_buf_max_cached; - size_t safe_devmem_copy_threshold; - size_t req_buf_size; - size_t req_buf_min_posted; - size_t req_buf_max_cached; - int sw_rx_tx_init_max; - int msg_lossless; - size_t default_cq_size; - size_t default_tx_size; - size_t default_rx_size; - int optimized_mrs; - int prov_key_cache; - int mr_match_events; - int disable_eq_hugetlb; - int zbcoll_radix; - - enum cxip_llring_mode llring_mode; - - int cq_policy; - - size_t default_vni; - - size_t eq_ack_batch_size; - int fc_retry_usec_delay; - int cntr_spin_before_yield; - size_t ctrl_rx_eq_max_size; - char *device_name; - size_t cq_fill_percent; - int rget_tc; - int cacheline_size; - - char *coll_job_id; - char *coll_job_step_id; - size_t coll_retry_usec; - size_t coll_timeout_usec; - char *coll_fabric_mgr_url; - char *coll_mcast_token; - size_t hwcoll_addrs_per_job; - size_t hwcoll_min_nodes; - int coll_use_dma_put; - - char hostname[255]; - char *telemetry; - int telemetry_rgid; - int disable_hmem_dev_register; - int ze_hmem_supported; - enum cxip_rdzv_proto rdzv_proto; - int disable_alt_read_cmdq; - int cntr_trig_cmdq; - int enable_trig_op_limit; - int hybrid_posted_recv_preemptive; - int hybrid_unexpected_msg_preemptive; - size_t mr_cache_events_disable_poll_nsecs; - size_t mr_cache_events_disable_le_poll_nsecs; - int force_dev_reg_copy; - enum cxip_mr_target_ordering mr_target_ordering; - int disable_cuda_sync_memops; -}; +extern bool cxip_collectives_supported; +extern int sc_page_size; +extern struct slist cxip_if_list; -extern struct cxip_environment cxip_env; +/* Coll trace globals used by inline trace functions */ +extern bool cxip_coll_trace_muted; +extern bool cxip_coll_trace_append; +extern bool cxip_coll_trace_linebuf; +extern int cxip_coll_trace_rank; +extern int cxip_coll_trace_numranks; +extern FILE *cxip_coll_trace_fid; +extern bool cxip_coll_prod_trace_initialized; +extern uint64_t cxip_coll_trace_mask; + +/* Split headers - types, macros, and function declarations */ +#include "cxip/enums.h" +#include "cxip/env.h" +#include "cxip/cmdq.h" +#include "cxip/ptelist_buf.h" +#include "cxip/eq.h" +#include "cxip/cq.h" +#include "cxip/pte.h" +#include "cxip/req_buf.h" +#include "cxip/addr.h" +#include "cxip/coll_trace.h" +#include "cxip/log.h" +#include "cxip/portals_table.h" +#include "cxip/fabric.h" +#include "cxip/cntr.h" +#include "cxip/zbcoll.h" +#include "cxip/repsum.h" +#include "cxip/curl.h" +#include "cxip/info.h" +#include "cxip/msg_hpc.h" +#include "cxip/iomm.h" +#include "cxip/auth.h" +#include "cxip/rma.h" +#include "cxip/atomic.h" +#include "cxip/nic.h" +#include "cxip/common.h" +#include "cxip/req.h" +#include "cxip/av.h" +#include "cxip/coll.h" +#include "cxip/msg.h" +#include "cxip/if.h" +#include "cxip/telemetry.h" +#include "cxip/evtq.h" +#include "cxip/rdzv_pte.h" +#include "cxip/mr_lac_cache.h" +#include "cxip/rxc.h" +#include "cxip/txc.h" +#include "cxip/ctrl.h" +#include "cxip/ep.h" +#include "cxip/mr.h" +#include "cxip/fc.h" +#include "cxip/dom.h" + +/* + * Inline function definitions + * + * These are kept here (not in split headers) because they often + * access struct members from multiple modules, requiring all types + * to be fully defined first. + */ static inline bool cxip_software_pte_allowed(enum cxip_ep_ptle_mode rx_match_mode) { return rx_match_mode != CXIP_PTLTE_HARDWARE_MODE; } -/* - * The CXI Provider Address format. - * - * A Cassini NIC Address and PID identify a libfabric Endpoint. Cassini - * borrows the name 'PID' from Portals. In CXI, a process can allocate several - * PID values. - * - * The PID value C_PID_ANY is reserved. When used, the library auto-assigns - * a free PID value. A PID value is assigned when network resources are - * allocated. Libfabric clients can achieve this by not specifying a 'service' - * in a call to fi_getinfo() or by not setting src_addr in the fi_info - * structure used to allocate an Endpoint. - */ -struct cxip_addr { - uint32_t pid : C_DFA_PID_BITS_MAX; - uint32_t nic : C_DFA_NIC_BITS; - uint32_t pad : 3; - uint16_t vni; -}; - -#define CXIP_ADDR_EQUAL(a, b) ((a).nic == (b).nic && (a).pid == (b).pid) -#define CXIP_ADDR_VNI_EQUAL(a, b) (CXIP_ADDR_EQUAL(a, b) && (a).vni == (b).vni) - -/* - * A PID contains "pid_granule" logical endpoints. The PID granule is set per - * device and can be found in libCXI devinfo. The default pid_granule is 256. - * These endpoints are partitioned by the provider for the following use: - * - * 0 RX Queue PtlTE - * 16 Collective PtlTE entry - * 17-116 Optimized write MR PtlTEs 0-99 - * For Client specified keys: - * 17-116 Non-cached optimized write MR PtlTEs 0-99 - * For Provider specified keys: - * 17-24 Cached optimized write MR PtlTEs 0-7 - * 25-116 Non-cached optimized write MR PtlTEs 8-99 - * 117 Standard client/provider cached/non-cached write MR - * PtlTE / Control messaging - * 127 Rendezvous destination write PtlTE - * 128-227 Optimized read MR PtlTEs 0-99 - * For Client specified keys: - * 128-227 Non-cached optimized read MR PtlTEs 0-99 - * For Provider specified keys: - * 128-135 Cached optimized read MR PtlTEs 0-7 - * 136-227 Non-cached optimized read MR PtlTEs 8-99 - * 228 Standard client or provider cached/non-cached read MR - * PtlTE - * 229-237 Rendezvous restricted read PtlTE (TODO consider merge with MR) - * 255 Rendezvous source PtlTE - * - * Note: Any logical endpoint within a PID granule that issues unrestricted Puts - * MUST be within the logical endpoint range 0 - 127 and unrestricted Gets MUST - * be within the logical endpoint range 128 - 255. - */ -#define CXIP_PTL_IDX_RXQ 0 -#define CXIP_PTL_IDX_RNR_RXQ 1 -#define CXIP_PTL_IDX_WRITE_MR_OPT_BASE 17 -#define CXIP_PTL_IDX_READ_MR_OPT_BASE 128 -#define CXIP_PTL_IDX_MR_OPT_CNT 100 -#define CXIP_PTL_IDX_PROV_NUM_CACHE_IDX 8 -#define CXIP_PTL_IDX_PROV_MR_OPT_CNT \ - (CXIP_PTL_IDX_MR_OPT_CNT - CXIP_PTL_IDX_PROV_NUM_CACHE_IDX) - -/* Map non-cached optimized MR keys (client or FI_MR_PROV_KEY) - * to appropriate PTL index. - */ -#define CXIP_MR_PROV_KEY_MASK ((1ULL << 61) - 1) -#define CXIP_MR_PROV_KEY_ID_MASK ((1ULL << 16) - 1) -#define CXIP_MR_UNCACHED_KEY_TO_IDX(key) ((key) & CXIP_MR_PROV_KEY_ID_MASK) -#define CXIP_PTL_IDX_WRITE_MR_OPT(key) \ - (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + \ - CXIP_MR_UNCACHED_KEY_TO_IDX(key)) -#define CXIP_PTL_IDX_READ_MR_OPT(key) \ - (CXIP_PTL_IDX_READ_MR_OPT_BASE + \ - CXIP_MR_UNCACHED_KEY_TO_IDX(key)) - -/* Map cached FI_MR_PROV_KEY optimized MR LAC to Index */ -#define CXIP_PTL_IDX_WRITE_PROV_CACHE_MR_OPT(lac) \ - (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + (lac)) -#define CXIP_PTL_IDX_READ_PROV_CACHE_MR_OPT(lac) \ - (CXIP_PTL_IDX_READ_MR_OPT_BASE + (lac)) - -#define CXIP_PTL_IDX_WRITE_MR_STD 117 -#define CXIP_PTL_IDX_RDZV_DEST 127 -#define CXIP_PTL_IDX_COLL 6 -#define CXIP_PTL_IDX_CTRL CXIP_PTL_IDX_WRITE_MR_STD -#define CXIP_PTL_IDX_READ_MR_STD 228 -#define CXIP_PTL_IDX_RDZV_RESTRICTED_BASE 229 -#define CXIP_PTL_IDX_RDZV_RESTRICTED(lac) \ - (CXIP_PTL_IDX_RDZV_RESTRICTED_BASE + (lac)) - -#define CXIP_PTL_IDX_RDZV_SRC 255 - -/* The CXI provider supports both provider specified MR keys - * (FI_MR_PROV_KEY MR mode) and client specified keys on a per-domain - * basis. - * - * User specified keys: - * Hardware resources limit the number of active keys to 16 bits. - * Key size is 32-bit so there are only 64K unique keys. - * - * Provider specified keys: - * The key size is 64-bits and is separated from the MR hardware - * resources such that the associated MR can be cached if the - * following criteria are met: - * - * - The associated memory region is non-zero in length - * - The associated memory region mapping is cached - * - The MR is not bound to a counter - * - * Optimized caching is preferred by default. - * TODO: Fallback to standard optimized if PTE can not be allocated. - * - * FI_MR_PROV_KEY MR are associated with a unique domain wide - * 16-bit buffer ID, reducing the overhead of maintaining keys. - * Provider keys should always be preferred over client keys - * unless well known keys are not exchanged between peers. - */ -#define CXIP_MR_KEY_SIZE sizeof(uint32_t) -#define CXIP_MR_KEY_MASK ((1ULL << (8 * CXIP_MR_KEY_SIZE)) - 1) -#define CXIP_MR_VALID_OFFSET_MASK ((1ULL << 56) - 1) - -/* For provider defined keys we define a 64 bit MR key that maps - * to provider required information. - */ -struct cxip_mr_key { - union { - /* Provider generated standard cached */ - struct { - uint64_t lac : 3; - uint64_t lac_off: 58; - uint64_t opt : 1; - uint64_t cached : 1; - uint64_t unused1: 1; - /* shares CXIP_CTRL_LE_TYPE_MR */ - }; - /* Client or Provider non-cached */ - struct { - uint64_t key : 61; - uint64_t unused2: 3; - /* Provider shares opt */ - /* Provider shares cached == 0 */ - /* Provider shares CXIP_CTRL_LE_TYPE_MR */ - }; - /* Provider Key Only */ - struct { - /* Non-cached key consists of unique MR ID and sequence - * number. The same MR ID can be used with sequence - * number to create 2^44 unique keys. That is, a - * single standard MR repeatedly created and destroyed - * every micro-second, would take months before - * it repeated. - */ - uint64_t id : 16; /* Unique - 64K MR */ - uint64_t seqnum : 44; /* Sequence with random seed */ - uint64_t events : 1; /* Requires event generation */ - uint64_t unused3: 2; - uint64_t is_prov: 1; - /* Overloads CXIP_CTRL_LE_TYPE_MR and must be cleared - * before appending MR LE or TX using in match bits. - */ - }; - uint64_t raw; - }; -}; - -#define CXIP_MR_PROV_KEY_SIZE sizeof(struct cxip_mr_key) -#define CXIP_NUM_CACHED_KEY_LE 8 - -struct cxip_domain; -struct cxip_mr_domain; -struct cxip_mr; - -/* CXI provider MR operations that are specific for the MR - * based on MR key type and caching. - */ -struct cxip_mr_util_ops { - bool is_cached; - int (*init_key)(struct cxip_mr *mr, uint64_t req_key); - int (*enable_opt)(struct cxip_mr *mr); - int (*disable_opt)(struct cxip_mr *mr); - int (*enable_std)(struct cxip_mr *mr); - int (*disable_std)(struct cxip_mr *mr); -}; - -struct cxip_ep_obj; - -/* - * cxip_ctrl_mr_cache_flush() - Flush LE associated with remote MR cache. - */ -void cxip_ctrl_mr_cache_flush(struct cxip_ep_obj *ep_obj); - -/* - * cxip_adjust_remote_offset() - Update address with the appropriate offset - * for key. - */ static inline uint64_t cxip_adjust_remote_offset(uint64_t *addr, uint64_t key) { @@ -578,473 +154,6 @@ uint64_t cxip_adjust_remote_offset(uint64_t *addr, uint64_t key) return FI_SUCCESS; } -int cxip_generic_mr_key_to_ptl_idx(struct cxip_domain *dom, - uint64_t key, bool write); -bool cxip_generic_is_mr_key_opt(uint64_t key); -bool cxip_generic_is_mr_key_events(uint64_t caps, uint64_t key); -bool cxip_generic_is_valid_mr_key(uint64_t key); - -/* Messaging Match Bit layout */ -#define CXIP_TX_ID_WIDTH 11 -#define CXIP_TAG_WIDTH 48 -#define CXIP_RDZV_ID_CMD_WIDTH 8 -#define CXIP_RDZV_ID_HIGH_WIDTH 7 -#define CXIP_TOTAL_RDZV_ID_WIDTH (CXIP_RDZV_ID_CMD_WIDTH + \ - CXIP_RDZV_ID_HIGH_WIDTH) -#define CXIP_TAG_MASK ((1UL << CXIP_TAG_WIDTH) - 1) - -#define CXIP_CS_TAG_WIDTH 40 -#define CXIP_VNI_WIDTH 16 -#define CXIP_CS_TAG_MASK ((1UL << CXIP_CS_TAG_WIDTH) - 1) - -/* Define several types of LEs */ -enum cxip_le_type { - CXIP_LE_TYPE_RX = 0, /* RX data LE */ - CXIP_LE_TYPE_ZBP, /* Zero-byte Put control message LE. Used to - * exchange data in the EQ header_data and - * match_bits fields. Unexpected headers are - * disabled. - */ -}; - -enum cxip_ctrl_le_type { - CXIP_CTRL_LE_TYPE_MR = 0, /* Memory Region LE */ - CXIP_CTRL_LE_TYPE_CTRL_MSG, /* Control Message LE */ -}; - -enum cxip_ctrl_msg_type { - CXIP_CTRL_MSG_FC_NOTIFY = 0, - CXIP_CTRL_MSG_FC_RESUME, - CXIP_CTRL_MSG_ZB_DATA, - CXIP_CTRL_MSG_ZB_DATA_RDMA_LAC, -}; - -union cxip_match_bits { - struct { - uint64_t tag : CXIP_TAG_WIDTH; /* User tag value */ - uint64_t tx_id : CXIP_TX_ID_WIDTH; /* Prov. tracked ID */ - uint64_t cq_data : 1; /* Header data is valid */ - uint64_t tagged : 1; /* Tagged API */ - uint64_t match_comp : 1; /* Notify initiator on match */ - uint64_t rdzv_done : 1; /* Notify initiator when rdzv done */ - uint64_t le_type : 1; - }; - /* Rendezvous protocol request, overloads match_comp and rdzv_done - * to specify requested protocol. - */ - struct { - uint64_t pad0 : 61; - uint64_t rdzv_proto : 2; - uint64_t pad1 : 1; - }; - /* Split TX ID for rendezvous operations. */ - struct { - uint64_t pad2 : (CXIP_TAG_WIDTH - 1); /* User tag value */ - uint64_t coll_get : 1; /* leaf rdma get */ - uint64_t rdzv_id_hi : CXIP_RDZV_ID_HIGH_WIDTH; - uint64_t rdzv_lac : 4; /* Rendezvous Get LAC */ - }; - struct { - uint64_t rdzv_id_lo : CXIP_RDZV_ID_CMD_WIDTH; - }; - /* Client/Server messaging match bits */ - struct { - uint64_t rnr_tag : CXIP_CS_TAG_WIDTH; /* User tag value */ - uint64_t rnr_rsvd : 6; /* Unused, set to 0 */ - uint64_t rnr_cq_data : 1; /* Header data valid */ - uint64_t rnr_tagged : 1; /* Tagged API */ - uint64_t rnr_vni : CXIP_VNI_WIDTH; /* Source VNI */ - }; - /* Control LE match bit format for notify/resume */ - struct { - uint64_t txc_id : 8; - uint64_t rxc_id : 8; - uint64_t drops : 16; - uint64_t pad3 : 29; - uint64_t ctrl_msg_type: 2; - uint64_t ctrl_le_type : 1; - }; - /* Control LE match bit format for zbcollectives */ - struct { - uint64_t zb_data :61; - uint64_t zb_pad : 3; - /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_CTRL_MSG - * shares ctrl_msg_type == CXIP_CTRL_MSG_ZB_BCAST - */ - }; - /* Control LE match bit format for cached MR */ - struct { - uint64_t mr_lac : 3; - uint64_t mr_lac_off : 58; - uint64_t mr_opt : 1; - uint64_t mr_cached : 1; - uint64_t mr_unused : 1; - /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR */ - }; - struct { - uint64_t mr_key : 61; - uint64_t mr_pad : 3; - /* shares mr_opt - * shares mr_cached == 0 - * shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR - */ - }; - struct { - uint64_t unused2 : 63; - uint64_t is_prov : 1; - /* Indicates provider generated key and shares ctrl_le_type == - * CXIP_CTRL_LE_TYPE_MR so it must be cleared before matching. - */ - }; - uint64_t raw; -}; -#define CXIP_IS_PROV_MR_KEY_BIT (1ULL << 63) -#define CXIP_KEY_MATCH_BITS(key) ((key) & ~CXIP_IS_PROV_MR_KEY_BIT) - -/* libcxi Wrapper Structures */ - -#define CXI_PLATFORM_ASIC 0 -#define CXI_PLATFORM_NETSIM 1 -#define CXI_PLATFORM_Z1 2 -#define CXI_PLATFORM_FPGA 3 - -#define MAX_HW_CPS 16 -/* - * CXI Device wrapper - * - * There will be one of these for every local Cassini device on the node. - */ -struct cxip_if { - struct slist_entry if_entry; - - /* Device description */ - struct cxil_devinfo *info; - int speed; - int link; - - struct cxil_dev *dev; - - /* PtlTEs (searched during state change events) */ - struct dlist_entry ptes; - - ofi_atomic32_t ref; - ofi_spin_t lock; -}; - -/* - * CXI communication profile wrapper. - * - * The wrapper is used to remap user requested traffic class to a communication - * profile which actually can be allocated. - */ -struct cxip_remap_cp { - struct dlist_entry remap_entry; - struct cxi_cp remap_cp; - struct cxi_cp *hw_cp; -}; - -/* - * CXI Logical Network Interface (LNI) wrapper - * - * An LNI is a container used allocate resources from a NIC. - */ -struct cxip_lni { - struct cxip_if *iface; - struct cxil_lni *lni; - - /* Hardware communication profiles */ - struct cxi_cp *hw_cps[MAX_HW_CPS]; - int n_cps; - - /* Software remapped communication profiles. */ - struct dlist_entry remap_cps; - - pthread_rwlock_t cp_lock; -}; - -/* A portals table define a network endpoint address. The endpoint address is - * a {NIC + PID} and this can be configured against multiple VNIs - */ -struct cxip_portals_table { - struct cxip_lni *lni; - uint32_t pid; - struct cxil_domain **doms; - size_t doms_count; -}; - -int cxip_portals_table_alloc(struct cxip_lni *lni, uint16_t *vni, - size_t vni_count, uint32_t pid, - struct cxip_portals_table **ptable); -void cxip_portals_table_free(struct cxip_portals_table *ptable); - -struct cxip_pte_map_entry { - struct dlist_entry entry; - struct cxil_pte_map *map; -}; - -/* - * CXI Portal Table Entry (PtlTE) wrapper - * - * Represents PtlTE mapped in a CXI domain. - */ -struct cxip_pte { - struct dlist_entry pte_entry; - struct cxip_portals_table *ptable; - struct cxil_pte *pte; - enum c_ptlte_state state; - struct dlist_entry map_list; - - void (*state_change_cb)(struct cxip_pte *pte, - const union c_event *event); - void *ctx; -}; - -/* - * CXI Command Queue wrapper - */ -struct cxip_cmdq { - struct cxi_cq *dev_cmdq; - struct c_cstate_cmd c_state; - enum cxip_llring_mode llring_mode; - - struct cxi_cp *cur_cp; - struct cxi_cp *prev_cp; - struct cxip_lni *lni; -}; - -int cxip_cmdq_emit_idc_put(struct cxip_cmdq *cmdq, - const struct c_cstate_cmd *c_state, - const struct c_idc_put_cmd *put, const void *buf, - size_t len, uint64_t flags); -int cxip_cmdq_emit_dma(struct cxip_cmdq *cmdq, struct c_full_dma_cmd *dma, - uint64_t flags); -int cxip_cmdq_emic_idc_amo(struct cxip_cmdq *cmdq, - const struct c_cstate_cmd *c_state, - const struct c_idc_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); -int cxip_cmdq_emit_dma_amo(struct cxip_cmdq *cmdq, struct c_dma_amo_cmd *amo, - uint64_t flags, bool fetching, bool flush); -int cxip_cmdq_emit_idc_msg(struct cxip_cmdq *cmdq, - const struct c_cstate_cmd *c_state, - const struct c_idc_msg_hdr *msg, const void *buf, - size_t len, uint64_t flags); - -/* OFI Provider Structures */ - -/* - * CXI Provider Fabric object - */ -struct cxip_fabric { - struct util_fabric util_fabric; - ofi_atomic32_t ref; -}; - -/* - * CXI Provider Memory Descriptor - */ -struct cxip_md { - struct cxip_domain *dom; - struct cxi_md *md; - struct ofi_mr_info info; - uint64_t map_flags; - uint64_t handle; - int dmabuf_fd; - bool handle_valid; - bool cached; - bool dmabuf_fd_valid; -}; - -#define CXIP_MR_DOMAIN_HT_BUCKETS 16 - -struct cxip_mr_domain { - struct dlist_entry buckets[CXIP_MR_DOMAIN_HT_BUCKETS]; - ofi_spin_t lock; -}; - -void cxip_mr_domain_init(struct cxip_mr_domain *mr_domain); -void cxip_mr_domain_fini(struct cxip_mr_domain *mr_domain); - -struct cxip_telemetry { - struct cxip_domain *dom; - - /* List of telemetry entries to being monitored. */ - struct dlist_entry telemetry_list; -}; - -void cxip_telemetry_dump_delta(struct cxip_telemetry *telemetry); -void cxip_telemetry_free(struct cxip_telemetry *telemetry); -int cxip_telemetry_alloc(struct cxip_domain *dom, - struct cxip_telemetry **telemetry); - -#define TELEMETRY_ENTRY_NAME_SIZE 64U - -struct cxip_telemetry_entry { - struct cxip_telemetry *telemetry; - struct dlist_entry telemetry_entry; - - /* Telemetry name. */ - char name[TELEMETRY_ENTRY_NAME_SIZE]; - - /* Telemetry value. */ - unsigned long value; -}; - -struct cxip_domain_cmdq { - struct dlist_entry entry; - struct cxip_cmdq *cmdq; -}; - -/* - * CXI Provider Domain object - */ -struct cxip_domain { - struct util_domain util_domain; - struct cxip_fabric *fab; - ofi_spin_t lock; - ofi_atomic32_t ref; - - struct fid_ep rx_ep; - struct fid_peer_srx *owner_srx; - - uint32_t tclass; - - struct cxip_eq *eq; //unused - struct cxip_eq *mr_eq; //unused - - /* Assigned NIC address */ - uint32_t nic_addr; - - /* Device info */ - struct cxip_if *iface; - - /* Device partition */ - struct cxip_lni *lni; - - /* Trigger and CT support */ - struct cxip_cmdq *trig_cmdq; - struct ofi_genlock trig_cmdq_lock; - bool cntr_init; - - /* Provider generated RKEYs, else client */ - bool is_prov_key; - - /* Can disable caching of provider generated RKEYs */ - bool prov_key_cache; - - /* Provider generated RKEYs optimized MR disablement/enablement */ - bool optimized_mrs; - - /* Enable MR match event counting enables a more robust - * MR when using FI_MR_PROV_KEY. It disables hardware cached - * MR keys and ensures memory backing a MR cannot be - * remotely accessed even if that memory remains in the - * libfabric MR cache. - */ - bool mr_match_events; - - /* Domain wide MR resources. - * Req IDs are control buffer IDs to map MR or MR cache to an LE. - * MR IDs are used by non-cached provider key MR to decouple the - * MR and Req ID, and do not map directly to the MR LE. - */ - ofi_spin_t ctrl_id_lock; - struct indexer req_ids; - struct indexer mr_ids; - - /* If FI_MR_PROV_KEY is not cached, keys include a sequence number - * to reduce the likelyhood of a stale key being used to access - * a recycled MR key. - */ - uint32_t prov_key_seqnum; - - /* Translation cache */ - struct ofi_mr_cache iomm; - bool odp; - bool ats; - bool hmem; - - /* ATS translation support */ - struct cxip_md scalable_md; - bool scalable_iomm; - bool rocr_dev_mem_only; - - /* Domain state */ - bool enabled; - - /* List of allocated resources used for deferred work queue processing. - */ - struct dlist_entry txc_list; - struct dlist_entry cntr_list; - struct dlist_entry cq_list; - - struct fi_hmem_override_ops hmem_ops; - bool hybrid_mr_desc; - - /* Container of in-use MRs against this domain. */ - struct cxip_mr_domain mr_domain; - - /* Counters collected for the duration of the domain existence. */ - struct cxip_telemetry *telemetry; - - /* NIC AMO operation which is remapped to a PCIe operation. */ - int amo_remap_to_pcie_fadd; - - /* Maximum number of triggered operations configured for the service - * ID. - */ - int max_trig_op_in_use; - sem_t *trig_op_lock; - - /* Domain has been configured with FI_AV_AUTH_KEY. */ - bool av_auth_key; - - /* This is only valid if FI_AV_AUTH_KEY is false. */ - struct cxi_auth_key auth_key; - - /* Maximum number of auth keys requested by user. */ - size_t auth_key_entry_max; - - /* Domain has been configured with FI_AV_USER_ID. */ - bool av_user_id; - - /* Domain level TX command queues used when number of authorization - * keys exceeds LCID limit. - */ - struct dlist_entry cmdq_list; - unsigned int cmdq_cnt; - struct ofi_genlock cmdq_lock; - size_t tx_size; - - /* domain level match mode override */ - enum cxip_ep_ptle_mode rx_match_mode; - bool msg_offload; - size_t req_buf_size; - -}; - -int cxip_domain_emit_idc_put(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - const struct c_cstate_cmd *c_state, - const struct c_idc_put_cmd *put, const void *buf, - size_t len, uint64_t flags); -int cxip_domain_emit_dma(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, struct c_full_dma_cmd *dma, - uint64_t flags); -int cxip_domain_emit_idc_amo(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - const struct c_cstate_cmd *c_state, - const struct c_idc_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); -int cxip_domain_emit_dma_amo(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - struct c_dma_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); -int cxip_domain_emit_idc_msg(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - const struct c_cstate_cmd *c_state, - const struct c_idc_msg_hdr *msg, const void *buf, - size_t len, uint64_t flags); - static inline bool cxip_domain_mr_cache_enabled(struct cxip_domain *dom) { return dom->iomm.domain == &dom->util_domain; @@ -1056,36 +165,6 @@ static inline bool cxip_domain_mr_cache_iface_enabled(struct cxip_domain *dom, return cxip_domain_mr_cache_enabled(dom) && dom->iomm.monitors[iface]; } -int cxip_domain_valid_vni(struct cxip_domain *dom, struct cxi_auth_key *key); - - -/* This structure implies knowledge about the breakdown of the NIC address, - * which is taken from the AMA, that the provider does not know in a flexible - * way. However, the domain fi_open_ops() API includes a topology function - * that requires knowledge of the address breakdown into topology components. - * TODO: Research a less restricted way to get this information. - */ -#define CXIP_ADDR_PORT_BITS 6 -#define CXIP_ADDR_SWITCH_BITS 5 -#define CXIP_ADDR_GROUP_BITS 9 -#define CXIP_ADDR_FATTREE_PORT_BITS 6 -#define CXIP_ADDR_FATTREE_SWITCH_BITS 14 - -struct cxip_topo_addr { - union { - uint32_t addr; - struct { - uint32_t port_num:CXIP_ADDR_PORT_BITS; - uint32_t switch_num:CXIP_ADDR_SWITCH_BITS; - uint32_t group_num:CXIP_ADDR_GROUP_BITS; - } dragonfly; - struct { - uint32_t port_num:CXIP_ADDR_FATTREE_PORT_BITS; - uint32_t switch_num:CXIP_ADDR_FATTREE_SWITCH_BITS; - } fat_tree; - }; -}; - static inline ssize_t cxip_copy_to_hmem_iov(struct cxip_domain *domain, enum fi_hmem_iface hmem_iface, uint64_t device, const struct iovec *hmem_iov, @@ -1097,422 +176,16 @@ cxip_copy_to_hmem_iov(struct cxip_domain *domain, enum fi_hmem_iface hmem_iface, hmem_iov_offset, src, size); } -/* - * Event Queue - * - * libfabric fi_eq implementation. - * - * Created in cxip_eq_open(). - */ -struct cxip_eq { - struct util_eq util_eq; - struct fi_eq_attr attr; - struct dlist_entry ep_list; - ofi_mutex_t list_lock; -}; - -#ifdef CXI_MAP_IOVA_ALLOC -#define CXIP_EQ_MAP_FLAGS (CXI_MAP_WRITE | CXI_MAP_PIN | CXI_MAP_IOVA_ALLOC) -#else -#define CXIP_EQ_MAP_FLAGS (CXI_MAP_WRITE | CXI_MAP_PIN) -#endif - -/* - * RMA request - * - * Support structures, accumulated in a union. - */ -struct cxip_req_rma { - struct cxip_txc *txc; - struct cxip_md *local_md; // RMA target buffer - void *ibuf; - struct cxip_cntr *cntr; - /* collectives leaf_rdma_get_callback context data */ - struct cxip_coll_reduction *reduction; -}; - -struct cxip_req_amo { - struct cxip_txc *txc; - struct cxip_md *result_md; - struct cxip_md *oper1_md; - char result[16]; - char oper1[16]; - bool tmp_result; - bool tmp_oper1; - void *ibuf; - bool fetching_amo_flush; - uint8_t fetching_amo_flush_event_count; - unsigned int fetching_amo_flush_event_rc; - struct cxip_cntr *cntr; -}; - -/* Used with receive request to maintain state associated - * with MQD support for dumping unexpected messages. - */ -struct cxip_ux_dump_state { - bool done; - - size_t max_count; /* Number entries/src_addr provided */ - size_t ret_count; /* Number of UX entries returned */ - size_t ux_count; /* Total UX entries available */ - - struct fi_cq_tagged_entry *entry; - fi_addr_t *src_addr; -}; - -struct cxip_req_recv { - /* Receive parameters */ - struct dlist_entry rxc_entry; - union { - struct cxip_rxc *rxc; - struct cxip_rxc_hpc *rxc_hpc; - struct cxip_rxc_rnr *rxc_rnr; - }; - - struct cxip_cntr *cntr; - void *recv_buf; // local receive buffer - struct cxip_md *recv_md; // local receive MD - bool hybrid_md; // True if MD was provided - bool success_disable; - uint32_t ulen; // User buffer length - bool tagged; - uint64_t tag; - uint64_t ignore; - uint32_t match_id; - uint64_t flags; - - /* FI_CLAIM work around to hold UX remote offsets for duration of - * H/W UX entry matching and deletion. Array of 8-byte unexpected - * headers remote offsets, and current remote offset used when - * processing search results to match remote offsets. - */ - uint64_t *ule_offsets; - uint64_t ule_offset; - unsigned int num_ule_offsets; - unsigned int cur_ule_offsets; - bool offset_found; - - /* UX list dump state */ - struct cxip_ux_dump_state *ux_dump; - - /* Control info */ - int rc; // DMA return code - uint32_t rlen; // Send length - uint64_t oflow_start; // Overflow buffer address - uint16_t vni; // VNI operation came in on - uint32_t initiator; // DMA initiator address - uint32_t rdzv_id; // DMA initiator rendezvous ID - uint8_t rdzv_lac; // Rendezvous source LAC - bool done_notify; // Must send done notification - enum cxip_rdzv_proto rdzv_proto; - int rdzv_events; // Processed rdzv event count - enum c_event_type rdzv_event_types[4]; - uint32_t rdzv_initiator; // Rendezvous initiator used for mrecvs - uint32_t rget_nic; - uint32_t rget_pid; - int multirecv_inflight; // SW EP Multi-receives in progress - bool canceled; // Request canceled? - bool unlinked; - bool multi_recv; - bool tgt_event; - uint64_t start_offset; - uint64_t mrecv_bytes; - uint64_t mrecv_unlink_bytes; - bool auto_unlinked; - bool hw_offloaded; - struct cxip_req *parent; - struct dlist_entry children; - uint64_t src_offset; - uint16_t rdzv_mlen; -}; - -struct cxip_req_send { - /* Send parameters */ - union { - struct cxip_txc *txc; - struct cxip_txc_hpc *txc_hpc; - struct cxip_txc_rnr *txc_rnr; - }; - struct cxip_cntr *cntr; - const void *buf; // local send buffer - size_t len; // request length - struct cxip_md *send_md; // send buffer memory descriptor - struct cxip_addr caddr; - fi_addr_t dest_addr; - bool tagged; - bool hybrid_md; - bool success_disable; - uint32_t tclass; - uint64_t tag; - uint64_t data; - uint64_t flags; - void *ibuf; - - /* Control info */ - struct dlist_entry txc_entry; - struct cxip_fc_peer *fc_peer; - union { - int rdzv_id; // SW RDZV ID for long messages - int tx_id; - }; - int rc; // DMA return code - int rdzv_send_events; // Processed event count - uint64_t max_rnr_time; - uint64_t retry_rnr_time; - struct dlist_entry rnr_entry; - int retries; - bool canceled; -}; - -struct cxip_req_rdzv_src { - struct dlist_entry list; - struct cxip_txc *txc; - uint32_t lac; - int rc; -}; - -struct cxip_req_search { - struct cxip_rxc_hpc *rxc; - bool complete; - int puts_pending; -}; - -struct cxip_req_coll { - struct cxip_coll_pte *coll_pte; - struct cxip_coll_buf *coll_buf; - uint32_t mrecv_space; - size_t hw_req_len; - bool isred; - enum c_return_code cxi_rc; -}; - -enum cxip_req_type { - CXIP_REQ_RMA, - CXIP_REQ_AMO, - CXIP_REQ_OFLOW, - CXIP_REQ_RECV, - CXIP_REQ_SEND, - CXIP_REQ_RDZV_SRC, - CXIP_REQ_SEARCH, - CXIP_REQ_COLL, - CXIP_REQ_RBUF, -}; - -/* - * Async Request - * - * Support structure. - * - * Created in cxip_cq_req_alloc(). - * - * This implements an async-request/callback mechanism. It uses the libfabric - * utility pool, which provides a pool of reusable memory objects that supports - * a fast lookup through the req_id index value, and can be bound to a CQ. - * - * The request is allocated and bound to the CQ, and then the command is - * issued. When the completion queue signals completion, this request is found, - * and the callback function is called. - */ -struct cxip_req { - /* Control info */ - struct dlist_entry evtq_entry; - void *req_ctx; - struct cxip_cq *cq; // request CQ - struct cxip_evtq *evtq; // request event queue - int req_id; // fast lookup in index table - int (*cb)(struct cxip_req *req, const union c_event *evt); - // completion event callback - bool discard; - - /* Triggered related fields. */ - bool triggered; - uint64_t trig_thresh; - struct cxip_cntr *trig_cntr; - - struct fi_peer_rx_entry *rx_entry; - - /* CQ event fields, set according to fi_cq.3 - * - set by provider - * - returned to user in completion event - */ - uint64_t context; - uint64_t flags; - uint64_t data_len; - uint64_t buf; - uint64_t data; - uint64_t tag; - fi_addr_t addr; - - /* Request parameters */ - enum cxip_req_type type; - union { - struct cxip_req_rma rma; - struct cxip_req_amo amo; - struct cxip_req_recv recv; - struct cxip_req_send send; - struct cxip_req_rdzv_src rdzv_src; - struct cxip_req_search search; - struct cxip_req_coll coll; - }; -}; - static inline bool cxip_is_trig_req(struct cxip_req *req) { return req->trig_cntr != NULL; } -struct cxip_ctrl_req_mr { - struct cxip_mr *mr; -}; - -struct cxip_ctrl_send { - uint32_t nic_addr; - uint32_t pid; - uint16_t vni; - union cxip_match_bits mb; -}; - -struct cxip_ctrl_req { - struct dlist_entry ep_entry; - struct cxip_ep_obj *ep_obj; - int req_id; - int (*cb)(struct cxip_ctrl_req *req, const union c_event *evt); - - union { - struct cxip_ctrl_req_mr mr; - struct cxip_ctrl_send send; - }; -}; - -struct cxip_mr_lac_cache { - /* MR referencing the associated MR cache LE, can only - * be flushed if reference count is 0. - */ - ofi_atomic32_t ref; - union cxip_match_bits mb; - struct cxip_ctrl_req *ctrl_req; -}; - -struct cxip_fc_peer { - struct dlist_entry txc_entry; - struct cxip_txc_hpc *txc; - struct cxip_ctrl_req req; - struct cxip_addr caddr; - struct dlist_entry msg_queue; - uint16_t pending; - uint16_t dropped; - uint16_t pending_acks; - bool replayed; - unsigned int retry_count; -}; - -struct cxip_fc_drops { - struct dlist_entry rxc_entry; - struct cxip_rxc_hpc *rxc; - struct cxip_ctrl_req req; - uint32_t nic_addr; - uint32_t pid; - uint16_t vni; - uint16_t drops; - unsigned int retry_count; -}; - -/* Completion queue specific wrapper around CXI event queue. */ -struct cxip_cq_eq { - struct cxi_eq *eq; - void *buf; - size_t len; - struct cxi_md *md; - bool mmap; - unsigned int unacked_events; - struct c_eq_status prev_eq_status; - bool eq_saturated; -}; - -struct cxip_evtq { - struct cxi_eq *eq; - void *buf; - size_t len; - struct cxi_md *md; - bool mmap; - unsigned int unacked_events; - unsigned int ack_batch_size; - struct c_eq_status prev_eq_status; - bool eq_saturated; - /* Reference to wait_obj allocated outside scope of event queue */ - struct cxil_wait_obj *event_wait_obj; - struct cxil_wait_obj *status_wait_obj; - - /* Point back to CQ */ - struct cxip_cq *cq; - - /* Protected with ep_ob->lock */ - struct ofi_bufpool *req_pool; - struct indexer req_table; - struct dlist_entry req_list; -}; - -/* - * CXI Libfbric software completion queue - */ -struct cxip_cq { - struct util_cq util_cq; - struct fi_cq_attr attr; - - /* Implement our own CQ ep_list_lock since common code util_cq - * implementation is a mutex and can not be optimized. This lock - * is always taken walking the CQ EP, but can be optimized to no-op. - */ - struct ofi_genlock ep_list_lock; - - /* CXI CQ wait object EPs are maintained in epoll FD */ - int ep_fd; - - /* CXI specific fields. */ - struct cxip_domain *domain; - unsigned int ack_batch_size; - struct dlist_entry dom_entry; -}; - static inline uint16_t cxip_evtq_eqn(struct cxip_evtq *evtq) { return evtq->eq->eqn; } -/* - * CXI libfabric completion counter - */ -struct cxip_cntr { - struct fid_cntr cntr_fid; - struct cxip_domain *domain; // parent domain - ofi_atomic32_t ref; - struct fi_cntr_attr attr; // copy of user or default attributes - struct fid_wait *wait; - /* Contexts to which counter is bound */ - struct dlist_entry ctx_list; - - /* Triggered cmdq for bound counters */ - struct cxip_cmdq *trig_cmdq; - - struct ofi_genlock lock; - - struct cxi_ct *ct; - struct c_ct_writeback *wb; - uint64_t wb_device; - enum fi_hmem_iface wb_iface; - uint64_t wb_handle; - bool wb_handle_valid; - struct c_ct_writeback lwb; - - struct dlist_entry dom_entry; - - /* Counter for number of operations which need progress. A separate lock - * is needed since these functions may be called without counter lock held. - */ - struct ofi_genlock progress_count_lock; - int progress_count; -}; - static inline void cxip_cntr_progress_inc(struct cxip_cntr *cntr) { ofi_genlock_lock(&cntr->progress_count_lock); @@ -1540,319 +213,6 @@ static inline unsigned int cxip_cntr_progress_get(struct cxip_cntr *cntr) return count; } -struct cxip_ux_send { - struct dlist_entry rxc_entry; - struct cxip_req *req; - struct cxip_rxc *rxc; - struct fi_peer_rx_entry *rx_entry; - union c_event put_ev; - bool claimed; /* Reserved with FI_PEEK | FI_CLAIM */ -}; - -/* Key used to associate PUT and PUT_OVERFLOW events */ -union cxip_def_event_key { - struct { - uint64_t initiator : 32; - uint64_t rdzv_id : 15; - uint64_t pad0 : 16; - uint64_t rdzv : 1; - }; - struct { - uint64_t start_addr : 57; - uint64_t pad1 : 7; - }; - uint64_t raw; -}; - -struct cxip_deferred_event { - struct dlist_entry rxc_entry; - union cxip_def_event_key key; - struct cxip_req *req; - union c_event ev; - uint64_t mrecv_start; - uint32_t mrecv_len; - - struct cxip_ux_send *ux_send; -}; - -/* A very specific (non-generic) hash table is used to map - * deferred CXI events to associate PUT and PUT_OVERFLOW events. - * Hash entries are added and removed at a high rate and the - * overhead of generic implementations is insufficient. - */ -#define CXIP_DEF_EVENT_HT_BUCKETS 256 - -struct def_event_ht { - struct dlist_entry bh[CXIP_DEF_EVENT_HT_BUCKETS]; -}; - -/* - * Zero-buffer collectives. - */ -#define ZB_NOSIM -1 -#define ZB_ALLSIM -2 - -struct cxip_zbcoll_obj; -typedef void (*zbcomplete_t)(struct cxip_zbcoll_obj *zb, void *usrptr); - -struct cxip_zbcoll_cb_obj { - zbcomplete_t usrfunc; // callback function - void *usrptr; // callback data -}; - -/* Used to track state for one or more zbcoll endpoints */ -struct cxip_zbcoll_state { - struct cxip_zbcoll_obj *zb; // backpointer to zbcoll_obj - uint64_t *dataptr; // user-supplied target - uint64_t dataval; // collective data - int num_relatives; // number of nearest relatives - int *relatives; // nearest relative indices - int contribs; // contribution count - int grp_rank; // local rank within group -}; - -/* Used to track concurrent zbcoll operations */ -struct cxip_zbcoll_obj { - struct dlist_entry ready_link; // link to zb_coll ready_list - struct cxip_ep_obj *ep_obj; // backpointer to endpoint - struct cxip_zbcoll_state *state;// state array - struct cxip_addr *caddrs; // cxip addresses in collective - int num_caddrs; // number of cxip addresses - zbcomplete_t userfunc; // completion callback function - void *userptr; // completion callback data - uint64_t *grpmskp; // pointer to global group mask - uint32_t *shuffle; // TEST shuffle array - int simcount; // TEST count of states - int simrank; // TEST simulated rank - int simref; // TEST zb0 reference count - int busy; // serialize collectives in zb - int grpid; // zb collective grpid - int error; // error code - int reduce; // set to report reduction data -}; - -/* zbcoll extension to struct cxip_ep_obj */ -struct cxip_ep_zbcoll_obj { - struct dlist_entry ready_list; // zbcoll ops ready to advance - struct cxip_zbcoll_obj **grptbl;// group lookup table - uint64_t grpmsk; // mask of used grptbl entries - int refcnt; // grptbl reference count - bool disable; // low level tests - ofi_spin_t lock; // group ID negotiation lock - ofi_atomic32_t dsc_count; // cumulative RCV discard count - ofi_atomic32_t err_count; // cumulative ACK error count - ofi_atomic32_t ack_count; // cumulative ACK success count - ofi_atomic32_t rcv_count; // cumulative RCV success count -}; - -/* - * Collectives context. - * - * Extension to cxip_ep_obj for collectives. - * - * Initialized in cxip_coll_init() during EP creation. - */ -struct cxip_ep_coll_obj { - struct index_map mcast_map; // mc address -> object - struct dlist_entry root_retry_list; - struct dlist_entry mc_list; // list of mcast addresses - struct cxip_coll_pte *coll_pte; // PTE extensions - struct dlist_ts sched_list; // scheduled actions - struct cxip_cmdq *rx_cmdq; // shared with STD EP - struct cxip_cmdq *tx_cmdq; // shared with STD EP - struct cxip_cntr *rx_cntr; // shared with STD EP - struct cxip_cntr *tx_cntr; // shared with STD EP - struct cxip_evtq *rx_evtq; // shared with STD EP - struct cxip_evtq *tx_evtq; // shared with STD EP - struct cxip_eq *eq; // shared with STD EP - ofi_atomic32_t num_mc; // count of MC objects - ofi_atomic32_t join_cnt; // advanced on every join - size_t min_multi_recv; // trigger value to rotate bufs - size_t buffer_size; // size of receive buffers - size_t buffer_count; // count of receive buffers - bool join_busy; // serialize joins on a node - bool is_hwroot; // set if ep is hw_root - bool enabled; // enabled - /* needed for progress after leaf sends its contribution */ - struct dlist_entry leaf_rdma_get_list; - /* used to change ctrl_msg_type to CXIP_CTRL_MSG_ZB_DATA_RDMA_LAC */ - bool leaf_save_root_lac; - /* Logical address context for leaf rdma get */ - uint64_t rdma_get_lac_va_tx; - /* pointer to the source buffer base used in the RDMA */ - uint8_t *root_rdma_get_data_p; - /* root rdma get memory descriptor, for entire root src buffer */ - struct cxip_md *root_rdma_get_md; -}; - -/* Receive context state machine. - * TODO: Handle unexpected RMA. - */ -enum cxip_rxc_state { - /* Initial state of an RXC. All user posted receives are rejected until - * the RXC has been enabled. - * - * Note that an RXC can be transitioned from any state into - * RXC_DISABLED. - * - * Validate state changes: - * RXC_ENABLED: User has successfully enabled the RXC. - * RXC_ENABLED_SOFTWARE: User has successfully initialized the RXC - * in a software only RX matching mode. - */ - RXC_DISABLED = 0, - - /* User posted receives are matched against the software unexpected - * list before being offloaded to hardware. Hardware matches against - * the corresponding PtlTE priority and overflow list. - * - * Validate state changes: - * RXC_ONLOAD_FLOW_CONTROL: Several scenarios can initiate this state - * change. - * 1. Hardware fails to allocate an LE for an unexpected message - * or a priority list LE append fails, and hybrid mode is not - * enabled. Hardware transitions the PtlTE from enabled to disabled. - * 2. Hardware fails to allocate an LE during an overflow list - * append. The PtlTE remains in the enabled state but appends to - * the overflow list are disabled. Software manually disables - * the PtlTE. - * 3. Hardware fails to successfully match on the overflow list. - * Hardware automatically transitions the PtlTE from enabled to - * disabled. - * RXC_ONLOAD_FLOW_CONTROL_REENABLE: Several scenarios can initiate - * it this state change: - * 1. The hardware EQ is full, hardware transitions the PtlTE from - * enabled/software managed to disabled to recover drops, but it - * can re-enable if an LE resource is not recovered. - * 2. Running "hardware" RX match mode and matching failed because - * the overflow list buffers were full. Hardware transitions the - * PtlTE from enabled to disabled. The overflow list must be - * replenished and processing can continue if an LE resource is not - * recovered. - * 3. Running "hybrid" or "software" RX match mode and a message - * is received, but there is not a buffer available on the request - * list. Hardware transitions the PtlTE from software managed to - * disabled. The request list must be replenished and processing - * can continue if an LE resource is not recovered. - * RXC_PENDING_PTLTE_SOFTWARE_MANAGED: When the provider is configured - * to run in "hybrid" RX match mode and hardware fails to allocate an - * LE for an unexpected message match or an priority list append fails. - * Hardware will automatically transition the PtlTE from enabled to - * software managed and onload of UX messages will be initiated. - */ - RXC_ENABLED, - - /* The NIC has initiated a transition to software managed EP matching. - * - * Software must onload/reonload the hardware unexpected list while - * creating a pending unexpected list from entries received on the PtlTE - * request list. Any in flight appends will fail and be added to - * a receive replay list, further attempts to post receive operations - * will return -FI_EAGAIN. When onloading completes, the pending - * UX list is appended to the onloaded UX list and then failed appends - * are replayed prior to enabling the posting of receive operations. - * - * Validate state changes: - * RXC_ENABLED_SOFTWARE: The HW to SW transition onloading has - * completed and the onloaded and pending request UX list have been - * combined. - */ - RXC_PENDING_PTLTE_SOFTWARE_MANAGED, - - /* Executing as a software managed PtlTE either due to hybrid - * transition from hardware or initial startup in software - * RX matching mode. - * - * Validate state changes: - * RXC_PENDING_PTLTE_HARDWARE: TODO: When able, software may - * initiate a transition from software managed mode back to - * fully offloaded operation. - * RXC_ONLODAD_FLOW_CONTROL_REENABLE: Hardware was unable to match - * on the request list or the EQ is full. Hardware has disabled the - * PtlTE initiating flow control. Operation can continue if LE - * resources are not recovered as long as request buffers can be - * replenished. - */ - RXC_ENABLED_SOFTWARE, - - /* TODO: Hybrid RX match mode PtlTE is transitioning from software - * managed operation back to fully offloaded operation. - * - * Validate state changes: - * RXC_ENABLED: Hybrid software managed PtlTE successfully - * transitions back to fully offloaded operation. - * RXC_ENABLED_SOFTWARE: Hybrid software managed PtlTE was - * not able to transition to fully offloaded operation. - */ - RXC_PENDING_PTLTE_HARDWARE, - - /* Software has encountered a condition which requires manual transition - * of the PtlTE into disable. This state change occurs when a posted - * receive could not be appended due to LE exhaustion and software - * managed EP PtlTE operation has been disabled or is not possible. - * - * Validate state changes: - * RXC_ONLOAD_FLOW_CONTROL: PtlTE disabled event has successfully been - * received and onloading can begin. - */ - RXC_PENDING_PTLTE_DISABLE, - - /* Flow control has occurred and the PtlTE is disabled. Software is - * in the process of onloading the hardware unexpected headers to free - * up LEs. User posted receives are matched against the software - * unexpected list. If a match is not found on the software unexpected - * list, -FI_EAGAIN is returned to the user. Hardware matching is - * disabled. - * - * Validate state changes: - * RXC_ONLOAD_FLOW_CONTROL_REENABLE: An unexpected list entry matched - * a user posted receive, the search and delete command free a - * unexpected list entry, or a transition to software managed EP is - * occuring. - */ - RXC_ONLOAD_FLOW_CONTROL, - - /* PtlTE is in the same state as RXC_ONLOAD_FLOW_CONTROL, but the RXC - * should attempt to be re-enabled. - * - * Validate state changes: - * RXC_FLOW_CONTROL: Onloading of the unexpected headers has completed. - */ - RXC_ONLOAD_FLOW_CONTROL_REENABLE, - - /* Software is performing sideband communication to recover the dropped - * messages. User posted receives are matched against the software - * unexpected list. If a match is not found on the software unexpected - * list, -FI_EAGAIN is returned to the user. Hardware matching is - * disabled. - * - * If an append fails due to RC_NO_SPACE while in the RXC_FLOW_CONTROL - * state, hardware LEs are exhausted and no more LEs can be freed by - * onloading unexpected headers into software. This is a fatal event - * which requires software endpoint mode to workaround. - * - * Validate state changes: - * RXC_ENABLED: Sideband communication is complete and PtlTE is - * successfully re-enabled. - * RXC_SOFTWARE_MANAGED: When executing in "hybrid" or "software" - * RX match mode and processing has requested to re-enable as a - * software managed EP. - */ - RXC_FLOW_CONTROL, -}; - -#define CXIP_COUNTER_BUCKETS 31U -#define CXIP_BUCKET_MAX (CXIP_COUNTER_BUCKETS - 1) -#define CXIP_LIST_COUNTS 3U - -struct cxip_msg_counters { - /* Histogram counting the number of messages based on priority, buffer - * type (HMEM), and message size. - */ - ofi_atomic32_t msg_count[CXIP_LIST_COUNTS][OFI_HMEM_MAX][CXIP_COUNTER_BUCKETS]; -}; - -/* Returns the most significant bit set (indexed from 1 - the LSB) */ static inline int fls64(uint64_t x) { if (!x) @@ -1901,176 +261,6 @@ cxip_msg_counters_msg_record(struct cxip_msg_counters *cntrs, ofi_atomic_add32(&cntrs->msg_count[list][buf_type][bucket], 1); } -/* - * The default for the number of SW initiated TX operation that may - * be initiated by RX processing and be outstanding. This has no - * impact on hardware initiated rendezvous gets. This value can be - * adjusted if necessary with FI_CXI_SW_RX_TX_INIT_MAX=#. - */ -#define CXIP_SW_RX_TX_INIT_MAX_DEFAULT 1024 -#define CXIP_SW_RX_TX_INIT_MIN 64 - -/* If a restricted rendezvous protocol notify done message - * cannot be delivered due to EQ full, delay before retrying. - */ -#define CXIP_DONE_NOTIFY_RETRY_DELAY_US 100 - -#define RXC_RESERVED_FC_SLOTS 1 - -/* RXC specialization API support */ -struct cxip_rxc_ops { - ssize_t (*recv_common)(struct cxip_rxc *rxc, void *buf, size_t len, - void *desc, fi_addr_t src_add, uint64_t tag, - uint64_t ignore, void *context, uint64_t flags, - bool tagged, struct cxip_cntr *comp_cntr); - void (*progress)(struct cxip_rxc *rxc, bool internal); - void (*recv_req_tgt_event)(struct cxip_req *req, - const union c_event *event); - int (*cancel_msg_recv)(struct cxip_req *req); - int (*ctrl_msg_cb)(struct cxip_ctrl_req *req, - const union c_event *event); - void (*init_struct)(struct cxip_rxc *rxc, struct cxip_ep_obj *ep_obj); - void (*fini_struct)(struct cxip_rxc *rxc); - void (*cleanup)(struct cxip_rxc *rxc); - int (*msg_init)(struct cxip_rxc *rxc); - int (*msg_fini)(struct cxip_rxc *rxc); -}; - -/* - * Receive context base object - */ -struct cxip_rxc { - void *context; - uint32_t protocol; - - struct fi_rx_attr attr; - bool selective_completion; - bool hmem; - bool trunc_ok; - bool sw_ep_only; - bool msg_offload; - uint8_t pid_bits; // Zero without SEP - uint8_t recv_ptl_idx; - - enum cxip_rxc_state state; - - /* Reverse link to EP object that owns this context */ - struct cxip_ep_obj *ep_obj; - - struct cxip_cq *recv_cq; - struct cxip_cntr *recv_cntr; - - struct cxip_rxc_ops ops; - - struct cxip_domain *domain; - - /* RXC receive portal table, event queue and hardware - * command queue. - */ - struct cxip_evtq rx_evtq; - struct cxip_pte *rx_pte; - struct cxip_cmdq *rx_cmdq; - int orx_reqs; - - /* If FI_MULTI_RECV is supported, minimum receive size required - * for buffers posted. - */ - size_t min_multi_recv; - - /* If TX events are required by specialization, the maximum - * credits that can be used. - */ - int32_t max_tx; - unsigned int recv_appends; - - struct cxip_msg_counters cntrs; -}; - -/* Receive context specialization for supporting HPC messaging - * that requires SAS implemented in a Portals environment. - */ -struct cxip_rxc_hpc { - /* Must be first */ - struct cxip_rxc base; - - int max_eager_size; - uint64_t rget_align_mask; - - /* Window when FI_CLAIM mutual exclusive access is required */ - bool hw_claim_in_progress; - - int sw_ux_list_len; - int sw_pending_ux_list_len; - - /* Number of unexpected list entries in HW. */ - ofi_atomic32_t orx_hw_ule_cnt; - - /* RX context transmit queue is separated into two logical - * queues, one used for rendezvous get initiation and one - * used for notifications. Depending on the messaging protocols - * and traffic classes in use, the two logical queues could - * point to the same hardware queue or be distinct. - */ - struct cxip_cmdq *tx_rget_cmdq; - struct cxip_cmdq *tx_cmdq; - ofi_atomic32_t orx_tx_reqs; - - /* Software receive queue. User posted requests are queued here instead - * of on hardware if the RXC is in software endpoint mode. - */ - struct dlist_entry sw_recv_queue; - - /* Defer events to wait for both put and put overflow */ - struct def_event_ht deferred_events; - - /* Unexpected message handling */ - struct cxip_ptelist_bufpool *req_list_bufpool; - struct cxip_ptelist_bufpool *oflow_list_bufpool; - - enum cxip_rxc_state prev_state; - enum cxip_rxc_state new_state; - enum c_sc_reason fc_reason; - - /* RXC drop count used for FC accounting. */ - int drop_count; - - /* Array of 8-byte of unexpected headers remote offsets. */ - uint64_t *ule_offsets; - unsigned int num_ule_offsets; - - /* Current remote offset to be processed. Incremented after processing - * a search and delete put event. - */ - unsigned int cur_ule_offsets; - - struct dlist_entry fc_drops; - struct dlist_entry replay_queue; - struct dlist_entry sw_ux_list; - struct dlist_entry sw_pending_ux_list; - - /* Flow control/software state change metrics */ - int num_fc_eq_full; - int num_fc_no_match; - int num_fc_unexp; - int num_fc_append_fail; - int num_fc_req_full; - int num_sc_nic_hw2sw_append_fail; - int num_sc_nic_hw2sw_unexp; -}; - -/* Receive context specialization for supporting client/server - * messaging. - */ -struct cxip_rxc_rnr { - /* Must be first */ - struct cxip_rxc base; - - bool hybrid_mr_desc; - /* Used when success events are not required */ - struct cxip_req *req_selective_comp_msg; - struct cxip_req *req_selective_comp_tag; -}; - static inline void cxip_copy_to_md(struct cxip_md *md, void *dest, const void *src, size_t size, bool require_dev_reg_copy) @@ -2124,455 +314,6 @@ static inline void cxip_copy_from_md(struct cxip_md *md, void *dest, } } -/* PtlTE buffer pool - Common PtlTE request/overflow list buffer - * management. - * - * Only C_PTL_LIST_REQUEST and C_PTL_LIST_OVERFLOW are supported. - */ -struct cxip_ptelist_bufpool_attr { - enum c_ptl_list list_type; - - /* Callback to handle PtlTE link error/unlink events */ - int (*ptelist_cb)(struct cxip_req *req, const union c_event *event); - size_t buf_size; - size_t min_space_avail; - size_t min_posted; - size_t max_posted; - size_t max_cached; -}; - -struct cxip_ptelist_bufpool { - struct cxip_ptelist_bufpool_attr attr; - struct cxip_rxc_hpc *rxc; - size_t buf_alignment; - - /* Ordered list of buffers emitted to hardware */ - struct dlist_entry active_bufs; - - /* List of consumed buffers which cannot be reposted yet - * since unexpected entries have not been matched. - */ - struct dlist_entry consumed_bufs; - - /* List of available buffers that may be appended to the list. - * These could be from a previous append failure or be cached - * from previous message processing to avoid map/unmap of - * list buffer. - */ - struct dlist_entry free_bufs; - - ofi_atomic32_t bufs_linked; - ofi_atomic32_t bufs_allocated; - ofi_atomic32_t bufs_free; -}; - -struct cxip_ptelist_req { - /* Pending list of unexpected header entries which could not be placed - * on the RX context unexpected header list due to put events being - * received out-of-order. - */ - struct dlist_entry pending_ux_list; -}; - -struct cxip_ptelist_buf { - struct cxip_ptelist_bufpool *pool; - - /* RX context the request buffer is posted on. */ - struct cxip_rxc_hpc *rxc; - enum cxip_le_type le_type; - struct dlist_entry buf_entry; - struct cxip_req *req; - - /* Memory mapping of req_buf field. */ - struct cxip_md *md; - - /* The number of bytes consume by hardware when the request buffer was - * unlinked. - */ - size_t unlink_length; - - /* Current offset into the buffer where packets/data are landing. When - * the cur_offset is equal to unlink_length, software has completed - * event processing for the buffer. - */ - size_t cur_offset; - - /* Request list specific control information */ - struct cxip_ptelist_req request; - - /* The number of unexpected headers posted placed on the RX context - * unexpected header list which have not been matched. - */ - ofi_atomic32_t refcount; - - /* Buffer used to land packets. */ - char *data; -}; - -int cxip_ptelist_bufpool_init(struct cxip_rxc_hpc *rxc, - struct cxip_ptelist_bufpool **pool, - struct cxip_ptelist_bufpool_attr *attr); -void cxip_ptelist_bufpool_fini(struct cxip_ptelist_bufpool *pool); -int cxip_ptelist_buf_replenish(struct cxip_ptelist_bufpool *pool, - bool seq_restart); -void cxip_ptelist_buf_link_err(struct cxip_ptelist_buf *buf, - int rc_link_error); -void cxip_ptelist_buf_unlink(struct cxip_ptelist_buf *buf); -void cxip_ptelist_buf_put(struct cxip_ptelist_buf *buf, bool repost); -void cxip_ptelist_buf_get(struct cxip_ptelist_buf *buf); -void cxip_ptelist_buf_consumed(struct cxip_ptelist_buf *buf); - -/* - * cxip_req_bufpool_init() - Initialize PtlTE request list buffer management - * object. - */ -int cxip_req_bufpool_init(struct cxip_rxc_hpc *rxc); -void cxip_req_bufpool_fini(struct cxip_rxc_hpc *rxc); - -/* - * cxip_oflow_bufpool_init() - Initialize PtlTE overflow list buffer management - * object. - */ -int cxip_oflow_bufpool_init(struct cxip_rxc_hpc *rxc); -void cxip_oflow_bufpool_fini(struct cxip_rxc_hpc *rxc); - -void _cxip_req_buf_ux_free(struct cxip_ux_send *ux, bool repost); -void cxip_req_buf_ux_free(struct cxip_ux_send *ux); - -#define CXIP_RDZV_IDS (1 << CXIP_TOTAL_RDZV_ID_WIDTH) -#define CXIP_RDZV_IDS_MULTI_RECV (1 << CXIP_RDZV_ID_CMD_WIDTH) -#define CXIP_TX_IDS (1 << CXIP_TX_ID_WIDTH) - -/* One per LAC */ -#define RDZV_SRC_LES 8U -#define RDZV_NO_MATCH_PTES 8U - -/* Base rendezvous PtlTE object */ -struct cxip_rdzv_pte { - struct cxip_txc_hpc *txc; - struct cxip_pte *pte; - - /* Count of the number of buffers successfully linked on this PtlTE. */ - ofi_atomic32_t le_linked_success_count; - - /* Count of the number of buffers failed to link on this PtlTE. */ - ofi_atomic32_t le_linked_failure_count; -}; - -/* Matching PtlTE for user generated unrestricted get DMA */ -struct cxip_rdzv_match_pte { - struct cxip_rdzv_pte base_pte; - - /* Request structure used to handle zero byte puts used for match - * complete. - */ - struct cxip_req *zbp_req; - - /* Request structures used to handle rendezvous source/data transfers. - * There is one request structure (and LE) for each LAC. - */ - struct cxip_req *src_reqs[RDZV_SRC_LES]; -}; - -/* Matching PtlTE for user generated restricted get DMA. One PtlTE - * per LAC used. - */ -struct cxip_rdzv_nomatch_pte { - struct cxip_rdzv_pte base_pte; - struct cxip_req *le_req; -}; - -#if ENABLE_DEBUG -/* Defines to force hard to test TXC error path failures; - * only valid for debug unit testing. See txc->force_err. - */ -#define CXIP_TXC_FORCE_ERR_ALT_READ_PROTO_ALLOC (1 << 0) -#endif - -/* TXC specialization API support */ -struct cxip_txc_ops { - ssize_t (*send_common)(struct cxip_txc *txc, uint32_t tclass, - const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, uint64_t tag, - void *context, uint64_t flags, bool tagged, - bool triggered, uint64_t trig_thresh, - struct cxip_cntr *trig_cntr, - struct cxip_cntr *comp_cntr); - void (*progress)(struct cxip_txc *txc, bool internal); - int (*cancel_msg_send)(struct cxip_req *req); - void (*init_struct)(struct cxip_txc *txc, struct cxip_ep_obj *ep_obj); - void (*fini_struct)(struct cxip_txc *txc); - void (*cleanup)(struct cxip_txc *txc); - int (*msg_init)(struct cxip_txc *txc); - int (*msg_fini)(struct cxip_txc *txc); -}; - -/* - * Endpoint object transmit context - */ -struct cxip_txc { - void *context; - - uint32_t protocol; - bool enabled; - bool hrp_war_req; // Non-fetching 32-bit HRP - bool hmem; - bool trunc_ok; - - struct cxip_cq *send_cq; - struct cxip_cntr *send_cntr; - struct cxip_cntr *read_cntr; - struct cxip_cntr *write_cntr; - - struct cxip_txc_ops ops; - - struct cxip_ep_obj *ep_obj; // parent EP object - struct cxip_domain *domain; // parent domain - uint8_t pid_bits; - uint8_t recv_ptl_idx; - - struct fi_tx_attr attr; // attributes - bool selective_completion; - uint32_t tclass; - - /* TX H/W Event Queue */ - struct cxip_evtq tx_evtq; - - /* Inject buffers for EP, protected by ep_obj->lock */ - struct ofi_bufpool *ibuf_pool; - - struct cxip_cmdq *tx_cmdq; // added during cxip_txc_enable() - int otx_reqs; // outstanding transmit requests - - /* Queue of TX messages in flight for the context */ - struct dlist_entry msg_queue; - - struct cxip_req *rma_write_selective_completion_req; - struct cxip_req *rma_read_selective_completion_req; - struct cxip_req *amo_selective_completion_req; - struct cxip_req *amo_fetch_selective_completion_req; - - struct dlist_entry dom_entry; -}; - -/* Default HPC SAS TXC specialization */ -struct cxip_txc_hpc { - /* Must remain first */ - struct cxip_txc base; - - int max_eager_size; - int rdzv_eager_size; - - /* Rendezvous messaging support */ - struct cxip_rdzv_match_pte *rdzv_pte; - struct cxip_rdzv_nomatch_pte *rdzv_nomatch_pte[RDZV_NO_MATCH_PTES]; - struct indexer rdzv_ids; - struct indexer msg_rdzv_ids; - enum cxip_rdzv_proto rdzv_proto; - - struct cxip_cmdq *rx_cmdq; // Target cmdq for Rendezvous buffers - -#if ENABLE_DEBUG - uint64_t force_err; -#endif - /* Flow Control recovery */ - struct dlist_entry fc_peers; - - /* Match complete IDs */ - struct indexer tx_ids; - -}; - -/* Client/server derived TXC, does not support SAS ordering - * or remotely buffered unexpected messages. - */ -#define CXIP_RNR_TIMEOUT_US 500000 -#define CXIP_NUM_RNR_WAIT_QUEUE 5 - -struct cxip_txc_rnr { - /* Must remain first */ - struct cxip_txc base; - - uint64_t max_retry_wait_us; /* Maximum time to retry any request */ - ofi_atomic32_t time_wait_reqs; /* Number of RNR time wait reqs */ - uint64_t next_retry_wait_us; /* Time of next retry in all queues */ - uint64_t total_retries; - uint64_t total_rnr_nacks; - bool hybrid_mr_desc; - - /* Used when success events are not required */ - struct cxip_req *req_selective_comp_msg; - struct cxip_req *req_selective_comp_tag; - - /* There are CXIP_NUM_RNR_WAIT_QUEUE queues where each queue has - * a specified time wait value and where the last queue is has the - * maximum time wait value before retrying (and is used for all - * subsequent retries). This implementation allows each queue to - * be maintained in retry order with a simple append of the request. - */ - struct dlist_entry time_wait_queue[CXIP_NUM_RNR_WAIT_QUEUE]; -}; - -int cxip_txc_emit_idc_put(struct cxip_txc *txc, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - const struct c_cstate_cmd *c_state, - const struct c_idc_put_cmd *put, const void *buf, - size_t len, uint64_t flags); -int cxip_txc_emit_dma(struct cxip_txc *txc, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct cxip_cntr *trig_cntr, size_t trig_thresh, - struct c_full_dma_cmd *dma, uint64_t flags); -int cxip_txc_emit_idc_amo(struct cxip_txc *txc, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - const struct c_cstate_cmd *c_state, - const struct c_idc_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); -int cxip_txc_emit_dma_amo(struct cxip_txc *txc, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct cxip_cntr *trig_cntr, size_t trig_thresh, - struct c_dma_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); -int cxip_txc_emit_idc_msg(struct cxip_txc *txc, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - const struct c_cstate_cmd *c_state, - const struct c_idc_msg_hdr *msg, const void *buf, - size_t len, uint64_t flags); - -void cxip_txc_flush_msg_trig_reqs(struct cxip_txc *txc); - -/* - * Endpoint Control Object - * - * Groups control MR and messaging structures that can be exclusively used - * for a standard EP or globally shared in a SEP by all RX/TX context. - */ -struct cxip_ctrl { - /* wait object is required to wake up CQ waiters - * when control progress is required. - */ - struct cxil_wait_obj *wait; - - struct cxi_eq *tgt_evtq; - struct cxi_eq *tx_evtq; - - /* TX command queue is used to initiate side-band messaging - * and is TX credit based. - */ - struct cxip_cmdq *txq; - unsigned int tx_credits; - - /* Target command queue is used for appending RX side-band - * messaging control LE and managing standard MR LE. - */ - struct cxip_cmdq *tgq; - struct cxip_pte *pte; - struct cxip_ctrl_req msg_req; - - /* FI_MR_PROV_KEY caching, protected with ep_obj->lock */ - struct cxip_mr_lac_cache std_mr_cache[CXIP_NUM_CACHED_KEY_LE]; - struct cxip_mr_lac_cache opt_mr_cache[CXIP_NUM_CACHED_KEY_LE]; - - struct dlist_entry mr_list; - - /* Event queue buffers */ - void *tgt_evtq_buf; - struct cxi_md *tgt_evtq_buf_md; - void *tx_evtq_buf; - struct cxi_md *tx_evtq_buf_md; -}; - -/* - * Base Endpoint Object - * - * Support structure, libfabric fi_endpoint implementation. - * - * This is the meat of the endpoint object. It has been separated from cxip_ep - * to support aliasing. - */ -struct cxip_ep_obj { - /* Allow lock to be optimized out with FI_THREAD_DOMAIN */ - struct ofi_genlock lock; - struct cxip_domain *domain; - struct cxip_av *av; - - struct fid_peer_srx *owner_srx; - - /* Domain has been configured with FI_AV_AUTH_KEY. */ - bool av_auth_key; - - /* This is only valid if FI_AV_AUTH_KEY is false. */ - struct cxi_auth_key auth_key; - - /* Array of VNIs if FI_AV_AUTH_KEY is true. */ - uint16_t *vnis; - size_t vni_count; - - struct cxip_addr src_addr; - fi_addr_t fi_addr; - - bool enabled; - - /* Endpoint protocol implementations. - * FI_PROTO_CXI - Portals SAS protocol - */ - uint32_t protocol; - struct cxip_txc *txc; - struct cxip_rxc *rxc; - - /* Internal support for CQ wait object */ - struct cxil_wait_obj *priv_wait; - int wait_fd; - - /* ASIC version associated with EP/Domain */ - enum cassini_version asic_ver; - - /* Information that might be owned by an EP (or a SEP - * when implemented). Should ultimately be a pointer - * to a base/specialization. - */ - struct cxip_ctrl ctrl; - - /* Command queues. Each EP has 1 transmit and 1 target - * command queue that can be shared. An optional 2nd transmit - * command queue may be created for RX initiated rgets. - */ - struct cxip_cmdq *txq; - ofi_atomic32_t txq_ref; - struct cxip_cmdq *tgq; - ofi_atomic32_t tgq_ref; - struct cxip_cmdq *rx_txq; - - /* Libfabric software EQ resource */ - struct cxip_eq *eq; - struct dlist_entry eq_link; - - /* Values at base EP creation */ - uint64_t caps; - struct fi_ep_attr ep_attr; - struct fi_tx_attr tx_attr; - struct fi_rx_attr rx_attr; - - /* Require memcpy's via the dev reg APIs. */ - bool require_dev_reg_copy[OFI_HMEM_MAX]; - - /* Collectives support */ - struct cxip_ep_coll_obj coll; - struct cxip_ep_zbcoll_obj zbcoll; - - size_t txq_size; - size_t tgq_size; - ofi_atomic32_t ref; - struct cxip_portals_table *ptable; -}; - -int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len, - uint64_t access, uint64_t flags, struct cxip_md **md); - static inline void cxip_ep_obj_copy_to_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, const void *src, size_t size) @@ -2656,200 +397,11 @@ static inline void cxip_rxc_orx_reqs_init(struct cxip_rxc *rxc) rxc->orx_reqs = 0; } -/* - * CXI endpoint implementations to support FI_CLASS_EP. - */ -struct cxip_ep { - struct fid_ep ep; - struct fi_tx_attr tx_attr; - struct fi_rx_attr rx_attr; - struct cxip_ep_obj *ep_obj; - int is_alias; -}; - -size_t cxip_ep_get_unexp_msgs(struct fid_ep *fid_ep, - struct fi_cq_tagged_entry *entry, size_t count, - fi_addr_t *src_addr, size_t *ux_count); -int cxip_build_ux_entry_info(struct cxip_ep *ep, - struct fi_cq_tagged_entry *entry, size_t count, - fi_addr_t *src_addr, size_t *ux_count); - -enum cxip_mr_state { - CXIP_MR_DISABLED = 1, - CXIP_MR_ENABLED, - CXIP_MR_LINKED, - CXIP_MR_UNLINKED, - CXIP_MR_LINK_ERR, -}; - -/* - * Memory Region - * - * libfabric fi_mr implementation. - * - * Created in cxip_regattr(). - */ -struct cxip_mr { - struct fid_mr mr_fid; - struct cxip_domain *domain; // parent domain - struct cxip_ep *ep; // endpoint for remote memory - uint64_t key; // memory key - uint64_t flags; // special flags - struct fi_mr_attr attr; // attributes - struct cxip_cntr *cntr; // if bound to cntr - - /* Indicates if FI_RMA_EVENT was specified at creation and - * will be used to enable fi_writedata() and fi_inject_writedata() - * support for this MR (TODO). - */ - bool rma_events; - - /* If requested then count MR events to determine if RMA are in - * progress. At close if no RMA are in progress bypass the invalidate - * of the PTLTE LE. This improves non-cached key close performance, - * enabling their use so that after closing the MR the associated - * memory cannot be remotely accessed, even if it remains in the - * libfabric MR cache. - */ - bool count_events; - ofi_atomic32_t match_events; - ofi_atomic32_t access_events; - - ofi_spin_t lock; - - struct cxip_mr_util_ops *mr_util; - bool enabled; - struct cxip_pte *pte; - enum cxip_mr_state mr_state; - int64_t mr_id; // Non-cached provider key uniqueness - struct cxip_ctrl_req req; - bool optimized; - - void *buf; // memory buffer VA - uint64_t len; // memory length - struct cxip_md *md; // buffer IO descriptor - struct dlist_entry ep_entry; - - struct dlist_entry mr_domain_entry; -}; - -struct cxip_av_auth_key_entry { - ofi_atomic32_t use_cnt; - ofi_atomic32_t ref_cnt; - UT_hash_handle hh; - struct dlist_entry entry; - struct cxi_auth_key key; - fi_addr_t fi_addr; -}; - -struct cxip_av_entry { - ofi_atomic32_t use_cnt; - UT_hash_handle hh; - struct cxip_addr addr; - fi_addr_t fi_addr; - struct cxip_av_auth_key_entry *auth_key; -}; - -struct cxip_av { - struct fid_av av_fid; - struct cxip_domain *domain; - - /* List of endpoints bound to this AV. Each bind takes a reference - * as well. - */ - struct dlist_entry ep_list; - ofi_atomic32_t ref; - - /* Memory used to implement lookups. Two data structures are used. - * 1. ibuf pool for O(1) lookup on the data path - * 2. hash table for O(1) on the receive path - */ - struct cxip_av_entry *av_entry_hash; - struct ofi_bufpool *av_entry_pool; - ofi_atomic32_t av_entry_cnt; - - /* Memory used to support AV authorization key. Three data structures - * are needed. - * 1. ibuf pool for memory allocation and lookup O(1) access. - * 2. hash table for O(1) reverse lookup - * 3. List for iterating - */ - struct cxip_av_auth_key_entry *auth_key_entry_hash; - struct ofi_bufpool *auth_key_entry_pool; - struct dlist_entry auth_key_entry_list; - ofi_atomic32_t auth_key_entry_cnt; - size_t auth_key_entry_max; - - /* Single lock is used to protect entire AV. With domain level - * threading, this lock is not used. - */ - bool lockless; - pthread_rwlock_t lock; - - /* AV is configured as symmetric. This is an optimization which enables - * endpoints to use logical address. - */ - bool symmetric; - - /* Address vector type. */ - enum fi_av_type type; - - /* Whether or not the AV is operating in FI_AV_AUTH_KEY mode. */ - bool av_auth_key; - - /* Whether or not the AV was opened with FI_AV_USER_ID. */ - bool av_user_id; -}; - -int cxip_av_auth_key_get_vnis(struct cxip_av *av, uint16_t **vni, - size_t *vni_count); -void cxip_av_auth_key_put_vnis(struct cxip_av *av, uint16_t *vni, - size_t vni_count); -extern struct cxip_addr *(*cxip_av_addr_in)(const void *addr); -extern void (*cxip_av_addr_out)(struct cxip_addr *addr_out, - struct cxip_addr *addr); -int cxip_av_lookup_addr(struct cxip_av *av, fi_addr_t fi_addr, - struct cxip_addr *addr); -fi_addr_t cxip_av_lookup_fi_addr(struct cxip_av *av, - const struct cxip_addr *addr); -fi_addr_t cxip_av_lookup_auth_key_fi_addr(struct cxip_av *av, unsigned int vni); -int cxip_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context); -int cxip_av_bind_ep(struct cxip_av *av, struct cxip_ep *ep); -void cxip_av_unbind_ep(struct cxip_av *av, struct cxip_ep *ep); static inline int cxip_av_entry_count(struct cxip_av *av) { return ofi_atomic_get32(&av->av_entry_cnt); } -/* - * AV Set - * - * libfabric fi_av_set implementation. - * - * Created in cxip_av_set(). - */ -struct cxip_av_set { - struct fid_av_set av_set_fid; - struct cxip_av *cxi_av; // associated AV - struct cxip_coll_mc *mc_obj; // reference MC - fi_addr_t *fi_addr_ary; // addresses in set - size_t fi_addr_cnt; // count of addresses - struct cxip_comm_key comm_key; // communication key - uint64_t flags; -}; - -/* Needed for math functions */ -union cxip_dbl_bits { - struct { - uint64_t mantissa:52; - uint64_t exponent:11; - uint64_t sign:1; - } __attribute__((__packed__)); - double dval; - uint64_t ival; -}; - static inline uint64_t _dbl2bits(double d) { #if (BYTE_ORDER == LITTLE_ENDIAN) @@ -2883,383 +435,10 @@ static inline void _decompose_dbl(double d, int *sgn, int *exp, #endif } -/* data structures for reduction support */ -enum cxip_coll_redtype { - REDTYPE_BYT, - REDTYPE_INT, - REDTYPE_FLT, - REDTYPE_IMINMAX, - REDTYPE_FMINMAX, - REDTYPE_REPSUM -}; - -/* int AND, OR, XOR, MIN, MAX, SUM */ -struct cxip_intval { - int64_t ival[4]; -}; - -/* flt MIN, MAX, SUM */ -struct cxip_fltval { - double fval[4]; -}; - -/* int MINMAXLOC */ -struct cxip_iminmax { - int64_t iminval; - uint64_t iminidx; - int64_t imaxval; - uint64_t imaxidx; -}; - -/* flt MINMAXLOC */ -struct cxip_fltminmax { - double fminval; - uint64_t fminidx; - double fmaxval; - uint64_t fmaxidx; -}; - -/* repsum SUM */ -struct cxip_repsum { - int64_t T[4]; - int32_t M; - int8_t overflow_id; - bool inexact; - bool overflow; - bool invalid; -}; - -/* Collective operation states */ -enum cxip_coll_state { - CXIP_COLL_STATE_NONE, - CXIP_COLL_STATE_READY, - CXIP_COLL_STATE_FAULT, -}; - -const char *cxip_strerror(int prov_errno); - -/* Rosetta reduction engine error codes */ -typedef enum cxip_coll_rc { - CXIP_COLL_RC_SUCCESS = 0, // good - CXIP_COLL_RC_FLT_INEXACT = 1, // result was rounded - CXIP_COLL_RC_FLT_OVERFLOW = 3, // result too large to represent - CXIP_COLL_RC_FLT_INVALID = 4, // op was signalling NaN, or - // infinities subtracted - CXIP_COLL_RC_REP_INEXACT = 5, // reproducible sum was rounded - CXIP_COLL_RC_INT_OVERFLOW = 6, // reproducible sum overflow - CXIP_COLL_RC_CONTR_OVERFLOW = 7, // too many contributions seen - CXIP_COLL_RC_OP_MISMATCH = 8, // conflicting opcodes - CXIP_COLL_RC_TX_FAILURE = 9, // internal send error - CXIP_COLL_RC_RDMA_FAILURE = 10, // leaf rdma read error - CXIP_COLL_RC_RDMA_DATA_FAILURE = 11, // leaf rdma read data misc - CXIP_COLL_RC_MAX = 12 -} cxip_coll_rc_t; - -struct cxip_coll_buf { - struct dlist_entry buf_entry; // linked list of buffers - struct cxip_req *req; // associated LINK request - struct cxip_md *cxi_md; // buffer memory descriptor - size_t bufsiz; // buffer size in bytes - uint8_t buffer[]; // buffer space itself -}; - -struct cxip_coll_pte { - struct cxip_pte *pte; // Collectives PTE - struct cxip_ep_obj *ep_obj; // Associated endpoint - struct cxip_coll_mc *mc_obj; // Associated multicast object - struct dlist_entry buf_list; // PTE receive buffers - ofi_atomic32_t buf_cnt; // count of linked buffers - ofi_atomic32_t buf_swap_cnt; // for diagnostics - ofi_atomic32_t recv_cnt; // for diagnostics - int buf_low_water; // for diagnostics - bool enabled; // enabled -}; - -/* REQUIRED: - * sizeof(struct cxip_coll_accumulator) >= sizeof(struct cxip_coll_data) - * (opaque) struct cxip_coll_accumulator exported in fi_cxi_ext.h - */ -struct cxip_coll_data { - union { - uint8_t databuf[32]; // raw data buffer - struct cxip_intval intval; // 4 integer values + flags - struct cxip_fltval fltval; // 4 double values + flags - struct cxip_iminmax intminmax; // 1 intminmax structure + flags - struct cxip_fltminmax fltminmax;// 1 fltminmax structure + flags - struct cxip_repsum repsum; // 1 repsum structure + flags - }; - cxip_coll_op_t red_op; // reduction opcode - cxip_coll_rc_t red_rc; // reduction return code - int red_cnt; // reduction contrib count - bool initialized; -}; - -struct coll_counters { - int32_t coll_recv_cnt; - int32_t send_cnt; - int32_t recv_cnt; - int32_t pkt_cnt; - int32_t seq_err_cnt; - int32_t tmout_cnt; -}; - -struct cxip_coll_metrics_ep { - int myrank; - bool isroot; -}; -struct cxip_coll_metrics { - long red_count_bad; - long red_count_full; - long red_count_partial; - long red_count_unreduced; - struct cxip_coll_metrics_ep ep_data; -}; - -void cxip_coll_reset_mc_ctrs(struct fid_mc *mc); -void cxip_coll_get_mc_ctrs(struct fid_mc *mc, struct coll_counters *counters); - -void cxip_coll_init_metrics(void); -void cxip_coll_get_metrics(struct cxip_coll_metrics *metrics); - -struct cxip_coll_reduction { - struct cxip_coll_mc *mc_obj; // parent mc_obj - uint32_t red_id; // reduction id - uint16_t seqno; // reduction sequence number - uint16_t resno; // reduction result number - struct cxip_req *op_inject_req; // active operation request - enum cxip_coll_state coll_state; // reduction state on node - struct cxip_coll_data accum; // reduction accumulator - struct cxip_coll_data backup; // copy of above - void *op_rslt_data; // user recv buffer (or NULL) - int op_data_bytcnt; // bytes in send/recv buffers - void *op_context; // caller's context - bool in_use; // reduction is in-use - bool pktsent; // reduction packet sent - bool completed; // reduction is completed - bool rdma_get_sent; // rdma get from leaf to root - bool rdma_get_completed; // rdma get completed - int rdma_get_cb_rc; // rdma get status - uint64_t leaf_contrib_start_us; // leaf ts after contrib send - bool drop_send; // drop the next send operation - bool drop_recv; // drop the next recv operation - enum cxip_coll_rc red_rc; // set by first error - struct timespec tv_expires; // need to retry? - struct timespec arm_expires; // RE expiration time for this red_id - struct dlist_entry tmout_link; // link to timeout list - uint8_t tx_msg[64]; // static packet memory -}; - -struct cxip_coll_mc { - struct fid_mc mc_fid; - struct dlist_entry entry; // Link to mc object list - struct cxip_ep_obj *ep_obj; // Associated endpoint - struct cxip_av_set *av_set_obj; // associated AV set - struct cxip_zbcoll_obj *zb; // zb object for zbcol - struct cxip_coll_pte *coll_pte; // collective PTE - struct timespec rootexpires; // root wait expiration timeout - struct timespec leafexpires; // leaf wait expiration timeout - struct timespec curlexpires; // CURL delete expiration timeout - fi_addr_t mynode_fiaddr; // fi_addr of this node - int mynode_idx; // av_set index of this node - uint32_t hwroot_idx; // av_set index of hwroot node - uint32_t mcast_addr; // multicast target address - int tail_red_id; // tail active red_id - int next_red_id; // next available red_id - int max_red_id; // limit total concurrency - int seqno; // rolling seqno for packets - int close_state; // the state of the close operation - bool has_closed; // true after a mc close call - bool has_error; // true if any error - bool is_multicast; // true if multicast address - bool arm_disable; // arm-disable for testing - bool retry_disable; // retry-disable for testing - bool is_joined; // true if joined - bool rx_discard; // true to discard RX events - enum cxi_traffic_class tc; // traffic class - enum cxi_traffic_class_type tc_type; // traffic class type - ofi_atomic32_t send_cnt; // for diagnostics - ofi_atomic32_t recv_cnt; // for diagnostics - ofi_atomic32_t pkt_cnt; // for diagnostics - ofi_atomic32_t seq_err_cnt; // for diagnostics - ofi_atomic32_t tmout_cnt; // for diagnostics - ofi_spin_t lock; - - struct cxi_md *reduction_md; // memory descriptor for DMA - struct cxip_coll_reduction reduction[CXIP_COLL_MAX_CONCUR]; - /* Logical address context for leaf rdma get */ - uint64_t rdma_get_lac_va_tx; - /* Logical address context recieved by the leaf */ - uint64_t rdma_get_lac_va_rx; - /* pointer to the source buffer base used in the RDMA */ - uint8_t *root_rdma_get_data_p; - /* pointer to the dest buffer base used in the RDMA */ - uint8_t *leaf_rdma_get_data_p; - /* root rdma get memory descriptor, for entire root src buffer */ - struct cxip_md *root_rdma_get_md; - /* leaf rdma get memory descriptor, for entire leaf dest buffer */ - struct cxip_md *leaf_rdma_get_md; -}; - -struct cxip_curl_handle; - -typedef void (*curlcomplete_t)(struct cxip_curl_handle *); - -struct cxip_curl_handle { - long status; // HTTP status, 0 for no server, -1 busy - const char *endpoint; // HTTP server endpoint address - const char *request; // HTTP request data - const char *response; // HTTP response data, NULL until complete - curlcomplete_t usrfunc; // user completion function - void *usrptr; // user function argument - void *recv; // opaque - void *headers; // opaque -}; - -/* Low-level CURL POST/DELETE async wrappers */ -enum curl_ops { - CURL_GET, - CURL_PUT, - CURL_POST, - CURL_PATCH, - CURL_DELETE, - CURL_MAX -}; -extern bool cxip_collectives_supported; -int cxip_curl_init(void); -void cxip_curl_fini(void); -const char *cxip_curl_opname(enum curl_ops op); -int cxip_curl_perform(const char *endpoint, const char *request, - const char *sessionToken, size_t rsp_init_size, - enum curl_ops op, bool verbose, - curlcomplete_t usrfunc, void *usrptr); -int cxip_curl_progress(struct cxip_curl_handle **handleptr); -void cxip_curl_free(struct cxip_curl_handle *handle); - static inline void single_to_double_quote(char *str) { do {if (*str == '\'') *str = '"';} while (*(++str)); } -enum json_type cxip_json_obj(const char *desc, struct json_object *jobj, - struct json_object **jval); -int cxip_json_bool(const char *desc, struct json_object *jobj, bool *val); -int cxip_json_int(const char *desc, struct json_object *jobj, int *val); -int cxip_json_int64(const char *desc, struct json_object *jobj, int64_t *val); -int cxip_json_double(const char *desc, struct json_object *jobj, double *val); -int cxip_json_string(const char *desc, struct json_object *jobj, - const char **val); -struct json_object *cxip_json_tokener_parse(const char *str); -int cxip_json_object_put(struct json_object *obj); - -/* Perform zero-buffer collectives */ -void cxip_tree_rowcol(int radix, int nodeidx, int *row, int *col, int *siz); -void cxip_tree_nodeidx(int radix, int row, int col, int *nodeidx); -int cxip_tree_relatives(int radix, int nodeidx, int maxnodes, int *rels); - -int cxip_zbcoll_recv_cb(struct cxip_ep_obj *ep_obj, uint32_t init_nic, - uint32_t init_pid, uint64_t mbv, uint64_t data); -void cxip_zbcoll_send(struct cxip_zbcoll_obj *zb, int srcidx, int dstidx, - uint64_t payload); -void cxip_zbcoll_free(struct cxip_zbcoll_obj *zb); -int cxip_zbcoll_alloc(struct cxip_ep_obj *ep_obj, int num_addrs, - fi_addr_t *fiaddrs, int simrank, - struct cxip_zbcoll_obj **zbp); -int cxip_zbcoll_simlink(struct cxip_zbcoll_obj *zb0, - struct cxip_zbcoll_obj *zb); -void cxip_zbcoll_set_user_cb(struct cxip_zbcoll_obj *zb, - zbcomplete_t userfunc, void *userptr); - -int cxip_zbcoll_max_grps(bool sim); -int cxip_zbcoll_getgroup(struct cxip_zbcoll_obj *zb); -void cxip_zbcoll_rlsgroup(struct cxip_zbcoll_obj *zb); -int cxip_zbcoll_broadcast(struct cxip_zbcoll_obj *zb, uint64_t *dataptr); -int cxip_zbcoll_reduce(struct cxip_zbcoll_obj *zb, uint64_t *dataptr); -int cxip_zbcoll_barrier(struct cxip_zbcoll_obj *zb); -void cxip_ep_zbcoll_progress(struct cxip_ep_obj *ep_obj); - -void cxip_zbcoll_reset_counters(struct cxip_ep_obj *ep_obj); -void cxip_zbcoll_get_counters(struct cxip_ep_obj *ep_obj, uint32_t *dsc, - uint32_t *err, uint32_t *ack, uint32_t *rcv); -void cxip_zbcoll_fini(struct cxip_ep_obj *ep_obj); -int cxip_zbcoll_init(struct cxip_ep_obj *ep_obj); - -/* - * CNTR/CQ wait object file list element - * - * Support structure. - * - * Created in cxip_cntr_open(), cxip_cq_open(). - */ -struct cxip_fid_list { - struct dlist_entry entry; - struct fid *fid; -}; - -int cxip_rdzv_match_pte_alloc(struct cxip_txc_hpc *txc, - struct cxip_rdzv_match_pte **rdzv_pte); -int cxip_rdzv_nomatch_pte_alloc(struct cxip_txc_hpc *txc, int lac, - struct cxip_rdzv_nomatch_pte **rdzv_pte); -int cxip_rdzv_pte_src_req_alloc(struct cxip_rdzv_match_pte *pte, int lac); -void cxip_rdzv_match_pte_free(struct cxip_rdzv_match_pte *pte); -void cxip_rdzv_nomatch_pte_free(struct cxip_rdzv_nomatch_pte *pte); -int cxip_rdzv_pte_zbp_cb(struct cxip_req *req, const union c_event *event); -int cxip_rdzv_pte_src_cb(struct cxip_req *req, const union c_event *event); - -struct cxip_if *cxip_if_lookup_addr(uint32_t nic_addr); -struct cxip_if *cxip_if_lookup_name(const char *name); -int cxip_get_if(uint32_t nic_addr, struct cxip_if **dev_if); -void cxip_put_if(struct cxip_if *dev_if); -int cxip_if_valid_rgroup_vni(struct cxip_if *iface, unsigned int rgroup_id, - unsigned int vni); -int cxip_alloc_lni(struct cxip_if *iface, uint32_t svc_id, - struct cxip_lni **if_lni); -void cxip_free_lni(struct cxip_lni *lni); -const char *cxi_tc_str(enum cxi_traffic_class tc); -enum cxi_traffic_class cxip_ofi_to_cxi_tc(uint32_t ofi_tclass); -int cxip_cmdq_cp_set(struct cxip_cmdq *cmdq, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type); -int cxip_cmdq_cp_modify(struct cxip_cmdq *cmdq, uint16_t vni, - enum cxi_traffic_class tc); -void cxip_if_init(void); -void cxip_if_fini(void); - -int cxip_pte_set_state(struct cxip_pte *pte, struct cxip_cmdq *cmdq, - enum c_ptlte_state new_state, uint32_t drop_count); -int cxip_pte_set_state_wait(struct cxip_pte *pte, struct cxip_cmdq *cmdq, - struct cxip_evtq *evtq, - enum c_ptlte_state new_state, uint32_t drop_count); -int cxip_pte_append(struct cxip_pte *pte, uint64_t iova, size_t len, - unsigned int lac, enum c_ptl_list list, - uint32_t buffer_id, uint64_t match_bits, - uint64_t ignore_bits, uint32_t match_id, - uint64_t min_free, uint32_t flags, - struct cxip_cntr *cntr, struct cxip_cmdq *cmdq, - bool ring); -int cxip_pte_unlink(struct cxip_pte *pte, enum c_ptl_list list, - int buffer_id, struct cxip_cmdq *cmdq); -int cxip_pte_map(struct cxip_pte *pte, uint64_t pid_idx, bool is_multicast); -int cxip_pte_alloc_nomap(struct cxip_portals_table *ptable, struct cxi_eq *evtq, - struct cxi_pt_alloc_opts *opts, - void (*state_change_cb)(struct cxip_pte *pte, - const union c_event *event), - void *ctx, struct cxip_pte **pte); -int cxip_pte_alloc(struct cxip_portals_table *ptable, struct cxi_eq *evtq, - uint64_t pid_idx, bool is_multicast, - struct cxi_pt_alloc_opts *opts, - void (*state_change_cb)(struct cxip_pte *pte, - const union c_event *event), - void *ctx, struct cxip_pte **pte); -void cxip_pte_free(struct cxip_pte *pte); -int cxip_pte_state_change(struct cxip_if *dev_if, const union c_event *event); - -int cxip_cmdq_alloc(struct cxip_lni *lni, struct cxi_eq *evtq, - struct cxi_cq_alloc_opts *cq_opts, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct cxip_cmdq **cmdq); -void cxip_cmdq_free(struct cxip_cmdq *cmdq); -int cxip_cmdq_emit_c_state(struct cxip_cmdq *cmdq, - const struct c_cstate_cmd *cmd); static inline bool cxip_cmdq_empty(struct cxip_cmdq *cmdq) { @@ -3282,194 +461,11 @@ static inline bool cxip_cmdq_prev_match(struct cxip_cmdq *cmdq, uint16_t vni, (cmdq->prev_cp->tc_type == tc_type); } - -int cxip_evtq_init(struct cxip_evtq *evtq, struct cxip_cq *cq, - size_t num_events, size_t num_fc_events, - struct cxil_wait_obj *priv_wait); -void cxip_evtq_fini(struct cxip_evtq *eq); - -int cxip_domain(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **dom, void *context); - -int cxip_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, - void *context); - -int cxip_endpoint(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); - -int cxip_tx_id_alloc(struct cxip_txc_hpc *txc, void *ctx); -int cxip_tx_id_free(struct cxip_txc_hpc *txc, int id); -void *cxip_tx_id_lookup(struct cxip_txc_hpc *txc, int id); -int cxip_rdzv_id_alloc(struct cxip_txc_hpc *txc, struct cxip_req *req); -int cxip_rdzv_id_free(struct cxip_txc_hpc *txc, int id); -void *cxip_rdzv_id_lookup(struct cxip_txc_hpc *txc, int id); -int cxip_ep_cmdq(struct cxip_ep_obj *ep_obj, bool transmit, uint32_t tclass, - struct cxi_eq *evtq, struct cxip_cmdq **cmdq); -void cxip_ep_cmdq_put(struct cxip_ep_obj *ep_obj, bool transmit); - -int cxip_recv_ux_sw_matcher(struct cxip_ux_send *ux); -int cxip_recv_req_sw_matcher(struct cxip_req *req); -int cxip_recv_cancel(struct cxip_req *req); -int cxip_fc_process_drops(struct cxip_ep_obj *ep_obj, uint32_t nic_addr, - uint32_t pid, uint16_t vni, uint16_t drops); -void cxip_recv_pte_cb(struct cxip_pte *pte, const union c_event *event); -void cxip_rxc_req_fini(struct cxip_rxc *rxc); -int cxip_rxc_oflow_init(struct cxip_rxc *rxc); -void cxip_rxc_oflow_fini(struct cxip_rxc *rxc); -int cxip_fc_resume(struct cxip_ep_obj *ep_obj, uint32_t nic_addr, uint32_t pid, - uint16_t vni); - -void cxip_txc_struct_init(struct cxip_txc *txc, const struct fi_tx_attr *attr, - void *context); -struct cxip_txc *cxip_txc_calloc(struct cxip_ep_obj *ep_obj, void *context); -void cxip_txc_free(struct cxip_txc *txc); -int cxip_txc_enable(struct cxip_txc *txc); -void cxip_txc_disable(struct cxip_txc *txc); -struct cxip_txc *cxip_stx_alloc(const struct fi_tx_attr *attr, void *context); -int cxip_rxc_msg_enable(struct cxip_rxc_hpc *rxc, uint32_t drop_count); - -struct cxip_rxc *cxip_rxc_calloc(struct cxip_ep_obj *ep_obj, void *context); -void cxip_rxc_free(struct cxip_rxc *rxc); -int cxip_rxc_enable(struct cxip_rxc *rxc); -void cxip_rxc_disable(struct cxip_rxc *rxc); -void cxip_rxc_struct_init(struct cxip_rxc *rxc, const struct fi_rx_attr *attr, - void *context); -void cxip_rxc_recv_req_cleanup(struct cxip_rxc *rxc); - -int cxip_rxc_emit_dma(struct cxip_rxc_hpc *rxc, struct cxip_cmdq *cmdq, - uint16_t vni, enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct c_full_dma_cmd *dma, uint64_t flags); -int cxip_rxc_emit_idc_msg(struct cxip_rxc_hpc *rxc, struct cxip_cmdq *cmdq, - uint16_t vni, enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - const struct c_cstate_cmd *c_state, - const struct c_idc_msg_hdr *msg, const void *buf, - size_t len, uint64_t flags); - -int cxip_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); - -bool cxip_evtq_saturated(struct cxip_evtq *evtq); -struct cxip_md *cxip_txc_ibuf_md(void *ibuf); -void *cxip_txc_ibuf_alloc(struct cxip_txc *txc); -void cxip_txc_ibuf_free(struct cxip_txc *txc, void *ibuf); -int cxip_ibuf_chunk_init(struct ofi_bufpool_region *region); -void cxip_ibuf_chunk_fini(struct ofi_bufpool_region *region); -int cxip_evtq_req_cancel(struct cxip_evtq *evtq, void *req_ctx, - void *op_ctx, bool match); -void cxip_evtq_req_discard(struct cxip_evtq *evtq, void *req_ctx); -void cxip_evtq_flush_trig_reqs(struct cxip_evtq *evtq); -int cxip_cq_req_complete(struct cxip_req *req); -int cxip_cq_req_complete_addr(struct cxip_req *req, fi_addr_t src); -int cxip_cq_req_error(struct cxip_req *req, size_t olen, - int err, int prov_errno, void *err_data, - size_t err_data_size, fi_addr_t src_addr); -int cxip_cq_add_wait_fd(struct cxip_cq *cq, int wait_fd, int events); -void cxip_cq_del_wait_fd(struct cxip_cq *cq, int wait_fd); - -int proverr2errno(int err); -struct cxip_req *cxip_evtq_req_alloc(struct cxip_evtq *evtq, - int remap, void *req_ctx); -void cxip_evtq_req_free(struct cxip_req *req); -void cxip_evtq_progress(struct cxip_evtq *evtq, bool internal); - -void cxip_ep_progress(struct fid *fid); -void cxip_ep_flush_trig_reqs(struct cxip_ep_obj *ep_obj); - -int cxip_cq_trywait(struct cxip_cq *cq); -void cxip_cq_progress(struct cxip_cq *cq); -void cxip_util_cq_progress(struct util_cq *util_cq); -int cxip_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context); -int cxip_evtq_adjust_reserved_fc_event_slots(struct cxip_evtq *evtq, int value); -void cxip_cq_flush_trig_reqs(struct cxip_cq *cq); - -void cxip_dom_cntr_disable(struct cxip_domain *dom); -int cxip_cntr_mod(struct cxip_cntr *cxi_cntr, uint64_t value, bool set, - bool err); -int cxip_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context); - -int cxip_iomm_init(struct cxip_domain *dom); -void cxip_iomm_fini(struct cxip_domain *dom); -int cxip_map(struct cxip_domain *dom, const void *buf, unsigned long len, - uint64_t access, uint64_t flags, struct cxip_md **md); -void cxip_unmap(struct cxip_md *md); - -int cxip_ctrl_msg_send(struct cxip_ctrl_req *req, uint64_t data); -void cxip_ep_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); -void cxip_ep_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, bool internal); -void cxip_ep_tx_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); -void cxip_ep_tx_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, bool internal); -void cxip_ep_tgt_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); -void cxip_ep_tgt_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, - bool internal); -int cxip_ep_ctrl_init(struct cxip_ep_obj *ep_obj); -void cxip_ep_ctrl_fini(struct cxip_ep_obj *ep_obj); -int cxip_ep_trywait(struct cxip_ep_obj *ep_obj, struct cxip_cq *cq); - -int cxip_av_set(struct fid_av *av, struct fi_av_set_attr *attr, - struct fid_av_set **av_set_fid, void * context); - -// TODO: naming convention for testing hooks -void cxip_coll_init(struct cxip_ep_obj *ep_obj); -int cxip_coll_enable(struct cxip_ep *ep); -int cxip_coll_disable(struct cxip_ep_obj *ep_obj); -void cxip_coll_close(struct cxip_ep_obj *ep_obj); -void cxip_coll_populate_opcodes(void); -int cxip_coll_send(struct cxip_coll_reduction *reduction, - int av_set_idx, const void *buffer, size_t buflen, - struct cxi_md *md); -int cxip_coll_send_red_pkt(struct cxip_coll_reduction *reduction, - const struct cxip_coll_data *coll_data, - bool arm, bool retry, bool root_result_pkt); - -void cxip_capture_red_id(int *red_id_buf); -ssize_t cxip_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context); -ssize_t cxip_broadcast(struct fid_ep *ep, void *buf, size_t count, - void *desc, fi_addr_t coll_addr, fi_addr_t root_addr, - enum fi_datatype datatype, uint64_t flags, - void *context); -ssize_t cxip_reduce(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - fi_addr_t coll_addr, fi_addr_t root_addr, - enum fi_datatype datatype, enum fi_op op, uint64_t flags, - void *context); -ssize_t cxip_allreduce(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - fi_addr_t coll_addr, enum fi_datatype datatype, - enum fi_op op, uint64_t flags, void *context); -int cxip_join_collective(struct fid_ep *ep, fi_addr_t coll_addr, - const struct fid_av_set *coll_av_set, - uint64_t flags, struct fid_mc **mc, void *context); -void cxip_coll_progress_join(struct cxip_ep_obj *ep_obj); -void cxip_coll_progress_cq_poll(struct cxip_ep_obj *ep_obj); - -int cxip_coll_arm_disable(struct fid_mc *mc, bool disable); -void cxip_coll_limit_red_id(struct fid_mc *mc, int max_red_id); -void cxip_coll_drop_send(struct cxip_coll_reduction *reduction); -void cxip_coll_drop_recv(struct cxip_coll_reduction *reduction); - -void cxip_dbl_to_rep(struct cxip_repsum *x, double d); -void cxip_rep_to_dbl(double *d, const struct cxip_repsum *x); -void cxip_rep_add(struct cxip_repsum *x, const struct cxip_repsum *y); -double cxip_rep_add_dbl(double d1, double d2); -double cxip_rep_sum(size_t count, double *values); - -int cxip_check_auth_key_info(struct fi_info *info); -int cxip_gen_auth_key(struct fi_info *info, struct cxi_auth_key *key); - static inline struct fid_peer_srx *cxip_get_owner_srx(struct cxip_rxc *rxc) { return rxc->ep_obj->owner_srx; } -#define CXIP_FC_SOFTWARE_INITIATED -1 - -/* cxip_fc_reason() - Returns the event reason for portal state - * change (FC reason or SC reason). - */ static inline int cxip_fc_reason(const union c_event *event) { if (!event->tgt_long.initiator.state_change.sc_nic_auto) @@ -3500,66 +496,11 @@ static inline void cxip_txq_ring(struct cxip_cmdq *cmdq, bool more, } } -ssize_t cxip_rma_common(enum fi_op_type op, struct cxip_txc *txc, - const void *buf, size_t len, void *desc, - fi_addr_t tgt_addr, uint64_t addr, - uint64_t key, uint64_t data, uint64_t flags, - uint32_t tclass, uint64_t msg_order, void *context, - bool triggered, uint64_t trig_thresh, - struct cxip_cntr *trig_cntr, - struct cxip_cntr *comp_cntr); - static inline int cxip_no_discard(struct fi_peer_rx_entry *rx_entry) { return -FI_ENOSYS; } -int cxip_unexp_start(struct fi_peer_rx_entry *entry); - -/* - * Request variants: - * CXIP_RQ_AMO - * Passes one argument (operand1), and applies that to a remote memory - * address content. - * - * CXIP_RQ_AMO_FETCH - * Passes two arguments (operand1, resultptr), applies operand1 to a - * remote memory address content, and returns the prior content of the - * remote memory in resultptr. - * - * CXIP_RQ_AMO_SWAP - * Passes three arguments (operand1, compare, resultptr). If remote memory - * address content satisfies the comparison operation with compare, - * replaces the remote memory content with operand1, and returns the prior - * content of the remote memory in resultptr. - * - * CXIP_RQ_AMO_PCIE_FETCH - * Passes two arguments (operand1, resultptr), applies operand1 to a - * remote memory address content, and returns the prior content of the - * remote memory in resultptr. - * - * The resulting operation should be a PCIe AMO instead of NIC AMO. - */ -enum cxip_amo_req_type { - CXIP_RQ_AMO, - CXIP_RQ_AMO_FETCH, - CXIP_RQ_AMO_SWAP, - CXIP_RQ_AMO_PCIE_FETCH, - CXIP_RQ_AMO_LAST, -}; - -int cxip_amo_common(enum cxip_amo_req_type req_type, struct cxip_txc *txc, - uint32_t tclass, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **comparedesc, - size_t compare_count, const struct fi_ioc *resultv, - void **resultdesc, size_t result_count, uint64_t flags, - bool triggered, uint64_t trig_thresh, - struct cxip_cntr *trig_cntr, struct cxip_cntr *comp_cntr); -int _cxip_atomic_opcode(enum cxip_amo_req_type req_type, enum fi_datatype dt, - enum fi_op op, int amo_remap_to_pcie_fadd, - enum c_atomic_op *cop, enum c_atomic_type *cdt, - enum c_cswap_op *copswp, unsigned int *cdtlen); - static inline void cxip_domain_add_txc(struct cxip_domain *dom, struct cxip_txc *txc) { @@ -3612,15 +553,6 @@ cxip_domain_remove_cq(struct cxip_domain *dom, struct cxip_cq *cq) ofi_spin_unlock(&dom->lock); } -int cxip_domain_ctrl_id_alloc(struct cxip_domain *dom, - struct cxip_ctrl_req *req); -void cxip_domain_ctrl_id_free(struct cxip_domain *dom, - struct cxip_ctrl_req *req); -int cxip_domain_prov_mr_id_alloc(struct cxip_domain *dom, - struct cxip_mr *mr); -void cxip_domain_prov_mr_id_free(struct cxip_domain *dom, - struct cxip_mr *mr); - static inline struct cxip_ctrl_req *cxip_domain_ctrl_id_at(struct cxip_domain *dom, int buffer_id) @@ -3643,41 +575,6 @@ static inline bool is_netsim(struct cxip_ep_obj *ep_obj) CXI_PLATFORM_NETSIM); } -/* debugging TRACE functions */ -#define cxip_coll_trace_attr __attribute__((format(__printf__, 1, 2))) -extern bool cxip_coll_trace_muted; // suppress output if true -extern bool cxip_coll_trace_append; // append open for trace file -extern bool cxip_coll_trace_linebuf; // set line buffering for trace -extern int cxip_coll_trace_rank; // tracing rank -extern int cxip_coll_trace_numranks; // tracing number of ranks -extern FILE *cxip_coll_trace_fid; // trace output file descriptor -extern bool cxip_coll_prod_trace_initialized; // turn on tracing in non-debug - // build -extern char **cxip_coll_prod_trace_buffer; // production trace buffer -extern int cxip_coll_prod_trace_current; // current index in trace buffer -extern int cxip_coll_prod_trace_max_idx; // max lines in trace buffer -extern int cxip_coll_prod_trace_ln_max; // max trace line length - -int cxip_coll_trace_attr cxip_coll_trace(const char *fmt, ...); -int cxip_coll_trace_attr cxip_coll_prod_trace(const char *fmt, ...); -void cxip_coll_trace_flush(void); -void cxip_coll_trace_close(void); -void cxip_coll_trace_init(struct cxip_ep_obj *ep_obj); -void cxip_coll_print_prod_trace(void); - -/* debugging TRACE filtering control */ -enum cxip_coll_trace_module { - CXIP_TRC_CTRL, - CXIP_TRC_ZBCOLL, - CXIP_TRC_COLL_CURL, - CXIP_TRC_COLL_PKT, - CXIP_TRC_COLL_JOIN, - CXIP_TRC_COLL_DEBUG, - CXIP_TRC_TEST_CODE, - CXIP_TRC_MAX -}; -extern uint64_t cxip_coll_trace_mask; - static inline void cxip_coll_trace_set(int mod) { cxip_coll_trace_mask |= (1L << mod); @@ -3698,109 +595,6 @@ static inline bool cxip_coll_prod_trace_true(void) return cxip_coll_prod_trace_initialized; } -#if ENABLE_DEBUG -#define CXIP_COLL_TRACE(mod, fmt, ...) \ - do {if (cxip_coll_trace_true(mod)) \ - cxip_coll_trace(fmt, ##__VA_ARGS__);} while (0) -#else -#define CXIP_COLL_TRACE(mod, fmt, ...) \ - do {if (cxip_coll_prod_trace_true()) \ - cxip_coll_prod_trace(fmt, ##__VA_ARGS__); } while (0) -#endif - -/* fabric logging implementation functions */ -#define _CXIP_DBG(subsys, fmt, ...) \ - FI_DBG(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) -#define _CXIP_INFO(subsys, fmt, ...) \ - FI_INFO(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) -#define _CXIP_WARN(subsys, fmt, ...) \ - FI_WARN(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) -#define _CXIP_WARN_ONCE(subsys, fmt, ...) \ - FI_WARN_ONCE(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) -#define CXIP_LOG(fmt, ...) \ - fi_log(&cxip_prov, FI_LOG_WARN, FI_LOG_CORE, \ - __func__, __LINE__, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) - -#define CXIP_FATAL(fmt, ...) \ - do { \ - CXIP_LOG(fmt, ##__VA_ARGS__); \ - abort(); \ - } while (0) - -#define TXC_BASE(txc) ((struct cxip_txc *)(void *)(txc)) -#define TXC_DBG(txc, fmt, ...) \ - _CXIP_DBG(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ - TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) -#define TXC_INFO(txc, fmt, ...) \ - _CXIP_INFO(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ - TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) -#define TXC_WARN(txc, fmt, ...) \ - _CXIP_WARN(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ - TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) -#define TXC_WARN_RET(txc, ret, fmt, ...) \ - TXC_WARN(txc, "%d:%s: " fmt "", ret, fi_strerror(-ret), ##__VA_ARGS__) -#define TXC_FATAL(txc, fmt, ...) \ - CXIP_FATAL("TXC (%#x:%u):: " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ - TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) - -#define RXC_BASE(rxc) ((struct cxip_rxc *)(void *)(rxc)) -#define RXC_DBG(rxc, fmt, ...) \ - _CXIP_DBG(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ - RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_INFO(rxc, fmt, ...) \ - _CXIP_INFO(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ - RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_WARN(rxc, fmt, ...) \ - _CXIP_WARN(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ - RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_WARN_ONCE(rxc, fmt, ...) \ - _CXIP_WARN_ONCE(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ - RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_FATAL(rxc, fmt, ...) \ - CXIP_FATAL("RXC (%#x:%u) PtlTE %u:[Fatal] " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ - RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) - -#define DOM_INFO(dom, fmt, ...) \ - _CXIP_INFO(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ - (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ - (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ - (dom)->nic_addr, ##__VA_ARGS__) -#define DOM_WARN(dom, fmt, ...) \ - _CXIP_WARN(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ - (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ - (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ - (dom)->nic_addr, ##__VA_ARGS__) - -#define CXIP_UNEXPECTED_EVENT_STS "Unexpected event status, %s rc = %s\n" -#define CXIP_UNEXPECTED_EVENT "Unexpected event %s, rc = %s\n" - -#define CXIP_DEFAULT_CACHE_LINE_SIZE 64 - -#define CXIP_SYSFS_CACHE_LINE_SIZE \ - "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" - -/* cxip_cacheline_size() - Return the CPU cache-line size, if unable to - * read then return the assumed cache size. - */ static inline int cxip_cacheline_size(void) { FILE *f; @@ -3944,60 +738,6 @@ int cxip_set_recv_match_id(struct cxip_rxc *rxc, fi_addr_t src_addr, return FI_SUCCESS; } -fi_addr_t cxip_recv_req_src_addr(struct cxip_rxc *rxc, - uint32_t init, uint16_t vni, - bool force); -int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len, - struct cxip_md *md, struct cxip_req **cxip_req, - int (*recv_cb)(struct cxip_req *req, - const union c_event *event)); -void cxip_recv_req_free(struct cxip_req *req); -void cxip_recv_req_report(struct cxip_req *req); -void cxip_recv_req_peek_complete(struct cxip_req *req, - struct cxip_ux_send *ux_send); -struct cxip_req *cxip_mrecv_req_dup(struct cxip_req *mrecv_req); -int cxip_complete_put(struct cxip_req *req, const union c_event *event); -/* XXXX TODO: Remove */ -/* Defines the posted receive interval for checking LE allocation if - * in hybrid RX match mode and preemptive transitions to software - * managed EP are requested. - */ -#define CXIP_HYBRID_RECV_CHECK_INTERVAL (64-1) -#define FC_SW_LE_MSG_FATAL "LE exhaustion during flow control, "\ - "FI_CXI_RX_MATCH_MODE=[hybrid|software] is required\n" -int cxip_recv_pending_ptlte_disable(struct cxip_rxc *rxc, bool check_fc); -int cxip_flush_appends(struct cxip_rxc_hpc *rxc, - int (*flush_cb)(struct cxip_req *req, - const union c_event *event)); -int cxip_recv_req_dropped(struct cxip_req *req); -void cxip_rxc_record_req_stat(struct cxip_rxc *rxc, enum c_ptl_list list, - size_t rlength, struct cxip_req *req); -bool tag_match(uint64_t init_mb, uint64_t mb, uint64_t ib); -bool init_match(struct cxip_rxc *rxc, uint32_t init, uint32_t match_id); -uint32_t cxip_msg_match_id(struct cxip_txc *txc); -void cxip_report_send_completion(struct cxip_req *req, bool sw_cntr); -bool cxip_send_eager_idc(struct cxip_req *req); -void cxip_send_buf_fini(struct cxip_req *req); -int cxip_send_buf_init(struct cxip_req *req); - -size_t cxip_ep_get_unexp_msgs(struct fid_ep *fid_ep, - struct fi_cq_tagged_entry *entry, size_t count, - fi_addr_t *src_addr, size_t *ux_count); - -int cxip_nic_alloc(struct cxip_if *nic_if, struct fid_nic **fid_nic); - -int cxip_domain_dwq_emit_dma(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct cxip_cntr *trig_cntr, size_t trig_thresh, - struct c_full_dma_cmd *dma, uint64_t flags); -int cxip_domain_dwq_emit_amo(struct cxip_domain *dom, uint16_t vni, - enum cxi_traffic_class tc, - enum cxi_traffic_class_type tc_type, - struct cxip_cntr *trig_cntr, size_t trig_thresh, - struct c_dma_amo_cmd *amo, uint64_t flags, - bool fetching, bool flush); - static inline void cxip_set_env_rx_match_mode(void) { char *param_str = NULL; @@ -4068,4 +808,4 @@ static inline void cxip_set_env_rx_match_mode(void) } } -#endif +#endif /* _CXIP_PROV_H_ */ diff --git a/prov/cxi/include/cxip/addr.h b/prov/cxi/include/cxip/addr.h new file mode 100644 index 00000000000..fa74e155172 --- /dev/null +++ b/prov/cxi/include/cxip/addr.h @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_ADDR_H_ +#define _CXIP_ADDR_H_ + + +#include + +/* Macros */ +#define CXIP_ADDR_EQUAL(a, b) ((a).nic == (b).nic && (a).pid == (b).pid) + +#define CXIP_ADDR_VNI_EQUAL(a, b) (CXIP_ADDR_EQUAL(a, b) && (a).vni == (b).vni) + +#define CXIP_ADDR_PORT_BITS 6 + +#define CXIP_ADDR_SWITCH_BITS 5 + +#define CXIP_ADDR_GROUP_BITS 9 + +#define CXIP_ADDR_FATTREE_PORT_BITS 6 + +#define CXIP_ADDR_FATTREE_SWITCH_BITS 14 + +/* Type definitions */ +struct cxip_addr { + uint32_t pid : C_DFA_PID_BITS_MAX; + uint32_t nic : C_DFA_NIC_BITS; + uint32_t pad : 3; + uint16_t vni; +}; + +#endif /* _CXIP_ADDR_H_ */ diff --git a/prov/cxi/include/cxip/atomic.h b/prov/cxi/include/cxip/atomic.h new file mode 100644 index 00000000000..e7b4259388c --- /dev/null +++ b/prov/cxi/include/cxip/atomic.h @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_ATOMIC_H_ +#define _CXIP_ATOMIC_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_cntr; +struct cxip_txc; + +/* Function declarations */ +int cxip_amo_common(enum cxip_amo_req_type req_type, struct cxip_txc *txc, + uint32_t tclass, const struct fi_msg_atomic *msg, + const struct fi_ioc *comparev, void **comparedesc, + size_t compare_count, const struct fi_ioc *resultv, + void **resultdesc, size_t result_count, uint64_t flags, + bool triggered, uint64_t trig_thresh, + struct cxip_cntr *trig_cntr, struct cxip_cntr *comp_cntr); + +int _cxip_atomic_opcode(enum cxip_amo_req_type req_type, enum fi_datatype dt, + enum fi_op op, int amo_remap_to_pcie_fadd, + enum c_atomic_op *cop, enum c_atomic_type *cdt, + enum c_cswap_op *copswp, unsigned int *cdtlen); + +#endif /* _CXIP_ATOMIC_H_ */ diff --git a/prov/cxi/include/cxip/auth.h b/prov/cxi/include/cxip/auth.h new file mode 100644 index 00000000000..6d730527971 --- /dev/null +++ b/prov/cxi/include/cxip/auth.h @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_AUTH_H_ +#define _CXIP_AUTH_H_ + + +/* Function declarations */ +int cxip_check_auth_key_info(struct fi_info *info); + +int cxip_gen_auth_key(struct fi_info *info, struct cxi_auth_key *key); + +#endif /* _CXIP_AUTH_H_ */ diff --git a/prov/cxi/include/cxip/av.h b/prov/cxi/include/cxip/av.h new file mode 100644 index 00000000000..ea4527f64c4 --- /dev/null +++ b/prov/cxi/include/cxip/av.h @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_AV_H_ +#define _CXIP_AV_H_ + + +#include +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_addr; +struct cxip_coll_mc; +struct cxip_domain; +struct cxip_ep; + +/* Type definitions */ +struct cxip_av_auth_key_entry { + ofi_atomic32_t use_cnt; + ofi_atomic32_t ref_cnt; + UT_hash_handle hh; + struct dlist_entry entry; + struct cxi_auth_key key; + fi_addr_t fi_addr; +}; + +struct cxip_av_entry { + ofi_atomic32_t use_cnt; + UT_hash_handle hh; + struct cxip_addr addr; + fi_addr_t fi_addr; + struct cxip_av_auth_key_entry *auth_key; +}; + +struct cxip_av { + struct fid_av av_fid; + struct cxip_domain *domain; + + /* List of endpoints bound to this AV. Each bind takes a reference + * as well. + */ + struct dlist_entry ep_list; + ofi_atomic32_t ref; + + /* Memory used to implement lookups. Two data structures are used. + * 1. ibuf pool for O(1) lookup on the data path + * 2. hash table for O(1) on the receive path + */ + struct cxip_av_entry *av_entry_hash; + struct ofi_bufpool *av_entry_pool; + ofi_atomic32_t av_entry_cnt; + + /* Memory used to support AV authorization key. Three data structures + * are needed. + * 1. ibuf pool for memory allocation and lookup O(1) access. + * 2. hash table for O(1) reverse lookup + * 3. List for iterating + */ + struct cxip_av_auth_key_entry *auth_key_entry_hash; + struct ofi_bufpool *auth_key_entry_pool; + struct dlist_entry auth_key_entry_list; + ofi_atomic32_t auth_key_entry_cnt; + size_t auth_key_entry_max; + + /* Single lock is used to protect entire AV. With domain level + * threading, this lock is not used. + */ + bool lockless; + pthread_rwlock_t lock; + + /* AV is configured as symmetric. This is an optimization which enables + * endpoints to use logical address. + */ + bool symmetric; + + /* Address vector type. */ + enum fi_av_type type; + + /* Whether or not the AV is operating in FI_AV_AUTH_KEY mode. */ + bool av_auth_key; + + /* Whether or not the AV was opened with FI_AV_USER_ID. */ + bool av_user_id; +}; + +struct cxip_av_set { + struct fid_av_set av_set_fid; + struct cxip_av *cxi_av; // associated AV + struct cxip_coll_mc *mc_obj; // reference MC + fi_addr_t *fi_addr_ary; // addresses in set + size_t fi_addr_cnt; // count of addresses + struct cxip_comm_key comm_key; // communication key + uint64_t flags; +}; + +/* Function declarations */ +int cxip_av_auth_key_get_vnis(struct cxip_av *av, uint16_t **vni, + size_t *vni_count); + +void cxip_av_auth_key_put_vnis(struct cxip_av *av, uint16_t *vni, + size_t vni_count); + +extern struct cxip_addr *(*cxip_av_addr_in)(const void *addr); + +extern void (*cxip_av_addr_out)(struct cxip_addr *addr_out, + struct cxip_addr *addr); + +int cxip_av_lookup_addr(struct cxip_av *av, fi_addr_t fi_addr, + struct cxip_addr *addr); + +fi_addr_t cxip_av_lookup_fi_addr(struct cxip_av *av, + const struct cxip_addr *addr); + +fi_addr_t cxip_av_lookup_auth_key_fi_addr(struct cxip_av *av, unsigned int vni); + +int cxip_av_open(struct fid_domain *domain, struct fi_av_attr *attr, + struct fid_av **av, void *context); + +int cxip_av_bind_ep(struct cxip_av *av, struct cxip_ep *ep); + +void cxip_av_unbind_ep(struct cxip_av *av, struct cxip_ep *ep); + +int cxip_av_set(struct fid_av *av, struct fi_av_set_attr *attr, + struct fid_av_set **av_set_fid, void * context); + +#endif /* _CXIP_AV_H_ */ diff --git a/prov/cxi/include/cxip/cmdq.h b/prov/cxi/include/cxip/cmdq.h new file mode 100644 index 00000000000..a21b80aa172 --- /dev/null +++ b/prov/cxi/include/cxip/cmdq.h @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_CMDQ_H_ +#define _CXIP_CMDQ_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_lni; + +/* Type definitions */ +struct cxip_cmdq { + struct cxi_cq *dev_cmdq; + struct c_cstate_cmd c_state; + enum cxip_llring_mode llring_mode; + + struct cxi_cp *cur_cp; + struct cxi_cp *prev_cp; + struct cxip_lni *lni; +}; + +/* Function declarations */ +int cxip_cmdq_emit_idc_put(struct cxip_cmdq *cmdq, + const struct c_cstate_cmd *c_state, + const struct c_idc_put_cmd *put, const void *buf, + size_t len, uint64_t flags); + +int cxip_cmdq_emit_dma(struct cxip_cmdq *cmdq, struct c_full_dma_cmd *dma, + uint64_t flags); + +int cxip_cmdq_emic_idc_amo(struct cxip_cmdq *cmdq, + const struct c_cstate_cmd *c_state, + const struct c_idc_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +int cxip_cmdq_emit_dma_amo(struct cxip_cmdq *cmdq, struct c_dma_amo_cmd *amo, + uint64_t flags, bool fetching, bool flush); + +int cxip_cmdq_emit_idc_msg(struct cxip_cmdq *cmdq, + const struct c_cstate_cmd *c_state, + const struct c_idc_msg_hdr *msg, const void *buf, + size_t len, uint64_t flags); + +enum cxi_traffic_class cxip_ofi_to_cxi_tc(uint32_t ofi_tclass); + +int cxip_cmdq_cp_set(struct cxip_cmdq *cmdq, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type); + +int cxip_cmdq_cp_modify(struct cxip_cmdq *cmdq, uint16_t vni, + enum cxi_traffic_class tc); + +int cxip_cmdq_alloc(struct cxip_lni *lni, struct cxi_eq *evtq, + struct cxi_cq_alloc_opts *cq_opts, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct cxip_cmdq **cmdq); + +void cxip_cmdq_free(struct cxip_cmdq *cmdq); + +int cxip_cmdq_emit_c_state(struct cxip_cmdq *cmdq, + const struct c_cstate_cmd *cmd); + +#endif /* _CXIP_CMDQ_H_ */ diff --git a/prov/cxi/include/cxip/cntr.h b/prov/cxi/include/cxip/cntr.h new file mode 100644 index 00000000000..73de961ed84 --- /dev/null +++ b/prov/cxi/include/cxip/cntr.h @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_CNTR_H_ +#define _CXIP_CNTR_H_ + + +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_domain; + +/* Type definitions */ +struct cxip_cntr { + struct fid_cntr cntr_fid; + struct cxip_domain *domain; // parent domain + ofi_atomic32_t ref; + struct fi_cntr_attr attr; // copy of user or default attributes + struct fid_wait *wait; + /* Contexts to which counter is bound */ + struct dlist_entry ctx_list; + + /* Triggered cmdq for bound counters */ + struct cxip_cmdq *trig_cmdq; + + struct ofi_genlock lock; + + struct cxi_ct *ct; + struct c_ct_writeback *wb; + uint64_t wb_device; + enum fi_hmem_iface wb_iface; + uint64_t wb_handle; + bool wb_handle_valid; + struct c_ct_writeback lwb; + + struct dlist_entry dom_entry; + + /* Counter for number of operations which need progress. A separate lock + * is needed since these functions may be called without counter lock held. + */ + struct ofi_genlock progress_count_lock; + int progress_count; +}; + +/* Function declarations */ +int cxip_cntr_mod(struct cxip_cntr *cxi_cntr, uint64_t value, bool set, + bool err); + +int cxip_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, + struct fid_cntr **cntr, void *context); + +#endif /* _CXIP_CNTR_H_ */ diff --git a/prov/cxi/include/cxip/coll.h b/prov/cxi/include/cxip/coll.h new file mode 100644 index 00000000000..a3c8052cced --- /dev/null +++ b/prov/cxi/include/cxip/coll.h @@ -0,0 +1,307 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_COLL_H_ +#define _CXIP_COLL_H_ + + +#include +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct coll_counters; +struct cxip_av_set; +struct cxip_cmdq; +struct cxip_cntr; +struct cxip_ep; +struct cxip_ep_obj; +struct cxip_eq; +struct cxip_evtq; +struct cxip_md; +struct cxip_pte; +struct cxip_req; +struct cxip_zbcoll_obj; + +/* Macros */ +#define CXIP_COLL_MAX_CONCUR 8 + +#define CXIP_COLL_MIN_RX_BUFS 8 + +#define CXIP_COLL_MIN_RX_SIZE 131072 + +#define CXIP_COLL_MIN_MULTI_RECV 64 + +#define CXIP_COLL_MAX_DATA_SIZE 32 + +#define CXIP_COLL_MAX_SEQNO ((1 << 10) - 1) + +#define CXIP_COLL_MOD_SEQNO (CXIP_COLL_MAX_SEQNO - 1) + +#define CXIP_COLL_MIN_RETRY_USEC 1 + +#define CXIP_COLL_MAX_RETRY_USEC 32000 + +#define CXIP_COLL_MAX_LEAF_TIMEOUT_MULT 50 + +#define CXIP_COLL_MIN_TIMEOUT_USEC 1 + +#define CXIP_COLL_MAX_TIMEOUT_USEC 20000000 + +/* Type definitions */ +struct cxip_ep_coll_obj { + struct index_map mcast_map; // mc address -> object + struct dlist_entry root_retry_list; + struct dlist_entry mc_list; // list of mcast addresses + struct cxip_coll_pte *coll_pte; // PTE extensions + struct dlist_ts sched_list; // scheduled actions + struct cxip_cmdq *rx_cmdq; // shared with STD EP + struct cxip_cmdq *tx_cmdq; // shared with STD EP + struct cxip_cntr *rx_cntr; // shared with STD EP + struct cxip_cntr *tx_cntr; // shared with STD EP + struct cxip_evtq *rx_evtq; // shared with STD EP + struct cxip_evtq *tx_evtq; // shared with STD EP + struct cxip_eq *eq; // shared with STD EP + ofi_atomic32_t num_mc; // count of MC objects + ofi_atomic32_t join_cnt; // advanced on every join + size_t min_multi_recv; // trigger value to rotate bufs + size_t buffer_size; // size of receive buffers + size_t buffer_count; // count of receive buffers + bool join_busy; // serialize joins on a node + bool is_hwroot; // set if ep is hw_root + bool enabled; // enabled + /* needed for progress after leaf sends its contribution */ + struct dlist_entry leaf_rdma_get_list; + /* used to change ctrl_msg_type to CXIP_CTRL_MSG_ZB_DATA_RDMA_LAC */ + bool leaf_save_root_lac; + /* Logical address context for leaf rdma get */ + uint64_t rdma_get_lac_va_tx; + /* pointer to the source buffer base used in the RDMA */ + uint8_t *root_rdma_get_data_p; + /* root rdma get memory descriptor, for entire root src buffer */ + struct cxip_md *root_rdma_get_md; +}; + +struct cxip_intval { + int64_t ival[4]; +}; + +struct cxip_fltval { + double fval[4]; +}; + +struct cxip_iminmax { + int64_t iminval; + uint64_t iminidx; + int64_t imaxval; + uint64_t imaxidx; +}; + +struct cxip_fltminmax { + double fminval; + uint64_t fminidx; + double fmaxval; + uint64_t fmaxidx; +}; + +struct cxip_coll_buf { + struct dlist_entry buf_entry; // linked list of buffers + struct cxip_req *req; // associated LINK request + struct cxip_md *cxi_md; // buffer memory descriptor + size_t bufsiz; // buffer size in bytes + uint8_t buffer[]; // buffer space itself +}; + +struct cxip_coll_pte { + struct cxip_pte *pte; // Collectives PTE + struct cxip_ep_obj *ep_obj; // Associated endpoint + struct cxip_coll_mc *mc_obj; // Associated multicast object + struct dlist_entry buf_list; // PTE receive buffers + ofi_atomic32_t buf_cnt; // count of linked buffers + ofi_atomic32_t buf_swap_cnt; // for diagnostics + ofi_atomic32_t recv_cnt; // for diagnostics + int buf_low_water; // for diagnostics + bool enabled; // enabled +}; + +struct cxip_coll_data { + union { + uint8_t databuf[32]; // raw data buffer + struct cxip_intval intval; // 4 integer values + flags + struct cxip_fltval fltval; // 4 double values + flags + struct cxip_iminmax intminmax; // 1 intminmax structure + flags + struct cxip_fltminmax fltminmax;// 1 fltminmax structure + flags + struct cxip_repsum repsum; // 1 repsum structure + flags + }; + cxip_coll_op_t red_op; // reduction opcode + cxip_coll_rc_t red_rc; // reduction return code + int red_cnt; // reduction contrib count + bool initialized; +}; + +struct cxip_coll_metrics_ep { + int myrank; + bool isroot; +}; + +struct cxip_coll_metrics { + long red_count_bad; + long red_count_full; + long red_count_partial; + long red_count_unreduced; + struct cxip_coll_metrics_ep ep_data; +}; + +struct cxip_coll_reduction { + struct cxip_coll_mc *mc_obj; // parent mc_obj + uint32_t red_id; // reduction id + uint16_t seqno; // reduction sequence number + uint16_t resno; // reduction result number + struct cxip_req *op_inject_req; // active operation request + enum cxip_coll_state coll_state; // reduction state on node + struct cxip_coll_data accum; // reduction accumulator + struct cxip_coll_data backup; // copy of above + void *op_rslt_data; // user recv buffer (or NULL) + int op_data_bytcnt; // bytes in send/recv buffers + void *op_context; // caller's context + bool in_use; // reduction is in-use + bool pktsent; // reduction packet sent + bool completed; // reduction is completed + bool rdma_get_sent; // rdma get from leaf to root + bool rdma_get_completed; // rdma get completed + int rdma_get_cb_rc; // rdma get status + uint64_t leaf_contrib_start_us; // leaf ts after contrib send + bool drop_send; // drop the next send operation + bool drop_recv; // drop the next recv operation + enum cxip_coll_rc red_rc; // set by first error + struct timespec tv_expires; // need to retry? + struct timespec arm_expires; // RE expiration time for this red_id + struct dlist_entry tmout_link; // link to timeout list + uint8_t tx_msg[64]; // static packet memory +}; + +struct cxip_coll_mc { + struct fid_mc mc_fid; + struct dlist_entry entry; // Link to mc object list + struct cxip_ep_obj *ep_obj; // Associated endpoint + struct cxip_av_set *av_set_obj; // associated AV set + struct cxip_zbcoll_obj *zb; // zb object for zbcol + struct cxip_coll_pte *coll_pte; // collective PTE + struct timespec rootexpires; // root wait expiration timeout + struct timespec leafexpires; // leaf wait expiration timeout + struct timespec curlexpires; // CURL delete expiration timeout + fi_addr_t mynode_fiaddr; // fi_addr of this node + int mynode_idx; // av_set index of this node + uint32_t hwroot_idx; // av_set index of hwroot node + uint32_t mcast_addr; // multicast target address + int tail_red_id; // tail active red_id + int next_red_id; // next available red_id + int max_red_id; // limit total concurrency + int seqno; // rolling seqno for packets + int close_state; // the state of the close operation + bool has_closed; // true after a mc close call + bool has_error; // true if any error + bool is_multicast; // true if multicast address + bool arm_disable; // arm-disable for testing + bool retry_disable; // retry-disable for testing + bool is_joined; // true if joined + bool rx_discard; // true to discard RX events + enum cxi_traffic_class tc; // traffic class + enum cxi_traffic_class_type tc_type; // traffic class type + ofi_atomic32_t send_cnt; // for diagnostics + ofi_atomic32_t recv_cnt; // for diagnostics + ofi_atomic32_t pkt_cnt; // for diagnostics + ofi_atomic32_t seq_err_cnt; // for diagnostics + ofi_atomic32_t tmout_cnt; // for diagnostics + ofi_spin_t lock; + + struct cxi_md *reduction_md; // memory descriptor for DMA + struct cxip_coll_reduction reduction[CXIP_COLL_MAX_CONCUR]; + /* Logical address context for leaf rdma get */ + uint64_t rdma_get_lac_va_tx; + /* Logical address context recieved by the leaf */ + uint64_t rdma_get_lac_va_rx; + /* pointer to the source buffer base used in the RDMA */ + uint8_t *root_rdma_get_data_p; + /* pointer to the dest buffer base used in the RDMA */ + uint8_t *leaf_rdma_get_data_p; + /* root rdma get memory descriptor, for entire root src buffer */ + struct cxip_md *root_rdma_get_md; + /* leaf rdma get memory descriptor, for entire leaf dest buffer */ + struct cxip_md *leaf_rdma_get_md; +}; + +/* Function declarations */ +void cxip_coll_reset_mc_ctrs(struct fid_mc *mc); + +void cxip_coll_get_mc_ctrs(struct fid_mc *mc, struct coll_counters *counters); + +void cxip_coll_init_metrics(void); + +void cxip_coll_get_metrics(struct cxip_coll_metrics *metrics); + +void cxip_coll_init(struct cxip_ep_obj *ep_obj); + +int cxip_coll_enable(struct cxip_ep *ep); + +int cxip_coll_disable(struct cxip_ep_obj *ep_obj); + +void cxip_coll_close(struct cxip_ep_obj *ep_obj); + +void cxip_coll_populate_opcodes(void); + +int cxip_coll_send(struct cxip_coll_reduction *reduction, + int av_set_idx, const void *buffer, size_t buflen, + struct cxi_md *md); + +int cxip_coll_send_red_pkt(struct cxip_coll_reduction *reduction, + const struct cxip_coll_data *coll_data, + bool arm, bool retry, bool root_result_pkt); + +void cxip_capture_red_id(int *red_id_buf); + +ssize_t cxip_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context); + +ssize_t cxip_broadcast(struct fid_ep *ep, void *buf, size_t count, + void *desc, fi_addr_t coll_addr, fi_addr_t root_addr, + enum fi_datatype datatype, uint64_t flags, + void *context); + +ssize_t cxip_reduce(struct fid_ep *ep, const void *buf, size_t count, + void *desc, void *result, void *result_desc, + fi_addr_t coll_addr, fi_addr_t root_addr, + enum fi_datatype datatype, enum fi_op op, uint64_t flags, + void *context); + +ssize_t cxip_allreduce(struct fid_ep *ep, const void *buf, size_t count, + void *desc, void *result, void *result_desc, + fi_addr_t coll_addr, enum fi_datatype datatype, + enum fi_op op, uint64_t flags, void *context); + +int cxip_join_collective(struct fid_ep *ep, fi_addr_t coll_addr, + const struct fid_av_set *coll_av_set, + uint64_t flags, struct fid_mc **mc, void *context); + +void cxip_coll_progress_join(struct cxip_ep_obj *ep_obj); + +void cxip_coll_progress_cq_poll(struct cxip_ep_obj *ep_obj); + +int cxip_coll_arm_disable(struct fid_mc *mc, bool disable); + +void cxip_coll_limit_red_id(struct fid_mc *mc, int max_red_id); + +void cxip_coll_drop_send(struct cxip_coll_reduction *reduction); + +void cxip_coll_drop_recv(struct cxip_coll_reduction *reduction); + +int cxip_coll_trace_attr cxip_coll_prod_trace(const char *fmt, ...); + +void cxip_coll_print_prod_trace(void); + +#endif /* _CXIP_COLL_H_ */ diff --git a/prov/cxi/include/cxip/coll_trace.h b/prov/cxi/include/cxip/coll_trace.h new file mode 100644 index 00000000000..8bb7fbdbebd --- /dev/null +++ b/prov/cxi/include/cxip/coll_trace.h @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_COLL_TRACE_H_ +#define _CXIP_COLL_TRACE_H_ + + +/* Forward declarations */ +struct cxip_ep_obj; + +/* Macros */ +#define cxip_coll_trace_attr __attribute__((format(__printf__, 1, 2))) + +#define CXIP_COLL_TRACE(mod, fmt, ...) \ + do {if (cxip_coll_prod_trace_true()) \ + cxip_coll_prod_trace(fmt, ##__VA_ARGS__); } while (0) + +/* Function declarations */ +int cxip_coll_trace_attr cxip_coll_trace(const char *fmt, ...); + +void cxip_coll_trace_flush(void); + +void cxip_coll_trace_close(void); + +void cxip_coll_trace_init(struct cxip_ep_obj *ep_obj); + +#endif /* _CXIP_COLL_TRACE_H_ */ diff --git a/prov/cxi/include/cxip/common.h b/prov/cxi/include/cxip/common.h new file mode 100644 index 00000000000..803fd5c1744 --- /dev/null +++ b/prov/cxi/include/cxip/common.h @@ -0,0 +1,313 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_COMMON_H_ +#define _CXIP_COMMON_H_ + + +#include +#include + +/* Forward declarations */ +struct cxip_domain; +struct cxip_req; +struct cxip_ux_send; + +/* Macros */ +#define _CXIP_PROV_H_ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define FLOOR(a, b) ((long long)(a) - (((long long)(a)) % (b))) + +#define CEILING(a, b) ((long long)(a) <= 0LL ? 0 : (FLOOR((a)-1, b) + (b))) + +#define CXIP_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define CXIP_ALIGN(x, a) CXIP_ALIGN_MASK(x, (typeof(x))(a) - 1) + +#define CXIP_ALIGN_DOWN(x, a) CXIP_ALIGN((x) - ((a) - 1), (a)) + +#define CXIP_PATH_MAX 256 + +#define CXIP_BUFFER_ID_MAX (1 << 16) + +#define CXIP_TX_COMP_MODES (FI_INJECT_COMPLETE | \ + FI_TRANSMIT_COMPLETE | \ + FI_DELIVERY_COMPLETE | \ + FI_MATCH_COMPLETE) + +#define CXIP_TX_OP_FLAGS (FI_INJECT | \ + FI_COMPLETION | \ + CXIP_TX_COMP_MODES | \ + FI_REMOTE_CQ_DATA | \ + FI_MORE | \ + FI_FENCE) + +#define CXIP_RX_OP_FLAGS (FI_COMPLETION | \ + FI_MULTI_RECV | \ + FI_MORE) + +#define CXIP_RX_IGNORE_OP_FLAGS (FI_REMOTE_CQ_DATA | \ + FI_INJECT) + +#define CXIP_WRITEMSG_ALLOWED_FLAGS (FI_INJECT | \ + FI_COMPLETION | \ + FI_MORE | \ + FI_FENCE | \ + CXIP_TX_COMP_MODES) + +#define CXIP_READMSG_ALLOWED_FLAGS (FI_COMPLETION | \ + FI_MORE | \ + FI_FENCE | \ + CXIP_TX_COMP_MODES) + +#define CXIP_AMO_MAX_IOV 1 + +#define CXIP_REMOTE_CQ_DATA_SZ 8 + +#define CXIP_RDZV_THRESHOLD 16384 + +#define CXIP_OFLOW_BUF_SIZE (12*1024*1024) + +#define CXIP_OFLOW_BUF_MIN_POSTED 3 + +#define CXIP_OFLOW_BUF_MAX_CACHED (CXIP_OFLOW_BUF_MIN_POSTED * 3) + +#define CXIP_DEFAULT_MR_CACHE_MAX_CNT 4096 + +#define CXIP_DEFAULT_MR_CACHE_MAX_SIZE -1 + +#define CXIP_SAFE_DEVMEM_COPY_THRESH 4096 + +#define CXIP_CAPS (CXIP_DOM_CAPS | CXIP_EP_CAPS) + +#define CXIP_INJECT_SIZE C_MAX_IDC_PAYLOAD_UNR + +#define CXIP_MAX_TX_SIZE 16384U + +#define CXIP_DEFAULT_TX_SIZE 1024U + +#define CXI_PROV_LE_PER_EP 1024U + +#define LES_PER_EP_MAX 16384U + +#define CXIP_MAX_RX_SIZE (LES_PER_EP_MAX - CXI_PROV_LE_PER_EP) + +#define CXIP_DEFAULT_RX_SIZE 1024U + +#define CXIP_MAJOR_VERSION 0 + +#define CXIP_MINOR_VERSION 1 + +#define CXIP_PROV_VERSION FI_VERSION(CXIP_MAJOR_VERSION, \ + CXIP_MINOR_VERSION) + +#define CXIP_FI_VERSION FI_VERSION(2, 4) + +#define CXIP_WIRE_PROTO_VERSION 1 + +#define CXIP_PAUSE() + +#define CXIP_PTL_IDX_RXQ 0 + +#define CXIP_PTL_IDX_RNR_RXQ 1 + +#define CXIP_PTL_IDX_WRITE_MR_OPT_BASE 17 + +#define CXIP_PTL_IDX_READ_MR_OPT_BASE 128 + +#define CXIP_PTL_IDX_MR_OPT_CNT 100 + +#define CXIP_PTL_IDX_PROV_NUM_CACHE_IDX 8 + +#define CXIP_PTL_IDX_PROV_MR_OPT_CNT \ + (CXIP_PTL_IDX_MR_OPT_CNT - CXIP_PTL_IDX_PROV_NUM_CACHE_IDX) + +#define CXIP_PTL_IDX_WRITE_MR_OPT(key) \ + (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + \ + CXIP_MR_UNCACHED_KEY_TO_IDX(key)) + +#define CXIP_PTL_IDX_READ_MR_OPT(key) \ + (CXIP_PTL_IDX_READ_MR_OPT_BASE + \ + CXIP_MR_UNCACHED_KEY_TO_IDX(key)) + +#define CXIP_PTL_IDX_WRITE_PROV_CACHE_MR_OPT(lac) \ + (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + (lac)) + +#define CXIP_PTL_IDX_READ_PROV_CACHE_MR_OPT(lac) \ + (CXIP_PTL_IDX_READ_MR_OPT_BASE + (lac)) + +#define CXIP_PTL_IDX_WRITE_MR_STD 117 + +#define CXIP_PTL_IDX_RDZV_DEST 127 + +#define CXIP_PTL_IDX_COLL 6 + +#define CXIP_PTL_IDX_CTRL CXIP_PTL_IDX_WRITE_MR_STD + +#define CXIP_PTL_IDX_READ_MR_STD 228 + +#define CXIP_PTL_IDX_RDZV_RESTRICTED_BASE 229 + +#define CXIP_PTL_IDX_RDZV_RESTRICTED(lac) \ + (CXIP_PTL_IDX_RDZV_RESTRICTED_BASE + (lac)) + +#define CXIP_PTL_IDX_RDZV_SRC 255 + +#define CXIP_NUM_CACHED_KEY_LE 8 + +#define CXIP_TX_ID_WIDTH 11 + +#define CXIP_RDZV_ID_CMD_WIDTH 8 + +#define CXIP_RDZV_ID_HIGH_WIDTH 7 + +#define CXIP_TOTAL_RDZV_ID_WIDTH (CXIP_RDZV_ID_CMD_WIDTH + \ + CXIP_RDZV_ID_HIGH_WIDTH) + +#define CXIP_CS_TAG_WIDTH 40 + +#define CXIP_VNI_WIDTH 16 + +#define CXIP_CS_TAG_MASK ((1UL << CXIP_CS_TAG_WIDTH) - 1) + +#define CXIP_IS_PROV_MR_KEY_BIT (1ULL << 63) + +#define CXIP_KEY_MATCH_BITS(key) ((key) & ~CXIP_IS_PROV_MR_KEY_BIT) + +#define CXI_PLATFORM_ASIC 0 + +#define CXI_PLATFORM_NETSIM 1 + +#define CXI_PLATFORM_Z1 2 + +#define CXI_PLATFORM_FPGA 3 + +#define MAX_HW_CPS 16 + +#define TELEMETRY_ENTRY_NAME_SIZE 64U + +#define CXIP_DEF_EVENT_HT_BUCKETS 256 + +#define ZB_NOSIM -1 + +#define ZB_ALLSIM -2 + +#define CXIP_COUNTER_BUCKETS 31U + +#define CXIP_BUCKET_MAX (CXIP_COUNTER_BUCKETS - 1) + +#define CXIP_LIST_COUNTS 3U + +#define CXIP_SW_RX_TX_INIT_MAX_DEFAULT 1024 + +#define CXIP_SW_RX_TX_INIT_MIN 64 + +#define CXIP_DONE_NOTIFY_RETRY_DELAY_US 100 + +#define CXIP_RDZV_IDS (1 << CXIP_TOTAL_RDZV_ID_WIDTH) + +#define CXIP_RDZV_IDS_MULTI_RECV (1 << CXIP_RDZV_ID_CMD_WIDTH) + +#define CXIP_TX_IDS (1 << CXIP_TX_ID_WIDTH) + +#define RDZV_SRC_LES 8U + +#define RDZV_NO_MATCH_PTES 8U + +#define CXIP_RNR_TIMEOUT_US 500000 + +#define CXIP_NUM_RNR_WAIT_QUEUE 5 + +#define _CXIP_DBG(subsys, fmt, ...) \ + FI_DBG(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ + ##__VA_ARGS__) + +#define _CXIP_INFO(subsys, fmt, ...) \ + FI_INFO(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ + ##__VA_ARGS__) + +#define _CXIP_WARN(subsys, fmt, ...) \ + FI_WARN(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ + ##__VA_ARGS__) + +#define _CXIP_WARN_ONCE(subsys, fmt, ...) \ + FI_WARN_ONCE(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ + ##__VA_ARGS__) + +#define CXIP_UNEXPECTED_EVENT_STS "Unexpected event status, %s rc = %s\n" + +#define CXIP_UNEXPECTED_EVENT "Unexpected event %s, rc = %s\n" + +#define CXIP_DEFAULT_CACHE_LINE_SIZE 64 + +#define CXIP_SYSFS_CACHE_LINE_SIZE \ + "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" + +#define CXIP_HYBRID_RECV_CHECK_INTERVAL (64-1) + +#define FC_SW_LE_MSG_FATAL "LE exhaustion during flow control, "\ + "FI_CXI_RX_MATCH_MODE=[hybrid|software] is required\n" + +/* Type definitions */ +struct cxip_telemetry { + struct cxip_domain *dom; + + /* List of telemetry entries to being monitored. */ + struct dlist_entry telemetry_list; +}; + +struct cxip_topo_addr { + union { + uint32_t addr; + struct { + uint32_t port_num:CXIP_ADDR_PORT_BITS; + uint32_t switch_num:CXIP_ADDR_SWITCH_BITS; + uint32_t group_num:CXIP_ADDR_GROUP_BITS; + } dragonfly; + struct { + uint32_t port_num:CXIP_ADDR_FATTREE_PORT_BITS; + uint32_t switch_num:CXIP_ADDR_FATTREE_SWITCH_BITS; + } fat_tree; + }; +}; + +union cxip_def_event_key { + struct { + uint64_t initiator : 32; + uint64_t rdzv_id : 15; + uint64_t pad0 : 16; + uint64_t rdzv : 1; + }; + struct { + uint64_t start_addr : 57; + uint64_t pad1 : 7; + }; + uint64_t raw; +}; + +struct cxip_deferred_event { + struct dlist_entry rxc_entry; + union cxip_def_event_key key; + struct cxip_req *req; + union c_event ev; + uint64_t mrecv_start; + uint32_t mrecv_len; + + struct cxip_ux_send *ux_send; +}; + +struct coll_counters { + int32_t coll_recv_cnt; + int32_t send_cnt; + int32_t recv_cnt; + int32_t pkt_cnt; + int32_t seq_err_cnt; + int32_t tmout_cnt; +}; + +#endif /* _CXIP_COMMON_H_ */ diff --git a/prov/cxi/include/cxip/cq.h b/prov/cxi/include/cxip/cq.h new file mode 100644 index 00000000000..d1dd7b84d55 --- /dev/null +++ b/prov/cxi/include/cxip/cq.h @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_CQ_H_ +#define _CXIP_CQ_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_domain; +struct cxip_req; + +/* Macros */ +#define CXIP_CQ_DEF_SZ 131072U + +/* Type definitions */ +struct cxip_cq_eq { + struct cxi_eq *eq; + void *buf; + size_t len; + struct cxi_md *md; + bool mmap; + unsigned int unacked_events; + struct c_eq_status prev_eq_status; + bool eq_saturated; +}; + +struct cxip_cq { + struct util_cq util_cq; + struct fi_cq_attr attr; + + /* Implement our own CQ ep_list_lock since common code util_cq + * implementation is a mutex and can not be optimized. This lock + * is always taken walking the CQ EP, but can be optimized to no-op. + */ + struct ofi_genlock ep_list_lock; + + /* CXI CQ wait object EPs are maintained in epoll FD */ + int ep_fd; + + /* CXI specific fields. */ + struct cxip_domain *domain; + unsigned int ack_batch_size; + struct dlist_entry dom_entry; +}; + +struct cxip_fid_list { + struct dlist_entry entry; + struct fid *fid; +}; + +/* Function declarations */ +const char *cxip_strerror(int prov_errno); + +int cxip_cq_req_complete(struct cxip_req *req); + +int cxip_cq_req_complete_addr(struct cxip_req *req, fi_addr_t src); + +int cxip_cq_req_error(struct cxip_req *req, size_t olen, + int err, int prov_errno, void *err_data, + size_t err_data_size, fi_addr_t src_addr); + +int cxip_cq_add_wait_fd(struct cxip_cq *cq, int wait_fd, int events); + +void cxip_cq_del_wait_fd(struct cxip_cq *cq, int wait_fd); + +int proverr2errno(int err); + +int cxip_cq_trywait(struct cxip_cq *cq); + +void cxip_cq_progress(struct cxip_cq *cq); + +void cxip_util_cq_progress(struct util_cq *util_cq); + +int cxip_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq, void *context); + +void cxip_cq_flush_trig_reqs(struct cxip_cq *cq); + +#endif /* _CXIP_CQ_H_ */ diff --git a/prov/cxi/include/cxip/ctrl.h b/prov/cxi/include/cxip/ctrl.h new file mode 100644 index 00000000000..c6228b77b1d --- /dev/null +++ b/prov/cxi/include/cxip/ctrl.h @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_CTRL_H_ +#define _CXIP_CTRL_H_ + + +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_ep_obj; +struct cxip_mr; +struct cxip_pte; + +/* Type definitions */ +struct cxip_ctrl_req_mr { + struct cxip_mr *mr; +}; + +struct cxip_ctrl_send { + uint32_t nic_addr; + uint32_t pid; + uint16_t vni; + union cxip_match_bits mb; +}; + +struct cxip_ctrl_req { + struct dlist_entry ep_entry; + struct cxip_ep_obj *ep_obj; + int req_id; + int (*cb)(struct cxip_ctrl_req *req, const union c_event *evt); + + union { + struct cxip_ctrl_req_mr mr; + struct cxip_ctrl_send send; + }; +}; + +struct cxip_ctrl { + /* wait object is required to wake up CQ waiters + * when control progress is required. + */ + struct cxil_wait_obj *wait; + + struct cxi_eq *tgt_evtq; + struct cxi_eq *tx_evtq; + + /* TX command queue is used to initiate side-band messaging + * and is TX credit based. + */ + struct cxip_cmdq *txq; + unsigned int tx_credits; + + /* Target command queue is used for appending RX side-band + * messaging control LE and managing standard MR LE. + */ + struct cxip_cmdq *tgq; + struct cxip_pte *pte; + struct cxip_ctrl_req msg_req; + + /* FI_MR_PROV_KEY caching, protected with ep_obj->lock */ + struct cxip_mr_lac_cache std_mr_cache[CXIP_NUM_CACHED_KEY_LE]; + struct cxip_mr_lac_cache opt_mr_cache[CXIP_NUM_CACHED_KEY_LE]; + + struct dlist_entry mr_list; + + /* Event queue buffers */ + void *tgt_evtq_buf; + struct cxi_md *tgt_evtq_buf_md; + void *tx_evtq_buf; + struct cxi_md *tx_evtq_buf_md; +}; + +/* Function declarations */ +void cxip_ctrl_mr_cache_flush(struct cxip_ep_obj *ep_obj); + +int cxip_ctrl_msg_send(struct cxip_ctrl_req *req, uint64_t data); + +#endif /* _CXIP_CTRL_H_ */ diff --git a/prov/cxi/include/cxip/curl.h b/prov/cxi/include/cxip/curl.h new file mode 100644 index 00000000000..bed007eead7 --- /dev/null +++ b/prov/cxi/include/cxip/curl.h @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_CURL_H_ +#define _CXIP_CURL_H_ + + +#include +#include +#include + +/* Type definitions */ +struct cxip_curl_handle { + long status; // HTTP status, 0 for no server, -1 busy + const char *endpoint; // HTTP server endpoint address + const char *request; // HTTP request data + const char *response; // HTTP response data, NULL until complete + curlcomplete_t usrfunc; // user completion function + void *usrptr; // user function argument + void *recv; // opaque + void *headers; // opaque +}; + +/* Function declarations */ +int cxip_curl_init(void); + +void cxip_curl_fini(void); + +const char *cxip_curl_opname(enum curl_ops op); + +int cxip_curl_perform(const char *endpoint, const char *request, + const char *sessionToken, size_t rsp_init_size, + enum curl_ops op, bool verbose, + curlcomplete_t usrfunc, void *usrptr); + +int cxip_curl_progress(struct cxip_curl_handle **handleptr); + +void cxip_curl_free(struct cxip_curl_handle *handle); + +enum json_type cxip_json_obj(const char *desc, struct json_object *jobj, + struct json_object **jval); + +int cxip_json_bool(const char *desc, struct json_object *jobj, bool *val); + +int cxip_json_int(const char *desc, struct json_object *jobj, int *val); + +int cxip_json_int64(const char *desc, struct json_object *jobj, int64_t *val); + +int cxip_json_double(const char *desc, struct json_object *jobj, double *val); + +int cxip_json_string(const char *desc, struct json_object *jobj, + const char **val); + +struct json_object *cxip_json_tokener_parse(const char *str); + +int cxip_json_object_put(struct json_object *obj); + +#endif /* _CXIP_CURL_H_ */ diff --git a/prov/cxi/include/cxip/dom.h b/prov/cxi/include/cxip/dom.h new file mode 100644 index 00000000000..32174f8cb64 --- /dev/null +++ b/prov/cxi/include/cxip/dom.h @@ -0,0 +1,236 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_DOM_H_ +#define _CXIP_DOM_H_ + + +#include +#include +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_cntr; +struct cxip_ctrl_req; +struct cxip_eq; +struct cxip_fabric; +struct cxip_if; +struct cxip_lni; +struct cxip_mr; +struct cxip_telemetry; + +/* Macros */ +#define CXIP_DOM_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM | FI_AV_USER_ID | FI_PEER) + +#define DOM_INFO(dom, fmt, ...) \ + _CXIP_INFO(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ + (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ + (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ + (dom)->nic_addr, ##__VA_ARGS__) + +#define DOM_WARN(dom, fmt, ...) \ + _CXIP_WARN(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ + (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ + (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ + (dom)->nic_addr, ##__VA_ARGS__) + +/* Type definitions */ +struct cxip_domain_cmdq { + struct dlist_entry entry; + struct cxip_cmdq *cmdq; +}; + +struct cxip_domain { + struct util_domain util_domain; + struct cxip_fabric *fab; + ofi_spin_t lock; + ofi_atomic32_t ref; + + struct fid_ep rx_ep; + struct fid_peer_srx *owner_srx; + + uint32_t tclass; + + struct cxip_eq *eq; //unused + struct cxip_eq *mr_eq; //unused + + /* Assigned NIC address */ + uint32_t nic_addr; + + /* Device info */ + struct cxip_if *iface; + + /* Device partition */ + struct cxip_lni *lni; + + /* Trigger and CT support */ + struct cxip_cmdq *trig_cmdq; + struct ofi_genlock trig_cmdq_lock; + bool cntr_init; + + /* Provider generated RKEYs, else client */ + bool is_prov_key; + + /* Can disable caching of provider generated RKEYs */ + bool prov_key_cache; + + /* Provider generated RKEYs optimized MR disablement/enablement */ + bool optimized_mrs; + + /* Enable MR match event counting enables a more robust + * MR when using FI_MR_PROV_KEY. It disables hardware cached + * MR keys and ensures memory backing a MR cannot be + * remotely accessed even if that memory remains in the + * libfabric MR cache. + */ + bool mr_match_events; + + /* Domain wide MR resources. + * Req IDs are control buffer IDs to map MR or MR cache to an LE. + * MR IDs are used by non-cached provider key MR to decouple the + * MR and Req ID, and do not map directly to the MR LE. + */ + ofi_spin_t ctrl_id_lock; + struct indexer req_ids; + struct indexer mr_ids; + + /* If FI_MR_PROV_KEY is not cached, keys include a sequence number + * to reduce the likelyhood of a stale key being used to access + * a recycled MR key. + */ + uint32_t prov_key_seqnum; + + /* Translation cache */ + struct ofi_mr_cache iomm; + bool odp; + bool ats; + bool hmem; + + /* ATS translation support */ + struct cxip_md scalable_md; + bool scalable_iomm; + bool rocr_dev_mem_only; + + /* Domain state */ + bool enabled; + + /* List of allocated resources used for deferred work queue processing. + */ + struct dlist_entry txc_list; + struct dlist_entry cntr_list; + struct dlist_entry cq_list; + + struct fi_hmem_override_ops hmem_ops; + bool hybrid_mr_desc; + + /* Container of in-use MRs against this domain. */ + struct cxip_mr_domain mr_domain; + + /* Counters collected for the duration of the domain existence. */ + struct cxip_telemetry *telemetry; + + /* NIC AMO operation which is remapped to a PCIe operation. */ + int amo_remap_to_pcie_fadd; + + /* Maximum number of triggered operations configured for the service + * ID. + */ + int max_trig_op_in_use; + sem_t *trig_op_lock; + + /* Domain has been configured with FI_AV_AUTH_KEY. */ + bool av_auth_key; + + /* This is only valid if FI_AV_AUTH_KEY is false. */ + struct cxi_auth_key auth_key; + + /* Maximum number of auth keys requested by user. */ + size_t auth_key_entry_max; + + /* Domain has been configured with FI_AV_USER_ID. */ + bool av_user_id; + + /* Domain level TX command queues used when number of authorization + * keys exceeds LCID limit. + */ + struct dlist_entry cmdq_list; + unsigned int cmdq_cnt; + struct ofi_genlock cmdq_lock; + size_t tx_size; + + /* domain level match mode override */ + enum cxip_ep_ptle_mode rx_match_mode; + bool msg_offload; + size_t req_buf_size; + +}; + +/* Function declarations */ +int cxip_domain_emit_idc_put(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + const struct c_cstate_cmd *c_state, + const struct c_idc_put_cmd *put, const void *buf, + size_t len, uint64_t flags); + +int cxip_domain_emit_dma(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, struct c_full_dma_cmd *dma, + uint64_t flags); + +int cxip_domain_emit_idc_amo(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + const struct c_cstate_cmd *c_state, + const struct c_idc_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +int cxip_domain_emit_dma_amo(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + struct c_dma_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +int cxip_domain_emit_idc_msg(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + const struct c_cstate_cmd *c_state, + const struct c_idc_msg_hdr *msg, const void *buf, + size_t len, uint64_t flags); + +int cxip_domain_valid_vni(struct cxip_domain *dom, struct cxi_auth_key *key); + +int cxip_domain(struct fid_fabric *fabric, struct fi_info *info, + struct fid_domain **dom, void *context); + +void cxip_dom_cntr_disable(struct cxip_domain *dom); + +int cxip_domain_ctrl_id_alloc(struct cxip_domain *dom, + struct cxip_ctrl_req *req); + +void cxip_domain_ctrl_id_free(struct cxip_domain *dom, + struct cxip_ctrl_req *req); + +int cxip_domain_prov_mr_id_alloc(struct cxip_domain *dom, + struct cxip_mr *mr); + +void cxip_domain_prov_mr_id_free(struct cxip_domain *dom, + struct cxip_mr *mr); + +int cxip_domain_dwq_emit_dma(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct cxip_cntr *trig_cntr, size_t trig_thresh, + struct c_full_dma_cmd *dma, uint64_t flags); + +int cxip_domain_dwq_emit_amo(struct cxip_domain *dom, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct cxip_cntr *trig_cntr, size_t trig_thresh, + struct c_dma_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +#endif /* _CXIP_DOM_H_ */ diff --git a/prov/cxi/include/cxip/enums.h b/prov/cxi/include/cxip/enums.h new file mode 100644 index 00000000000..45746191744 --- /dev/null +++ b/prov/cxi/include/cxip/enums.h @@ -0,0 +1,305 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_ENUMS_H_ +#define _CXIP_ENUMS_H_ + + +/* All enum type definitions */ +/* Included first because many structs embed enum fields */ + +enum cxip_ats_mlock_mode { + CXIP_ATS_MLOCK_OFF, + CXIP_ATS_MLOCK_CACHE, + CXIP_ATS_MLOCK_ALL, +}; + +enum cxip_llring_mode { + CXIP_LLRING_NEVER, + CXIP_LLRING_IDLE, + CXIP_LLRING_ALWAYS, +}; + +enum cxip_ep_ptle_mode { + CXIP_PTLTE_HARDWARE_MODE, + CXIP_PTLTE_DEFAULT_MODE = CXIP_PTLTE_HARDWARE_MODE, + CXIP_PTLTE_SOFTWARE_MODE, + CXIP_PTLTE_HYBRID_MODE, +}; + +enum cxip_rdzv_proto { + CXIP_RDZV_PROTO_DEFAULT, /* unrestricted gets */ + CXIP_RDZV_PROTO_ALT_READ, /* restricted gets */ + CXIP_RDZV_PROTO_ALT_WRITE, /* restricted puts */ +}; + +enum cxip_mr_target_ordering { + /* Sets MR target ordering based on message and target RMA ordering + * options. + */ + MR_ORDER_DEFAULT, + + /* Force ordering to always be strict. */ + MR_ORDER_STRICT, + + /* Force ordering to always be relaxed. */ + MR_ORDER_RELAXED, +}; + +enum cxip_le_type { + CXIP_LE_TYPE_RX = 0, /* RX data LE */ + CXIP_LE_TYPE_ZBP, /* Zero-byte Put control message LE. Used to + * exchange data in the EQ header_data and + * match_bits fields. Unexpected headers are + * disabled. + */ +}; + +enum cxip_ctrl_le_type { + CXIP_CTRL_LE_TYPE_MR = 0, /* Memory Region LE */ + CXIP_CTRL_LE_TYPE_CTRL_MSG, /* Control Message LE */ +}; + +enum cxip_ctrl_msg_type { + CXIP_CTRL_MSG_FC_NOTIFY = 0, + CXIP_CTRL_MSG_FC_RESUME, + CXIP_CTRL_MSG_ZB_DATA, + CXIP_CTRL_MSG_ZB_DATA_RDMA_LAC, +}; + +enum cxip_req_type { + CXIP_REQ_RMA, + CXIP_REQ_AMO, + CXIP_REQ_OFLOW, + CXIP_REQ_RECV, + CXIP_REQ_SEND, + CXIP_REQ_RDZV_SRC, + CXIP_REQ_SEARCH, + CXIP_REQ_COLL, + CXIP_REQ_RBUF, +}; + +enum cxip_rxc_state { + /* Initial state of an RXC. All user posted receives are rejected until + * the RXC has been enabled. + * + * Note that an RXC can be transitioned from any state into + * RXC_DISABLED. + * + * Validate state changes: + * RXC_ENABLED: User has successfully enabled the RXC. + * RXC_ENABLED_SOFTWARE: User has successfully initialized the RXC + * in a software only RX matching mode. + */ + RXC_DISABLED = 0, + + /* User posted receives are matched against the software unexpected + * list before being offloaded to hardware. Hardware matches against + * the corresponding PtlTE priority and overflow list. + * + * Validate state changes: + * RXC_ONLOAD_FLOW_CONTROL: Several scenarios can initiate this state + * change. + * 1. Hardware fails to allocate an LE for an unexpected message + * or a priority list LE append fails, and hybrid mode is not + * enabled. Hardware transitions the PtlTE from enabled to disabled. + * 2. Hardware fails to allocate an LE during an overflow list + * append. The PtlTE remains in the enabled state but appends to + * the overflow list are disabled. Software manually disables + * the PtlTE. + * 3. Hardware fails to successfully match on the overflow list. + * Hardware automatically transitions the PtlTE from enabled to + * disabled. + * RXC_ONLOAD_FLOW_CONTROL_REENABLE: Several scenarios can initiate + * it this state change: + * 1. The hardware EQ is full, hardware transitions the PtlTE from + * enabled/software managed to disabled to recover drops, but it + * can re-enable if an LE resource is not recovered. + * 2. Running "hardware" RX match mode and matching failed because + * the overflow list buffers were full. Hardware transitions the + * PtlTE from enabled to disabled. The overflow list must be + * replenished and processing can continue if an LE resource is not + * recovered. + * 3. Running "hybrid" or "software" RX match mode and a message + * is received, but there is not a buffer available on the request + * list. Hardware transitions the PtlTE from software managed to + * disabled. The request list must be replenished and processing + * can continue if an LE resource is not recovered. + * RXC_PENDING_PTLTE_SOFTWARE_MANAGED: When the provider is configured + * to run in "hybrid" RX match mode and hardware fails to allocate an + * LE for an unexpected message match or an priority list append fails. + * Hardware will automatically transition the PtlTE from enabled to + * software managed and onload of UX messages will be initiated. + */ + RXC_ENABLED, + + /* The NIC has initiated a transition to software managed EP matching. + * + * Software must onload/reonload the hardware unexpected list while + * creating a pending unexpected list from entries received on the PtlTE + * request list. Any in flight appends will fail and be added to + * a receive replay list, further attempts to post receive operations + * will return -FI_EAGAIN. When onloading completes, the pending + * UX list is appended to the onloaded UX list and then failed appends + * are replayed prior to enabling the posting of receive operations. + * + * Validate state changes: + * RXC_ENABLED_SOFTWARE: The HW to SW transition onloading has + * completed and the onloaded and pending request UX list have been + * combined. + */ + RXC_PENDING_PTLTE_SOFTWARE_MANAGED, + + /* Executing as a software managed PtlTE either due to hybrid + * transition from hardware or initial startup in software + * RX matching mode. + * + * Validate state changes: + * RXC_PENDING_PTLTE_HARDWARE: TODO: When able, software may + * initiate a transition from software managed mode back to + * fully offloaded operation. + * RXC_ONLODAD_FLOW_CONTROL_REENABLE: Hardware was unable to match + * on the request list or the EQ is full. Hardware has disabled the + * PtlTE initiating flow control. Operation can continue if LE + * resources are not recovered as long as request buffers can be + * replenished. + */ + RXC_ENABLED_SOFTWARE, + + /* TODO: Hybrid RX match mode PtlTE is transitioning from software + * managed operation back to fully offloaded operation. + * + * Validate state changes: + * RXC_ENABLED: Hybrid software managed PtlTE successfully + * transitions back to fully offloaded operation. + * RXC_ENABLED_SOFTWARE: Hybrid software managed PtlTE was + * not able to transition to fully offloaded operation. + */ + RXC_PENDING_PTLTE_HARDWARE, + + /* Software has encountered a condition which requires manual transition + * of the PtlTE into disable. This state change occurs when a posted + * receive could not be appended due to LE exhaustion and software + * managed EP PtlTE operation has been disabled or is not possible. + * + * Validate state changes: + * RXC_ONLOAD_FLOW_CONTROL: PtlTE disabled event has successfully been + * received and onloading can begin. + */ + RXC_PENDING_PTLTE_DISABLE, + + /* Flow control has occurred and the PtlTE is disabled. Software is + * in the process of onloading the hardware unexpected headers to free + * up LEs. User posted receives are matched against the software + * unexpected list. If a match is not found on the software unexpected + * list, -FI_EAGAIN is returned to the user. Hardware matching is + * disabled. + * + * Validate state changes: + * RXC_ONLOAD_FLOW_CONTROL_REENABLE: An unexpected list entry matched + * a user posted receive, the search and delete command free a + * unexpected list entry, or a transition to software managed EP is + * occuring. + */ + RXC_ONLOAD_FLOW_CONTROL, + + /* PtlTE is in the same state as RXC_ONLOAD_FLOW_CONTROL, but the RXC + * should attempt to be re-enabled. + * + * Validate state changes: + * RXC_FLOW_CONTROL: Onloading of the unexpected headers has completed. + */ + RXC_ONLOAD_FLOW_CONTROL_REENABLE, + + /* Software is performing sideband communication to recover the dropped + * messages. User posted receives are matched against the software + * unexpected list. If a match is not found on the software unexpected + * list, -FI_EAGAIN is returned to the user. Hardware matching is + * disabled. + * + * If an append fails due to RC_NO_SPACE while in the RXC_FLOW_CONTROL + * state, hardware LEs are exhausted and no more LEs can be freed by + * onloading unexpected headers into software. This is a fatal event + * which requires software endpoint mode to workaround. + * + * Validate state changes: + * RXC_ENABLED: Sideband communication is complete and PtlTE is + * successfully re-enabled. + * RXC_SOFTWARE_MANAGED: When executing in "hybrid" or "software" + * RX match mode and processing has requested to re-enable as a + * software managed EP. + */ + RXC_FLOW_CONTROL, +}; + +enum cxip_mr_state { + CXIP_MR_DISABLED = 1, + CXIP_MR_ENABLED, + CXIP_MR_LINKED, + CXIP_MR_UNLINKED, + CXIP_MR_LINK_ERR, +}; + +enum cxip_coll_redtype { + REDTYPE_BYT, + REDTYPE_INT, + REDTYPE_FLT, + REDTYPE_IMINMAX, + REDTYPE_FMINMAX, + REDTYPE_REPSUM +}; + +enum cxip_coll_state { + CXIP_COLL_STATE_NONE, + CXIP_COLL_STATE_READY, + CXIP_COLL_STATE_FAULT, +}; + +typedef enum cxip_coll_rc { + CXIP_COLL_RC_SUCCESS = 0, // good + CXIP_COLL_RC_FLT_INEXACT = 1, // result was rounded + CXIP_COLL_RC_FLT_OVERFLOW = 3, // result too large to represent + CXIP_COLL_RC_FLT_INVALID = 4, // op was signalling NaN, or + // infinities subtracted + CXIP_COLL_RC_REP_INEXACT = 5, // reproducible sum was rounded + CXIP_COLL_RC_INT_OVERFLOW = 6, // reproducible sum overflow + CXIP_COLL_RC_CONTR_OVERFLOW = 7, // too many contributions seen + CXIP_COLL_RC_OP_MISMATCH = 8, // conflicting opcodes + CXIP_COLL_RC_TX_FAILURE = 9, // internal send error + CXIP_COLL_RC_RDMA_FAILURE = 10, // leaf rdma read error + CXIP_COLL_RC_RDMA_DATA_FAILURE = 11, // leaf rdma read data misc + CXIP_COLL_RC_MAX = 12 +} cxip_coll_rc_t; + +enum curl_ops { + CURL_GET, + CURL_PUT, + CURL_POST, + CURL_PATCH, + CURL_DELETE, + CURL_MAX +}; + +enum cxip_amo_req_type { + CXIP_RQ_AMO, + CXIP_RQ_AMO_FETCH, + CXIP_RQ_AMO_SWAP, + CXIP_RQ_AMO_PCIE_FETCH, + CXIP_RQ_AMO_LAST, +}; + +enum cxip_coll_trace_module { + CXIP_TRC_CTRL, + CXIP_TRC_ZBCOLL, + CXIP_TRC_COLL_CURL, + CXIP_TRC_COLL_PKT, + CXIP_TRC_COLL_JOIN, + CXIP_TRC_COLL_DEBUG, + CXIP_TRC_TEST_CODE, + CXIP_TRC_MAX +}; + +#endif /* _CXIP_ENUMS_H_ */ diff --git a/prov/cxi/include/cxip/env.h b/prov/cxi/include/cxip/env.h new file mode 100644 index 00000000000..e055442bc7e --- /dev/null +++ b/prov/cxi/include/cxip/env.h @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_ENV_H_ +#define _CXIP_ENV_H_ + + +#include + +/* Type definitions */ +struct cxip_environment { + /* Translation */ + int odp; + int force_odp; + int ats; + int iotlb; + int disable_dmabuf_cuda; + int disable_dmabuf_rocr; + enum cxip_ats_mlock_mode ats_mlock_mode; + + /* Messaging */ + int fork_safe_requested; + enum cxip_ep_ptle_mode rx_match_mode; + int msg_offload; + int trunc_ok; + int hybrid_preemptive; + int hybrid_recv_preemptive; + size_t rdzv_threshold; + size_t rdzv_get_min; + size_t rdzv_eager_size; + int rdzv_aligned_sw_rget; + int rnr_max_timeout_us; + int disable_non_inject_msg_idc; + int disable_non_inject_rma_idc; + int disable_non_inject_amo_idc; + int disable_host_register; + size_t oflow_buf_size; + size_t oflow_buf_min_posted; + size_t oflow_buf_max_cached; + size_t safe_devmem_copy_threshold; + size_t req_buf_size; + size_t req_buf_min_posted; + size_t req_buf_max_cached; + int sw_rx_tx_init_max; + int msg_lossless; + size_t default_cq_size; + size_t default_tx_size; + size_t default_rx_size; + int optimized_mrs; + int prov_key_cache; + int mr_match_events; + int disable_eq_hugetlb; + int zbcoll_radix; + + enum cxip_llring_mode llring_mode; + + int cq_policy; + + size_t default_vni; + + size_t eq_ack_batch_size; + int fc_retry_usec_delay; + int cntr_spin_before_yield; + size_t ctrl_rx_eq_max_size; + char *device_name; + size_t cq_fill_percent; + int rget_tc; + int cacheline_size; + + char *coll_job_id; + char *coll_job_step_id; + size_t coll_retry_usec; + size_t coll_timeout_usec; + char *coll_fabric_mgr_url; + char *coll_mcast_token; + size_t hwcoll_addrs_per_job; + size_t hwcoll_min_nodes; + int coll_use_dma_put; + + char hostname[255]; + char *telemetry; + int telemetry_rgid; + int disable_hmem_dev_register; + int ze_hmem_supported; + enum cxip_rdzv_proto rdzv_proto; + int disable_alt_read_cmdq; + int cntr_trig_cmdq; + int enable_trig_op_limit; + int hybrid_posted_recv_preemptive; + int hybrid_unexpected_msg_preemptive; + size_t mr_cache_events_disable_poll_nsecs; + size_t mr_cache_events_disable_le_poll_nsecs; + int force_dev_reg_copy; + enum cxip_mr_target_ordering mr_target_ordering; + int disable_cuda_sync_memops; +}; + +#endif /* _CXIP_ENV_H_ */ diff --git a/prov/cxi/include/cxip/ep.h b/prov/cxi/include/cxip/ep.h new file mode 100644 index 00000000000..0e7d7086262 --- /dev/null +++ b/prov/cxi/include/cxip/ep.h @@ -0,0 +1,184 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_EP_H_ +#define _CXIP_EP_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_av; +struct cxip_cmdq; +struct cxip_cq; +struct cxip_domain; +struct cxip_eq; +struct cxip_md; +struct cxip_portals_table; +struct cxip_rxc; +struct cxip_txc; + +/* Macros */ +#define CXIP_EP_MAX_CTX_BITS 0 + +#define CXIP_EP_MAX_TX_CNT (1 << CXIP_EP_MAX_CTX_BITS) + +#define CXIP_EP_MAX_RX_CNT (1 << CXIP_EP_MAX_CTX_BITS) + +#define CXIP_EP_MAX_MSG_SZ ((1ULL << 32) - 1) + +#define CXIP_EP_MIN_MULTI_RECV 64 + +#define CXIP_EP_MAX_MULTI_RECV ((1 << 24) - 1) + +#define CXIP_EP_PRI_CAPS \ + (FI_RMA | FI_ATOMICS | FI_TAGGED | FI_RECV | FI_SEND | \ + FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | \ + FI_DIRECTED_RECV | FI_MSG | FI_NAMED_RX_CTX | FI_HMEM | \ + FI_COLLECTIVE) + +#define CXIP_EP_SEC_CAPS \ + (FI_SOURCE | FI_SOURCE_ERR | FI_LOCAL_COMM | \ + FI_REMOTE_COMM | FI_RMA_EVENT | FI_MULTI_RECV | FI_FENCE | FI_TRIGGER) + +#define CXIP_EP_CAPS (CXIP_EP_PRI_CAPS | CXIP_EP_SEC_CAPS) + +#define CXIP_EP_CQ_FLAGS \ + (FI_SEND | FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION) + +#define CXIP_EP_CNTR_FLAGS \ + (FI_SEND | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | \ + FI_REMOTE_WRITE) + +/* Type definitions */ +struct cxip_ep_obj { + /* Allow lock to be optimized out with FI_THREAD_DOMAIN */ + struct ofi_genlock lock; + struct cxip_domain *domain; + struct cxip_av *av; + + struct fid_peer_srx *owner_srx; + + /* Domain has been configured with FI_AV_AUTH_KEY. */ + bool av_auth_key; + + /* This is only valid if FI_AV_AUTH_KEY is false. */ + struct cxi_auth_key auth_key; + + /* Array of VNIs if FI_AV_AUTH_KEY is true. */ + uint16_t *vnis; + size_t vni_count; + + struct cxip_addr src_addr; + fi_addr_t fi_addr; + + bool enabled; + + /* Endpoint protocol implementations. + * FI_PROTO_CXI - Portals SAS protocol + */ + uint32_t protocol; + struct cxip_txc *txc; + struct cxip_rxc *rxc; + + /* Internal support for CQ wait object */ + struct cxil_wait_obj *priv_wait; + int wait_fd; + + /* ASIC version associated with EP/Domain */ + enum cassini_version asic_ver; + + /* Information that might be owned by an EP (or a SEP + * when implemented). Should ultimately be a pointer + * to a base/specialization. + */ + struct cxip_ctrl ctrl; + + /* Command queues. Each EP has 1 transmit and 1 target + * command queue that can be shared. An optional 2nd transmit + * command queue may be created for RX initiated rgets. + */ + struct cxip_cmdq *txq; + ofi_atomic32_t txq_ref; + struct cxip_cmdq *tgq; + ofi_atomic32_t tgq_ref; + struct cxip_cmdq *rx_txq; + + /* Libfabric software EQ resource */ + struct cxip_eq *eq; + struct dlist_entry eq_link; + + /* Values at base EP creation */ + uint64_t caps; + struct fi_ep_attr ep_attr; + struct fi_tx_attr tx_attr; + struct fi_rx_attr rx_attr; + + /* Require memcpy's via the dev reg APIs. */ + bool require_dev_reg_copy[OFI_HMEM_MAX]; + + /* Collectives support */ + struct cxip_ep_coll_obj coll; + struct cxip_ep_zbcoll_obj zbcoll; + + size_t txq_size; + size_t tgq_size; + ofi_atomic32_t ref; + struct cxip_portals_table *ptable; +}; + +struct cxip_ep { + struct fid_ep ep; + struct fi_tx_attr tx_attr; + struct fi_rx_attr rx_attr; + struct cxip_ep_obj *ep_obj; + int is_alias; +}; + +/* Function declarations */ +int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len, + uint64_t access, uint64_t flags, struct cxip_md **md); + +int cxip_endpoint(struct fid_domain *domain, struct fi_info *info, + struct fid_ep **ep, void *context); + +int cxip_ep_cmdq(struct cxip_ep_obj *ep_obj, bool transmit, uint32_t tclass, + struct cxi_eq *evtq, struct cxip_cmdq **cmdq); + +void cxip_ep_cmdq_put(struct cxip_ep_obj *ep_obj, bool transmit); + +void cxip_ep_progress(struct fid *fid); + +void cxip_ep_flush_trig_reqs(struct cxip_ep_obj *ep_obj); + +void cxip_ep_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); + +void cxip_ep_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, bool internal); + +void cxip_ep_tx_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); + +void cxip_ep_tx_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, bool internal); + +void cxip_ep_tgt_ctrl_progress(struct cxip_ep_obj *ep_obj, bool internal); + +void cxip_ep_tgt_ctrl_progress_locked(struct cxip_ep_obj *ep_obj, + bool internal); + +int cxip_ep_ctrl_init(struct cxip_ep_obj *ep_obj); + +void cxip_ep_ctrl_fini(struct cxip_ep_obj *ep_obj); + +int cxip_ep_trywait(struct cxip_ep_obj *ep_obj, struct cxip_cq *cq); + +size_t cxip_ep_get_unexp_msgs(struct fid_ep *fid_ep, + struct fi_cq_tagged_entry *entry, size_t count, + fi_addr_t *src_addr, size_t *ux_count); + +#endif /* _CXIP_EP_H_ */ diff --git a/prov/cxi/include/cxip/eq.h b/prov/cxi/include/cxip/eq.h new file mode 100644 index 00000000000..7796e4ce573 --- /dev/null +++ b/prov/cxi/include/cxip/eq.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_EQ_H_ +#define _CXIP_EQ_H_ + + +#include +#include + +/* Macros */ +#define CXIP_EQ_DEF_SZ (1 << 8) + +#define CXIP_EQ_MAP_FLAGS (CXI_MAP_WRITE | CXI_MAP_PIN) + +/* Type definitions */ +struct cxip_eq { + struct util_eq util_eq; + struct fi_eq_attr attr; + struct dlist_entry ep_list; + ofi_mutex_t list_lock; +}; + +/* Function declarations */ +int cxip_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, + struct fid_eq **eq, void *context); + +#endif /* _CXIP_EQ_H_ */ diff --git a/prov/cxi/include/cxip/evtq.h b/prov/cxi/include/cxip/evtq.h new file mode 100644 index 00000000000..79cca0b4252 --- /dev/null +++ b/prov/cxi/include/cxip/evtq.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_EVTQ_H_ +#define _CXIP_EVTQ_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_cq; +struct cxip_req; + +/* Type definitions */ +struct cxip_evtq { + struct cxi_eq *eq; + void *buf; + size_t len; + struct cxi_md *md; + bool mmap; + unsigned int unacked_events; + unsigned int ack_batch_size; + struct c_eq_status prev_eq_status; + bool eq_saturated; + /* Reference to wait_obj allocated outside scope of event queue */ + struct cxil_wait_obj *event_wait_obj; + struct cxil_wait_obj *status_wait_obj; + + /* Point back to CQ */ + struct cxip_cq *cq; + + /* Protected with ep_ob->lock */ + struct ofi_bufpool *req_pool; + struct indexer req_table; + struct dlist_entry req_list; +}; + +struct def_event_ht { + struct dlist_entry bh[CXIP_DEF_EVENT_HT_BUCKETS]; +}; + +/* Function declarations */ +int cxip_evtq_init(struct cxip_evtq *evtq, struct cxip_cq *cq, + size_t num_events, size_t num_fc_events, + struct cxil_wait_obj *priv_wait); + +void cxip_evtq_fini(struct cxip_evtq *eq); + +bool cxip_evtq_saturated(struct cxip_evtq *evtq); + +int cxip_evtq_req_cancel(struct cxip_evtq *evtq, void *req_ctx, + void *op_ctx, bool match); + +void cxip_evtq_req_discard(struct cxip_evtq *evtq, void *req_ctx); + +void cxip_evtq_flush_trig_reqs(struct cxip_evtq *evtq); + +struct cxip_req *cxip_evtq_req_alloc(struct cxip_evtq *evtq, + int remap, void *req_ctx); + +void cxip_evtq_req_free(struct cxip_req *req); + +void cxip_evtq_progress(struct cxip_evtq *evtq, bool internal); + +int cxip_evtq_adjust_reserved_fc_event_slots(struct cxip_evtq *evtq, int value); + +#endif /* _CXIP_EVTQ_H_ */ diff --git a/prov/cxi/include/cxip/fabric.h b/prov/cxi/include/cxip/fabric.h new file mode 100644 index 00000000000..6b28544a1bf --- /dev/null +++ b/prov/cxi/include/cxip/fabric.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_FABRIC_H_ +#define _CXIP_FABRIC_H_ + + +#include + +/* Type definitions */ +struct cxip_fabric { + struct util_fabric util_fabric; + ofi_atomic32_t ref; +}; + +/* Function declarations */ +int cxip_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, + void *context); + +#endif /* _CXIP_FABRIC_H_ */ diff --git a/prov/cxi/include/cxip/fc.h b/prov/cxi/include/cxip/fc.h new file mode 100644 index 00000000000..92c15ac0370 --- /dev/null +++ b/prov/cxi/include/cxip/fc.h @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_FC_H_ +#define _CXIP_FC_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_ep_obj; +struct cxip_rxc_hpc; +struct cxip_txc_hpc; + +/* Macros */ +#define CXIP_FC_SOFTWARE_INITIATED -1 + +/* Type definitions */ +struct cxip_fc_peer { + struct dlist_entry txc_entry; + struct cxip_txc_hpc *txc; + struct cxip_ctrl_req req; + struct cxip_addr caddr; + struct dlist_entry msg_queue; + uint16_t pending; + uint16_t dropped; + uint16_t pending_acks; + bool replayed; + unsigned int retry_count; +}; + +struct cxip_fc_drops { + struct dlist_entry rxc_entry; + struct cxip_rxc_hpc *rxc; + struct cxip_ctrl_req req; + uint32_t nic_addr; + uint32_t pid; + uint16_t vni; + uint16_t drops; + unsigned int retry_count; +}; + +/* Function declarations */ +int cxip_fc_process_drops(struct cxip_ep_obj *ep_obj, uint32_t nic_addr, + uint32_t pid, uint16_t vni, uint16_t drops); + +int cxip_fc_resume(struct cxip_ep_obj *ep_obj, uint32_t nic_addr, uint32_t pid, + uint16_t vni); + +#endif /* _CXIP_FC_H_ */ diff --git a/prov/cxi/include/cxip/if.h b/prov/cxi/include/cxip/if.h new file mode 100644 index 00000000000..7e591b6feb7 --- /dev/null +++ b/prov/cxi/include/cxip/if.h @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_IF_H_ +#define _CXIP_IF_H_ + + +#include +#include +#include +#include +#include + +/* Type definitions */ +struct cxip_if { + struct slist_entry if_entry; + + /* Device description */ + struct cxil_devinfo *info; + int speed; + int link; + + struct cxil_dev *dev; + + /* PtlTEs (searched during state change events) */ + struct dlist_entry ptes; + + ofi_atomic32_t ref; + ofi_spin_t lock; +}; + +struct cxip_remap_cp { + struct dlist_entry remap_entry; + struct cxi_cp remap_cp; + struct cxi_cp *hw_cp; +}; + +struct cxip_lni { + struct cxip_if *iface; + struct cxil_lni *lni; + + /* Hardware communication profiles */ + struct cxi_cp *hw_cps[MAX_HW_CPS]; + int n_cps; + + /* Software remapped communication profiles. */ + struct dlist_entry remap_cps; + + pthread_rwlock_t cp_lock; +}; + +/* Function declarations */ +struct cxip_if *cxip_if_lookup_addr(uint32_t nic_addr); + +struct cxip_if *cxip_if_lookup_name(const char *name); + +int cxip_get_if(uint32_t nic_addr, struct cxip_if **dev_if); + +void cxip_put_if(struct cxip_if *dev_if); + +int cxip_if_valid_rgroup_vni(struct cxip_if *iface, unsigned int rgroup_id, + unsigned int vni); + +int cxip_alloc_lni(struct cxip_if *iface, uint32_t svc_id, + struct cxip_lni **if_lni); + +void cxip_free_lni(struct cxip_lni *lni); + +const char *cxi_tc_str(enum cxi_traffic_class tc); + +void cxip_if_init(void); + +void cxip_if_fini(void); + +#endif /* _CXIP_IF_H_ */ diff --git a/prov/cxi/include/cxip/info.h b/prov/cxi/include/cxip/info.h new file mode 100644 index 00000000000..abd9024cb91 --- /dev/null +++ b/prov/cxi/include/cxip/info.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_INFO_H_ +#define _CXIP_INFO_H_ + + +/* Function declarations */ +const char *cxip_rdzv_proto_to_str(enum cxip_rdzv_proto proto); + +#endif /* _CXIP_INFO_H_ */ diff --git a/prov/cxi/include/cxip/iomm.h b/prov/cxi/include/cxip/iomm.h new file mode 100644 index 00000000000..507a9569211 --- /dev/null +++ b/prov/cxi/include/cxip/iomm.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_IOMM_H_ +#define _CXIP_IOMM_H_ + + +#include + +/* Forward declarations */ +struct cxip_domain; +struct cxip_md; + +/* Function declarations */ +int cxip_iomm_init(struct cxip_domain *dom); + +void cxip_iomm_fini(struct cxip_domain *dom); + +int cxip_map(struct cxip_domain *dom, const void *buf, unsigned long len, + uint64_t access, uint64_t flags, struct cxip_md **md); + +void cxip_unmap(struct cxip_md *md); + +#endif /* _CXIP_IOMM_H_ */ diff --git a/prov/cxi/include/cxip/log.h b/prov/cxi/include/cxip/log.h new file mode 100644 index 00000000000..82cf491b0dc --- /dev/null +++ b/prov/cxi/include/cxip/log.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_LOG_H_ +#define _CXIP_LOG_H_ + + +/* Macros */ +#define CXIP_LOG(fmt, ...) \ + fi_log(&cxip_prov, FI_LOG_WARN, FI_LOG_CORE, \ + __func__, __LINE__, "%s: " fmt "", cxip_env.hostname, \ + ##__VA_ARGS__) + +#define CXIP_FATAL(fmt, ...) \ + do { \ + CXIP_LOG(fmt, ##__VA_ARGS__); \ + abort(); \ + } while (0) + +#endif /* _CXIP_LOG_H_ */ diff --git a/prov/cxi/include/cxip/mr.h b/prov/cxi/include/cxip/mr.h new file mode 100644 index 00000000000..4fab151844d --- /dev/null +++ b/prov/cxi/include/cxip/mr.h @@ -0,0 +1,170 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_MR_H_ +#define _CXIP_MR_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cntr; +struct cxip_domain; +struct cxip_ep; +struct cxip_pte; + +/* Macros */ +#define CXIP_MR_CACHE_EVENTS_DISABLE_POLL_NSECS 100000U + +#define CXIP_MR_CACHE_EVENTS_DISABLE_LE_POLL_NSECS 1000000000U + +#define CXIP_MR_PROV_KEY_MASK ((1ULL << 61) - 1) + +#define CXIP_MR_PROV_KEY_ID_MASK ((1ULL << 16) - 1) + +#define CXIP_MR_UNCACHED_KEY_TO_IDX(key) ((key) & CXIP_MR_PROV_KEY_ID_MASK) + +#define CXIP_MR_KEY_SIZE sizeof(uint32_t) + +#define CXIP_MR_KEY_MASK ((1ULL << (8 * CXIP_MR_KEY_SIZE)) - 1) + +#define CXIP_MR_VALID_OFFSET_MASK ((1ULL << 56) - 1) + +#define CXIP_MR_PROV_KEY_SIZE sizeof(struct cxip_mr_key) + +#define CXIP_MR_DOMAIN_HT_BUCKETS 16 + +/* Type definitions */ +struct cxip_mr_key { + union { + /* Provider generated standard cached */ + struct { + uint64_t lac : 3; + uint64_t lac_off: 58; + uint64_t opt : 1; + uint64_t cached : 1; + uint64_t unused1: 1; + /* shares CXIP_CTRL_LE_TYPE_MR */ + }; + /* Client or Provider non-cached */ + struct { + uint64_t key : 61; + uint64_t unused2: 3; + /* Provider shares opt */ + /* Provider shares cached == 0 */ + /* Provider shares CXIP_CTRL_LE_TYPE_MR */ + }; + /* Provider Key Only */ + struct { + /* Non-cached key consists of unique MR ID and sequence + * number. The same MR ID can be used with sequence + * number to create 2^44 unique keys. That is, a + * single standard MR repeatedly created and destroyed + * every micro-second, would take months before + * it repeated. + */ + uint64_t id : 16; /* Unique - 64K MR */ + uint64_t seqnum : 44; /* Sequence with random seed */ + uint64_t events : 1; /* Requires event generation */ + uint64_t unused3: 2; + uint64_t is_prov: 1; + /* Overloads CXIP_CTRL_LE_TYPE_MR and must be cleared + * before appending MR LE or TX using in match bits. + */ + }; + uint64_t raw; + }; +}; + +struct cxip_mr_util_ops { + bool is_cached; + int (*init_key)(struct cxip_mr *mr, uint64_t req_key); + int (*enable_opt)(struct cxip_mr *mr); + int (*disable_opt)(struct cxip_mr *mr); + int (*enable_std)(struct cxip_mr *mr); + int (*disable_std)(struct cxip_mr *mr); +}; + +struct cxip_md { + struct cxip_domain *dom; + struct cxi_md *md; + struct ofi_mr_info info; + uint64_t map_flags; + uint64_t handle; + int dmabuf_fd; + bool handle_valid; + bool cached; + bool dmabuf_fd_valid; +}; + +struct cxip_mr_domain { + struct dlist_entry buckets[CXIP_MR_DOMAIN_HT_BUCKETS]; + ofi_spin_t lock; +}; + +struct cxip_mr { + struct fid_mr mr_fid; + struct cxip_domain *domain; // parent domain + struct cxip_ep *ep; // endpoint for remote memory + uint64_t key; // memory key + uint64_t flags; // special flags + struct fi_mr_attr attr; // attributes + struct cxip_cntr *cntr; // if bound to cntr + + /* Indicates if FI_RMA_EVENT was specified at creation and + * will be used to enable fi_writedata() and fi_inject_writedata() + * support for this MR (TODO). + */ + bool rma_events; + + /* If requested then count MR events to determine if RMA are in + * progress. At close if no RMA are in progress bypass the invalidate + * of the PTLTE LE. This improves non-cached key close performance, + * enabling their use so that after closing the MR the associated + * memory cannot be remotely accessed, even if it remains in the + * libfabric MR cache. + */ + bool count_events; + ofi_atomic32_t match_events; + ofi_atomic32_t access_events; + + ofi_spin_t lock; + + struct cxip_mr_util_ops *mr_util; + bool enabled; + struct cxip_pte *pte; + enum cxip_mr_state mr_state; + int64_t mr_id; // Non-cached provider key uniqueness + struct cxip_ctrl_req req; + bool optimized; + + void *buf; // memory buffer VA + uint64_t len; // memory length + struct cxip_md *md; // buffer IO descriptor + struct dlist_entry ep_entry; + + struct dlist_entry mr_domain_entry; +}; + +/* Function declarations */ +int cxip_generic_mr_key_to_ptl_idx(struct cxip_domain *dom, + uint64_t key, bool write); + +bool cxip_generic_is_mr_key_opt(uint64_t key); + +bool cxip_generic_is_mr_key_events(uint64_t caps, uint64_t key); + +bool cxip_generic_is_valid_mr_key(uint64_t key); + +void cxip_mr_domain_init(struct cxip_mr_domain *mr_domain); + +void cxip_mr_domain_fini(struct cxip_mr_domain *mr_domain); + +#endif /* _CXIP_MR_H_ */ diff --git a/prov/cxi/include/cxip/mr_lac_cache.h b/prov/cxi/include/cxip/mr_lac_cache.h new file mode 100644 index 00000000000..702ae12579d --- /dev/null +++ b/prov/cxi/include/cxip/mr_lac_cache.h @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_MR_LAC_CACHE_H_ +#define _CXIP_MR_LAC_CACHE_H_ + + +/* cxip_mr_lac_cache type definition */ +/* This is in a separate header to break the circular dependency between mr.h and ctrl.h */ + +/* Forward declarations */ +struct cxip_ctrl_req; + +struct cxip_mr_lac_cache { + /* MR referencing the associated MR cache LE, can only + * be flushed if reference count is 0. + */ + ofi_atomic32_t ref; + union cxip_match_bits mb; + struct cxip_ctrl_req *ctrl_req; +}; + +#endif /* _CXIP_MR_LAC_CACHE_H_ */ diff --git a/prov/cxi/include/cxip/msg.h b/prov/cxi/include/cxip/msg.h new file mode 100644 index 00000000000..f685c627aa4 --- /dev/null +++ b/prov/cxi/include/cxip/msg.h @@ -0,0 +1,198 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_MSG_H_ +#define _CXIP_MSG_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_md; +struct cxip_pte; +struct cxip_req; +struct cxip_rxc; +struct cxip_rxc_hpc; +struct cxip_txc; + +/* Macros */ +#define CXIP_MSG_ORDER (FI_ORDER_SAS | \ + FI_ORDER_WAW | \ + FI_ORDER_RMA_WAW | \ + FI_ORDER_RMA_RAR | \ + FI_ORDER_ATOMIC_WAW | \ + FI_ORDER_ATOMIC_WAR | \ + FI_ORDER_ATOMIC_RAW | \ + FI_ORDER_ATOMIC_RAR) + +#define CXIP_TAG_WIDTH 48 + +#define CXIP_TAG_MASK ((1UL << CXIP_TAG_WIDTH) - 1) + +/* Type definitions */ +union cxip_match_bits { + struct { + uint64_t tag : CXIP_TAG_WIDTH; /* User tag value */ + uint64_t tx_id : CXIP_TX_ID_WIDTH; /* Prov. tracked ID */ + uint64_t cq_data : 1; /* Header data is valid */ + uint64_t tagged : 1; /* Tagged API */ + uint64_t match_comp : 1; /* Notify initiator on match */ + uint64_t rdzv_done : 1; /* Notify initiator when rdzv done */ + uint64_t le_type : 1; + }; + /* Rendezvous protocol request, overloads match_comp and rdzv_done + * to specify requested protocol. + */ + struct { + uint64_t pad0 : 61; + uint64_t rdzv_proto : 2; + uint64_t pad1 : 1; + }; + /* Split TX ID for rendezvous operations. */ + struct { + uint64_t pad2 : (CXIP_TAG_WIDTH - 1); /* User tag value */ + uint64_t coll_get : 1; /* leaf rdma get */ + uint64_t rdzv_id_hi : CXIP_RDZV_ID_HIGH_WIDTH; + uint64_t rdzv_lac : 4; /* Rendezvous Get LAC */ + }; + struct { + uint64_t rdzv_id_lo : CXIP_RDZV_ID_CMD_WIDTH; + }; + /* Client/Server messaging match bits */ + struct { + uint64_t rnr_tag : CXIP_CS_TAG_WIDTH; /* User tag value */ + uint64_t rnr_rsvd : 6; /* Unused, set to 0 */ + uint64_t rnr_cq_data : 1; /* Header data valid */ + uint64_t rnr_tagged : 1; /* Tagged API */ + uint64_t rnr_vni : CXIP_VNI_WIDTH; /* Source VNI */ + }; + /* Control LE match bit format for notify/resume */ + struct { + uint64_t txc_id : 8; + uint64_t rxc_id : 8; + uint64_t drops : 16; + uint64_t pad3 : 29; + uint64_t ctrl_msg_type: 2; + uint64_t ctrl_le_type : 1; + }; + /* Control LE match bit format for zbcollectives */ + struct { + uint64_t zb_data :61; + uint64_t zb_pad : 3; + /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_CTRL_MSG + * shares ctrl_msg_type == CXIP_CTRL_MSG_ZB_BCAST + */ + }; + /* Control LE match bit format for cached MR */ + struct { + uint64_t mr_lac : 3; + uint64_t mr_lac_off : 58; + uint64_t mr_opt : 1; + uint64_t mr_cached : 1; + uint64_t mr_unused : 1; + /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR */ + }; + struct { + uint64_t mr_key : 61; + uint64_t mr_pad : 3; + /* shares mr_opt + * shares mr_cached == 0 + * shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR + */ + }; + struct { + uint64_t unused2 : 63; + uint64_t is_prov : 1; + /* Indicates provider generated key and shares ctrl_le_type == + * CXIP_CTRL_LE_TYPE_MR so it must be cleared before matching. + */ + }; + uint64_t raw; +}; + +struct cxip_ux_dump_state { + bool done; + + size_t max_count; /* Number entries/src_addr provided */ + size_t ret_count; /* Number of UX entries returned */ + size_t ux_count; /* Total UX entries available */ + + struct fi_cq_tagged_entry *entry; + fi_addr_t *src_addr; +}; + +struct cxip_ux_send { + struct dlist_entry rxc_entry; + struct cxip_req *req; + struct cxip_rxc *rxc; + struct fi_peer_rx_entry *rx_entry; + union c_event put_ev; + bool claimed; /* Reserved with FI_PEEK | FI_CLAIM */ +}; + +struct cxip_msg_counters { + /* Histogram counting the number of messages based on priority, buffer + * type (HMEM), and message size. + */ + ofi_atomic32_t msg_count[CXIP_LIST_COUNTS][OFI_HMEM_MAX][CXIP_COUNTER_BUCKETS]; +}; + +/* Function declarations */ +int cxip_recv_ux_sw_matcher(struct cxip_ux_send *ux); + +int cxip_recv_req_sw_matcher(struct cxip_req *req); + +int cxip_recv_cancel(struct cxip_req *req); + +void cxip_recv_pte_cb(struct cxip_pte *pte, const union c_event *event); + +fi_addr_t cxip_recv_req_src_addr(struct cxip_rxc *rxc, + uint32_t init, uint16_t vni, + bool force); + +int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len, + struct cxip_md *md, struct cxip_req **cxip_req, + int (*recv_cb)(struct cxip_req *req, + const union c_event *event)); + +void cxip_recv_req_free(struct cxip_req *req); + +void cxip_recv_req_report(struct cxip_req *req); + +void cxip_recv_req_peek_complete(struct cxip_req *req, + struct cxip_ux_send *ux_send); + +struct cxip_req *cxip_mrecv_req_dup(struct cxip_req *mrecv_req); + +int cxip_complete_put(struct cxip_req *req, const union c_event *event); + +int cxip_recv_pending_ptlte_disable(struct cxip_rxc *rxc, bool check_fc); + +int cxip_flush_appends(struct cxip_rxc_hpc *rxc, + int (*flush_cb)(struct cxip_req *req, + const union c_event *event)); + +int cxip_recv_req_dropped(struct cxip_req *req); + +bool tag_match(uint64_t init_mb, uint64_t mb, uint64_t ib); + +bool init_match(struct cxip_rxc *rxc, uint32_t init, uint32_t match_id); + +uint32_t cxip_msg_match_id(struct cxip_txc *txc); + +void cxip_report_send_completion(struct cxip_req *req, bool sw_cntr); + +bool cxip_send_eager_idc(struct cxip_req *req); + +void cxip_send_buf_fini(struct cxip_req *req); + +int cxip_send_buf_init(struct cxip_req *req); + +#endif /* _CXIP_MSG_H_ */ diff --git a/prov/cxi/include/cxip/msg_hpc.h b/prov/cxi/include/cxip/msg_hpc.h new file mode 100644 index 00000000000..cab5f1fbc07 --- /dev/null +++ b/prov/cxi/include/cxip/msg_hpc.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_MSG_HPC_H_ +#define _CXIP_MSG_HPC_H_ + + +#include + +/* Forward declarations */ +struct cxip_ep; +struct cxip_rxc_hpc; + +/* Function declarations */ +int cxip_oflow_bufpool_init(struct cxip_rxc_hpc *rxc); + +void cxip_oflow_bufpool_fini(struct cxip_rxc_hpc *rxc); + +int cxip_build_ux_entry_info(struct cxip_ep *ep, + struct fi_cq_tagged_entry *entry, size_t count, + fi_addr_t *src_addr, size_t *ux_count); + +int cxip_unexp_start(struct fi_peer_rx_entry *entry); + +#endif /* _CXIP_MSG_HPC_H_ */ diff --git a/prov/cxi/include/cxip/nic.h b/prov/cxi/include/cxip/nic.h new file mode 100644 index 00000000000..bc6cfe7c372 --- /dev/null +++ b/prov/cxi/include/cxip/nic.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_NIC_H_ +#define _CXIP_NIC_H_ + + +/* Forward declarations */ +struct cxip_if; + +/* Function declarations */ +int cxip_nic_alloc(struct cxip_if *nic_if, struct fid_nic **fid_nic); + +#endif /* _CXIP_NIC_H_ */ diff --git a/prov/cxi/include/cxip/portals_table.h b/prov/cxi/include/cxip/portals_table.h new file mode 100644 index 00000000000..31af8e7cb14 --- /dev/null +++ b/prov/cxi/include/cxip/portals_table.h @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_PORTALS_TABLE_H_ +#define _CXIP_PORTALS_TABLE_H_ + + +#include +#include + +/* Forward declarations */ +struct cxip_lni; + +/* Type definitions */ +struct cxip_portals_table { + struct cxip_lni *lni; + uint32_t pid; + struct cxil_domain **doms; + size_t doms_count; +}; + +/* Function declarations */ +int cxip_portals_table_alloc(struct cxip_lni *lni, uint16_t *vni, + size_t vni_count, uint32_t pid, + struct cxip_portals_table **ptable); + +void cxip_portals_table_free(struct cxip_portals_table *ptable); + +#endif /* _CXIP_PORTALS_TABLE_H_ */ diff --git a/prov/cxi/include/cxip/pte.h b/prov/cxi/include/cxip/pte.h new file mode 100644 index 00000000000..884607df308 --- /dev/null +++ b/prov/cxi/include/cxip/pte.h @@ -0,0 +1,82 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_PTE_H_ +#define _CXIP_PTE_H_ + + +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_cntr; +struct cxip_evtq; +struct cxip_if; +struct cxip_portals_table; + +/* Macros */ +#define CXIP_PTE_IGNORE_DROPS ((1 << 24) - 1) + +/* Type definitions */ +struct cxip_pte_map_entry { + struct dlist_entry entry; + struct cxil_pte_map *map; +}; + +struct cxip_pte { + struct dlist_entry pte_entry; + struct cxip_portals_table *ptable; + struct cxil_pte *pte; + enum c_ptlte_state state; + struct dlist_entry map_list; + + void (*state_change_cb)(struct cxip_pte *pte, + const union c_event *event); + void *ctx; +}; + +/* Function declarations */ +int cxip_pte_set_state(struct cxip_pte *pte, struct cxip_cmdq *cmdq, + enum c_ptlte_state new_state, uint32_t drop_count); + +int cxip_pte_set_state_wait(struct cxip_pte *pte, struct cxip_cmdq *cmdq, + struct cxip_evtq *evtq, + enum c_ptlte_state new_state, uint32_t drop_count); + +int cxip_pte_append(struct cxip_pte *pte, uint64_t iova, size_t len, + unsigned int lac, enum c_ptl_list list, + uint32_t buffer_id, uint64_t match_bits, + uint64_t ignore_bits, uint32_t match_id, + uint64_t min_free, uint32_t flags, + struct cxip_cntr *cntr, struct cxip_cmdq *cmdq, + bool ring); + +int cxip_pte_unlink(struct cxip_pte *pte, enum c_ptl_list list, + int buffer_id, struct cxip_cmdq *cmdq); + +int cxip_pte_map(struct cxip_pte *pte, uint64_t pid_idx, bool is_multicast); + +int cxip_pte_alloc_nomap(struct cxip_portals_table *ptable, struct cxi_eq *evtq, + struct cxi_pt_alloc_opts *opts, + void (*state_change_cb)(struct cxip_pte *pte, + const union c_event *event), + void *ctx, struct cxip_pte **pte); + +int cxip_pte_alloc(struct cxip_portals_table *ptable, struct cxi_eq *evtq, + uint64_t pid_idx, bool is_multicast, + struct cxi_pt_alloc_opts *opts, + void (*state_change_cb)(struct cxip_pte *pte, + const union c_event *event), + void *ctx, struct cxip_pte **pte); + +void cxip_pte_free(struct cxip_pte *pte); + +int cxip_pte_state_change(struct cxip_if *dev_if, const union c_event *event); + +#endif /* _CXIP_PTE_H_ */ diff --git a/prov/cxi/include/cxip/ptelist_buf.h b/prov/cxi/include/cxip/ptelist_buf.h new file mode 100644 index 00000000000..f33e71436c6 --- /dev/null +++ b/prov/cxi/include/cxip/ptelist_buf.h @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_PTELIST_BUF_H_ +#define _CXIP_PTELIST_BUF_H_ + + +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_md; +struct cxip_req; +struct cxip_rxc_hpc; +struct cxip_ux_send; + +/* Type definitions */ +struct cxip_ptelist_bufpool_attr { + enum c_ptl_list list_type; + + /* Callback to handle PtlTE link error/unlink events */ + int (*ptelist_cb)(struct cxip_req *req, const union c_event *event); + size_t buf_size; + size_t min_space_avail; + size_t min_posted; + size_t max_posted; + size_t max_cached; +}; + +struct cxip_ptelist_bufpool { + struct cxip_ptelist_bufpool_attr attr; + struct cxip_rxc_hpc *rxc; + size_t buf_alignment; + + /* Ordered list of buffers emitted to hardware */ + struct dlist_entry active_bufs; + + /* List of consumed buffers which cannot be reposted yet + * since unexpected entries have not been matched. + */ + struct dlist_entry consumed_bufs; + + /* List of available buffers that may be appended to the list. + * These could be from a previous append failure or be cached + * from previous message processing to avoid map/unmap of + * list buffer. + */ + struct dlist_entry free_bufs; + + ofi_atomic32_t bufs_linked; + ofi_atomic32_t bufs_allocated; + ofi_atomic32_t bufs_free; +}; + +struct cxip_ptelist_req { + /* Pending list of unexpected header entries which could not be placed + * on the RX context unexpected header list due to put events being + * received out-of-order. + */ + struct dlist_entry pending_ux_list; +}; + +struct cxip_ptelist_buf { + struct cxip_ptelist_bufpool *pool; + + /* RX context the request buffer is posted on. */ + struct cxip_rxc_hpc *rxc; + enum cxip_le_type le_type; + struct dlist_entry buf_entry; + struct cxip_req *req; + + /* Memory mapping of req_buf field. */ + struct cxip_md *md; + + /* The number of bytes consume by hardware when the request buffer was + * unlinked. + */ + size_t unlink_length; + + /* Current offset into the buffer where packets/data are landing. When + * the cur_offset is equal to unlink_length, software has completed + * event processing for the buffer. + */ + size_t cur_offset; + + /* Request list specific control information */ + struct cxip_ptelist_req request; + + /* The number of unexpected headers posted placed on the RX context + * unexpected header list which have not been matched. + */ + ofi_atomic32_t refcount; + + /* Buffer used to land packets. */ + char *data; +}; + +/* Function declarations */ +int cxip_ptelist_bufpool_init(struct cxip_rxc_hpc *rxc, + struct cxip_ptelist_bufpool **pool, + struct cxip_ptelist_bufpool_attr *attr); + +void cxip_ptelist_bufpool_fini(struct cxip_ptelist_bufpool *pool); + +int cxip_ptelist_buf_replenish(struct cxip_ptelist_bufpool *pool, + bool seq_restart); + +void cxip_ptelist_buf_link_err(struct cxip_ptelist_buf *buf, + int rc_link_error); + +void cxip_ptelist_buf_unlink(struct cxip_ptelist_buf *buf); + +void cxip_ptelist_buf_put(struct cxip_ptelist_buf *buf, bool repost); + +void cxip_ptelist_buf_get(struct cxip_ptelist_buf *buf); + +void cxip_ptelist_buf_consumed(struct cxip_ptelist_buf *buf); + +void _cxip_req_buf_ux_free(struct cxip_ux_send *ux, bool repost); + +#endif /* _CXIP_PTELIST_BUF_H_ */ diff --git a/prov/cxi/include/cxip/rdzv_pte.h b/prov/cxi/include/cxip/rdzv_pte.h new file mode 100644 index 00000000000..adf1229dd1b --- /dev/null +++ b/prov/cxi/include/cxip/rdzv_pte.h @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_RDZV_PTE_H_ +#define _CXIP_RDZV_PTE_H_ + + +#include + +/* Forward declarations */ +struct cxip_pte; +struct cxip_req; +struct cxip_txc_hpc; + +/* Type definitions */ +struct cxip_rdzv_pte { + struct cxip_txc_hpc *txc; + struct cxip_pte *pte; + + /* Count of the number of buffers successfully linked on this PtlTE. */ + ofi_atomic32_t le_linked_success_count; + + /* Count of the number of buffers failed to link on this PtlTE. */ + ofi_atomic32_t le_linked_failure_count; +}; + +struct cxip_rdzv_match_pte { + struct cxip_rdzv_pte base_pte; + + /* Request structure used to handle zero byte puts used for match + * complete. + */ + struct cxip_req *zbp_req; + + /* Request structures used to handle rendezvous source/data transfers. + * There is one request structure (and LE) for each LAC. + */ + struct cxip_req *src_reqs[RDZV_SRC_LES]; +}; + +struct cxip_rdzv_nomatch_pte { + struct cxip_rdzv_pte base_pte; + struct cxip_req *le_req; +}; + +/* Function declarations */ +int cxip_rdzv_match_pte_alloc(struct cxip_txc_hpc *txc, + struct cxip_rdzv_match_pte **rdzv_pte); + +int cxip_rdzv_nomatch_pte_alloc(struct cxip_txc_hpc *txc, int lac, + struct cxip_rdzv_nomatch_pte **rdzv_pte); + +int cxip_rdzv_pte_src_req_alloc(struct cxip_rdzv_match_pte *pte, int lac); + +void cxip_rdzv_match_pte_free(struct cxip_rdzv_match_pte *pte); + +void cxip_rdzv_nomatch_pte_free(struct cxip_rdzv_nomatch_pte *pte); + +int cxip_rdzv_pte_zbp_cb(struct cxip_req *req, const union c_event *event); + +int cxip_rdzv_pte_src_cb(struct cxip_req *req, const union c_event *event); + +#endif /* _CXIP_RDZV_PTE_H_ */ diff --git a/prov/cxi/include/cxip/repsum.h b/prov/cxi/include/cxip/repsum.h new file mode 100644 index 00000000000..b746eb95a28 --- /dev/null +++ b/prov/cxi/include/cxip/repsum.h @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_REPSUM_H_ +#define _CXIP_REPSUM_H_ + + +#include +#include +#include + +/* Type definitions */ +union cxip_dbl_bits { + struct { + uint64_t mantissa:52; + uint64_t exponent:11; + uint64_t sign:1; + } __attribute__((__packed__)); + double dval; + uint64_t ival; +}; + +struct cxip_repsum { + int64_t T[4]; + int32_t M; + int8_t overflow_id; + bool inexact; + bool overflow; + bool invalid; +}; + +/* Function declarations */ +void cxip_dbl_to_rep(struct cxip_repsum *x, double d); + +void cxip_rep_to_dbl(double *d, const struct cxip_repsum *x); + +void cxip_rep_add(struct cxip_repsum *x, const struct cxip_repsum *y); + +double cxip_rep_add_dbl(double d1, double d2); + +double cxip_rep_sum(size_t count, double *values); + +#endif /* _CXIP_REPSUM_H_ */ diff --git a/prov/cxi/include/cxip/req.h b/prov/cxi/include/cxip/req.h new file mode 100644 index 00000000000..62f22841366 --- /dev/null +++ b/prov/cxi/include/cxip/req.h @@ -0,0 +1,230 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_REQ_H_ +#define _CXIP_REQ_H_ + + +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cntr; +struct cxip_coll_buf; +struct cxip_coll_pte; +struct cxip_coll_reduction; +struct cxip_cq; +struct cxip_evtq; +struct cxip_fc_peer; +struct cxip_md; +struct cxip_rxc; +struct cxip_rxc_hpc; +struct cxip_rxc_rnr; +struct cxip_txc; +struct cxip_txc_hpc; +struct cxip_txc_rnr; +struct cxip_ux_dump_state; + +/* Macros */ +#define CXIP_REQ_CLEANUP_TO 3000 + +/* Type definitions */ +struct cxip_req_rma { + struct cxip_txc *txc; + struct cxip_md *local_md; // RMA target buffer + void *ibuf; + struct cxip_cntr *cntr; + /* collectives leaf_rdma_get_callback context data */ + struct cxip_coll_reduction *reduction; +}; + +struct cxip_req_amo { + struct cxip_txc *txc; + struct cxip_md *result_md; + struct cxip_md *oper1_md; + char result[16]; + char oper1[16]; + bool tmp_result; + bool tmp_oper1; + void *ibuf; + bool fetching_amo_flush; + uint8_t fetching_amo_flush_event_count; + unsigned int fetching_amo_flush_event_rc; + struct cxip_cntr *cntr; +}; + +struct cxip_req_recv { + /* Receive parameters */ + struct dlist_entry rxc_entry; + union { + struct cxip_rxc *rxc; + struct cxip_rxc_hpc *rxc_hpc; + struct cxip_rxc_rnr *rxc_rnr; + }; + + struct cxip_cntr *cntr; + void *recv_buf; // local receive buffer + struct cxip_md *recv_md; // local receive MD + bool hybrid_md; // True if MD was provided + bool success_disable; + uint32_t ulen; // User buffer length + bool tagged; + uint64_t tag; + uint64_t ignore; + uint32_t match_id; + uint64_t flags; + + /* FI_CLAIM work around to hold UX remote offsets for duration of + * H/W UX entry matching and deletion. Array of 8-byte unexpected + * headers remote offsets, and current remote offset used when + * processing search results to match remote offsets. + */ + uint64_t *ule_offsets; + uint64_t ule_offset; + unsigned int num_ule_offsets; + unsigned int cur_ule_offsets; + bool offset_found; + + /* UX list dump state */ + struct cxip_ux_dump_state *ux_dump; + + /* Control info */ + int rc; // DMA return code + uint32_t rlen; // Send length + uint64_t oflow_start; // Overflow buffer address + uint16_t vni; // VNI operation came in on + uint32_t initiator; // DMA initiator address + uint32_t rdzv_id; // DMA initiator rendezvous ID + uint8_t rdzv_lac; // Rendezvous source LAC + bool done_notify; // Must send done notification + enum cxip_rdzv_proto rdzv_proto; + int rdzv_events; // Processed rdzv event count + enum c_event_type rdzv_event_types[4]; + uint32_t rdzv_initiator; // Rendezvous initiator used for mrecvs + uint32_t rget_nic; + uint32_t rget_pid; + int multirecv_inflight; // SW EP Multi-receives in progress + bool canceled; // Request canceled? + bool unlinked; + bool multi_recv; + bool tgt_event; + uint64_t start_offset; + uint64_t mrecv_bytes; + uint64_t mrecv_unlink_bytes; + bool auto_unlinked; + bool hw_offloaded; + struct cxip_req *parent; + struct dlist_entry children; + uint64_t src_offset; + uint16_t rdzv_mlen; +}; + +struct cxip_req_send { + /* Send parameters */ + union { + struct cxip_txc *txc; + struct cxip_txc_hpc *txc_hpc; + struct cxip_txc_rnr *txc_rnr; + }; + struct cxip_cntr *cntr; + const void *buf; // local send buffer + size_t len; // request length + struct cxip_md *send_md; // send buffer memory descriptor + struct cxip_addr caddr; + fi_addr_t dest_addr; + bool tagged; + bool hybrid_md; + bool success_disable; + uint32_t tclass; + uint64_t tag; + uint64_t data; + uint64_t flags; + void *ibuf; + + /* Control info */ + struct dlist_entry txc_entry; + struct cxip_fc_peer *fc_peer; + union { + int rdzv_id; // SW RDZV ID for long messages + int tx_id; + }; + int rc; // DMA return code + int rdzv_send_events; // Processed event count + uint64_t max_rnr_time; + uint64_t retry_rnr_time; + struct dlist_entry rnr_entry; + int retries; + bool canceled; +}; + +struct cxip_req_rdzv_src { + struct dlist_entry list; + struct cxip_txc *txc; + uint32_t lac; + int rc; +}; + +struct cxip_req_search { + struct cxip_rxc_hpc *rxc; + bool complete; + int puts_pending; +}; + +struct cxip_req_coll { + struct cxip_coll_pte *coll_pte; + struct cxip_coll_buf *coll_buf; + uint32_t mrecv_space; + size_t hw_req_len; + bool isred; + enum c_return_code cxi_rc; +}; + +struct cxip_req { + /* Control info */ + struct dlist_entry evtq_entry; + void *req_ctx; + struct cxip_cq *cq; // request CQ + struct cxip_evtq *evtq; // request event queue + int req_id; // fast lookup in index table + int (*cb)(struct cxip_req *req, const union c_event *evt); + // completion event callback + bool discard; + + /* Triggered related fields. */ + bool triggered; + uint64_t trig_thresh; + struct cxip_cntr *trig_cntr; + + struct fi_peer_rx_entry *rx_entry; + + /* CQ event fields, set according to fi_cq.3 + * - set by provider + * - returned to user in completion event + */ + uint64_t context; + uint64_t flags; + uint64_t data_len; + uint64_t buf; + uint64_t data; + uint64_t tag; + fi_addr_t addr; + + /* Request parameters */ + enum cxip_req_type type; + union { + struct cxip_req_rma rma; + struct cxip_req_amo amo; + struct cxip_req_recv recv; + struct cxip_req_send send; + struct cxip_req_rdzv_src rdzv_src; + struct cxip_req_search search; + struct cxip_req_coll coll; + }; +}; + +#endif /* _CXIP_REQ_H_ */ diff --git a/prov/cxi/include/cxip/req_buf.h b/prov/cxi/include/cxip/req_buf.h new file mode 100644 index 00000000000..1dd4f4df348 --- /dev/null +++ b/prov/cxi/include/cxip/req_buf.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_REQ_BUF_H_ +#define _CXIP_REQ_BUF_H_ + + +/* Forward declarations */ +struct cxip_rxc_hpc; +struct cxip_ux_send; + +/* Macros */ +#define CXIP_REQ_BUF_SIZE (12*1024*1024) + +#define CXIP_REQ_BUF_MIN_POSTED 6 + +#define CXIP_REQ_BUF_MAX_CACHED 0 + +#define CXIP_REQ_BUF_HEADER_MAX_SIZE (sizeof(struct c_port_fab_hdr) + \ + sizeof(struct c_port_unrestricted_hdr)) + +#define CXIP_REQ_BUF_HEADER_MIN_SIZE (sizeof(struct c_port_fab_hdr) + \ + sizeof(struct c_port_small_msg_hdr)) + +/* Function declarations */ +int cxip_req_bufpool_init(struct cxip_rxc_hpc *rxc); + +void cxip_req_bufpool_fini(struct cxip_rxc_hpc *rxc); + +void cxip_req_buf_ux_free(struct cxip_ux_send *ux); + +#endif /* _CXIP_REQ_BUF_H_ */ diff --git a/prov/cxi/include/cxip/rma.h b/prov/cxi/include/cxip/rma.h new file mode 100644 index 00000000000..f218bf5786d --- /dev/null +++ b/prov/cxi/include/cxip/rma.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_RMA_H_ +#define _CXIP_RMA_H_ + + +#include +#include +#include + +/* Forward declarations */ +struct cxip_cntr; +struct cxip_txc; + +/* Function declarations */ +ssize_t cxip_rma_common(enum fi_op_type op, struct cxip_txc *txc, + const void *buf, size_t len, void *desc, + fi_addr_t tgt_addr, uint64_t addr, + uint64_t key, uint64_t data, uint64_t flags, + uint32_t tclass, uint64_t msg_order, void *context, + bool triggered, uint64_t trig_thresh, + struct cxip_cntr *trig_cntr, + struct cxip_cntr *comp_cntr); + +#endif /* _CXIP_RMA_H_ */ diff --git a/prov/cxi/include/cxip/rxc.h b/prov/cxi/include/cxip/rxc.h new file mode 100644 index 00000000000..628d4e088b2 --- /dev/null +++ b/prov/cxi/include/cxip/rxc.h @@ -0,0 +1,245 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_RXC_H_ +#define _CXIP_RXC_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_cntr; +struct cxip_cq; +struct cxip_ctrl_req; +struct cxip_domain; +struct cxip_ep_obj; +struct cxip_pte; +struct cxip_ptelist_bufpool; +struct cxip_req; + +/* Macros */ +#define RXC_RESERVED_FC_SLOTS 1 + +#define RXC_BASE(rxc) ((struct cxip_rxc *)(void *)(rxc)) + +#define RXC_DBG(rxc, fmt, ...) \ + _CXIP_DBG(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) + +#define RXC_INFO(rxc, fmt, ...) \ + _CXIP_INFO(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) + +#define RXC_WARN(rxc, fmt, ...) \ + _CXIP_WARN(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) + +#define RXC_WARN_ONCE(rxc, fmt, ...) \ + _CXIP_WARN_ONCE(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) + +#define RXC_FATAL(rxc, fmt, ...) \ + CXIP_FATAL("RXC (%#x:%u) PtlTE %u:[Fatal] " fmt "", \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) + +/* Type definitions */ +struct cxip_rxc_ops { + ssize_t (*recv_common)(struct cxip_rxc *rxc, void *buf, size_t len, + void *desc, fi_addr_t src_add, uint64_t tag, + uint64_t ignore, void *context, uint64_t flags, + bool tagged, struct cxip_cntr *comp_cntr); + void (*progress)(struct cxip_rxc *rxc, bool internal); + void (*recv_req_tgt_event)(struct cxip_req *req, + const union c_event *event); + int (*cancel_msg_recv)(struct cxip_req *req); + int (*ctrl_msg_cb)(struct cxip_ctrl_req *req, + const union c_event *event); + void (*init_struct)(struct cxip_rxc *rxc, struct cxip_ep_obj *ep_obj); + void (*fini_struct)(struct cxip_rxc *rxc); + void (*cleanup)(struct cxip_rxc *rxc); + int (*msg_init)(struct cxip_rxc *rxc); + int (*msg_fini)(struct cxip_rxc *rxc); +}; + +struct cxip_rxc { + void *context; + uint32_t protocol; + + struct fi_rx_attr attr; + bool selective_completion; + bool hmem; + bool trunc_ok; + bool sw_ep_only; + bool msg_offload; + uint8_t pid_bits; // Zero without SEP + uint8_t recv_ptl_idx; + + enum cxip_rxc_state state; + + /* Reverse link to EP object that owns this context */ + struct cxip_ep_obj *ep_obj; + + struct cxip_cq *recv_cq; + struct cxip_cntr *recv_cntr; + + struct cxip_rxc_ops ops; + + struct cxip_domain *domain; + + /* RXC receive portal table, event queue and hardware + * command queue. + */ + struct cxip_evtq rx_evtq; + struct cxip_pte *rx_pte; + struct cxip_cmdq *rx_cmdq; + int orx_reqs; + + /* If FI_MULTI_RECV is supported, minimum receive size required + * for buffers posted. + */ + size_t min_multi_recv; + + /* If TX events are required by specialization, the maximum + * credits that can be used. + */ + int32_t max_tx; + unsigned int recv_appends; + + struct cxip_msg_counters cntrs; +}; + +struct cxip_rxc_hpc { + /* Must be first */ + struct cxip_rxc base; + + int max_eager_size; + uint64_t rget_align_mask; + + /* Window when FI_CLAIM mutual exclusive access is required */ + bool hw_claim_in_progress; + + int sw_ux_list_len; + int sw_pending_ux_list_len; + + /* Number of unexpected list entries in HW. */ + ofi_atomic32_t orx_hw_ule_cnt; + + /* RX context transmit queue is separated into two logical + * queues, one used for rendezvous get initiation and one + * used for notifications. Depending on the messaging protocols + * and traffic classes in use, the two logical queues could + * point to the same hardware queue or be distinct. + */ + struct cxip_cmdq *tx_rget_cmdq; + struct cxip_cmdq *tx_cmdq; + ofi_atomic32_t orx_tx_reqs; + + /* Software receive queue. User posted requests are queued here instead + * of on hardware if the RXC is in software endpoint mode. + */ + struct dlist_entry sw_recv_queue; + + /* Defer events to wait for both put and put overflow */ + struct def_event_ht deferred_events; + + /* Unexpected message handling */ + struct cxip_ptelist_bufpool *req_list_bufpool; + struct cxip_ptelist_bufpool *oflow_list_bufpool; + + enum cxip_rxc_state prev_state; + enum cxip_rxc_state new_state; + enum c_sc_reason fc_reason; + + /* RXC drop count used for FC accounting. */ + int drop_count; + + /* Array of 8-byte of unexpected headers remote offsets. */ + uint64_t *ule_offsets; + unsigned int num_ule_offsets; + + /* Current remote offset to be processed. Incremented after processing + * a search and delete put event. + */ + unsigned int cur_ule_offsets; + + struct dlist_entry fc_drops; + struct dlist_entry replay_queue; + struct dlist_entry sw_ux_list; + struct dlist_entry sw_pending_ux_list; + + /* Flow control/software state change metrics */ + int num_fc_eq_full; + int num_fc_no_match; + int num_fc_unexp; + int num_fc_append_fail; + int num_fc_req_full; + int num_sc_nic_hw2sw_append_fail; + int num_sc_nic_hw2sw_unexp; +}; + +struct cxip_rxc_rnr { + /* Must be first */ + struct cxip_rxc base; + + bool hybrid_mr_desc; + /* Used when success events are not required */ + struct cxip_req *req_selective_comp_msg; + struct cxip_req *req_selective_comp_tag; +}; + +/* Function declarations */ +void cxip_rxc_req_fini(struct cxip_rxc *rxc); + +int cxip_rxc_oflow_init(struct cxip_rxc *rxc); + +void cxip_rxc_oflow_fini(struct cxip_rxc *rxc); + +int cxip_rxc_msg_enable(struct cxip_rxc_hpc *rxc, uint32_t drop_count); + +struct cxip_rxc *cxip_rxc_calloc(struct cxip_ep_obj *ep_obj, void *context); + +void cxip_rxc_free(struct cxip_rxc *rxc); + +int cxip_rxc_enable(struct cxip_rxc *rxc); + +void cxip_rxc_disable(struct cxip_rxc *rxc); + +void cxip_rxc_struct_init(struct cxip_rxc *rxc, const struct fi_rx_attr *attr, + void *context); + +void cxip_rxc_recv_req_cleanup(struct cxip_rxc *rxc); + +int cxip_rxc_emit_dma(struct cxip_rxc_hpc *rxc, struct cxip_cmdq *cmdq, + uint16_t vni, enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct c_full_dma_cmd *dma, uint64_t flags); + +int cxip_rxc_emit_idc_msg(struct cxip_rxc_hpc *rxc, struct cxip_cmdq *cmdq, + uint16_t vni, enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + const struct c_cstate_cmd *c_state, + const struct c_idc_msg_hdr *msg, const void *buf, + size_t len, uint64_t flags); + +void cxip_rxc_record_req_stat(struct cxip_rxc *rxc, enum c_ptl_list list, + size_t rlength, struct cxip_req *req); + +#endif /* _CXIP_RXC_H_ */ diff --git a/prov/cxi/include/cxip/telemetry.h b/prov/cxi/include/cxip/telemetry.h new file mode 100644 index 00000000000..62fed6298b2 --- /dev/null +++ b/prov/cxi/include/cxip/telemetry.h @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_TELEMETRY_H_ +#define _CXIP_TELEMETRY_H_ + + +#include + +/* Forward declarations */ +struct cxip_domain; +struct cxip_telemetry; + +/* Type definitions */ +struct cxip_telemetry_entry { + struct cxip_telemetry *telemetry; + struct dlist_entry telemetry_entry; + + /* Telemetry name. */ + char name[TELEMETRY_ENTRY_NAME_SIZE]; + + /* Telemetry value. */ + unsigned long value; +}; + +/* Function declarations */ +void cxip_telemetry_dump_delta(struct cxip_telemetry *telemetry); + +void cxip_telemetry_free(struct cxip_telemetry *telemetry); + +int cxip_telemetry_alloc(struct cxip_domain *dom, + struct cxip_telemetry **telemetry); + +#endif /* _CXIP_TELEMETRY_H_ */ diff --git a/prov/cxi/include/cxip/txc.h b/prov/cxi/include/cxip/txc.h new file mode 100644 index 00000000000..feefb197180 --- /dev/null +++ b/prov/cxi/include/cxip/txc.h @@ -0,0 +1,242 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_TXC_H_ +#define _CXIP_TXC_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_cmdq; +struct cxip_cntr; +struct cxip_cq; +struct cxip_domain; +struct cxip_ep_obj; +struct cxip_md; +struct cxip_rdzv_match_pte; +struct cxip_rdzv_nomatch_pte; +struct cxip_req; + +/* Macros */ +#define CXIP_TXC_FORCE_ERR_ALT_READ_PROTO_ALLOC (1 << 0) + +#define TXC_BASE(txc) ((struct cxip_txc *)(void *)(txc)) + +#define TXC_DBG(txc, fmt, ...) \ + _CXIP_DBG(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) + +#define TXC_INFO(txc, fmt, ...) \ + _CXIP_INFO(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) + +#define TXC_WARN(txc, fmt, ...) \ + _CXIP_WARN(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) + +#define TXC_WARN_RET(txc, ret, fmt, ...) \ + TXC_WARN(txc, "%d:%s: " fmt "", ret, fi_strerror(-ret), ##__VA_ARGS__) + +#define TXC_FATAL(txc, fmt, ...) \ + CXIP_FATAL("TXC (%#x:%u):: " fmt "", \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) + +/* Type definitions */ +struct cxip_txc_ops { + ssize_t (*send_common)(struct cxip_txc *txc, uint32_t tclass, + const void *buf, size_t len, void *desc, + uint64_t data, fi_addr_t dest_addr, uint64_t tag, + void *context, uint64_t flags, bool tagged, + bool triggered, uint64_t trig_thresh, + struct cxip_cntr *trig_cntr, + struct cxip_cntr *comp_cntr); + void (*progress)(struct cxip_txc *txc, bool internal); + int (*cancel_msg_send)(struct cxip_req *req); + void (*init_struct)(struct cxip_txc *txc, struct cxip_ep_obj *ep_obj); + void (*fini_struct)(struct cxip_txc *txc); + void (*cleanup)(struct cxip_txc *txc); + int (*msg_init)(struct cxip_txc *txc); + int (*msg_fini)(struct cxip_txc *txc); +}; + +struct cxip_txc { + void *context; + + uint32_t protocol; + bool enabled; + bool hrp_war_req; // Non-fetching 32-bit HRP + bool hmem; + bool trunc_ok; + + struct cxip_cq *send_cq; + struct cxip_cntr *send_cntr; + struct cxip_cntr *read_cntr; + struct cxip_cntr *write_cntr; + + struct cxip_txc_ops ops; + + struct cxip_ep_obj *ep_obj; // parent EP object + struct cxip_domain *domain; // parent domain + uint8_t pid_bits; + uint8_t recv_ptl_idx; + + struct fi_tx_attr attr; // attributes + bool selective_completion; + uint32_t tclass; + + /* TX H/W Event Queue */ + struct cxip_evtq tx_evtq; + + /* Inject buffers for EP, protected by ep_obj->lock */ + struct ofi_bufpool *ibuf_pool; + + struct cxip_cmdq *tx_cmdq; // added during cxip_txc_enable() + int otx_reqs; // outstanding transmit requests + + /* Queue of TX messages in flight for the context */ + struct dlist_entry msg_queue; + + struct cxip_req *rma_write_selective_completion_req; + struct cxip_req *rma_read_selective_completion_req; + struct cxip_req *amo_selective_completion_req; + struct cxip_req *amo_fetch_selective_completion_req; + + struct dlist_entry dom_entry; +}; + +struct cxip_txc_hpc { + /* Must remain first */ + struct cxip_txc base; + + int max_eager_size; + int rdzv_eager_size; + + /* Rendezvous messaging support */ + struct cxip_rdzv_match_pte *rdzv_pte; + struct cxip_rdzv_nomatch_pte *rdzv_nomatch_pte[RDZV_NO_MATCH_PTES]; + struct indexer rdzv_ids; + struct indexer msg_rdzv_ids; + enum cxip_rdzv_proto rdzv_proto; + + struct cxip_cmdq *rx_cmdq; // Target cmdq for Rendezvous buffers + +#if ENABLE_DEBUG + uint64_t force_err; +#endif + /* Flow Control recovery */ + struct dlist_entry fc_peers; + + /* Match complete IDs */ + struct indexer tx_ids; + +}; + +struct cxip_txc_rnr { + /* Must remain first */ + struct cxip_txc base; + + uint64_t max_retry_wait_us; /* Maximum time to retry any request */ + ofi_atomic32_t time_wait_reqs; /* Number of RNR time wait reqs */ + uint64_t next_retry_wait_us; /* Time of next retry in all queues */ + uint64_t total_retries; + uint64_t total_rnr_nacks; + bool hybrid_mr_desc; + + /* Used when success events are not required */ + struct cxip_req *req_selective_comp_msg; + struct cxip_req *req_selective_comp_tag; + + /* There are CXIP_NUM_RNR_WAIT_QUEUE queues where each queue has + * a specified time wait value and where the last queue is has the + * maximum time wait value before retrying (and is used for all + * subsequent retries). This implementation allows each queue to + * be maintained in retry order with a simple append of the request. + */ + struct dlist_entry time_wait_queue[CXIP_NUM_RNR_WAIT_QUEUE]; +}; + +/* Function declarations */ +int cxip_txc_emit_idc_put(struct cxip_txc *txc, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + const struct c_cstate_cmd *c_state, + const struct c_idc_put_cmd *put, const void *buf, + size_t len, uint64_t flags); + +int cxip_txc_emit_dma(struct cxip_txc *txc, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct cxip_cntr *trig_cntr, size_t trig_thresh, + struct c_full_dma_cmd *dma, uint64_t flags); + +int cxip_txc_emit_idc_amo(struct cxip_txc *txc, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + const struct c_cstate_cmd *c_state, + const struct c_idc_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +int cxip_txc_emit_dma_amo(struct cxip_txc *txc, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + struct cxip_cntr *trig_cntr, size_t trig_thresh, + struct c_dma_amo_cmd *amo, uint64_t flags, + bool fetching, bool flush); + +int cxip_txc_emit_idc_msg(struct cxip_txc *txc, uint16_t vni, + enum cxi_traffic_class tc, + enum cxi_traffic_class_type tc_type, + const struct c_cstate_cmd *c_state, + const struct c_idc_msg_hdr *msg, const void *buf, + size_t len, uint64_t flags); + +void cxip_txc_flush_msg_trig_reqs(struct cxip_txc *txc); + +int cxip_tx_id_alloc(struct cxip_txc_hpc *txc, void *ctx); + +int cxip_tx_id_free(struct cxip_txc_hpc *txc, int id); + +void *cxip_tx_id_lookup(struct cxip_txc_hpc *txc, int id); + +int cxip_rdzv_id_alloc(struct cxip_txc_hpc *txc, struct cxip_req *req); + +int cxip_rdzv_id_free(struct cxip_txc_hpc *txc, int id); + +void *cxip_rdzv_id_lookup(struct cxip_txc_hpc *txc, int id); + +void cxip_txc_struct_init(struct cxip_txc *txc, const struct fi_tx_attr *attr, + void *context); + +struct cxip_txc *cxip_txc_calloc(struct cxip_ep_obj *ep_obj, void *context); + +void cxip_txc_free(struct cxip_txc *txc); + +int cxip_txc_enable(struct cxip_txc *txc); + +void cxip_txc_disable(struct cxip_txc *txc); + +struct cxip_txc *cxip_stx_alloc(const struct fi_tx_attr *attr, void *context); + +struct cxip_md *cxip_txc_ibuf_md(void *ibuf); + +void *cxip_txc_ibuf_alloc(struct cxip_txc *txc); + +void cxip_txc_ibuf_free(struct cxip_txc *txc, void *ibuf); + +int cxip_ibuf_chunk_init(struct ofi_bufpool_region *region); + +void cxip_ibuf_chunk_fini(struct ofi_bufpool_region *region); + +#endif /* _CXIP_TXC_H_ */ diff --git a/prov/cxi/include/cxip/zbcoll.h b/prov/cxi/include/cxip/zbcoll.h new file mode 100644 index 00000000000..a568b8cb669 --- /dev/null +++ b/prov/cxi/include/cxip/zbcoll.h @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only + * + * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP + */ + +#ifndef _CXIP_ZBCOLL_H_ +#define _CXIP_ZBCOLL_H_ + + +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct cxip_addr; +struct cxip_ep_obj; + +/* Type definitions */ +struct cxip_zbcoll_cb_obj { + zbcomplete_t usrfunc; // callback function + void *usrptr; // callback data +}; + +struct cxip_zbcoll_state { + struct cxip_zbcoll_obj *zb; // backpointer to zbcoll_obj + uint64_t *dataptr; // user-supplied target + uint64_t dataval; // collective data + int num_relatives; // number of nearest relatives + int *relatives; // nearest relative indices + int contribs; // contribution count + int grp_rank; // local rank within group +}; + +struct cxip_zbcoll_obj { + struct dlist_entry ready_link; // link to zb_coll ready_list + struct cxip_ep_obj *ep_obj; // backpointer to endpoint + struct cxip_zbcoll_state *state;// state array + struct cxip_addr *caddrs; // cxip addresses in collective + int num_caddrs; // number of cxip addresses + zbcomplete_t userfunc; // completion callback function + void *userptr; // completion callback data + uint64_t *grpmskp; // pointer to global group mask + uint32_t *shuffle; // TEST shuffle array + int simcount; // TEST count of states + int simrank; // TEST simulated rank + int simref; // TEST zb0 reference count + int busy; // serialize collectives in zb + int grpid; // zb collective grpid + int error; // error code + int reduce; // set to report reduction data +}; + +struct cxip_ep_zbcoll_obj { + struct dlist_entry ready_list; // zbcoll ops ready to advance + struct cxip_zbcoll_obj **grptbl;// group lookup table + uint64_t grpmsk; // mask of used grptbl entries + int refcnt; // grptbl reference count + bool disable; // low level tests + ofi_spin_t lock; // group ID negotiation lock + ofi_atomic32_t dsc_count; // cumulative RCV discard count + ofi_atomic32_t err_count; // cumulative ACK error count + ofi_atomic32_t ack_count; // cumulative ACK success count + ofi_atomic32_t rcv_count; // cumulative RCV success count +}; + +/* Function declarations */ +void cxip_tree_rowcol(int radix, int nodeidx, int *row, int *col, int *siz); + +void cxip_tree_nodeidx(int radix, int row, int col, int *nodeidx); + +int cxip_tree_relatives(int radix, int nodeidx, int maxnodes, int *rels); + +int cxip_zbcoll_recv_cb(struct cxip_ep_obj *ep_obj, uint32_t init_nic, + uint32_t init_pid, uint64_t mbv, uint64_t data); + +void cxip_zbcoll_send(struct cxip_zbcoll_obj *zb, int srcidx, int dstidx, + uint64_t payload); + +void cxip_zbcoll_free(struct cxip_zbcoll_obj *zb); + +int cxip_zbcoll_alloc(struct cxip_ep_obj *ep_obj, int num_addrs, + fi_addr_t *fiaddrs, int simrank, + struct cxip_zbcoll_obj **zbp); + +int cxip_zbcoll_simlink(struct cxip_zbcoll_obj *zb0, + struct cxip_zbcoll_obj *zb); + +void cxip_zbcoll_set_user_cb(struct cxip_zbcoll_obj *zb, + zbcomplete_t userfunc, void *userptr); + +int cxip_zbcoll_max_grps(bool sim); + +int cxip_zbcoll_getgroup(struct cxip_zbcoll_obj *zb); + +void cxip_zbcoll_rlsgroup(struct cxip_zbcoll_obj *zb); + +int cxip_zbcoll_broadcast(struct cxip_zbcoll_obj *zb, uint64_t *dataptr); + +int cxip_zbcoll_reduce(struct cxip_zbcoll_obj *zb, uint64_t *dataptr); + +int cxip_zbcoll_barrier(struct cxip_zbcoll_obj *zb); + +void cxip_ep_zbcoll_progress(struct cxip_ep_obj *ep_obj); + +void cxip_zbcoll_reset_counters(struct cxip_ep_obj *ep_obj); + +void cxip_zbcoll_get_counters(struct cxip_ep_obj *ep_obj, uint32_t *dsc, + uint32_t *err, uint32_t *ack, uint32_t *rcv); + +void cxip_zbcoll_fini(struct cxip_ep_obj *ep_obj); + +int cxip_zbcoll_init(struct cxip_ep_obj *ep_obj); + +#endif /* _CXIP_ZBCOLL_H_ */ diff --git a/prov/cxi/scripts/analyze_symbols.py b/prov/cxi/scripts/analyze_symbols.py deleted file mode 100755 index 30f624b43b2..00000000000 --- a/prov/cxi/scripts/analyze_symbols.py +++ /dev/null @@ -1,500 +0,0 @@ -#!/usr/bin/env -S uv run -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "ast-grep-py>=0.31.0", -# ] -# /// -""" -Analyze CXI provider source code to extract symbol information. - -This script extracts: -- Function declarations and definitions -- Type definitions (structs, enums, typedefs, unions) -- Macro definitions -- Usage sites for all of the above - -Output is a JSON report that can be used to plan header refactoring. -""" - -import json -import sys -from pathlib import Path -from dataclasses import dataclass, field, asdict -from typing import Literal -from ast_grep_py import SgRoot, SgNode - - -@dataclass -class Location: - file: str - line: int - column: int - - -@dataclass -class FunctionDecl: - name: str - location: Location - is_static: bool - is_inline: bool - is_definition: bool # True if this is a definition, False if just declaration - signature: str # Full function signature for matching - - -@dataclass -class TypeDef: - name: str - location: Location - kind: Literal["struct", "enum", "union", "typedef"] - is_forward_decl: bool - full_text: str # For complex types - - -@dataclass -class MacroDef: - name: str - location: Location - is_function_like: bool - full_text: str - - -@dataclass -class SymbolUsage: - symbol_name: str - location: Location - usage_kind: Literal["call", "type_ref", "macro_ref", "pointer_only"] - # pointer_only means we only use a pointer to this type, so forward decl suffices - - -@dataclass -class FileAnalysis: - path: str - function_decls: list[FunctionDecl] = field(default_factory=list) - type_defs: list[TypeDef] = field(default_factory=list) - macro_defs: list[MacroDef] = field(default_factory=list) - usages: list[SymbolUsage] = field(default_factory=list) - - -def get_location(node: SgNode, file_path: str) -> Location: - """Extract location from an ast-grep node.""" - range_info = node.range() - return Location( - file=file_path, - line=range_info.start.line + 1, # ast-grep uses 0-indexed lines - column=range_info.start.column, - ) - - -def find_child_by_kind(node: SgNode, kind: str) -> SgNode | None: - """Find first child with given kind.""" - for child in node.children(): - if child.kind() == kind: - return child - return None - - -def find_all_children_by_kind(node: SgNode, kind: str) -> list[SgNode]: - """Find all children with given kind.""" - return [child for child in node.children() if child.kind() == kind] - - -def find_identifier_in_declarator(node: SgNode) -> str | None: - """Recursively find the identifier in a declarator.""" - if node.kind() == "identifier": - return node.text() - - # Check field access first if available - field_result = node.field("declarator") - if field_result: - result = find_identifier_in_declarator(field_result) - if result: - return result - - # Then check children - for child in node.children(): - if child.kind() == "identifier": - return child.text() - elif child.kind() in ("function_declarator", "pointer_declarator", - "array_declarator", "parenthesized_declarator"): - result = find_identifier_in_declarator(child) - if result: - return result - - return None - - -def analyze_functions(root: SgRoot, file_path: str) -> list[FunctionDecl]: - """Extract function declarations and definitions.""" - functions = [] - - # Find function definitions - for node in root.root().find_all(kind="function_definition"): - declarator = node.field("declarator") - if declarator is None: - continue - - func_name = find_identifier_in_declarator(declarator) - if func_name is None: - continue - - full_text = node.text() - - # Check for static/inline by looking at storage_class_specifier children - # or by checking the text before the function name - prefix = full_text.split(func_name)[0] if func_name in full_text else "" - is_static = "static" in prefix - is_inline = "inline" in prefix - - # Get signature (everything before the body) - sig_end = full_text.find('{') - signature = full_text[:sig_end].strip() if sig_end > 0 else full_text - - functions.append(FunctionDecl( - name=func_name, - location=get_location(node, file_path), - is_static=is_static, - is_inline=is_inline, - is_definition=True, - signature=signature, - )) - - # Find function declarations (no body) - for node in root.root().find_all(kind="declaration"): - declarator = node.field("declarator") - if declarator is None: - continue - - # Check if this has a function_declarator somewhere - has_func_decl = False - check_node = declarator - while check_node: - if check_node.kind() == "function_declarator": - has_func_decl = True - break - check_node = find_child_by_kind(check_node, "function_declarator") - if check_node is None: - # Also check pointer_declarator - ptr = find_child_by_kind(declarator, "pointer_declarator") - if ptr: - check_node = find_child_by_kind(ptr, "function_declarator") - break - - if not has_func_decl: - continue - - func_name = find_identifier_in_declarator(declarator) - if func_name is None: - continue - - full_text = node.text() - prefix = full_text.split(func_name)[0] if func_name in full_text else "" - is_static = "static" in prefix - is_inline = "inline" in prefix - - functions.append(FunctionDecl( - name=func_name, - location=get_location(node, file_path), - is_static=is_static, - is_inline=is_inline, - is_definition=False, - signature=full_text.rstrip(';'), - )) - - return functions - - -def analyze_types(root: SgRoot, file_path: str) -> list[TypeDef]: - """Extract type definitions (struct, enum, union, typedef).""" - types = [] - - # Find struct/union/enum definitions - for kind, type_kind in [("struct_specifier", "struct"), - ("union_specifier", "union"), - ("enum_specifier", "enum")]: - for node in root.root().find_all(kind=kind): - # Get the name (type_identifier child) - name_node = find_child_by_kind(node, "type_identifier") - if name_node is None: - continue - - type_name = name_node.text() - full_text = node.text() - - # Check if this is a forward declaration (no field_declaration_list) - body = find_child_by_kind(node, "field_declaration_list") - if body is None: - body = find_child_by_kind(node, "enumerator_list") - is_forward = body is None - - types.append(TypeDef( - name=type_name, - location=get_location(node, file_path), - kind=type_kind, - is_forward_decl=is_forward, - full_text=full_text, - )) - - # Find typedefs - for node in root.root().find_all(kind="type_definition"): - declarator = node.field("declarator") - if declarator is None: - continue - - # Get the typedef'd name - look for type_identifier - type_name = None - if declarator.kind() == "type_identifier": - type_name = declarator.text() - else: - # Look in children - for child in declarator.children(): - if child.kind() == "type_identifier": - type_name = child.text() - break - # Also check if declarator itself contains the name - if type_name is None: - ti = find_child_by_kind(declarator, "type_identifier") - if ti: - type_name = ti.text() - - if type_name is None: - # Last resort: look for any identifier - for child in declarator.children(): - if child.kind() == "identifier": - type_name = child.text() - break - - if type_name is None: - continue - - types.append(TypeDef( - name=type_name, - location=get_location(node, file_path), - kind="typedef", - is_forward_decl=False, - full_text=node.text(), - )) - - return types - - -def analyze_macros(root: SgRoot, file_path: str) -> list[MacroDef]: - """Extract macro definitions.""" - macros = [] - - for node in root.root().find_all(kind="preproc_def"): - name_node = node.field("name") - if name_node is None: - continue - - macro_name = name_node.text() - full_text = node.text() - - macros.append(MacroDef( - name=macro_name, - location=get_location(node, file_path), - is_function_like=False, - full_text=full_text, - )) - - for node in root.root().find_all(kind="preproc_function_def"): - name_node = node.field("name") - if name_node is None: - continue - - macro_name = name_node.text() - full_text = node.text() - - macros.append(MacroDef( - name=macro_name, - location=get_location(node, file_path), - is_function_like=True, - full_text=full_text, - )) - - return macros - - -def analyze_usages(root: SgRoot, file_path: str, known_functions: set[str], - known_types: set[str], known_macros: set[str]) -> list[SymbolUsage]: - """Find usages of known symbols.""" - usages = [] - seen = set() # Avoid duplicates at same location - - # Find function calls - for node in root.root().find_all(kind="call_expression"): - func_node = node.field("function") - if func_node is None: - continue - - # Handle direct calls - if func_node.kind() == "identifier": - func_name = func_node.text() - if func_name in known_functions: - loc = get_location(node, file_path) - key = (func_name, loc.line, loc.column, "call") - if key not in seen: - seen.add(key) - usages.append(SymbolUsage( - symbol_name=func_name, - location=loc, - usage_kind="call", - )) - - # Find type references - for node in root.root().find_all(kind="type_identifier"): - type_name = node.text() - if type_name in known_types: - # Determine if this is pointer-only usage - parent = node.parent() - is_pointer_only = False - if parent: - gp = parent.parent() - if gp and "pointer" in gp.kind(): - is_pointer_only = True - - loc = get_location(node, file_path) - kind = "pointer_only" if is_pointer_only else "type_ref" - key = (type_name, loc.line, loc.column, kind) - if key not in seen: - seen.add(key) - usages.append(SymbolUsage( - symbol_name=type_name, - location=loc, - usage_kind=kind, - )) - - # Find struct/union/enum references (when used as types) - for kind in ["struct_specifier", "union_specifier", "enum_specifier"]: - for node in root.root().find_all(kind=kind): - name_node = find_child_by_kind(node, "type_identifier") - body = find_child_by_kind(node, "field_declaration_list") - if body is None: - body = find_child_by_kind(node, "enumerator_list") - - # Only count as usage if no body (reference, not definition) - if name_node and body is None: - type_name = name_node.text() - if type_name in known_types: - parent = node.parent() - is_pointer_only = parent and "pointer" in parent.kind() - - loc = get_location(node, file_path) - usage_kind = "pointer_only" if is_pointer_only else "type_ref" - key = (type_name, loc.line, loc.column, usage_kind) - if key not in seen: - seen.add(key) - usages.append(SymbolUsage( - symbol_name=type_name, - location=loc, - usage_kind=usage_kind, - )) - - # Find macro usages - for node in root.root().find_all(kind="identifier"): - ident = node.text() - if ident in known_macros: - # Make sure this isn't the macro definition itself - parent = node.parent() - if parent and parent.kind() in ("preproc_def", "preproc_function_def"): - # Check if this is the name field - name_field = parent.field("name") - if name_field and name_field.text() == ident: - continue - - loc = get_location(node, file_path) - key = (ident, loc.line, loc.column, "macro_ref") - if key not in seen: - seen.add(key) - usages.append(SymbolUsage( - symbol_name=ident, - location=loc, - usage_kind="macro_ref", - )) - - return usages - - -def analyze_file(file_path: Path) -> FileAnalysis: - """Analyze a single C source file.""" - content = file_path.read_text() - - try: - root = SgRoot(content, "c") - except Exception as e: - print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr) - return FileAnalysis(path=str(file_path)) - - analysis = FileAnalysis(path=str(file_path)) - analysis.function_decls = analyze_functions(root, str(file_path)) - analysis.type_defs = analyze_types(root, str(file_path)) - analysis.macro_defs = analyze_macros(root, str(file_path)) - - return analysis - - -def main(): - # Find all C source and header files in prov/cxi - cxi_dir = Path("prov/cxi") - - if not cxi_dir.exists(): - print(f"Error: {cxi_dir} does not exist. Run from libfabric root.", - file=sys.stderr) - sys.exit(1) - - # Note: This script outputs to stdout, which can be piped to generate_refactor_plan.py - - c_files = list(cxi_dir.rglob("*.c")) - h_files = list(cxi_dir.rglob("*.h")) - all_files = c_files + h_files - - print(f"Found {len(c_files)} C files and {len(h_files)} header files", file=sys.stderr) - - # First pass: collect all definitions - all_analyses: list[FileAnalysis] = [] - known_functions: set[str] = set() - known_types: set[str] = set() - known_macros: set[str] = set() - - for file_path in all_files: - print(f"Analyzing {file_path}...", file=sys.stderr) - analysis = analyze_file(file_path) - all_analyses.append(analysis) - - for func in analysis.function_decls: - known_functions.add(func.name) - for typedef in analysis.type_defs: - known_types.add(typedef.name) - for macro in analysis.macro_defs: - known_macros.add(macro.name) - - print(f"Found {len(known_functions)} functions, {len(known_types)} types, " - f"{len(known_macros)} macros", file=sys.stderr) - - # Second pass: find usages - for file_path, analysis in zip(all_files, all_analyses): - content = file_path.read_text() - try: - root = SgRoot(content, "c") - analysis.usages = analyze_usages(root, str(file_path), - known_functions, known_types, known_macros) - except Exception as e: - print(f"Warning: Failed to analyze usages in {file_path}: {e}", file=sys.stderr) - - # Convert to JSON-serializable format - result = { - "files": [asdict(a) for a in all_analyses], - "summary": { - "total_functions": len(known_functions), - "total_types": len(known_types), - "total_macros": len(known_macros), - "files_analyzed": len(all_files), - } - } - - # Output JSON - print(json.dumps(result, indent=2)) - - -if __name__ == "__main__": - main() diff --git a/prov/cxi/scripts/apply_refactor.py b/prov/cxi/scripts/apply_refactor.py deleted file mode 100755 index 82c4b612467..00000000000 --- a/prov/cxi/scripts/apply_refactor.py +++ /dev/null @@ -1,1182 +0,0 @@ -#!/usr/bin/env -S uv run -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "ast-grep-py>=0.31.0", -# "networkx>=3.0", -# ] -# /// -""" -Apply the refactoring plan to transform the CXI provider codebase. - -This script: -1. Reads the refactor plan (refactor_plan.json) -2. Extracts symbol definitions from cxip.h -3. Creates new header files under prov/cxi/include/cxip/ -4. Updates source files to include appropriate headers -5. Removes extracted content from cxip.h - -The transformation is done in multiple passes: -- Pass 1: Parse and extract all symbol definitions from cxip.h -- Pass 2: Group symbols by target header -- Pass 3: Generate new header files with proper include guards -- Pass 4: Update source file includes -- Pass 5: Clean up cxip.h to only include the new headers -""" - -import json -import re -import sys -from pathlib import Path -from dataclasses import dataclass, field -from collections import defaultdict -from ast_grep_py import SgRoot -import networkx as nx - - -# License header for new files -LICENSE_HEADER = """\ -/* - * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only - * - * Copyright (c) 2018-2024 Hewlett Packard Enterprise Development LP - */ -""" - - -@dataclass -class ExtractedSymbol: - """A symbol extracted from the source.""" - name: str - kind: str # "function", "type", "macro" - text: str # Full text of the declaration/definition - start_line: int - end_line: int - dependencies: list[str] = field(default_factory=list) - - -@dataclass -class HeaderContent: - """Content for a new header file.""" - path: str - symbols: list[ExtractedSymbol] = field(default_factory=list) - includes: set[str] = field(default_factory=set) - forward_decls: set[str] = field(default_factory=set) - - -def load_refactor_plan(path: str | None = None) -> dict: - """Load the refactor plan JSON from file or stdin if in a pipeline.""" - if path: - with open(path) as f: - return json.load(f) - else: - return json.load(sys.stdin) - - -def get_include_guard(header_path: str) -> tuple[str, str]: - """Generate include guard macros for a header.""" - # Convert path like "prov/cxi/include/cxip/ep.h" to "_CXIP_EP_H_" - name = Path(header_path).stem.upper() - guard = f"_CXIP_{name}_H_" - return f"#ifndef {guard}\n#define {guard}\n", f"#endif /* {guard} */\n" - - -def extract_macro_definitions(content: str, macro_names: set[str]) -> dict[str, ExtractedSymbol]: - """Extract macro definitions using regex (more reliable for preprocessor).""" - macros = {} - lines = content.split('\n') - - i = 0 - while i < len(lines): - line = lines[i] - - # Match #define MACRO_NAME - match = re.match(r'^#define\s+(\w+)(?:\(|[^(]|\s|$)', line) - if match: - macro_name = match.group(1) - if macro_name in macro_names: - # Find the full extent (handle line continuations) - start_line = i - end_line = i - while end_line < len(lines) and lines[end_line].rstrip().endswith('\\'): - end_line += 1 - - macro_text = '\n'.join(lines[start_line:end_line + 1]) - macros[macro_name] = ExtractedSymbol( - name=macro_name, - kind="macro", - text=macro_text, - start_line=start_line + 1, # 1-indexed - end_line=end_line + 1, - ) - i += 1 - - return macros - - -def extract_type_definitions(root: SgRoot, content: str, type_names: set[str]) -> tuple[dict[str, ExtractedSymbol], dict[str, ExtractedSymbol]]: - """Extract type definitions using ast-grep. - - Returns: - Tuple of (enums, other_types) - enums are separated because they - need to be defined early (before structs that use them as fields). - """ - enums = {} - types = {} - lines = content.split('\n') - - # Extract struct/union/enum definitions - for kind, type_kind in [("struct_specifier", "struct"), - ("union_specifier", "union"), - ("enum_specifier", "enum")]: - for node in root.root().find_all(kind=kind): - # Get the name - name_node = None - for child in node.children(): - if child.kind() == "type_identifier": - name_node = child - break - - if name_node is None: - continue - - type_name = name_node.text() - if type_name not in type_names: - continue - - # Check if this is a definition (has body) - has_body = False - for child in node.children(): - if child.kind() in ("field_declaration_list", "enumerator_list"): - has_body = True - break - - if not has_body: - continue # Skip forward declarations - - # Get the full text including potential typedef wrapper - parent = node.parent() - if parent and parent.kind() == "type_definition": - node_to_extract = parent - else: - # Check if this is part of a declaration - if parent and parent.kind() == "declaration": - node_to_extract = parent - else: - node_to_extract = node - - range_info = node_to_extract.range() - start = range_info.start.line - end = range_info.end.line - - # Extract the lines, including the semicolon if needed - text = '\n'.join(lines[start:end + 1]) - if not text.rstrip().endswith(';'): - # Look for semicolon on next line - if end + 1 < len(lines) and lines[end + 1].strip() == ';': - text += '\n;' - end += 1 - - sym = ExtractedSymbol( - name=type_name, - kind="type", - text=text, - start_line=start + 1, - end_line=end + 1, - ) - - # Enums go to a separate collection - if type_kind == "enum": - enums[type_name] = sym - else: - types[type_name] = sym - - # Extract typedefs - for node in root.root().find_all(kind="type_definition"): - declarator = node.field("declarator") - if declarator is None: - continue - - # Get the typedef'd name - type_name = None - if declarator.kind() == "type_identifier": - type_name = declarator.text() - else: - for child in declarator.children(): - if child.kind() == "type_identifier": - type_name = child.text() - break - - if type_name is None or type_name not in type_names: - continue - - # Don't re-add if we already have this from struct extraction - if type_name in types: - continue - - # Skip typedefs of enums - they're already extracted into enums dict - # Check if this typedef contains an enum_specifier - is_enum_typedef = False - for child in node.children(): - if child.kind() == "enum_specifier": - is_enum_typedef = True - break - if is_enum_typedef: - continue - - range_info = node.range() - start = range_info.start.line - end = range_info.end.line - - text = '\n'.join(lines[start:end + 1]) - - types[type_name] = ExtractedSymbol( - name=type_name, - kind="type", - text=text, - start_line=start + 1, - end_line=end + 1, - ) - - return enums, types - - -def extract_function_declarations(root: SgRoot, content: str, func_names: set[str]) -> tuple[dict[str, ExtractedSymbol], dict[str, ExtractedSymbol]]: - """Extract function declarations from header. - - Returns: - Tuple of (non_inline_functions, inline_functions) - Inline functions are kept separate to be placed after all types are defined. - """ - non_inline_functions = {} - inline_functions = {} - lines = content.split('\n') - - # Find function declarations (not definitions - those have bodies) - for node in root.root().find_all(kind="declaration"): - declarator = node.field("declarator") - if declarator is None: - continue - - # Check if this has a function_declarator - has_func_decl = False - check_node = declarator - while check_node: - if check_node.kind() == "function_declarator": - has_func_decl = True - break - # Look in children - found = None - for child in check_node.children(): - if child.kind() in ("function_declarator", "pointer_declarator"): - found = child - break - check_node = found - - if not has_func_decl: - continue - - # Get the function name - func_name = find_identifier_in_declarator(declarator) - if func_name is None or func_name not in func_names: - continue - - range_info = node.range() - start = range_info.start.line - end = range_info.end.line - - text = '\n'.join(lines[start:end + 1]) - - non_inline_functions[func_name] = ExtractedSymbol( - name=func_name, - kind="function", - text=text, - start_line=start + 1, - end_line=end + 1, - ) - - # Find static inline function definitions - these go to a separate collection - for node in root.root().find_all(kind="function_definition"): - declarator = node.field("declarator") - if declarator is None: - continue - - func_name = find_identifier_in_declarator(declarator) - if func_name is None or func_name not in func_names: - continue - - full_text = node.text() - prefix = full_text.split(func_name)[0] if func_name in full_text else "" - - # Only include if static inline - if "static" in prefix and "inline" in prefix: - range_info = node.range() - start = range_info.start.line - end = range_info.end.line - - text = '\n'.join(lines[start:end + 1]) - - inline_functions[func_name] = ExtractedSymbol( - name=func_name, - kind="inline_function", - text=text, - start_line=start + 1, - end_line=end + 1, - ) - - return non_inline_functions, inline_functions - - -def find_type_references(text: str, all_type_names: set[str]) -> dict[str, str]: - """Find all type references in a piece of code. - - Returns a dict mapping type_name -> kind ('struct', 'union', 'enum', or 'typedef') - for types that are referenced but might need forward declarations. - - We detect: - - struct foo * -> needs forward decl "struct foo;" - - union foo * -> needs forward decl "union foo;" - - struct foo field; -> needs full definition (embedded field - can't forward declare) - - enum foo field; -> needs full definition (embedded) - """ - references = {} - - # Pattern for struct/union/enum references - # Match: struct/union/enum type_name followed by * (pointer) or identifier (field) - for kind in ['struct', 'union', 'enum']: - # Find all occurrences of "struct typename" or "union typename" etc - pattern = rf'\b{kind}\s+(\w+)\s*(\*?)' - for match in re.finditer(pattern, text): - type_name = match.group(1) - is_pointer = bool(match.group(2)) - if type_name in all_type_names: - # For pointers, we can use forward declarations - # For embedded fields, we need the full type - if is_pointer: - if type_name not in references: - references[type_name] = kind - # If it's not a pointer, it's an embedded field - mark as needing full type - # We'll handle this differently (can't forward declare) - - return references - - -def find_embedded_type_references(text: str, all_type_names: set[str]) -> set[str]: - """Find types that are embedded (not pointers) and need full definitions. - - These CANNOT be forward declared - the including header must come first. - """ - embedded = set() - - # Pattern for embedded struct/union/enum fields (not pointers) - # Match: struct/union/enum type_name identifier; (without *) - for kind in ['struct', 'union', 'enum']: - # Look for embedded fields: "struct foo bar;" or "struct foo bar[N];" - # NOT "struct foo *bar;" (pointer) - pattern = rf'\b{kind}\s+(\w+)\s+(?!\*)\w+[\s\[\];]' - for match in re.finditer(pattern, text): - type_name = match.group(1) - if type_name in all_type_names: - embedded.add(type_name) - - return embedded - - -def extract_function_pointer_typedefs(content: str) -> dict[str, ExtractedSymbol]: - """Extract function pointer typedefs that may not be caught by ast-grep. - - These have the form: typedef returntype (*name)(params); - """ - typedefs = {} - lines = content.split('\n') - - # Pattern: typedef (*)(); - pattern = r'typedef\s+\w+\s+\(\*(\w+)\)\s*\([^)]*\)\s*;' - - for i, line in enumerate(lines): - match = re.match(pattern, line) - if match: - name = match.group(1) - typedefs[name] = ExtractedSymbol( - name=name, - kind="type", - text=line, - start_line=i + 1, - end_line=i + 1, - ) - - return typedefs - - -def generate_forward_declarations(type_refs: dict[str, str]) -> list[str]: - """Generate forward declaration statements for the given type references. - - Args: - type_refs: dict mapping type_name -> kind ('struct', 'union', 'enum') - - Returns: - List of forward declaration strings - """ - decls = [] - for type_name, kind in sorted(type_refs.items()): - if kind in ('struct', 'union'): - decls.append(f"{kind} {type_name};") - # Note: enums can't be forward declared in C - return decls - - -def find_identifier_in_declarator(node) -> str | None: - """Recursively find the identifier in a declarator.""" - if node.kind() == "identifier": - return node.text() - - # Check field access first - field_result = node.field("declarator") - if field_result: - result = find_identifier_in_declarator(field_result) - if result: - return result - - # Then check children - for child in node.children(): - if child.kind() == "identifier": - return child.text() - elif child.kind() in ("function_declarator", "pointer_declarator", - "array_declarator", "parenthesized_declarator"): - result = find_identifier_in_declarator(child) - if result: - return result - - return None - - -def detect_required_includes(text: str) -> list[str]: - """Detect which standard/ofi includes are needed based on types used in the code. - - Returns a list of include directives in the correct order. - """ - includes = [] - - # Map of type patterns to their required includes - # Order matters - more fundamental includes should come first - # NOTE: These includes are for documentation purposes since the wrapper - # cxip.h already includes all external dependencies. But they help - # make each split header more self-documenting. - include_checks = [ - # Standard C headers - (r'\b(uint8_t|uint16_t|uint32_t|uint64_t|int8_t|int16_t|int32_t|int64_t|uintptr_t)\b', - ''), - (r'\bsize_t\b', ''), - (r'\bbool\b', ''), - - # POSIX headers - (r'\bpthread_(rwlock_t|mutex_t|cond_t|t)\b', ''), - (r'\bsem_t\b', ''), - - # OFI headers - order matters for dependencies - # Note: ofi_spin_t and ofi_mutex_t are both defined in ofi_lock.h - (r'\b(dlist_entry|slist_entry|slist|dlist_ts)\b', ''), - (r'\bofi_atomic32_t\b', ''), - (r'\b(ofi_spin_t|ofi_mutex_t)\b', ''), - ] - - seen = set() - for pattern, include in include_checks: - if include not in seen and re.search(pattern, text): - includes.append(f'#include {include}') - seen.add(include) - - return includes - - -def generate_header_file(header: HeaderContent, all_type_names: set[str], - types_defined_in_header: set[str], - enum_names_in_enums_h: set[str]) -> str: - """Generate the content of a new header file. - - Note: This generates headers WITHOUT includes. The main cxip.h will - include everything in the correct order to handle dependencies. - Individual headers are not meant to be standalone. - - IMPORTANT: Inline functions are NOT included in split headers - they - remain in cxip.h after all type definitions, because they often - access struct members from multiple modules. - - Args: - header: The header content to generate - all_type_names: Set of all known type names across all headers - types_defined_in_header: Set of type names defined in THIS header - enum_names_in_enums_h: Set of enum names that are in enums.h (skip these) - """ - guard_start, guard_end = get_include_guard(header.path) - - lines = [] - lines.append(LICENSE_HEADER) - lines.append(guard_start) - lines.append("") - - # Group symbols by kind, preserving original source order (by start_line) - # NOTE: inline_function kind is excluded - those stay in cxip.h - # NOTE: Skip enums that are already in enums.h - macros = [s for s in header.symbols if s.kind == "macro"] - types = [s for s in header.symbols if s.kind == "type" and s.name not in enum_names_in_enums_h] - functions = [s for s in header.symbols if s.kind == "function"] - - # Sort by original source line to preserve dependency order - macros.sort(key=lambda s: s.start_line) - types.sort(key=lambda s: s.start_line) - functions.sort(key=lambda s: s.start_line) - - # Collect all text to detect required includes - all_symbol_text = '\n'.join(sym.text for sym in header.symbols) - required_includes = detect_required_includes(all_symbol_text) - if required_includes: - for inc in required_includes: - lines.append(inc) - lines.append("") - - # Compute forward declarations needed for function declarations - # (types used as pointers in function signatures) - all_text = '\n'.join(sym.text for sym in functions) - type_refs = find_type_references(all_text, all_type_names) - - # Also scan type definitions for function pointer members - # These can have struct/union pointers in their parameter lists - # e.g., int (*callback)(struct cxip_req *req, const union c_event *event); - type_text = '\n'.join(sym.text for sym in types) - type_refs_from_types = find_type_references(type_text, all_type_names) - type_refs.update(type_refs_from_types) - - # Remove types that are defined in this header (no forward decl needed) - for defined_type in types_defined_in_header: - type_refs.pop(defined_type, None) - - # Generate forward declarations - forward_decls = generate_forward_declarations(type_refs) - if forward_decls: - lines.append("/* Forward declarations */") - for decl in forward_decls: - lines.append(decl) - lines.append("") - - # Add macros first - if macros: - lines.append("/* Macros */") - for sym in macros: - lines.append(sym.text) - lines.append("") - - # Add types - preserve original order for dependencies - if types: - lines.append("/* Type definitions */") - for sym in types: - lines.append(sym.text) - lines.append("") - - # Add function declarations (non-inline only) - if functions: - lines.append("/* Function declarations */") - for sym in functions: - lines.append(sym.text) - lines.append("") - - lines.append(guard_end) - - return '\n'.join(lines) - - -def generate_mr_lac_cache_header(mr_lac_cache_sym: ExtractedSymbol) -> str: - """Generate a dedicated header for cxip_mr_lac_cache to break the mr.h/ctrl.h cycle. - - This struct is used by ctrl.h but defined in mr.h, creating a circular dependency. - By moving it to its own header that comes before both, we break the cycle. - """ - guard_start, guard_end = get_include_guard("prov/cxi/include/cxip/mr_lac_cache.h") - - lines = [] - lines.append(LICENSE_HEADER) - lines.append(guard_start) - lines.append("") - lines.append("/* cxip_mr_lac_cache type definition */") - lines.append("/* This is in a separate header to break the circular dependency between mr.h and ctrl.h */") - lines.append("") - lines.append("/* Forward declarations */") - lines.append("struct cxip_ctrl_req;") - lines.append("") - lines.append(mr_lac_cache_sym.text) - lines.append("") - lines.append(guard_end) - - return '\n'.join(lines) - - -def find_macro_references(text: str, all_macro_names: set[str]) -> set[str]: - """Find all macros referenced in the code. - - Macros can be used as: - - Bit-field widths: uint32_t field:MACRO_NAME; - - Array sizes: type arr[MACRO_NAME]; - - Initializers: .field = MACRO_NAME - - etc. - """ - referenced = set() - for macro_name in all_macro_names: - # Look for the macro name as a standalone token - pattern = rf'\b{re.escape(macro_name)}\b' - if re.search(pattern, text): - referenced.add(macro_name) - return referenced - - -def generate_enums_header(enums: list[ExtractedSymbol]) -> str: - """Generate a dedicated enums.h header with all enum definitions. - - This header is included first because enums are needed by many structs - (like cxip_environment) that embed enum fields. - """ - guard_start, guard_end = get_include_guard("prov/cxi/include/cxip/enums.h") - - lines = [] - lines.append(LICENSE_HEADER) - lines.append(guard_start) - lines.append("") - lines.append("/* All enum type definitions */") - lines.append("/* Included first because many structs embed enum fields */") - lines.append("") - - # Sort by original source line to preserve order - sorted_enums = sorted(enums, key=lambda s: s.start_line) - for sym in sorted_enums: - lines.append(sym.text) - lines.append("") - - lines.append(guard_end) - - return '\n'.join(lines) - - -def build_header_dependency_graph( - headers: dict[str, 'HeaderContent'], - type_to_header: dict[str, str], - macro_to_header: dict[str, str], - all_type_names: set[str], - all_macro_names: set[str] -) -> nx.DiGraph: - """Build a directed graph of header dependencies based on embedded type and macro usage. - - For each header, we analyze which types it uses as embedded fields (not pointers) - and which macros it references. If a type is embedded or a macro is used, - the header defining that symbol must be included first. - - Args: - headers: Dict mapping header path -> HeaderContent - type_to_header: Dict mapping type name -> header path where it's defined - macro_to_header: Dict mapping macro name -> header path where it's defined - all_type_names: Set of all known type names - all_macro_names: Set of all known macro names - - Returns: - A directed graph where edge A->B means A must be included before B - """ - G = nx.DiGraph() - - # Add all headers as nodes - for header_path in headers: - header_name = Path(header_path).name - G.add_node(header_name) - - # Always include enums.h first - G.add_node("enums.h") - - # For each header, find embedded type and macro dependencies - for header_path, header_content in headers.items(): - header_name = Path(header_path).name - - # Collect all text from types defined in this header - type_texts = [s.text for s in header_content.symbols if s.kind == "type"] - all_text = '\n'.join(type_texts) - - # Find embedded type references (types used as fields, not pointers) - embedded_refs = find_embedded_type_references(all_text, all_type_names) - - for embedded_type in embedded_refs: - # Find which header defines this type - if embedded_type in type_to_header: - dep_header_path = type_to_header[embedded_type] - dep_header_name = Path(dep_header_path).name - - # Don't add self-edges - if dep_header_name != header_name: - # Add edge: dependency must come before this header - G.add_edge(dep_header_name, header_name) - print(f" Dependency: {header_name} embeds type from {dep_header_name} ({embedded_type})", - file=sys.stderr) - - # Find macro references (used in bit-fields, array sizes, etc.) - macro_refs = find_macro_references(all_text, all_macro_names) - # Also include macros defined in this header (to exclude self-refs) - macros_in_this_header = {s.name for s in header_content.symbols if s.kind == "macro"} - - for macro_name in macro_refs: - if macro_name in macros_in_this_header: - continue # Skip self-references - if macro_name in macro_to_header: - dep_header_path = macro_to_header[macro_name] - dep_header_name = Path(dep_header_path).name - - if dep_header_name != header_name: - G.add_edge(dep_header_name, header_name) - print(f" Dependency: {header_name} uses macro from {dep_header_name} ({macro_name})", - file=sys.stderr) - - # enums.h should come before everything else - for node in G.nodes(): - if node != "enums.h": - G.add_edge("enums.h", node) - - # mr_lac_cache.h must come before ctrl.h (to break the circular dependency) - # ctrl.h embeds cxip_mr_lac_cache which is defined in mr_lac_cache.h - G.add_node("mr_lac_cache.h") - G.add_edge("enums.h", "mr_lac_cache.h") # enums must come first - # mr_lac_cache.h embeds union cxip_match_bits from msg.h - if "msg.h" in G.nodes(): - G.add_edge("msg.h", "mr_lac_cache.h") - print(f" Dependency: mr_lac_cache.h embeds union from msg.h (cxip_match_bits)", file=sys.stderr) - if "ctrl.h" in G.nodes(): - G.add_edge("mr_lac_cache.h", "ctrl.h") - print(f" Dependency: ctrl.h needs mr_lac_cache.h (cxip_mr_lac_cache)", file=sys.stderr) - # Also mr.h might reference it - if "mr.h" in G.nodes(): - G.add_edge("mr_lac_cache.h", "mr.h") - print(f" Dependency: mr.h needs mr_lac_cache.h (cxip_mr_lac_cache)", file=sys.stderr) - - return G - - -def compute_header_order(G: nx.DiGraph, fallback_order: list[str]) -> list[str]: - """Compute the topological order of headers. - - Args: - G: Dependency graph where edge A->B means A must come before B - fallback_order: Default order to use for headers not in the graph - - Returns: - List of header names in correct dependency order - """ - try: - # Use topological sort to get correct order - topo_order = list(nx.topological_sort(G)) - print(f"Topological order computed: {len(topo_order)} headers", file=sys.stderr) - - # Add any headers from fallback_order that aren't in the graph - result = [] - seen = set() - for h in topo_order: - if h not in seen: - result.append(h) - seen.add(h) - - for h in fallback_order: - if h not in seen: - result.append(h) - seen.add(h) - - return result - - except nx.NetworkXUnfeasible as e: - # Cycle detected - report it and fall back to manual order - print(f"WARNING: Cycle detected in dependency graph: {e}", file=sys.stderr) - try: - cycle = nx.find_cycle(G) - print(f" Cycle: {cycle}", file=sys.stderr) - except nx.NetworkXNoCycle: - pass - return fallback_order - - -def generate_wrapper_cxip_h(headers: dict[str, 'HeaderContent'], - type_to_header: dict[str, str], - macro_to_header: dict[str, str], - all_type_names: set[str], - all_macro_names: set[str], - original_content: str, - inline_functions: list[ExtractedSymbol], - func_ptr_typedefs: list[ExtractedSymbol] = None) -> str: - """Generate a new cxip.h that includes all the split headers. - - This preserves the original includes and external dependencies, - then includes all the new split headers, followed by inline functions. - - The structure is: - 1. License header - 2. Include guard - 3. External includes (ofi, libcxi, etc.) - 4. Split headers (types, macros, non-inline function declarations) - 5. Inline function definitions (need all types to be defined first) - 6. End guard - """ - lines = [] - - # Extract the original license and includes section - original_lines = original_content.split('\n') - - # Copy license header - lines.append("/*") - for line in original_lines[1:10]: # Get the license block - if line.startswith(" */"): - lines.append(line) - break - lines.append(line) - - lines.append("") - lines.append("#ifndef _CXIP_PROV_H_") - lines.append("#define _CXIP_PROV_H_") - lines.append("") - - # Copy all the original system/library includes - in_includes = False - for line in original_lines: - if line.startswith("#include"): - in_includes = True - # Skip only the new split headers (cxip/), keep other cxip includes - if "cxip/" not in line: - lines.append(line) - elif in_includes and line.strip() == "": - lines.append("") - elif in_includes and not line.startswith("#"): - break - - # Add function pointer typedefs that aren't in the plan - # These need to come before the split headers that use them - if func_ptr_typedefs: - lines.append("") - lines.append("/* Forward declarations for function pointer typedef parameters */") - # Extract struct names referenced in the typedefs - for typedef in func_ptr_typedefs: - for match in re.finditer(r'struct\s+(\w+)', typedef.text): - struct_name = match.group(1) - lines.append(f"struct {struct_name};") - lines.append("") - lines.append("/* Function pointer typedefs (needed by split headers) */") - sorted_typedefs = sorted(func_ptr_typedefs, key=lambda s: s.start_line) - for typedef in sorted_typedefs: - lines.append(typedef.text) - lines.append("") - - # Add extern declarations for global variables used in source files - lines.append("/* Extern declarations for global variables */") - lines.append("extern struct cxip_environment cxip_env;") - lines.append("extern struct fi_provider cxip_prov;") - lines.append("extern struct util_prov cxip_util_prov;") - lines.append("extern char cxip_prov_name[];") - lines.append("extern struct fi_fabric_attr cxip_fabric_attr;") - lines.append("extern struct fi_domain_attr cxip_domain_attr;") - lines.append("extern bool cxip_collectives_supported;") - lines.append("extern int sc_page_size;") - lines.append("extern struct slist cxip_if_list;") - lines.append("") - lines.append("/* Coll trace globals used by inline trace functions */") - lines.append("extern bool cxip_coll_trace_muted;") - lines.append("extern bool cxip_coll_trace_append;") - lines.append("extern bool cxip_coll_trace_linebuf;") - lines.append("extern int cxip_coll_trace_rank;") - lines.append("extern int cxip_coll_trace_numranks;") - lines.append("extern FILE *cxip_coll_trace_fid;") - lines.append("extern bool cxip_coll_prod_trace_initialized;") - lines.append("extern uint64_t cxip_coll_trace_mask;") - lines.append("") - - lines.append("/* Split headers - types, macros, and function declarations */") - - # Build dependency graph and compute topological order - print("\nBuilding header dependency graph...", file=sys.stderr) - dep_graph = build_header_dependency_graph(headers, type_to_header, macro_to_header, - all_type_names, all_macro_names) - - # Fallback order in case of cycles or issues - # NOTE: mr_lac_cache.h breaks the circular dependency between mr.h and ctrl.h - # by defining cxip_mr_lac_cache in a separate header that comes before both. - fallback_order = [ - "enums.h", "addr.h", "common.h", "log.h", "env.h", "if.h", - "iomm.h", "evtq.h", "cmdq.h", "pte.h", "eq.h", "cq.h", "cntr.h", - "msg.h", # Must be before mr_lac_cache.h (defines cxip_match_bits) - "mr_lac_cache.h", # Contains cxip_mr_lac_cache (breaks mr.h/ctrl.h cycle) - "mr.h", # Uses cxip_mr_lac_cache from mr_lac_cache.h - "ctrl.h", # Uses cxip_mr_lac_cache from mr_lac_cache.h - "dom.h", "av.h", "fabric.h", "auth.h", - "req.h", "fc.h", "msg_hpc.h", "rma.h", "atomic.h", "txc.h", "rxc.h", - "curl.h", "repsum.h", "coll_trace.h", "coll.h", "zbcoll.h", "ep.h", - "req_buf.h", "ptelist_buf.h", "rdzv_pte.h", "portals_table.h", - "info.h", "nic.h", "telemetry.h", - ] - - header_order = compute_header_order(dep_graph, fallback_order) - print(f"Header include order: {header_order}", file=sys.stderr) - - # Get list of header names that exist - existing_headers = {Path(h).name for h in headers.keys()} - existing_headers.add("enums.h") # Always include enums.h - existing_headers.add("mr_lac_cache.h") # Include cycle-breaking header - - # Include in computed order - for h in header_order: - if h in existing_headers: - lines.append(f'#include "cxip/{h}"') - - # Add inline functions after all types are defined - if inline_functions: - lines.append("") - lines.append("/*") - lines.append(" * Inline function definitions") - lines.append(" *") - lines.append(" * These are kept here (not in split headers) because they often") - lines.append(" * access struct members from multiple modules, requiring all types") - lines.append(" * to be fully defined first.") - lines.append(" */") - lines.append("") - - # Sort by original source line to preserve order - sorted_inlines = sorted(inline_functions, key=lambda s: s.start_line) - for func in sorted_inlines: - lines.append(func.text) - lines.append("") - - lines.append("#endif /* _CXIP_PROV_H_ */") - lines.append("") - - return '\n'.join(lines) - - -def main(): - cxip_h_path = Path("prov/cxi/include/cxip.h") - output_dir = Path("prov/cxi/include/cxip") - - if not cxip_h_path.exists(): - print(f"Error: {cxip_h_path} not found.", file=sys.stderr) - sys.exit(1) - - # Check if we're receiving input from a pipeline - if not sys.stdin.isatty(): - print("Reading refactor plan from stdin (pipeline mode)...", file=sys.stderr) - plan = load_refactor_plan() - else: - plan_path = Path("prov/cxi/scripts/refactor_plan.json") - if not plan_path.exists(): - print(f"Error: {plan_path} not found. Run generate_refactor_plan.py first.", - file=sys.stderr) - print("Or pipe the output: ./generate_refactor_plan.py | ./apply_refactor.py", - file=sys.stderr) - sys.exit(1) - print("Loading refactor plan from file...", file=sys.stderr) - plan = load_refactor_plan(str(plan_path)) - - # Build a map of symbol name -> target header - symbol_to_header: dict[str, str] = {} - for header, info in plan["new_headers"].items(): - for func in info.get("functions", []): - symbol_to_header[func] = header - for typ in info.get("types", []): - symbol_to_header[typ] = header - for macro in info.get("macros", []): - symbol_to_header[macro] = header - - print(f"Found {len(symbol_to_header)} symbols to extract", file=sys.stderr) - - # Read and parse cxip.h - use the backup if it exists (original content) - backup_path = cxip_h_path.with_suffix('.h.orig') - if backup_path.exists(): - print("Using backup cxip.h.orig for symbol extraction...", file=sys.stderr) - content = backup_path.read_text() - else: - print("Parsing cxip.h...", file=sys.stderr) - content = cxip_h_path.read_text() - root = SgRoot(content, "c") - - # Collect symbol names by kind - macro_names = {name for name, header in symbol_to_header.items() - if any(name in info.get("macros", []) - for info in plan["new_headers"].values())} - type_names = {name for name, header in symbol_to_header.items() - if any(name in info.get("types", []) - for info in plan["new_headers"].values())} - func_names = {name for name, header in symbol_to_header.items() - if any(name in info.get("functions", []) - for info in plan["new_headers"].values())} - - print(f"Looking for: {len(macro_names)} macros, {len(type_names)} types, " - f"{len(func_names)} functions", file=sys.stderr) - - # Extract symbols - print("Extracting macros...", file=sys.stderr) - extracted_macros = extract_macro_definitions(content, macro_names) - print(f" Found {len(extracted_macros)} macros", file=sys.stderr) - - print("Extracting types...", file=sys.stderr) - extracted_enums, extracted_types = extract_type_definitions(root, content, type_names) - print(f" Found {len(extracted_enums)} enums (-> enums.h)", file=sys.stderr) - print(f" Found {len(extracted_types)} other types", file=sys.stderr) - - print("Extracting functions...", file=sys.stderr) - extracted_functions, extracted_inlines = extract_function_declarations(root, content, func_names) - print(f" Found {len(extracted_functions)} non-inline functions", file=sys.stderr) - print(f" Found {len(extracted_inlines)} inline functions (kept in cxip.h)", file=sys.stderr) - - # Extract function pointer typedefs (not caught by ast-grep) - # These are needed by structs that use them, even if not in the plan - print("Extracting function pointer typedefs...", file=sys.stderr) - func_ptr_typedefs = extract_function_pointer_typedefs(content) - # Remove any already extracted - for name in list(func_ptr_typedefs.keys()): - if name in extracted_types: - del func_ptr_typedefs[name] - if func_ptr_typedefs: - print(f" Found {len(func_ptr_typedefs)} function pointer typedefs: {list(func_ptr_typedefs.keys())}", - file=sys.stderr) - # Add these to extracted_types - they'll go to common.h since they're not in the plan - extracted_types.update(func_ptr_typedefs) - - # Combine all extracted symbols (excluding inline functions and enums - they have special handling) - # IMPORTANT: In C, the same name can be used for both a struct/union/enum and a function - # (e.g., struct cxip_domain and int cxip_domain(...)). We use composite keys to avoid collisions. - all_extracted = {} - for name, sym in extracted_macros.items(): - all_extracted[f"{name}:macro"] = sym - for name, sym in extracted_types.items(): - all_extracted[f"{name}:type"] = sym - for name, sym in extracted_functions.items(): - all_extracted[f"{name}:function"] = sym - - # Report symbols not found in cxip.h (they might be in .c files) - # Note: inline functions and enums are tracked separately - # Strip the :kind suffix for comparison with symbol_to_header - extracted_names = {key.rsplit(':', 1)[0] for key in all_extracted.keys()} - all_symbol_names = extracted_names | set(extracted_inlines.keys()) | set(extracted_enums.keys()) - not_found = set(symbol_to_header.keys()) - all_symbol_names - if not_found: - print(f"\nSymbols not found in cxip.h ({len(not_found)}):", file=sys.stderr) - for name in sorted(not_found)[:20]: - details = plan.get("symbol_details", {}).get(name, {}) - defined = details.get("defined_in", []) - print(f" {name}: defined in {defined}", file=sys.stderr) - if len(not_found) > 20: - print(f" ... and {len(not_found) - 20} more", file=sys.stderr) - - # Group extracted symbols by target header (excluding inline functions) - headers: dict[str, HeaderContent] = defaultdict(lambda: HeaderContent(path="")) - for key, sym in all_extracted.items(): - # Extract the original symbol name from composite key - name = key.rsplit(':', 1)[0] - target_header = symbol_to_header.get(name) - if target_header: - if headers[target_header].path == "": - headers[target_header].path = target_header - headers[target_header].symbols.append(sym) - - # Create output directory - output_dir.mkdir(parents=True, exist_ok=True) - - # Generate enums.h first (contains all enum definitions) - if extracted_enums: - enums_header_path = output_dir / "enums.h" - enums_content = generate_enums_header(list(extracted_enums.values())) - enums_header_path.write_text(enums_content) - print(f"\nCreated {enums_header_path} ({len(extracted_enums)} enums)", file=sys.stderr) - - # Generate mr_lac_cache.h to break the circular dependency between mr.h and ctrl.h - # cxip_mr_lac_cache is embedded in ctrl.h but defined in mr.h - mr_lac_cache_sym = extracted_types.pop("cxip_mr_lac_cache", None) - if mr_lac_cache_sym: - # Also remove from the mr.h header's symbols list - mr_h_path = "prov/cxi/include/cxip/mr.h" - if mr_h_path in headers: - headers[mr_h_path].symbols = [ - s for s in headers[mr_h_path].symbols - if s.name != "cxip_mr_lac_cache" - ] - - mr_lac_cache_header_path = output_dir / "mr_lac_cache.h" - mr_lac_cache_content = generate_mr_lac_cache_header(mr_lac_cache_sym) - mr_lac_cache_header_path.write_text(mr_lac_cache_content) - print(f"Created {mr_lac_cache_header_path} (breaks mr.h/ctrl.h cycle)", file=sys.stderr) - - # Build set of all type names for forward declaration analysis - all_type_names = set(extracted_types.keys()) | set(extracted_enums.keys()) - - # Set of enum names that are in enums.h (to skip in individual headers) - enum_names_in_enums_h = set(extracted_enums.keys()) - - # Build type_to_header mapping for dependency analysis - type_to_header: dict[str, str] = {} - for name, sym in extracted_types.items(): - target = symbol_to_header.get(name) - if target: - type_to_header[name] = target - for name, sym in extracted_enums.items(): - # Enums are in enums.h - type_to_header[name] = "enums.h" - # cxip_mr_lac_cache is in its own header (to break mr.h/ctrl.h cycle) - if mr_lac_cache_sym: - type_to_header["cxip_mr_lac_cache"] = "mr_lac_cache.h" - - # Build macro_to_header mapping for dependency analysis - macro_to_header: dict[str, str] = {} - for name, sym in extracted_macros.items(): - target = symbol_to_header.get(name) - if target: - macro_to_header[name] = target - all_macro_names = set(extracted_macros.keys()) - - # Generate and write new header files - print(f"\nGenerating {len(headers)} new headers...", file=sys.stderr) - for header_path, header_content in sorted(headers.items()): - if not header_content.symbols: - continue - - # Get types defined in this specific header - types_in_header = {s.name for s in header_content.symbols if s.kind == "type"} - - output_path = output_dir / Path(header_path).name - content = generate_header_file(header_content, all_type_names, types_in_header, enum_names_in_enums_h) - output_path.write_text(content) - print(f" Created {output_path} ({len(header_content.symbols)} symbols)", - file=sys.stderr) - - # Collect function pointer typedefs that aren't assigned to any header - unassigned_func_ptr_typedefs = [ - sym for name, sym in func_ptr_typedefs.items() - if name not in symbol_to_header - ] - - # Generate new wrapper cxip.h with inline functions at the end - original_cxip_content = cxip_h_path.read_text() - inline_func_list = list(extracted_inlines.values()) - new_cxip_h = generate_wrapper_cxip_h(headers, type_to_header, macro_to_header, - all_type_names, all_macro_names, - original_cxip_content, inline_func_list, - unassigned_func_ptr_typedefs) - - # Save original backup and overwrite cxip.h directly - backup_path = cxip_h_path.with_suffix('.h.orig') - if not backup_path.exists(): - backup_path.write_text(original_cxip_content) - print(f"\nBacked up original cxip.h to {backup_path}", file=sys.stderr) - - # Overwrite cxip.h directly - cxip_h_path.write_text(new_cxip_h) - print(f"Updated {cxip_h_path}", file=sys.stderr) - - # Report summary - print(f"\nSummary:", file=sys.stderr) - print(f" {len(extracted_enums)} enums -> enums.h (included first)", file=sys.stderr) - if mr_lac_cache_sym: - print(f" 1 type (cxip_mr_lac_cache) -> mr_lac_cache.h (breaks cycle)", file=sys.stderr) - print(f" {len(extracted_macros)} macros -> split headers", file=sys.stderr) - print(f" {len(extracted_types)} other types -> split headers", file=sys.stderr) - print(f" {len(extracted_functions)} non-inline functions -> split headers", file=sys.stderr) - print(f" {len(extracted_inlines)} inline functions -> kept in cxip.h (after all types)", file=sys.stderr) - - print("\nDone! Run 'make' to build.", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/prov/cxi/scripts/generate_refactor_plan.py b/prov/cxi/scripts/generate_refactor_plan.py deleted file mode 100755 index e62e8c341da..00000000000 --- a/prov/cxi/scripts/generate_refactor_plan.py +++ /dev/null @@ -1,610 +0,0 @@ -#!/usr/bin/env -S uv run -# /// script -# requires-python = ">=3.11" -# dependencies = [] -# /// -""" -Generate a refactoring plan from symbol analysis. - -Improved heuristics: -1. Use naming conventions first - cxip_foo_* belongs in foo.h -2. Detect callback functions - functions assigned to struct fields aren't dead -3. Handle fundamental types - widely-used types go to logical home based on name -4. Track type dependencies for proper header ordering -""" - -import json -import re -import sys -from pathlib import Path -from dataclasses import dataclass, field -from collections import defaultdict, Counter -from typing import Literal - - -@dataclass -class SymbolInfo: - name: str - kind: Literal["function", "type", "macro"] - defined_in: list[str] - declared_in: list[str] - used_in: list[str] - is_static: bool = False - is_inline: bool = False - is_definition: bool = False - full_text: str = "" - signature: str = "" - - -@dataclass -class RefactorPlan: - symbol_locations: dict[str, str] = field(default_factory=dict) - new_headers: dict[str, list[str]] = field(default_factory=dict) - private_symbols: dict[str, list[str]] = field(default_factory=dict) - inline_handling: dict[str, str] = field(default_factory=dict) - likely_callbacks: list[str] = field(default_factory=list) - - -def is_src_file(path: str) -> bool: - return "prov/cxi/src/" in path and path.endswith(".c") - - -def is_test_file(path: str) -> bool: - return "prov/cxi/test/" in path - - -def is_header_file(path: str) -> bool: - return path.endswith(".h") - - -def is_main_header(path: str) -> bool: - return path.endswith("cxip.h") and "include/cxip.h" in path - - -def get_tu_name(path: str) -> str: - stem = Path(path).stem - if stem.startswith("cxip_"): - return stem[5:] - return stem - - -def get_header_for_tu(tu_name: str) -> str: - return f"prov/cxi/include/cxip/{tu_name}.h" - - -# Naming convention patterns - order matters (more specific first) -NAMING_PATTERNS = [ - # Specific subsystem patterns - (r'^cxip_txc_hpc', 'txc'), - (r'^cxip_txc_rnr', 'txc'), - (r'^cxip_rxc_hpc', 'rxc'), - (r'^cxip_rxc_rnr', 'rxc'), - (r'^cxip_msg_hpc', 'msg_hpc'), - (r'^cxip_msg_rnr', 'msg_rnr'), - (r'^cxip_rdzv_pte', 'rdzv_pte'), - (r'^cxip_rdzv_match', 'rdzv_pte'), - (r'^cxip_rdzv_nomatch', 'rdzv_pte'), - (r'^cxip_ptelist_buf', 'ptelist_buf'), - (r'^cxip_req_buf', 'req_buf'), - (r'^cxip_coll_trace', 'coll_trace'), - (r'^cxip_ep_obj', 'ep'), - (r'^cxip_ep_zbcoll', 'zbcoll'), - (r'^cxip_ep_coll', 'coll'), - - # General patterns - (r'^cxip_txc_', 'txc'), - (r'^cxip_rxc_', 'rxc'), - (r'^cxip_mr_', 'mr'), - (r'^cxip_cq_', 'cq'), - (r'^cxip_eq_', 'eq'), - (r'^cxip_ep_', 'ep'), - (r'^cxip_av_', 'av'), - (r'^cxip_cntr_', 'cntr'), - (r'^cxip_domain_', 'dom'), - (r'^cxip_dom_', 'dom'), - (r'^cxip_pte_', 'pte'), - (r'^cxip_cmdq_', 'cmdq'), - (r'^cxip_evtq_', 'evtq'), - (r'^cxip_ctrl_', 'ctrl'), - (r'^cxip_coll_', 'coll'), - (r'^cxip_zbcoll_', 'zbcoll'), - (r'^cxip_curl_', 'curl'), - (r'^cxip_rma_', 'rma'), - (r'^cxip_atomic_', 'atomic'), - (r'^cxip_msg_', 'msg'), - (r'^cxip_if_', 'if'), - (r'^cxip_iomm_', 'iomm'), - (r'^cxip_fabric_', 'fabric'), - (r'^cxip_telemetry_', 'telemetry'), - (r'^cxip_nic_', 'nic'), - (r'^cxip_info_', 'info'), - (r'^cxip_rep_', 'repsum'), - (r'^cxip_faults_', 'faults'), - (r'^cxip_portals_table', 'portals_table'), - (r'^cxip_lni', 'if'), - (r'^cxip_recv_', 'msg'), - (r'^cxip_send_', 'msg'), - (r'^cxip_ux_', 'msg'), - (r'^cxip_fc_', 'fc'), - (r'^cxip_map', 'iomm'), - (r'^cxip_unmap', 'iomm'), - (r'^cxip_copy_', 'mr'), - (r'^cxip_generic_', 'mr'), - (r'^cxip_tree_', 'zbcoll'), - (r'^cxip_check_auth', 'auth'), - (r'^cxip_gen_auth', 'auth'), - - # Type-specific patterns (for struct names) - (r'^cxip_txc$', 'txc'), - (r'^cxip_rxc$', 'rxc'), - (r'^cxip_ep$', 'ep'), - (r'^cxip_mr$', 'mr'), - (r'^cxip_cq$', 'cq'), - (r'^cxip_eq$', 'eq'), - (r'^cxip_cntr$', 'cntr'), - (r'^cxip_av$', 'av'), - (r'^cxip_domain$', 'dom'), - (r'^cxip_fabric$', 'fabric'), - (r'^cxip_pte$', 'pte'), - (r'^cxip_cmdq$', 'cmdq'), - (r'^cxip_evtq$', 'evtq'), - (r'^cxip_req$', 'req'), - (r'^cxip_md$', 'mr'), - (r'^cxip_if$', 'if'), - (r'^cxip_addr$', 'addr'), - (r'^cxip_environment$', 'env'), - (r'^cxip_env$', 'env'), - - # Additional type patterns to reduce common.h usage - (r'^cxip_req_', 'req'), # cxip_req_send, cxip_req_recv, etc. - (r'^cxip_repsum$', 'repsum'), - (r'^cxip_dbl_bits$', 'repsum'), - (r'^_bits2dbl$', 'repsum'), - (r'^_dbl2bits$', 'repsum'), - (r'^_decompose_dbl$', 'repsum'), - (r'^cxip_ctrl$', 'ctrl'), - (r'^cxip_ptelist_req$', 'ptelist_buf'), - (r'^cxip_fltval$', 'coll'), - (r'^cxip_fltminmax$', 'coll'), - (r'^cxip_intval$', 'coll'), - (r'^cxip_iminmax$', 'coll'), - (r'^curl_ops$', 'curl'), - (r'^cxip_match_bits$', 'msg'), - (r'^cxip_llring_mode$', 'cmdq'), - (r'^cxip_le_type$', 'pte'), - (r'^cxip_amo_req_type$', 'atomic'), - (r'^cxip_ats_mlock_mode$', 'iomm'), - (r'^cxip_fid_list$', 'cq'), - (r'^cxip_remap_cp$', 'if'), - (r'^def_event_ht$', 'evtq'), - - # Inline utility function patterns - (r'^is_netsim$', 'ep'), - (r'^cxip_txq_ring$', 'cmdq'), - (r'^cxip_mac_to_nic$', 'if'), - (r'^cxip_cacheline_size$', 'if'), - (r'^cxip_adjust_remote_offset$', 'mr'), - (r'^single_to_double_quote$', 'curl'), - (r'^cxip_json_', 'curl'), - (r'^cxip_set_env_', 'env'), - (r'^cxip_set_recv_', 'rxc'), - (r'^cxip_get_owner_srx$', 'rxc'), - (r'^cxip_is_trig_req$', 'req'), - (r'^cxip_no_discard$', 'msg'), - (r'^cxip_software_pte_allowed$', 'pte'), - (r'^cxip_stx_alloc$', 'txc'), - (r'^fls64$', 'if'), - (r'^cxi_tc_str$', 'if'), - - # Macro patterns for common macros - (r'^CXIP_ADDR_', 'addr'), - (r'^CXIP_TAG_', 'msg'), - (r'^CXIP_ALIGN', 'common'), - (r'^ARRAY_SIZE$', 'common'), - (r'^CEILING$', 'common'), - (r'^FLOOR$', 'common'), - (r'^CXIP_DBG$', 'log'), - (r'^CXIP_INFO$', 'log'), - (r'^CXIP_WARN', 'log'), - (r'^CXIP_LOG$', 'log'), - (r'^CXIP_FATAL$', 'log'), - (r'^TXC_', 'txc'), - (r'^RXC_', 'rxc'), - (r'^DOM_', 'dom'), -] - -# Callback function patterns - functions that are likely used as callbacks -CALLBACK_PATTERNS = [ - r'_cb$', # Ends with _cb - r'_callback$', # Ends with _callback - r'_handler$', # Ends with _handler - r'_progress$', # Progress functions - r'_recv$', # Receive callbacks - r'_send$', # Send callbacks - r'_complete$', # Completion callbacks - r'_ops$', # Operation tables -] - - -def infer_home_from_name(name: str) -> str | None: - """Infer the home TU from the symbol name using patterns.""" - for pattern, tu in NAMING_PATTERNS: - if re.match(pattern, name, re.IGNORECASE): - return tu - return None - - -def is_likely_callback(name: str, sym: SymbolInfo) -> bool: - """Check if a function is likely a callback based on naming patterns.""" - if sym.kind != "function": - return False - - for pattern in CALLBACK_PATTERNS: - if re.search(pattern, name): - return True - - # Also check if function signature suggests callback (returns int, has specific params) - # This is a heuristic based on common callback patterns - return False - - -def find_home_tu_by_definition(sym: SymbolInfo) -> str | None: - """Find home TU based on where the symbol is defined.""" - src_definitions = [f for f in sym.defined_in if is_src_file(f)] - if src_definitions: - return get_tu_name(src_definitions[0]) - return None - - -def find_home_tu_by_usage(sym: SymbolInfo) -> str | None: - """Find home TU based on usage patterns (fallback).""" - src_users = [f for f in sym.used_in if is_src_file(f)] - if not src_users: - return None - - tu_counts = Counter(get_tu_name(f) for f in src_users) - if tu_counts: - return tu_counts.most_common(1)[0][0] - return None - - -def load_analysis(path: str | None = None) -> dict: - """Load analysis from file or stdin if in a pipeline.""" - if path: - with open(path) as f: - return json.load(f) - else: - return json.load(sys.stdin) - - -def extract_symbols(analysis: dict) -> dict[str, SymbolInfo]: - """Extract and aggregate symbol information from analysis. - - Note: C allows the same name for a struct/union/enum and a function - (e.g., struct cxip_domain and int cxip_domain(...)). We use composite - keys like "name:type" and "name:function" to track both. - """ - symbols: dict[str, SymbolInfo] = {} - - for file_info in analysis["files"]: - file_path = file_info["path"] - - for func in file_info.get("function_decls", []): - name = func["name"] - # Use composite key to allow same name as type - func_key = f"{name}:function" - if func_key not in symbols: - symbols[func_key] = SymbolInfo( - name=name, - kind="function", - defined_in=[], - declared_in=[], - used_in=[], - is_static=func.get("is_static", False), - is_inline=func.get("is_inline", False), - signature=func.get("signature", ""), - ) - - sym = symbols[func_key] - if func.get("is_definition", False): - if file_path not in sym.defined_in: - sym.defined_in.append(file_path) - sym.is_definition = True - sym.is_static = func.get("is_static", False) - sym.is_inline = func.get("is_inline", False) - sym.signature = func.get("signature", sym.signature) - else: - if file_path not in sym.declared_in: - sym.declared_in.append(file_path) - - for typedef in file_info.get("type_defs", []): - name = typedef["name"] - # Use composite key to allow same name as function - type_key = f"{name}:type" - if type_key not in symbols: - symbols[type_key] = SymbolInfo( - name=name, - kind="type", - defined_in=[], - declared_in=[], - used_in=[], - full_text=typedef.get("full_text", ""), - ) - - sym = symbols[type_key] - if not typedef.get("is_forward_decl", False): - if file_path not in sym.defined_in: - sym.defined_in.append(file_path) - sym.full_text = typedef.get("full_text", "") - - for macro in file_info.get("macro_defs", []): - name = macro["name"] - # Macros don't share namespace with types/functions, but use key for consistency - macro_key = f"{name}:macro" - if macro_key not in symbols: - symbols[macro_key] = SymbolInfo( - name=name, - kind="macro", - defined_in=[], - declared_in=[], - used_in=[], - full_text=macro.get("full_text", ""), - ) - - sym = symbols[macro_key] - if file_path not in sym.defined_in: - sym.defined_in.append(file_path) - - for usage in file_info.get("usages", []): - name = usage["symbol_name"] - # Try to find the symbol in any kind - for key in [f"{name}:function", f"{name}:type", f"{name}:macro"]: - if key in symbols: - if file_path not in symbols[key].used_in: - symbols[key].used_in.append(file_path) - - return symbols - - -def analyze_symbol_visibility(symbols: dict[str, SymbolInfo]) -> RefactorPlan: - """Determine where each symbol should live using improved heuristics.""" - plan = RefactorPlan() - - for key, sym in symbols.items(): - # Use sym.name for naming-based heuristics, but key for storage - # This allows both 'cxip_domain:type' and 'cxip_domain:function' to coexist - name = sym.name - # Skip symbols not from main cxip.h or src files - from_main_header = any(is_main_header(f) for f in sym.defined_in + sym.declared_in) - from_src = any(is_src_file(f) for f in sym.defined_in) - - if not from_main_header and not from_src: - plan.symbol_locations[key] = "external" - continue - - src_users = set(f for f in sym.used_in if is_src_file(f)) - test_users = set(f for f in sym.used_in if is_test_file(f)) - - # Check if this is likely a callback function - if is_likely_callback(name, sym): - plan.likely_callbacks.append(key) - - # HEURISTIC 1: Static symbols stay private - if sym.is_static and sym.defined_in: - src_defs = [f for f in sym.defined_in if is_src_file(f)] - if src_defs: - plan.symbol_locations[key] = f"private:{src_defs[0]}" - if src_defs[0] not in plan.private_symbols: - plan.private_symbols[src_defs[0]] = [] - plan.private_symbols[src_defs[0]].append(name) - continue - - # HEURISTIC 2: Use naming convention first - home_tu = infer_home_from_name(name) - - # HEURISTIC 3: For functions, prefer where they're defined - if home_tu is None and sym.kind == "function": - home_tu = find_home_tu_by_definition(sym) - - # HEURISTIC 4: For types/macros defined in header, use name-based - # Don't fall back to usage-based for types - that leads to poor placement - - # Determine if symbol needs to be exported - needs_export = (len(src_users) > 1 or - len(test_users) > 0 or - is_likely_callback(name, sym)) - - # CRITICAL: If a type/macro is DEFINED in the main header, it must go to a header - # Even if it's only used in one place - it's part of the public API - if from_main_header and sym.kind in ("type", "macro"): - needs_export = True - - # Even single-use non-static functions might be callbacks - if sym.kind == "function" and not sym.is_static and len(src_users) <= 1: - if is_likely_callback(name, sym): - needs_export = True - elif from_main_header: # Declared in header = intended to be public - needs_export = True - - if needs_export: - if home_tu: - header = get_header_for_tu(home_tu) - plan.symbol_locations[key] = f"header:{header}" - if header not in plan.new_headers: - plan.new_headers[header] = [] - plan.new_headers[header].append(name) - else: - # Can't determine - put in common.h for now - plan.symbol_locations[key] = "header:prov/cxi/include/cxip/common.h" - if "prov/cxi/include/cxip/common.h" not in plan.new_headers: - plan.new_headers["prov/cxi/include/cxip/common.h"] = [] - plan.new_headers["prov/cxi/include/cxip/common.h"].append(name) - elif len(src_users) == 1: - # Private to one file - src_file = list(src_users)[0] - plan.symbol_locations[key] = f"private:{src_file}" - if src_file not in plan.private_symbols: - plan.private_symbols[src_file] = [] - plan.private_symbols[src_file].append(name) - elif len(src_users) == 0 and not test_users: - # Check if it's a callback or has declaration in header - if is_likely_callback(name, sym) or from_main_header: - if home_tu: - header = get_header_for_tu(home_tu) - plan.symbol_locations[key] = f"header:{header}" - if header not in plan.new_headers: - plan.new_headers[header] = [] - plan.new_headers[header].append(name) - else: - plan.symbol_locations[key] = "header:prov/cxi/include/cxip/common.h" - if "prov/cxi/include/cxip/common.h" not in plan.new_headers: - plan.new_headers["prov/cxi/include/cxip/common.h"] = [] - plan.new_headers["prov/cxi/include/cxip/common.h"].append(name) - else: - plan.symbol_locations[key] = "dead_code" - else: - plan.symbol_locations[key] = "unknown" - - # Analyze inline functions - for key, sym in symbols.items(): - name = sym.name - if sym.kind == "function" and sym.is_inline: - src_users = set(f for f in sym.used_in if is_src_file(f)) - - if len(src_users) == 0: - if is_likely_callback(name, sym): - plan.inline_handling[key] = "keep_for_callback" - else: - plan.inline_handling[key] = "possibly_dead" - elif len(src_users) == 1: - plan.inline_handling[key] = f"private:{list(src_users)[0]}" - else: - loc = plan.symbol_locations.get(key, "") - if loc.startswith("header:"): - plan.inline_handling[key] = f"keep_inline:{loc.split(':', 1)[1]}" - else: - plan.inline_handling[key] = "make_regular_function" - - return plan - - -def generate_report(symbols: dict[str, SymbolInfo], plan: RefactorPlan) -> dict: - """Generate a structured report.""" - # Build a reverse lookup: name -> list of keys (to handle name collisions) - name_to_keys: dict[str, list[str]] = defaultdict(list) - for key, sym in symbols.items(): - name_to_keys[sym.name].append(key) - - report = { - "summary": { - "total_symbols": len(symbols), - "functions": sum(1 for s in symbols.values() if s.kind == "function"), - "types": sum(1 for s in symbols.values() if s.kind == "type"), - "macros": sum(1 for s in symbols.values() if s.kind == "macro"), - "likely_callbacks": len(plan.likely_callbacks), - }, - "new_headers": {}, - "private_symbols": {}, - "inline_functions": plan.inline_handling, - "likely_callbacks": sorted(plan.likely_callbacks), - "location_summary": defaultdict(int), - } - - for key, loc in plan.symbol_locations.items(): - if loc.startswith("header:"): - report["location_summary"]["needs_header"] += 1 - elif loc.startswith("private:"): - report["location_summary"]["private"] += 1 - elif loc == "dead_code": - report["location_summary"]["dead_code"] += 1 - elif loc == "external": - report["location_summary"]["external"] += 1 - else: - report["location_summary"]["other"] += 1 - - for header, sym_names in sorted(plan.new_headers.items()): - # Categorize symbols by kind - need to look up by name, handling collisions - funcs = [] - types = [] - macros = [] - for name in sym_names: - # Find all keys for this name and categorize - for key in name_to_keys.get(name, []): - sym = symbols[key] - if sym.kind == "function" and name not in funcs: - funcs.append(name) - elif sym.kind == "type" and name not in types: - types.append(name) - elif sym.kind == "macro" and name not in macros: - macros.append(name) - - report["new_headers"][header] = { - "count": len(sym_names), - "functions": sorted(set(funcs)), - "types": sorted(set(types)), - "macros": sorted(set(macros)), - } - - for file, syms in sorted(plan.private_symbols.items()): - report["private_symbols"][file] = { - "count": len(syms), - "symbols": sorted(syms), - } - - report["location_summary"] = dict(report["location_summary"]) - - return report - - -def main(): - # Check if we're receiving input from a pipeline - if not sys.stdin.isatty(): - print("Reading symbol analysis from stdin (pipeline mode)...", file=sys.stderr) - analysis = load_analysis() - else: - analysis_path = Path("prov/cxi/scripts/symbol_analysis.json") - if not analysis_path.exists(): - print(f"Error: {analysis_path} not found. Run analyze_symbols.py first.", - file=sys.stderr) - print("Or pipe the output: ./analyze_symbols.py | ./generate_refactor_plan.py", - file=sys.stderr) - sys.exit(1) - print("Loading symbol analysis from file...", file=sys.stderr) - analysis = load_analysis(str(analysis_path)) - - print("Extracting symbols...", file=sys.stderr) - symbols = extract_symbols(analysis) - print(f"Found {len(symbols)} unique symbols", file=sys.stderr) - - print("Analyzing symbol visibility with improved heuristics...", file=sys.stderr) - plan = analyze_symbol_visibility(symbols) - - print("Generating report...", file=sys.stderr) - report = generate_report(symbols, plan) - - # Add detailed symbol info - use sym.name as key in output for readability - # but track collisions (same name with different kinds) - report["symbol_details"] = {} - for key, sym in symbols.items(): - name = sym.name - # If there's already an entry for this name, append the kind to distinguish - output_key = name if name not in report["symbol_details"] else key - report["symbol_details"][output_key] = { - "kind": sym.kind, - "defined_in": sym.defined_in, - "declared_in": sym.declared_in, - "used_in_count": len(sym.used_in), - "used_in_src": [f for f in sym.used_in if is_src_file(f)], - "used_in_test": [f for f in sym.used_in if is_test_file(f)], - "is_static": sym.is_static, - "is_inline": sym.is_inline, - "is_likely_callback": is_likely_callback(name, sym), - "inferred_home": infer_home_from_name(name), - "recommended_location": plan.symbol_locations.get(key, "unknown"), - } - - print(json.dumps(report, indent=2)) - - -if __name__ == "__main__": - main() From 75ab956c9967fb1b2dac6ec398307176fc83ac6d Mon Sep 17 00:00:00 2001 From: Nicholas Sielicki Date: Fri, 2 Jan 2026 16:16:28 -0600 Subject: [PATCH 3/3] prov/cxi: run clang-format on all headers. Add clang-format disable/enables in cxip.h to ensure include-order stays valid. Run clang-format over all the newly generated headers. Signed-off-by: Nicholas Sielicki --- prov/cxi/include/cxip.h | 187 +++++++++---------- prov/cxi/include/cxip/addr.h | 7 +- prov/cxi/include/cxip/atomic.h | 5 +- prov/cxi/include/cxip/auth.h | 1 - prov/cxi/include/cxip/av.h | 23 ++- prov/cxi/include/cxip/cmdq.h | 5 +- prov/cxi/include/cxip/cntr.h | 16 +- prov/cxi/include/cxip/coll.h | 249 +++++++++++++------------- prov/cxi/include/cxip/coll_trace.h | 11 +- prov/cxi/include/cxip/common.h | 205 ++++++++++----------- prov/cxi/include/cxip/cq.h | 13 +- prov/cxi/include/cxip/ctrl.h | 3 +- prov/cxi/include/cxip/curl.h | 25 ++- prov/cxi/include/cxip/dom.h | 34 ++-- prov/cxi/include/cxip/enums.h | 47 +++-- prov/cxi/include/cxip/env.h | 3 +- prov/cxi/include/cxip/ep.h | 38 ++-- prov/cxi/include/cxip/eq.h | 3 +- prov/cxi/include/cxip/evtq.h | 13 +- prov/cxi/include/cxip/fabric.h | 1 - prov/cxi/include/cxip/fc.h | 5 +- prov/cxi/include/cxip/if.h | 7 +- prov/cxi/include/cxip/info.h | 1 - prov/cxi/include/cxip/iomm.h | 1 - prov/cxi/include/cxip/log.h | 16 +- prov/cxi/include/cxip/mr.h | 59 +++--- prov/cxi/include/cxip/mr_lac_cache.h | 4 +- prov/cxi/include/cxip/msg.h | 107 ++++++----- prov/cxi/include/cxip/msg_hpc.h | 1 - prov/cxi/include/cxip/nic.h | 1 - prov/cxi/include/cxip/portals_table.h | 3 +- prov/cxi/include/cxip/pte.h | 29 ++- prov/cxi/include/cxip/ptelist_buf.h | 10 +- prov/cxi/include/cxip/rdzv_pte.h | 1 - prov/cxi/include/cxip/repsum.h | 11 +- prov/cxi/include/cxip/req.h | 63 ++++--- prov/cxi/include/cxip/req_buf.h | 15 +- prov/cxi/include/cxip/rma.h | 14 +- prov/cxi/include/cxip/rxc.h | 43 +++-- prov/cxi/include/cxip/telemetry.h | 1 - prov/cxi/include/cxip/txc.h | 50 +++--- prov/cxi/include/cxip/zbcoll.h | 81 +++++---- prov/cxi/include/cxip_faults.h | 20 +-- prov/cxi/include/fi_cxi_ext.h | 130 +++++++------- 44 files changed, 748 insertions(+), 814 deletions(-) diff --git a/prov/cxi/include/cxip.h b/prov/cxi/include/cxip.h index 2eb102a7a4c..b46e57c7fd8 100644 --- a/prov/cxi/include/cxip.h +++ b/prov/cxi/include/cxip.h @@ -10,11 +10,11 @@ #ifndef _CXIP_PROV_H_ #define _CXIP_PROV_H_ -#include #include "config.h" +#include -#include #include +#include #include #include @@ -31,28 +31,22 @@ #include #include #include -#include #include +#include +#include #include -#include -#include #include -#include +#include +#include +#include #include +#include #include -#include -#include #include -#include "libcxi/libcxi.h" #include "cxip_faults.h" #include "fi_cxi_ext.h" - - - - - - +#include "libcxi/libcxi.h" /* Forward declarations for function pointer typedef parameters */ struct cxip_zbcoll_obj; @@ -84,6 +78,9 @@ extern bool cxip_coll_prod_trace_initialized; extern uint64_t cxip_coll_trace_mask; /* Split headers - types, macros, and function declarations */ +// clang-format off +// These headers are order-specific due to circular dependencies, don't reorder them. +// FIXME: make every header freestanding. #include "cxip/enums.h" #include "cxip/env.h" #include "cxip/cmdq.h" @@ -125,6 +122,7 @@ extern uint64_t cxip_coll_trace_mask; #include "cxip/mr.h" #include "cxip/fc.h" #include "cxip/dom.h" +// clang-format on /* * Inline function definitions @@ -134,13 +132,13 @@ extern uint64_t cxip_coll_trace_mask; * to be fully defined first. */ -static inline bool cxip_software_pte_allowed(enum cxip_ep_ptle_mode rx_match_mode) +static inline bool +cxip_software_pte_allowed(enum cxip_ep_ptle_mode rx_match_mode) { return rx_match_mode != CXIP_PTLTE_HARDWARE_MODE; } -static inline -uint64_t cxip_adjust_remote_offset(uint64_t *addr, uint64_t key) +static inline uint64_t cxip_adjust_remote_offset(uint64_t *addr, uint64_t key) { struct cxip_mr_key cxip_key = { .raw = key, @@ -230,15 +228,16 @@ static inline void cxip_msg_counters_init(struct cxip_msg_counters *cntrs) for (i = 0; i < CXIP_LIST_COUNTS; i++) { for (j = 0; j < OFI_HMEM_MAX; j++) { for (k = 0; k < CXIP_COUNTER_BUCKETS; k++) - ofi_atomic_initialize32(&cntrs->msg_count[i][j][k], 0); + ofi_atomic_initialize32( + &cntrs->msg_count[i][j][k], 0); } } } -static inline void -cxip_msg_counters_msg_record(struct cxip_msg_counters *cntrs, - enum c_ptl_list list, enum fi_hmem_iface buf_type, - size_t msg_size) +static inline void cxip_msg_counters_msg_record(struct cxip_msg_counters *cntrs, + enum c_ptl_list list, + enum fi_hmem_iface buf_type, + size_t msg_size) { unsigned int bucket; @@ -268,7 +267,8 @@ static inline void cxip_copy_to_md(struct cxip_md *md, void *dest, ssize_t ret __attribute__((unused)); struct iovec iov; bool dev_reg_copy = require_dev_reg_copy || - (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold); + (md->handle_valid && + size <= cxip_env.safe_devmem_copy_threshold); /* Favor dev reg access instead of relying on HMEM copy functions. */ if (dev_reg_copy) { @@ -279,9 +279,8 @@ static inline void cxip_copy_to_md(struct cxip_md *md, void *dest, iov.iov_base = dest; iov.iov_len = size; - ret = md->dom->hmem_ops.copy_to_hmem_iov(md->info.iface, - md->info.device, &iov, - 1, 0, src, size); + ret = md->dom->hmem_ops.copy_to_hmem_iov( + md->info.iface, md->info.device, &iov, 1, 0, src, size); assert(ret == size); } } @@ -293,38 +292,36 @@ static inline void cxip_copy_from_md(struct cxip_md *md, void *dest, ssize_t ret __attribute__((unused)); struct iovec iov; bool dev_reg_copy = require_dev_reg_copy || - (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold); + (md->handle_valid && + size <= cxip_env.safe_devmem_copy_threshold); /* Favor dev reg access instead of relying on HMEM copy functions. */ if (dev_reg_copy) { - ret = ofi_hmem_dev_reg_copy_from_hmem(md->info.iface, - md->handle, - dest, src, size); + ret = ofi_hmem_dev_reg_copy_from_hmem( + md->info.iface, md->handle, dest, src, size); assert(ret == FI_SUCCESS); } else { - iov.iov_base = (void *)src; + iov.iov_base = (void *) src; iov.iov_len = size; - - ret = md->dom->hmem_ops.copy_from_hmem_iov(dest, size, - md->info.iface, - md->info.device, - &iov, 1, 0); + ret = md->dom->hmem_ops.copy_from_hmem_iov( + dest, size, md->info.iface, md->info.device, &iov, 1, + 0); assert(ret == size); } } -static inline void -cxip_ep_obj_copy_to_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, - const void *src, size_t size) +static inline void cxip_ep_obj_copy_to_md(struct cxip_ep_obj *ep, + struct cxip_md *md, void *dest, + const void *src, size_t size) { cxip_copy_to_md(md, dest, src, size, ep->require_dev_reg_copy[md->info.iface]); } -static inline void -cxip_ep_obj_copy_from_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, - const void *src, size_t size) +static inline void cxip_ep_obj_copy_from_md(struct cxip_ep_obj *ep, + struct cxip_md *md, void *dest, + const void *src, size_t size) { cxip_copy_from_md(md, dest, src, size, ep->require_dev_reg_copy[md->info.iface]); @@ -332,14 +329,14 @@ cxip_ep_obj_copy_from_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, static inline bool cxip_ep_obj_mr_relaxed_order(struct cxip_ep_obj *ep) { - if (cxip_env.mr_target_ordering == MR_ORDER_STRICT) + if (cxip_env.mr_target_ordering == MR_ORDER_STRICT) return false; - if (cxip_env.mr_target_ordering == MR_ORDER_RELAXED) + if (cxip_env.mr_target_ordering == MR_ORDER_RELAXED) return true; if ((ep->rx_attr.msg_order & FI_ORDER_RMA_WAW) && - ep->ep_attr.max_order_waw_size != 0) + ep->ep_attr.max_order_waw_size != 0) return false; if ((ep->rx_attr.msg_order & FI_ORDER_WAW) && @@ -437,7 +434,10 @@ static inline void _decompose_dbl(double d, int *sgn, int *exp, static inline void single_to_double_quote(char *str) { - do {if (*str == '\'') *str = '"';} while (*(++str)); + do { + if (*str == '\'') + *str = '"'; + } while (*(++str)); } static inline bool cxip_cmdq_empty(struct cxip_cmdq *cmdq) @@ -450,7 +450,7 @@ static inline bool cxip_cmdq_match(struct cxip_cmdq *cmdq, uint16_t vni, enum cxi_traffic_class_type tc_type) { return (cmdq->cur_cp->vni == vni) && (cmdq->cur_cp->tc == tc) && - (cmdq->cur_cp->tc_type == tc_type); + (cmdq->cur_cp->tc_type == tc_type); } static inline bool cxip_cmdq_prev_match(struct cxip_cmdq *cmdq, uint16_t vni, @@ -458,7 +458,7 @@ static inline bool cxip_cmdq_prev_match(struct cxip_cmdq *cmdq, uint16_t vni, enum cxi_traffic_class_type tc_type) { return (cmdq->prev_cp->vni == vni) && (cmdq->prev_cp->tc == tc) && - (cmdq->prev_cp->tc_type == tc_type); + (cmdq->prev_cp->tc_type == tc_type); } static inline struct fid_peer_srx *cxip_get_owner_srx(struct cxip_rxc *rxc) @@ -501,24 +501,24 @@ static inline int cxip_no_discard(struct fi_peer_rx_entry *rx_entry) return -FI_ENOSYS; } -static inline void -cxip_domain_add_txc(struct cxip_domain *dom, struct cxip_txc *txc) +static inline void cxip_domain_add_txc(struct cxip_domain *dom, + struct cxip_txc *txc) { ofi_spin_lock(&dom->lock); dlist_insert_tail(&txc->dom_entry, &dom->txc_list); ofi_spin_unlock(&dom->lock); } -static inline void -cxip_domain_remove_txc(struct cxip_domain *dom, struct cxip_txc *txc) +static inline void cxip_domain_remove_txc(struct cxip_domain *dom, + struct cxip_txc *txc) { ofi_spin_lock(&dom->lock); dlist_remove(&txc->dom_entry); ofi_spin_unlock(&dom->lock); } -static inline void -cxip_domain_add_cntr(struct cxip_domain *dom, struct cxip_cntr *cntr) +static inline void cxip_domain_add_cntr(struct cxip_domain *dom, + struct cxip_cntr *cntr) { ofi_spin_lock(&dom->lock); dlist_insert_tail(&cntr->dom_entry, &dom->cntr_list); @@ -526,8 +526,8 @@ cxip_domain_add_cntr(struct cxip_domain *dom, struct cxip_cntr *cntr) ofi_spin_unlock(&dom->lock); } -static inline void -cxip_domain_remove_cntr(struct cxip_domain *dom, struct cxip_cntr *cntr) +static inline void cxip_domain_remove_cntr(struct cxip_domain *dom, + struct cxip_cntr *cntr) { ofi_spin_lock(&dom->lock); dlist_remove(&cntr->dom_entry); @@ -535,8 +535,8 @@ cxip_domain_remove_cntr(struct cxip_domain *dom, struct cxip_cntr *cntr) ofi_spin_unlock(&dom->lock); } -static inline void -cxip_domain_add_cq(struct cxip_domain *dom, struct cxip_cq *cq) +static inline void cxip_domain_add_cq(struct cxip_domain *dom, + struct cxip_cq *cq) { ofi_spin_lock(&dom->lock); dlist_insert_tail(&cq->dom_entry, &dom->cq_list); @@ -544,8 +544,8 @@ cxip_domain_add_cq(struct cxip_domain *dom, struct cxip_cq *cq) ofi_spin_unlock(&dom->lock); } -static inline void -cxip_domain_remove_cq(struct cxip_domain *dom, struct cxip_cq *cq) +static inline void cxip_domain_remove_cq(struct cxip_domain *dom, + struct cxip_cq *cq) { ofi_spin_lock(&dom->lock); dlist_remove(&cq->dom_entry); @@ -553,9 +553,8 @@ cxip_domain_remove_cq(struct cxip_domain *dom, struct cxip_cq *cq) ofi_spin_unlock(&dom->lock); } -static inline -struct cxip_ctrl_req *cxip_domain_ctrl_id_at(struct cxip_domain *dom, - int buffer_id) +static inline struct cxip_ctrl_req * +cxip_domain_ctrl_id_at(struct cxip_domain *dom, int buffer_id) { if (ofi_idx_is_valid(&dom->req_ids, buffer_id)) return ofi_idx_at(&dom->req_ids, buffer_id); @@ -564,9 +563,8 @@ struct cxip_ctrl_req *cxip_domain_ctrl_id_at(struct cxip_domain *dom, static inline uint32_t cxip_mac_to_nic(struct ether_addr *mac) { - return mac->ether_addr_octet[5] | - (mac->ether_addr_octet[4] << 8) | - ((mac->ether_addr_octet[3] & 0xF) << 16); + return mac->ether_addr_octet[5] | (mac->ether_addr_octet[4] << 8) | + ((mac->ether_addr_octet[3] & 0xF) << 16); } static inline bool is_netsim(struct cxip_ep_obj *ep_obj) @@ -603,8 +601,7 @@ static inline int cxip_cacheline_size(void) f = fopen(CXIP_SYSFS_CACHE_LINE_SIZE, "r"); if (!f) { - _CXIP_WARN(FI_LOG_CORE, - "Error %d determining cacheline size\n", + _CXIP_WARN(FI_LOG_CORE, "Error %d determining cacheline size\n", errno); cache_line_size = CXIP_DEFAULT_CACHE_LINE_SIZE; } else { @@ -621,9 +618,9 @@ static inline int cxip_cacheline_size(void) return cache_line_size; } -static inline int -cxip_txc_copy_from_hmem(struct cxip_txc *txc, struct cxip_md *hmem_md, - void *dest, const void *hmem_src, size_t size) +static inline int cxip_txc_copy_from_hmem(struct cxip_txc *txc, + struct cxip_md *hmem_md, void *dest, + const void *hmem_src, size_t size) { enum fi_hmem_iface iface; uint64_t device; @@ -675,7 +672,7 @@ cxip_txc_copy_from_hmem(struct cxip_txc *txc, struct cxip_md *hmem_md, /* Slow path HMEM copy path.*/ iface = ofi_get_hmem_iface(hmem_src, &device, &flags); - hmem_iov.iov_base = (void *)hmem_src; + hmem_iov.iov_base = (void *) hmem_src; hmem_iov.iov_len = size; ret = domain->hmem_ops.copy_from_hmem_iov(dest, size, iface, device, @@ -696,9 +693,9 @@ cxip_txc_copy_from_hmem(struct cxip_txc *txc, struct cxip_md *hmem_md, return FI_SUCCESS; } -static inline -int cxip_set_recv_match_id(struct cxip_rxc *rxc, fi_addr_t src_addr, - bool auth_key, uint32_t *match_id, uint16_t *vni) +static inline int cxip_set_recv_match_id(struct cxip_rxc *rxc, + fi_addr_t src_addr, bool auth_key, + uint32_t *match_id, uint16_t *vni) { struct cxip_addr caddr; int ret; @@ -707,12 +704,11 @@ int cxip_set_recv_match_id(struct cxip_rxc *rxc, fi_addr_t src_addr, * in the LE for matching. If application AVs are symmetric, use * logical FI address for matching. Otherwise, use physical address. */ - if (rxc->attr.caps & FI_DIRECTED_RECV && - src_addr != FI_ADDR_UNSPEC) { + if (rxc->attr.caps & FI_DIRECTED_RECV && src_addr != FI_ADDR_UNSPEC) { if (rxc->ep_obj->av->symmetric) { /* PID is not used for matching */ - *match_id = CXI_MATCH_ID(rxc->pid_bits, - C_PID_ANY, src_addr); + *match_id = CXI_MATCH_ID(rxc->pid_bits, C_PID_ANY, + src_addr); *vni = rxc->ep_obj->auth_key.vni; } else { ret = cxip_av_lookup_addr(rxc->ep_obj->av, src_addr, @@ -754,14 +750,16 @@ static inline void cxip_set_env_rx_match_mode(void) "Enable/Disable low LE preemptive recv transitions."); fi_param_get_bool(&cxip_prov, "hybrid_recv_preemptive", &cxip_env.hybrid_recv_preemptive); - fi_param_define(&cxip_prov, "hybrid_unexpected_msg_preemptive", - FI_PARAM_BOOL, - "Enable preemptive transition to software endpoint when number of hardware unexpected messages exceeds RX attribute size"); + fi_param_define( + &cxip_prov, "hybrid_unexpected_msg_preemptive", FI_PARAM_BOOL, + "Enable preemptive transition to software endpoint when number " + "of hardware unexpected messages exceeds RX attribute size"); fi_param_get_bool(&cxip_prov, "hybrid_unexpected_msg_preemptive", &cxip_env.hybrid_unexpected_msg_preemptive); - fi_param_define(&cxip_prov, "hybrid_posted_recv_preemptive", - FI_PARAM_BOOL, - "Enable preemptive transition to software endpoint when number of posted receives exceeds RX attribute size"); + fi_param_define( + &cxip_prov, "hybrid_posted_recv_preemptive", FI_PARAM_BOOL, + "Enable preemptive transition to software endpoint when number " + "of posted receives exceeds RX attribute size"); fi_param_get_bool(&cxip_prov, "hybrid_posted_recv_preemptive", &cxip_env.hybrid_posted_recv_preemptive); @@ -776,8 +774,9 @@ static inline void cxip_set_env_rx_match_mode(void) cxip_env.rx_match_mode = CXIP_PTLTE_HYBRID_MODE; cxip_env.msg_offload = true; } else { - _CXIP_WARN(FI_LOG_FABRIC, "Unrecognized rx_match_mode: %s\n", - param_str); + _CXIP_WARN(FI_LOG_FABRIC, + "Unrecognized rx_match_mode: %s\n", + param_str); cxip_env.rx_match_mode = CXIP_PTLTE_HARDWARE_MODE; cxip_env.msg_offload = true; } @@ -786,24 +785,28 @@ static inline void cxip_set_env_rx_match_mode(void) if (cxip_env.rx_match_mode != CXIP_PTLTE_HYBRID_MODE && cxip_env.hybrid_preemptive) { cxip_env.hybrid_preemptive = false; - _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignoring preemptive\n"); + _CXIP_WARN(FI_LOG_FABRIC, + "Not in hybrid mode, ignoring preemptive\n"); } if (cxip_env.rx_match_mode != CXIP_PTLTE_HYBRID_MODE && cxip_env.hybrid_recv_preemptive) { - _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignore LE recv preemptive\n"); + _CXIP_WARN(FI_LOG_FABRIC, + "Not in hybrid mode, ignore LE recv preemptive\n"); cxip_env.hybrid_recv_preemptive = 0; } if (cxip_env.rx_match_mode != CXIP_PTLTE_HYBRID_MODE && cxip_env.hybrid_posted_recv_preemptive) { - _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignore hybrid_posted_recv_preemptive\n"); + _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignore " + "hybrid_posted_recv_preemptive\n"); cxip_env.hybrid_posted_recv_preemptive = 0; } if (cxip_env.rx_match_mode != CXIP_PTLTE_HYBRID_MODE && cxip_env.hybrid_unexpected_msg_preemptive) { - _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignore hybrid_unexpected_msg_preemptive\n"); + _CXIP_WARN(FI_LOG_FABRIC, "Not in hybrid mode, ignore " + "hybrid_unexpected_msg_preemptive\n"); cxip_env.hybrid_unexpected_msg_preemptive = 0; } } diff --git a/prov/cxi/include/cxip/addr.h b/prov/cxi/include/cxip/addr.h index fa74e155172..d86f57072f6 100644 --- a/prov/cxi/include/cxip/addr.h +++ b/prov/cxi/include/cxip/addr.h @@ -7,7 +7,6 @@ #ifndef _CXIP_ADDR_H_ #define _CXIP_ADDR_H_ - #include /* Macros */ @@ -27,9 +26,9 @@ /* Type definitions */ struct cxip_addr { - uint32_t pid : C_DFA_PID_BITS_MAX; - uint32_t nic : C_DFA_NIC_BITS; - uint32_t pad : 3; + uint32_t pid : C_DFA_PID_BITS_MAX; + uint32_t nic : C_DFA_NIC_BITS; + uint32_t pad : 3; uint16_t vni; }; diff --git a/prov/cxi/include/cxip/atomic.h b/prov/cxi/include/cxip/atomic.h index e7b4259388c..9d5454c5573 100644 --- a/prov/cxi/include/cxip/atomic.h +++ b/prov/cxi/include/cxip/atomic.h @@ -7,10 +7,9 @@ #ifndef _CXIP_ATOMIC_H_ #define _CXIP_ATOMIC_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_cntr; diff --git a/prov/cxi/include/cxip/auth.h b/prov/cxi/include/cxip/auth.h index 6d730527971..58ae68d6e7c 100644 --- a/prov/cxi/include/cxip/auth.h +++ b/prov/cxi/include/cxip/auth.h @@ -7,7 +7,6 @@ #ifndef _CXIP_AUTH_H_ #define _CXIP_AUTH_H_ - /* Function declarations */ int cxip_check_auth_key_info(struct fi_info *info); diff --git a/prov/cxi/include/cxip/av.h b/prov/cxi/include/cxip/av.h index ea4527f64c4..a2dd753a127 100644 --- a/prov/cxi/include/cxip/av.h +++ b/prov/cxi/include/cxip/av.h @@ -7,13 +7,12 @@ #ifndef _CXIP_AV_H_ #define _CXIP_AV_H_ - -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include /* Forward declarations */ struct cxip_addr; @@ -92,11 +91,11 @@ struct cxip_av { struct cxip_av_set { struct fid_av_set av_set_fid; - struct cxip_av *cxi_av; // associated AV - struct cxip_coll_mc *mc_obj; // reference MC - fi_addr_t *fi_addr_ary; // addresses in set - size_t fi_addr_cnt; // count of addresses - struct cxip_comm_key comm_key; // communication key + struct cxip_av *cxi_av; // associated AV + struct cxip_coll_mc *mc_obj; // reference MC + fi_addr_t *fi_addr_ary; // addresses in set + size_t fi_addr_cnt; // count of addresses + struct cxip_comm_key comm_key; // communication key uint64_t flags; }; @@ -128,6 +127,6 @@ int cxip_av_bind_ep(struct cxip_av *av, struct cxip_ep *ep); void cxip_av_unbind_ep(struct cxip_av *av, struct cxip_ep *ep); int cxip_av_set(struct fid_av *av, struct fi_av_set_attr *attr, - struct fid_av_set **av_set_fid, void * context); + struct fid_av_set **av_set_fid, void *context); #endif /* _CXIP_AV_H_ */ diff --git a/prov/cxi/include/cxip/cmdq.h b/prov/cxi/include/cxip/cmdq.h index a21b80aa172..958314e6fd1 100644 --- a/prov/cxi/include/cxip/cmdq.h +++ b/prov/cxi/include/cxip/cmdq.h @@ -7,10 +7,9 @@ #ifndef _CXIP_CMDQ_H_ #define _CXIP_CMDQ_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_lni; diff --git a/prov/cxi/include/cxip/cntr.h b/prov/cxi/include/cxip/cntr.h index 73de961ed84..457d8c7949d 100644 --- a/prov/cxi/include/cxip/cntr.h +++ b/prov/cxi/include/cxip/cntr.h @@ -7,11 +7,10 @@ #ifndef _CXIP_CNTR_H_ #define _CXIP_CNTR_H_ - -#include -#include -#include #include +#include +#include +#include /* Forward declarations */ struct cxip_cmdq; @@ -20,14 +19,14 @@ struct cxip_domain; /* Type definitions */ struct cxip_cntr { struct fid_cntr cntr_fid; - struct cxip_domain *domain; // parent domain + struct cxip_domain *domain; // parent domain ofi_atomic32_t ref; - struct fi_cntr_attr attr; // copy of user or default attributes + struct fi_cntr_attr attr; // copy of user or default attributes struct fid_wait *wait; /* Contexts to which counter is bound */ struct dlist_entry ctx_list; - /* Triggered cmdq for bound counters */ + /* Triggered cmdq for bound counters */ struct cxip_cmdq *trig_cmdq; struct ofi_genlock lock; @@ -43,7 +42,8 @@ struct cxip_cntr { struct dlist_entry dom_entry; /* Counter for number of operations which need progress. A separate lock - * is needed since these functions may be called without counter lock held. + * is needed since these functions may be called without counter lock + * held. */ struct ofi_genlock progress_count_lock; int progress_count; diff --git a/prov/cxi/include/cxip/coll.h b/prov/cxi/include/cxip/coll.h index a3c8052cced..40a598b5318 100644 --- a/prov/cxi/include/cxip/coll.h +++ b/prov/cxi/include/cxip/coll.h @@ -7,13 +7,12 @@ #ifndef _CXIP_COLL_H_ #define _CXIP_COLL_H_ - -#include -#include -#include -#include #include +#include #include +#include +#include +#include /* Forward declarations */ struct coll_counters; @@ -30,52 +29,52 @@ struct cxip_req; struct cxip_zbcoll_obj; /* Macros */ -#define CXIP_COLL_MAX_CONCUR 8 +#define CXIP_COLL_MAX_CONCUR 8 -#define CXIP_COLL_MIN_RX_BUFS 8 +#define CXIP_COLL_MIN_RX_BUFS 8 -#define CXIP_COLL_MIN_RX_SIZE 131072 +#define CXIP_COLL_MIN_RX_SIZE 131072 -#define CXIP_COLL_MIN_MULTI_RECV 64 +#define CXIP_COLL_MIN_MULTI_RECV 64 -#define CXIP_COLL_MAX_DATA_SIZE 32 +#define CXIP_COLL_MAX_DATA_SIZE 32 -#define CXIP_COLL_MAX_SEQNO ((1 << 10) - 1) +#define CXIP_COLL_MAX_SEQNO ((1 << 10) - 1) -#define CXIP_COLL_MOD_SEQNO (CXIP_COLL_MAX_SEQNO - 1) +#define CXIP_COLL_MOD_SEQNO (CXIP_COLL_MAX_SEQNO - 1) -#define CXIP_COLL_MIN_RETRY_USEC 1 +#define CXIP_COLL_MIN_RETRY_USEC 1 -#define CXIP_COLL_MAX_RETRY_USEC 32000 +#define CXIP_COLL_MAX_RETRY_USEC 32000 -#define CXIP_COLL_MAX_LEAF_TIMEOUT_MULT 50 +#define CXIP_COLL_MAX_LEAF_TIMEOUT_MULT 50 -#define CXIP_COLL_MIN_TIMEOUT_USEC 1 +#define CXIP_COLL_MIN_TIMEOUT_USEC 1 -#define CXIP_COLL_MAX_TIMEOUT_USEC 20000000 +#define CXIP_COLL_MAX_TIMEOUT_USEC 20000000 /* Type definitions */ struct cxip_ep_coll_obj { - struct index_map mcast_map; // mc address -> object + struct index_map mcast_map; // mc address -> object struct dlist_entry root_retry_list; - struct dlist_entry mc_list; // list of mcast addresses - struct cxip_coll_pte *coll_pte; // PTE extensions - struct dlist_ts sched_list; // scheduled actions - struct cxip_cmdq *rx_cmdq; // shared with STD EP - struct cxip_cmdq *tx_cmdq; // shared with STD EP - struct cxip_cntr *rx_cntr; // shared with STD EP - struct cxip_cntr *tx_cntr; // shared with STD EP - struct cxip_evtq *rx_evtq; // shared with STD EP - struct cxip_evtq *tx_evtq; // shared with STD EP - struct cxip_eq *eq; // shared with STD EP - ofi_atomic32_t num_mc; // count of MC objects - ofi_atomic32_t join_cnt; // advanced on every join - size_t min_multi_recv; // trigger value to rotate bufs - size_t buffer_size; // size of receive buffers - size_t buffer_count; // count of receive buffers - bool join_busy; // serialize joins on a node - bool is_hwroot; // set if ep is hw_root - bool enabled; // enabled + struct dlist_entry mc_list; // list of mcast addresses + struct cxip_coll_pte *coll_pte; // PTE extensions + struct dlist_ts sched_list; // scheduled actions + struct cxip_cmdq *rx_cmdq; // shared with STD EP + struct cxip_cmdq *tx_cmdq; // shared with STD EP + struct cxip_cntr *rx_cntr; // shared with STD EP + struct cxip_cntr *tx_cntr; // shared with STD EP + struct cxip_evtq *rx_evtq; // shared with STD EP + struct cxip_evtq *tx_evtq; // shared with STD EP + struct cxip_eq *eq; // shared with STD EP + ofi_atomic32_t num_mc; // count of MC objects + ofi_atomic32_t join_cnt; // advanced on every join + size_t min_multi_recv; // trigger value to rotate bufs + size_t buffer_size; // size of receive buffers + size_t buffer_count; // count of receive buffers + bool join_busy; // serialize joins on a node + bool is_hwroot; // set if ep is hw_root + bool enabled; // enabled /* needed for progress after leaf sends its contribution */ struct dlist_entry leaf_rdma_get_list; /* used to change ctrl_msg_type to CXIP_CTRL_MSG_ZB_DATA_RDMA_LAC */ @@ -111,37 +110,38 @@ struct cxip_fltminmax { }; struct cxip_coll_buf { - struct dlist_entry buf_entry; // linked list of buffers - struct cxip_req *req; // associated LINK request - struct cxip_md *cxi_md; // buffer memory descriptor - size_t bufsiz; // buffer size in bytes - uint8_t buffer[]; // buffer space itself + struct dlist_entry buf_entry; // linked list of buffers + struct cxip_req *req; // associated LINK request + struct cxip_md *cxi_md; // buffer memory descriptor + size_t bufsiz; // buffer size in bytes + uint8_t buffer[]; // buffer space itself }; struct cxip_coll_pte { - struct cxip_pte *pte; // Collectives PTE - struct cxip_ep_obj *ep_obj; // Associated endpoint - struct cxip_coll_mc *mc_obj; // Associated multicast object - struct dlist_entry buf_list; // PTE receive buffers - ofi_atomic32_t buf_cnt; // count of linked buffers - ofi_atomic32_t buf_swap_cnt; // for diagnostics - ofi_atomic32_t recv_cnt; // for diagnostics - int buf_low_water; // for diagnostics - bool enabled; // enabled + struct cxip_pte *pte; // Collectives PTE + struct cxip_ep_obj *ep_obj; // Associated endpoint + struct cxip_coll_mc *mc_obj; // Associated multicast object + struct dlist_entry buf_list; // PTE receive buffers + ofi_atomic32_t buf_cnt; // count of linked buffers + ofi_atomic32_t buf_swap_cnt; // for diagnostics + ofi_atomic32_t recv_cnt; // for diagnostics + int buf_low_water; // for diagnostics + bool enabled; // enabled }; struct cxip_coll_data { union { - uint8_t databuf[32]; // raw data buffer - struct cxip_intval intval; // 4 integer values + flags - struct cxip_fltval fltval; // 4 double values + flags - struct cxip_iminmax intminmax; // 1 intminmax structure + flags - struct cxip_fltminmax fltminmax;// 1 fltminmax structure + flags - struct cxip_repsum repsum; // 1 repsum structure + flags + uint8_t databuf[32]; // raw data buffer + struct cxip_intval intval; // 4 integer values + flags + struct cxip_fltval fltval; // 4 double values + flags + struct cxip_iminmax intminmax; // 1 intminmax structure + flags + struct cxip_fltminmax + fltminmax; // 1 fltminmax structure + flags + struct cxip_repsum repsum; // 1 repsum structure + flags }; - cxip_coll_op_t red_op; // reduction opcode - cxip_coll_rc_t red_rc; // reduction return code - int red_cnt; // reduction contrib count + cxip_coll_op_t red_op; // reduction opcode + cxip_coll_rc_t red_rc; // reduction return code + int red_cnt; // reduction contrib count bool initialized; }; @@ -159,69 +159,69 @@ struct cxip_coll_metrics { }; struct cxip_coll_reduction { - struct cxip_coll_mc *mc_obj; // parent mc_obj - uint32_t red_id; // reduction id - uint16_t seqno; // reduction sequence number - uint16_t resno; // reduction result number - struct cxip_req *op_inject_req; // active operation request - enum cxip_coll_state coll_state; // reduction state on node - struct cxip_coll_data accum; // reduction accumulator - struct cxip_coll_data backup; // copy of above - void *op_rslt_data; // user recv buffer (or NULL) - int op_data_bytcnt; // bytes in send/recv buffers - void *op_context; // caller's context - bool in_use; // reduction is in-use - bool pktsent; // reduction packet sent - bool completed; // reduction is completed - bool rdma_get_sent; // rdma get from leaf to root - bool rdma_get_completed; // rdma get completed - int rdma_get_cb_rc; // rdma get status - uint64_t leaf_contrib_start_us; // leaf ts after contrib send - bool drop_send; // drop the next send operation - bool drop_recv; // drop the next recv operation - enum cxip_coll_rc red_rc; // set by first error - struct timespec tv_expires; // need to retry? - struct timespec arm_expires; // RE expiration time for this red_id - struct dlist_entry tmout_link; // link to timeout list - uint8_t tx_msg[64]; // static packet memory + struct cxip_coll_mc *mc_obj; // parent mc_obj + uint32_t red_id; // reduction id + uint16_t seqno; // reduction sequence number + uint16_t resno; // reduction result number + struct cxip_req *op_inject_req; // active operation request + enum cxip_coll_state coll_state; // reduction state on node + struct cxip_coll_data accum; // reduction accumulator + struct cxip_coll_data backup; // copy of above + void *op_rslt_data; // user recv buffer (or NULL) + int op_data_bytcnt; // bytes in send/recv buffers + void *op_context; // caller's context + bool in_use; // reduction is in-use + bool pktsent; // reduction packet sent + bool completed; // reduction is completed + bool rdma_get_sent; // rdma get from leaf to root + bool rdma_get_completed; // rdma get completed + int rdma_get_cb_rc; // rdma get status + uint64_t leaf_contrib_start_us; // leaf ts after contrib send + bool drop_send; // drop the next send operation + bool drop_recv; // drop the next recv operation + enum cxip_coll_rc red_rc; // set by first error + struct timespec tv_expires; // need to retry? + struct timespec arm_expires; // RE expiration time for this red_id + struct dlist_entry tmout_link; // link to timeout list + uint8_t tx_msg[64]; // static packet memory }; struct cxip_coll_mc { struct fid_mc mc_fid; - struct dlist_entry entry; // Link to mc object list - struct cxip_ep_obj *ep_obj; // Associated endpoint - struct cxip_av_set *av_set_obj; // associated AV set - struct cxip_zbcoll_obj *zb; // zb object for zbcol - struct cxip_coll_pte *coll_pte; // collective PTE - struct timespec rootexpires; // root wait expiration timeout - struct timespec leafexpires; // leaf wait expiration timeout - struct timespec curlexpires; // CURL delete expiration timeout - fi_addr_t mynode_fiaddr; // fi_addr of this node - int mynode_idx; // av_set index of this node - uint32_t hwroot_idx; // av_set index of hwroot node - uint32_t mcast_addr; // multicast target address - int tail_red_id; // tail active red_id - int next_red_id; // next available red_id - int max_red_id; // limit total concurrency - int seqno; // rolling seqno for packets - int close_state; // the state of the close operation - bool has_closed; // true after a mc close call - bool has_error; // true if any error - bool is_multicast; // true if multicast address - bool arm_disable; // arm-disable for testing - bool retry_disable; // retry-disable for testing - bool is_joined; // true if joined - bool rx_discard; // true to discard RX events - enum cxi_traffic_class tc; // traffic class - enum cxi_traffic_class_type tc_type; // traffic class type - ofi_atomic32_t send_cnt; // for diagnostics - ofi_atomic32_t recv_cnt; // for diagnostics - ofi_atomic32_t pkt_cnt; // for diagnostics - ofi_atomic32_t seq_err_cnt; // for diagnostics - ofi_atomic32_t tmout_cnt; // for diagnostics + struct dlist_entry entry; // Link to mc object list + struct cxip_ep_obj *ep_obj; // Associated endpoint + struct cxip_av_set *av_set_obj; // associated AV set + struct cxip_zbcoll_obj *zb; // zb object for zbcol + struct cxip_coll_pte *coll_pte; // collective PTE + struct timespec rootexpires; // root wait expiration timeout + struct timespec leafexpires; // leaf wait expiration timeout + struct timespec curlexpires; // CURL delete expiration timeout + fi_addr_t mynode_fiaddr; // fi_addr of this node + int mynode_idx; // av_set index of this node + uint32_t hwroot_idx; // av_set index of hwroot node + uint32_t mcast_addr; // multicast target address + int tail_red_id; // tail active red_id + int next_red_id; // next available red_id + int max_red_id; // limit total concurrency + int seqno; // rolling seqno for packets + int close_state; // the state of the close operation + bool has_closed; // true after a mc close call + bool has_error; // true if any error + bool is_multicast; // true if multicast address + bool arm_disable; // arm-disable for testing + bool retry_disable; // retry-disable for testing + bool is_joined; // true if joined + bool rx_discard; // true to discard RX events + enum cxi_traffic_class tc; // traffic class + enum cxi_traffic_class_type tc_type; // traffic class type + ofi_atomic32_t send_cnt; // for diagnostics + ofi_atomic32_t recv_cnt; // for diagnostics + ofi_atomic32_t pkt_cnt; // for diagnostics + ofi_atomic32_t seq_err_cnt; // for diagnostics + ofi_atomic32_t tmout_cnt; // for diagnostics ofi_spin_t lock; - struct cxi_md *reduction_md; // memory descriptor for DMA + struct cxi_md *reduction_md; // memory descriptor for DMA struct cxip_coll_reduction reduction[CXIP_COLL_MAX_CONCUR]; /* Logical address context for leaf rdma get */ uint64_t rdma_get_lac_va_tx; @@ -256,20 +256,19 @@ void cxip_coll_close(struct cxip_ep_obj *ep_obj); void cxip_coll_populate_opcodes(void); -int cxip_coll_send(struct cxip_coll_reduction *reduction, - int av_set_idx, const void *buffer, size_t buflen, - struct cxi_md *md); +int cxip_coll_send(struct cxip_coll_reduction *reduction, int av_set_idx, + const void *buffer, size_t buflen, struct cxi_md *md); int cxip_coll_send_red_pkt(struct cxip_coll_reduction *reduction, - const struct cxip_coll_data *coll_data, - bool arm, bool retry, bool root_result_pkt); + const struct cxip_coll_data *coll_data, bool arm, + bool retry, bool root_result_pkt); void cxip_capture_red_id(int *red_id_buf); ssize_t cxip_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context); -ssize_t cxip_broadcast(struct fid_ep *ep, void *buf, size_t count, - void *desc, fi_addr_t coll_addr, fi_addr_t root_addr, +ssize_t cxip_broadcast(struct fid_ep *ep, void *buf, size_t count, void *desc, + fi_addr_t coll_addr, fi_addr_t root_addr, enum fi_datatype datatype, uint64_t flags, void *context); @@ -285,8 +284,8 @@ ssize_t cxip_allreduce(struct fid_ep *ep, const void *buf, size_t count, enum fi_op op, uint64_t flags, void *context); int cxip_join_collective(struct fid_ep *ep, fi_addr_t coll_addr, - const struct fid_av_set *coll_av_set, - uint64_t flags, struct fid_mc **mc, void *context); + const struct fid_av_set *coll_av_set, uint64_t flags, + struct fid_mc **mc, void *context); void cxip_coll_progress_join(struct cxip_ep_obj *ep_obj); diff --git a/prov/cxi/include/cxip/coll_trace.h b/prov/cxi/include/cxip/coll_trace.h index 8bb7fbdbebd..7163829caba 100644 --- a/prov/cxi/include/cxip/coll_trace.h +++ b/prov/cxi/include/cxip/coll_trace.h @@ -7,16 +7,17 @@ #ifndef _CXIP_COLL_TRACE_H_ #define _CXIP_COLL_TRACE_H_ - /* Forward declarations */ struct cxip_ep_obj; /* Macros */ -#define cxip_coll_trace_attr __attribute__((format(__printf__, 1, 2))) +#define cxip_coll_trace_attr __attribute__((format(__printf__, 1, 2))) -#define CXIP_COLL_TRACE(mod, fmt, ...) \ - do {if (cxip_coll_prod_trace_true()) \ - cxip_coll_prod_trace(fmt, ##__VA_ARGS__); } while (0) +#define CXIP_COLL_TRACE(mod, fmt, ...) \ + do { \ + if (cxip_coll_prod_trace_true()) \ + cxip_coll_prod_trace(fmt, ##__VA_ARGS__); \ + } while (0) /* Function declarations */ int cxip_coll_trace_attr cxip_coll_trace(const char *fmt, ...); diff --git a/prov/cxi/include/cxip/common.h b/prov/cxi/include/cxip/common.h index 803fd5c1744..e301f7cc871 100644 --- a/prov/cxi/include/cxip/common.h +++ b/prov/cxi/include/cxip/common.h @@ -7,9 +7,8 @@ #ifndef _CXIP_COMMON_H_ #define _CXIP_COMMON_H_ - -#include #include +#include /* Forward declarations */ struct cxip_domain; @@ -21,159 +20,144 @@ struct cxip_ux_send; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define FLOOR(a, b) ((long long)(a) - (((long long)(a)) % (b))) +#define FLOOR(a, b) ((long long) (a) - (((long long) (a)) % (b))) -#define CEILING(a, b) ((long long)(a) <= 0LL ? 0 : (FLOOR((a)-1, b) + (b))) +#define CEILING(a, b) ((long long) (a) <= 0LL ? 0 : (FLOOR((a) - 1, b) + (b))) #define CXIP_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -#define CXIP_ALIGN(x, a) CXIP_ALIGN_MASK(x, (typeof(x))(a) - 1) +#define CXIP_ALIGN(x, a) CXIP_ALIGN_MASK(x, (typeof(x)) (a) - 1) #define CXIP_ALIGN_DOWN(x, a) CXIP_ALIGN((x) - ((a) - 1), (a)) -#define CXIP_PATH_MAX 256 +#define CXIP_PATH_MAX 256 -#define CXIP_BUFFER_ID_MAX (1 << 16) +#define CXIP_BUFFER_ID_MAX (1 << 16) -#define CXIP_TX_COMP_MODES (FI_INJECT_COMPLETE | \ - FI_TRANSMIT_COMPLETE | \ - FI_DELIVERY_COMPLETE | \ - FI_MATCH_COMPLETE) +#define CXIP_TX_COMP_MODES \ + (FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE | FI_DELIVERY_COMPLETE | \ + FI_MATCH_COMPLETE) -#define CXIP_TX_OP_FLAGS (FI_INJECT | \ - FI_COMPLETION | \ - CXIP_TX_COMP_MODES | \ - FI_REMOTE_CQ_DATA | \ - FI_MORE | \ - FI_FENCE) +#define CXIP_TX_OP_FLAGS \ + (FI_INJECT | FI_COMPLETION | CXIP_TX_COMP_MODES | FI_REMOTE_CQ_DATA | \ + FI_MORE | FI_FENCE) -#define CXIP_RX_OP_FLAGS (FI_COMPLETION | \ - FI_MULTI_RECV | \ - FI_MORE) +#define CXIP_RX_OP_FLAGS (FI_COMPLETION | FI_MULTI_RECV | FI_MORE) -#define CXIP_RX_IGNORE_OP_FLAGS (FI_REMOTE_CQ_DATA | \ - FI_INJECT) +#define CXIP_RX_IGNORE_OP_FLAGS (FI_REMOTE_CQ_DATA | FI_INJECT) -#define CXIP_WRITEMSG_ALLOWED_FLAGS (FI_INJECT | \ - FI_COMPLETION | \ - FI_MORE | \ - FI_FENCE | \ - CXIP_TX_COMP_MODES) +#define CXIP_WRITEMSG_ALLOWED_FLAGS \ + (FI_INJECT | FI_COMPLETION | FI_MORE | FI_FENCE | CXIP_TX_COMP_MODES) -#define CXIP_READMSG_ALLOWED_FLAGS (FI_COMPLETION | \ - FI_MORE | \ - FI_FENCE | \ - CXIP_TX_COMP_MODES) +#define CXIP_READMSG_ALLOWED_FLAGS \ + (FI_COMPLETION | FI_MORE | FI_FENCE | CXIP_TX_COMP_MODES) -#define CXIP_AMO_MAX_IOV 1 +#define CXIP_AMO_MAX_IOV 1 -#define CXIP_REMOTE_CQ_DATA_SZ 8 +#define CXIP_REMOTE_CQ_DATA_SZ 8 -#define CXIP_RDZV_THRESHOLD 16384 +#define CXIP_RDZV_THRESHOLD 16384 -#define CXIP_OFLOW_BUF_SIZE (12*1024*1024) +#define CXIP_OFLOW_BUF_SIZE (12 * 1024 * 1024) -#define CXIP_OFLOW_BUF_MIN_POSTED 3 +#define CXIP_OFLOW_BUF_MIN_POSTED 3 -#define CXIP_OFLOW_BUF_MAX_CACHED (CXIP_OFLOW_BUF_MIN_POSTED * 3) +#define CXIP_OFLOW_BUF_MAX_CACHED (CXIP_OFLOW_BUF_MIN_POSTED * 3) -#define CXIP_DEFAULT_MR_CACHE_MAX_CNT 4096 +#define CXIP_DEFAULT_MR_CACHE_MAX_CNT 4096 -#define CXIP_DEFAULT_MR_CACHE_MAX_SIZE -1 +#define CXIP_DEFAULT_MR_CACHE_MAX_SIZE -1 -#define CXIP_SAFE_DEVMEM_COPY_THRESH 4096 +#define CXIP_SAFE_DEVMEM_COPY_THRESH 4096 #define CXIP_CAPS (CXIP_DOM_CAPS | CXIP_EP_CAPS) -#define CXIP_INJECT_SIZE C_MAX_IDC_PAYLOAD_UNR +#define CXIP_INJECT_SIZE C_MAX_IDC_PAYLOAD_UNR -#define CXIP_MAX_TX_SIZE 16384U +#define CXIP_MAX_TX_SIZE 16384U -#define CXIP_DEFAULT_TX_SIZE 1024U +#define CXIP_DEFAULT_TX_SIZE 1024U -#define CXI_PROV_LE_PER_EP 1024U +#define CXI_PROV_LE_PER_EP 1024U -#define LES_PER_EP_MAX 16384U +#define LES_PER_EP_MAX 16384U -#define CXIP_MAX_RX_SIZE (LES_PER_EP_MAX - CXI_PROV_LE_PER_EP) +#define CXIP_MAX_RX_SIZE (LES_PER_EP_MAX - CXI_PROV_LE_PER_EP) -#define CXIP_DEFAULT_RX_SIZE 1024U +#define CXIP_DEFAULT_RX_SIZE 1024U -#define CXIP_MAJOR_VERSION 0 +#define CXIP_MAJOR_VERSION 0 -#define CXIP_MINOR_VERSION 1 +#define CXIP_MINOR_VERSION 1 -#define CXIP_PROV_VERSION FI_VERSION(CXIP_MAJOR_VERSION, \ - CXIP_MINOR_VERSION) +#define CXIP_PROV_VERSION FI_VERSION(CXIP_MAJOR_VERSION, CXIP_MINOR_VERSION) -#define CXIP_FI_VERSION FI_VERSION(2, 4) +#define CXIP_FI_VERSION FI_VERSION(2, 4) -#define CXIP_WIRE_PROTO_VERSION 1 +#define CXIP_WIRE_PROTO_VERSION 1 #define CXIP_PAUSE() -#define CXIP_PTL_IDX_RXQ 0 +#define CXIP_PTL_IDX_RXQ 0 -#define CXIP_PTL_IDX_RNR_RXQ 1 +#define CXIP_PTL_IDX_RNR_RXQ 1 -#define CXIP_PTL_IDX_WRITE_MR_OPT_BASE 17 +#define CXIP_PTL_IDX_WRITE_MR_OPT_BASE 17 -#define CXIP_PTL_IDX_READ_MR_OPT_BASE 128 +#define CXIP_PTL_IDX_READ_MR_OPT_BASE 128 -#define CXIP_PTL_IDX_MR_OPT_CNT 100 +#define CXIP_PTL_IDX_MR_OPT_CNT 100 -#define CXIP_PTL_IDX_PROV_NUM_CACHE_IDX 8 +#define CXIP_PTL_IDX_PROV_NUM_CACHE_IDX 8 -#define CXIP_PTL_IDX_PROV_MR_OPT_CNT \ +#define CXIP_PTL_IDX_PROV_MR_OPT_CNT \ (CXIP_PTL_IDX_MR_OPT_CNT - CXIP_PTL_IDX_PROV_NUM_CACHE_IDX) -#define CXIP_PTL_IDX_WRITE_MR_OPT(key) \ - (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + \ - CXIP_MR_UNCACHED_KEY_TO_IDX(key)) +#define CXIP_PTL_IDX_WRITE_MR_OPT(key) \ + (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + CXIP_MR_UNCACHED_KEY_TO_IDX(key)) -#define CXIP_PTL_IDX_READ_MR_OPT(key) \ - (CXIP_PTL_IDX_READ_MR_OPT_BASE + \ - CXIP_MR_UNCACHED_KEY_TO_IDX(key)) +#define CXIP_PTL_IDX_READ_MR_OPT(key) \ + (CXIP_PTL_IDX_READ_MR_OPT_BASE + CXIP_MR_UNCACHED_KEY_TO_IDX(key)) -#define CXIP_PTL_IDX_WRITE_PROV_CACHE_MR_OPT(lac) \ +#define CXIP_PTL_IDX_WRITE_PROV_CACHE_MR_OPT(lac) \ (CXIP_PTL_IDX_WRITE_MR_OPT_BASE + (lac)) -#define CXIP_PTL_IDX_READ_PROV_CACHE_MR_OPT(lac) \ +#define CXIP_PTL_IDX_READ_PROV_CACHE_MR_OPT(lac) \ (CXIP_PTL_IDX_READ_MR_OPT_BASE + (lac)) -#define CXIP_PTL_IDX_WRITE_MR_STD 117 +#define CXIP_PTL_IDX_WRITE_MR_STD 117 -#define CXIP_PTL_IDX_RDZV_DEST 127 +#define CXIP_PTL_IDX_RDZV_DEST 127 -#define CXIP_PTL_IDX_COLL 6 +#define CXIP_PTL_IDX_COLL 6 -#define CXIP_PTL_IDX_CTRL CXIP_PTL_IDX_WRITE_MR_STD +#define CXIP_PTL_IDX_CTRL CXIP_PTL_IDX_WRITE_MR_STD -#define CXIP_PTL_IDX_READ_MR_STD 228 +#define CXIP_PTL_IDX_READ_MR_STD 228 -#define CXIP_PTL_IDX_RDZV_RESTRICTED_BASE 229 +#define CXIP_PTL_IDX_RDZV_RESTRICTED_BASE 229 -#define CXIP_PTL_IDX_RDZV_RESTRICTED(lac) \ +#define CXIP_PTL_IDX_RDZV_RESTRICTED(lac) \ (CXIP_PTL_IDX_RDZV_RESTRICTED_BASE + (lac)) -#define CXIP_PTL_IDX_RDZV_SRC 255 +#define CXIP_PTL_IDX_RDZV_SRC 255 #define CXIP_NUM_CACHED_KEY_LE 8 -#define CXIP_TX_ID_WIDTH 11 +#define CXIP_TX_ID_WIDTH 11 -#define CXIP_RDZV_ID_CMD_WIDTH 8 +#define CXIP_RDZV_ID_CMD_WIDTH 8 #define CXIP_RDZV_ID_HIGH_WIDTH 7 -#define CXIP_TOTAL_RDZV_ID_WIDTH (CXIP_RDZV_ID_CMD_WIDTH + \ - CXIP_RDZV_ID_HIGH_WIDTH) +#define CXIP_TOTAL_RDZV_ID_WIDTH \ + (CXIP_RDZV_ID_CMD_WIDTH + CXIP_RDZV_ID_HIGH_WIDTH) -#define CXIP_CS_TAG_WIDTH 40 +#define CXIP_CS_TAG_WIDTH 40 -#define CXIP_VNI_WIDTH 16 +#define CXIP_VNI_WIDTH 16 -#define CXIP_CS_TAG_MASK ((1UL << CXIP_CS_TAG_WIDTH) - 1) +#define CXIP_CS_TAG_MASK ((1UL << CXIP_CS_TAG_WIDTH) - 1) #define CXIP_IS_PROV_MR_KEY_BIT (1ULL << 63) @@ -191,11 +175,11 @@ struct cxip_ux_send; #define TELEMETRY_ENTRY_NAME_SIZE 64U -#define CXIP_DEF_EVENT_HT_BUCKETS 256 +#define CXIP_DEF_EVENT_HT_BUCKETS 256 -#define ZB_NOSIM -1 +#define ZB_NOSIM -1 -#define ZB_ALLSIM -2 +#define ZB_ALLSIM -2 #define CXIP_COUNTER_BUCKETS 31U @@ -203,39 +187,39 @@ struct cxip_ux_send; #define CXIP_LIST_COUNTS 3U -#define CXIP_SW_RX_TX_INIT_MAX_DEFAULT 1024 +#define CXIP_SW_RX_TX_INIT_MAX_DEFAULT 1024 -#define CXIP_SW_RX_TX_INIT_MIN 64 +#define CXIP_SW_RX_TX_INIT_MIN 64 #define CXIP_DONE_NOTIFY_RETRY_DELAY_US 100 -#define CXIP_RDZV_IDS (1 << CXIP_TOTAL_RDZV_ID_WIDTH) +#define CXIP_RDZV_IDS (1 << CXIP_TOTAL_RDZV_ID_WIDTH) #define CXIP_RDZV_IDS_MULTI_RECV (1 << CXIP_RDZV_ID_CMD_WIDTH) -#define CXIP_TX_IDS (1 << CXIP_TX_ID_WIDTH) +#define CXIP_TX_IDS (1 << CXIP_TX_ID_WIDTH) #define RDZV_SRC_LES 8U #define RDZV_NO_MATCH_PTES 8U -#define CXIP_RNR_TIMEOUT_US 500000 +#define CXIP_RNR_TIMEOUT_US 500000 -#define CXIP_NUM_RNR_WAIT_QUEUE 5 +#define CXIP_NUM_RNR_WAIT_QUEUE 5 -#define _CXIP_DBG(subsys, fmt, ...) \ +#define _CXIP_DBG(subsys, fmt, ...) \ FI_DBG(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ ##__VA_ARGS__) -#define _CXIP_INFO(subsys, fmt, ...) \ +#define _CXIP_INFO(subsys, fmt, ...) \ FI_INFO(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ ##__VA_ARGS__) -#define _CXIP_WARN(subsys, fmt, ...) \ +#define _CXIP_WARN(subsys, fmt, ...) \ FI_WARN(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ ##__VA_ARGS__) -#define _CXIP_WARN_ONCE(subsys, fmt, ...) \ +#define _CXIP_WARN_ONCE(subsys, fmt, ...) \ FI_WARN_ONCE(&cxip_prov, subsys, "%s: " fmt "", cxip_env.hostname, \ ##__VA_ARGS__) @@ -245,12 +229,13 @@ struct cxip_ux_send; #define CXIP_DEFAULT_CACHE_LINE_SIZE 64 -#define CXIP_SYSFS_CACHE_LINE_SIZE \ +#define CXIP_SYSFS_CACHE_LINE_SIZE \ "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" -#define CXIP_HYBRID_RECV_CHECK_INTERVAL (64-1) +#define CXIP_HYBRID_RECV_CHECK_INTERVAL (64 - 1) -#define FC_SW_LE_MSG_FATAL "LE exhaustion during flow control, "\ +#define FC_SW_LE_MSG_FATAL \ + "LE exhaustion during flow control, " \ "FI_CXI_RX_MATCH_MODE=[hybrid|software] is required\n" /* Type definitions */ @@ -265,27 +250,27 @@ struct cxip_topo_addr { union { uint32_t addr; struct { - uint32_t port_num:CXIP_ADDR_PORT_BITS; - uint32_t switch_num:CXIP_ADDR_SWITCH_BITS; - uint32_t group_num:CXIP_ADDR_GROUP_BITS; + uint32_t port_num : CXIP_ADDR_PORT_BITS; + uint32_t switch_num : CXIP_ADDR_SWITCH_BITS; + uint32_t group_num : CXIP_ADDR_GROUP_BITS; } dragonfly; struct { - uint32_t port_num:CXIP_ADDR_FATTREE_PORT_BITS; - uint32_t switch_num:CXIP_ADDR_FATTREE_SWITCH_BITS; + uint32_t port_num : CXIP_ADDR_FATTREE_PORT_BITS; + uint32_t switch_num : CXIP_ADDR_FATTREE_SWITCH_BITS; } fat_tree; }; }; union cxip_def_event_key { struct { - uint64_t initiator : 32; - uint64_t rdzv_id : 15; - uint64_t pad0 : 16; - uint64_t rdzv : 1; + uint64_t initiator : 32; + uint64_t rdzv_id : 15; + uint64_t pad0 : 16; + uint64_t rdzv : 1; }; struct { - uint64_t start_addr : 57; - uint64_t pad1 : 7; + uint64_t start_addr : 57; + uint64_t pad1 : 7; }; uint64_t raw; }; diff --git a/prov/cxi/include/cxip/cq.h b/prov/cxi/include/cxip/cq.h index d1dd7b84d55..485b752dc49 100644 --- a/prov/cxi/include/cxip/cq.h +++ b/prov/cxi/include/cxip/cq.h @@ -7,17 +7,16 @@ #ifndef _CXIP_CQ_H_ #define _CXIP_CQ_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_domain; struct cxip_req; /* Macros */ -#define CXIP_CQ_DEF_SZ 131072U +#define CXIP_CQ_DEF_SZ 131072U /* Type definitions */ struct cxip_cq_eq { @@ -62,9 +61,9 @@ int cxip_cq_req_complete(struct cxip_req *req); int cxip_cq_req_complete_addr(struct cxip_req *req, fi_addr_t src); -int cxip_cq_req_error(struct cxip_req *req, size_t olen, - int err, int prov_errno, void *err_data, - size_t err_data_size, fi_addr_t src_addr); +int cxip_cq_req_error(struct cxip_req *req, size_t olen, int err, + int prov_errno, void *err_data, size_t err_data_size, + fi_addr_t src_addr); int cxip_cq_add_wait_fd(struct cxip_cq *cq, int wait_fd, int events); diff --git a/prov/cxi/include/cxip/ctrl.h b/prov/cxi/include/cxip/ctrl.h index c6228b77b1d..b1ed7385b85 100644 --- a/prov/cxi/include/cxip/ctrl.h +++ b/prov/cxi/include/cxip/ctrl.h @@ -7,9 +7,8 @@ #ifndef _CXIP_CTRL_H_ #define _CXIP_CTRL_H_ - -#include #include +#include /* Forward declarations */ struct cxip_cmdq; diff --git a/prov/cxi/include/cxip/curl.h b/prov/cxi/include/cxip/curl.h index bed007eead7..f8e2e92f100 100644 --- a/prov/cxi/include/cxip/curl.h +++ b/prov/cxi/include/cxip/curl.h @@ -7,21 +7,20 @@ #ifndef _CXIP_CURL_H_ #define _CXIP_CURL_H_ - -#include -#include #include +#include +#include /* Type definitions */ struct cxip_curl_handle { - long status; // HTTP status, 0 for no server, -1 busy - const char *endpoint; // HTTP server endpoint address - const char *request; // HTTP request data - const char *response; // HTTP response data, NULL until complete - curlcomplete_t usrfunc; // user completion function - void *usrptr; // user function argument - void *recv; // opaque - void *headers; // opaque + long status; // HTTP status, 0 for no server, -1 busy + const char *endpoint; // HTTP server endpoint address + const char *request; // HTTP request data + const char *response; // HTTP response data, NULL until complete + curlcomplete_t usrfunc; // user completion function + void *usrptr; // user function argument + void *recv; // opaque + void *headers; // opaque }; /* Function declarations */ @@ -33,8 +32,8 @@ const char *cxip_curl_opname(enum curl_ops op); int cxip_curl_perform(const char *endpoint, const char *request, const char *sessionToken, size_t rsp_init_size, - enum curl_ops op, bool verbose, - curlcomplete_t usrfunc, void *usrptr); + enum curl_ops op, bool verbose, curlcomplete_t usrfunc, + void *usrptr); int cxip_curl_progress(struct cxip_curl_handle **handleptr); diff --git a/prov/cxi/include/cxip/dom.h b/prov/cxi/include/cxip/dom.h index 32174f8cb64..b2c14a42bba 100644 --- a/prov/cxi/include/cxip/dom.h +++ b/prov/cxi/include/cxip/dom.h @@ -7,14 +7,13 @@ #ifndef _CXIP_DOM_H_ #define _CXIP_DOM_H_ - -#include -#include -#include -#include -#include #include +#include #include +#include +#include +#include +#include /* Forward declarations */ struct cxip_cmdq; @@ -30,16 +29,16 @@ struct cxip_telemetry; /* Macros */ #define CXIP_DOM_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM | FI_AV_USER_ID | FI_PEER) -#define DOM_INFO(dom, fmt, ...) \ +#define DOM_INFO(dom, fmt, ...) \ _CXIP_INFO(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ - (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ - (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ + (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ + (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ (dom)->nic_addr, ##__VA_ARGS__) -#define DOM_WARN(dom, fmt, ...) \ +#define DOM_WARN(dom, fmt, ...) \ _CXIP_WARN(FI_LOG_DOMAIN, "DOM (cxi%u:%u:%u:%u:%#x): " fmt "", \ - (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ - (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ + (dom)->iface->info->dev_id, (dom)->lni->lni->id, \ + (dom)->auth_key.svc_id, (dom)->auth_key.vni, \ (dom)->nic_addr, ##__VA_ARGS__) /* Type definitions */ @@ -59,8 +58,8 @@ struct cxip_domain { uint32_t tclass; - struct cxip_eq *eq; //unused - struct cxip_eq *mr_eq; //unused + struct cxip_eq *eq; // unused + struct cxip_eq *mr_eq; // unused /* Assigned NIC address */ uint32_t nic_addr; @@ -170,7 +169,6 @@ struct cxip_domain { enum cxip_ep_ptle_mode rx_match_mode; bool msg_offload; size_t req_buf_size; - }; /* Function declarations */ @@ -214,11 +212,9 @@ int cxip_domain_ctrl_id_alloc(struct cxip_domain *dom, void cxip_domain_ctrl_id_free(struct cxip_domain *dom, struct cxip_ctrl_req *req); -int cxip_domain_prov_mr_id_alloc(struct cxip_domain *dom, - struct cxip_mr *mr); +int cxip_domain_prov_mr_id_alloc(struct cxip_domain *dom, struct cxip_mr *mr); -void cxip_domain_prov_mr_id_free(struct cxip_domain *dom, - struct cxip_mr *mr); +void cxip_domain_prov_mr_id_free(struct cxip_domain *dom, struct cxip_mr *mr); int cxip_domain_dwq_emit_dma(struct cxip_domain *dom, uint16_t vni, enum cxi_traffic_class tc, diff --git a/prov/cxi/include/cxip/enums.h b/prov/cxi/include/cxip/enums.h index 45746191744..7ef13178963 100644 --- a/prov/cxi/include/cxip/enums.h +++ b/prov/cxi/include/cxip/enums.h @@ -7,7 +7,6 @@ #ifndef _CXIP_ENUMS_H_ #define _CXIP_ENUMS_H_ - /* All enum type definitions */ /* Included first because many structs embed enum fields */ @@ -31,9 +30,9 @@ enum cxip_ep_ptle_mode { }; enum cxip_rdzv_proto { - CXIP_RDZV_PROTO_DEFAULT, /* unrestricted gets */ - CXIP_RDZV_PROTO_ALT_READ, /* restricted gets */ - CXIP_RDZV_PROTO_ALT_WRITE, /* restricted puts */ + CXIP_RDZV_PROTO_DEFAULT, /* unrestricted gets */ + CXIP_RDZV_PROTO_ALT_READ, /* restricted gets */ + CXIP_RDZV_PROTO_ALT_WRITE, /* restricted puts */ }; enum cxip_mr_target_ordering { @@ -50,17 +49,17 @@ enum cxip_mr_target_ordering { }; enum cxip_le_type { - CXIP_LE_TYPE_RX = 0, /* RX data LE */ - CXIP_LE_TYPE_ZBP, /* Zero-byte Put control message LE. Used to - * exchange data in the EQ header_data and - * match_bits fields. Unexpected headers are - * disabled. - */ + CXIP_LE_TYPE_RX = 0, /* RX data LE */ + CXIP_LE_TYPE_ZBP, /* Zero-byte Put control message LE. Used to + * exchange data in the EQ header_data and + * match_bits fields. Unexpected headers are + * disabled. + */ }; enum cxip_ctrl_le_type { - CXIP_CTRL_LE_TYPE_MR = 0, /* Memory Region LE */ - CXIP_CTRL_LE_TYPE_CTRL_MSG, /* Control Message LE */ + CXIP_CTRL_LE_TYPE_MR = 0, /* Memory Region LE */ + CXIP_CTRL_LE_TYPE_CTRL_MSG, /* Control Message LE */ }; enum cxip_ctrl_msg_type { @@ -259,18 +258,18 @@ enum cxip_coll_state { }; typedef enum cxip_coll_rc { - CXIP_COLL_RC_SUCCESS = 0, // good - CXIP_COLL_RC_FLT_INEXACT = 1, // result was rounded - CXIP_COLL_RC_FLT_OVERFLOW = 3, // result too large to represent - CXIP_COLL_RC_FLT_INVALID = 4, // op was signalling NaN, or - // infinities subtracted - CXIP_COLL_RC_REP_INEXACT = 5, // reproducible sum was rounded - CXIP_COLL_RC_INT_OVERFLOW = 6, // reproducible sum overflow - CXIP_COLL_RC_CONTR_OVERFLOW = 7, // too many contributions seen - CXIP_COLL_RC_OP_MISMATCH = 8, // conflicting opcodes - CXIP_COLL_RC_TX_FAILURE = 9, // internal send error - CXIP_COLL_RC_RDMA_FAILURE = 10, // leaf rdma read error - CXIP_COLL_RC_RDMA_DATA_FAILURE = 11, // leaf rdma read data misc + CXIP_COLL_RC_SUCCESS = 0, // good + CXIP_COLL_RC_FLT_INEXACT = 1, // result was rounded + CXIP_COLL_RC_FLT_OVERFLOW = 3, // result too large to represent + CXIP_COLL_RC_FLT_INVALID = 4, // op was signalling NaN, or + // infinities subtracted + CXIP_COLL_RC_REP_INEXACT = 5, // reproducible sum was rounded + CXIP_COLL_RC_INT_OVERFLOW = 6, // reproducible sum overflow + CXIP_COLL_RC_CONTR_OVERFLOW = 7, // too many contributions seen + CXIP_COLL_RC_OP_MISMATCH = 8, // conflicting opcodes + CXIP_COLL_RC_TX_FAILURE = 9, // internal send error + CXIP_COLL_RC_RDMA_FAILURE = 10, // leaf rdma read error + CXIP_COLL_RC_RDMA_DATA_FAILURE = 11, // leaf rdma read data misc CXIP_COLL_RC_MAX = 12 } cxip_coll_rc_t; diff --git a/prov/cxi/include/cxip/env.h b/prov/cxi/include/cxip/env.h index e055442bc7e..36ab493f022 100644 --- a/prov/cxi/include/cxip/env.h +++ b/prov/cxi/include/cxip/env.h @@ -7,7 +7,6 @@ #ifndef _CXIP_ENV_H_ #define _CXIP_ENV_H_ - #include /* Type definitions */ @@ -85,7 +84,7 @@ struct cxip_environment { int telemetry_rgid; int disable_hmem_dev_register; int ze_hmem_supported; - enum cxip_rdzv_proto rdzv_proto; + enum cxip_rdzv_proto rdzv_proto; int disable_alt_read_cmdq; int cntr_trig_cmdq; int enable_trig_op_limit; diff --git a/prov/cxi/include/cxip/ep.h b/prov/cxi/include/cxip/ep.h index 0e7d7086262..01abc466daa 100644 --- a/prov/cxi/include/cxip/ep.h +++ b/prov/cxi/include/cxip/ep.h @@ -7,12 +7,11 @@ #ifndef _CXIP_EP_H_ #define _CXIP_EP_H_ - -#include -#include -#include -#include #include +#include +#include +#include +#include /* Forward declarations */ struct cxip_av; @@ -26,34 +25,33 @@ struct cxip_rxc; struct cxip_txc; /* Macros */ -#define CXIP_EP_MAX_CTX_BITS 0 +#define CXIP_EP_MAX_CTX_BITS 0 -#define CXIP_EP_MAX_TX_CNT (1 << CXIP_EP_MAX_CTX_BITS) +#define CXIP_EP_MAX_TX_CNT (1 << CXIP_EP_MAX_CTX_BITS) -#define CXIP_EP_MAX_RX_CNT (1 << CXIP_EP_MAX_CTX_BITS) +#define CXIP_EP_MAX_RX_CNT (1 << CXIP_EP_MAX_CTX_BITS) -#define CXIP_EP_MAX_MSG_SZ ((1ULL << 32) - 1) +#define CXIP_EP_MAX_MSG_SZ ((1ULL << 32) - 1) -#define CXIP_EP_MIN_MULTI_RECV 64 +#define CXIP_EP_MIN_MULTI_RECV 64 -#define CXIP_EP_MAX_MULTI_RECV ((1 << 24) - 1) +#define CXIP_EP_MAX_MULTI_RECV ((1 << 24) - 1) -#define CXIP_EP_PRI_CAPS \ - (FI_RMA | FI_ATOMICS | FI_TAGGED | FI_RECV | FI_SEND | \ - FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | \ - FI_DIRECTED_RECV | FI_MSG | FI_NAMED_RX_CTX | FI_HMEM | \ - FI_COLLECTIVE) +#define CXIP_EP_PRI_CAPS \ + (FI_RMA | FI_ATOMICS | FI_TAGGED | FI_RECV | FI_SEND | FI_READ | \ + FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_DIRECTED_RECV | \ + FI_MSG | FI_NAMED_RX_CTX | FI_HMEM | FI_COLLECTIVE) -#define CXIP_EP_SEC_CAPS \ - (FI_SOURCE | FI_SOURCE_ERR | FI_LOCAL_COMM | \ - FI_REMOTE_COMM | FI_RMA_EVENT | FI_MULTI_RECV | FI_FENCE | FI_TRIGGER) +#define CXIP_EP_SEC_CAPS \ + (FI_SOURCE | FI_SOURCE_ERR | FI_LOCAL_COMM | FI_REMOTE_COMM | \ + FI_RMA_EVENT | FI_MULTI_RECV | FI_FENCE | FI_TRIGGER) #define CXIP_EP_CAPS (CXIP_EP_PRI_CAPS | CXIP_EP_SEC_CAPS) #define CXIP_EP_CQ_FLAGS \ (FI_SEND | FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION) -#define CXIP_EP_CNTR_FLAGS \ +#define CXIP_EP_CNTR_FLAGS \ (FI_SEND | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | \ FI_REMOTE_WRITE) diff --git a/prov/cxi/include/cxip/eq.h b/prov/cxi/include/cxip/eq.h index 7796e4ce573..3670eda2e81 100644 --- a/prov/cxi/include/cxip/eq.h +++ b/prov/cxi/include/cxip/eq.h @@ -7,12 +7,11 @@ #ifndef _CXIP_EQ_H_ #define _CXIP_EQ_H_ - #include #include /* Macros */ -#define CXIP_EQ_DEF_SZ (1 << 8) +#define CXIP_EQ_DEF_SZ (1 << 8) #define CXIP_EQ_MAP_FLAGS (CXI_MAP_WRITE | CXI_MAP_PIN) diff --git a/prov/cxi/include/cxip/evtq.h b/prov/cxi/include/cxip/evtq.h index 79cca0b4252..81f4a3d96d6 100644 --- a/prov/cxi/include/cxip/evtq.h +++ b/prov/cxi/include/cxip/evtq.h @@ -7,10 +7,9 @@ #ifndef _CXIP_EVTQ_H_ #define _CXIP_EVTQ_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_cq; @@ -53,15 +52,15 @@ void cxip_evtq_fini(struct cxip_evtq *eq); bool cxip_evtq_saturated(struct cxip_evtq *evtq); -int cxip_evtq_req_cancel(struct cxip_evtq *evtq, void *req_ctx, - void *op_ctx, bool match); +int cxip_evtq_req_cancel(struct cxip_evtq *evtq, void *req_ctx, void *op_ctx, + bool match); void cxip_evtq_req_discard(struct cxip_evtq *evtq, void *req_ctx); void cxip_evtq_flush_trig_reqs(struct cxip_evtq *evtq); -struct cxip_req *cxip_evtq_req_alloc(struct cxip_evtq *evtq, - int remap, void *req_ctx); +struct cxip_req *cxip_evtq_req_alloc(struct cxip_evtq *evtq, int remap, + void *req_ctx); void cxip_evtq_req_free(struct cxip_req *req); diff --git a/prov/cxi/include/cxip/fabric.h b/prov/cxi/include/cxip/fabric.h index 6b28544a1bf..536af95a6e1 100644 --- a/prov/cxi/include/cxip/fabric.h +++ b/prov/cxi/include/cxip/fabric.h @@ -7,7 +7,6 @@ #ifndef _CXIP_FABRIC_H_ #define _CXIP_FABRIC_H_ - #include /* Type definitions */ diff --git a/prov/cxi/include/cxip/fc.h b/prov/cxi/include/cxip/fc.h index 92c15ac0370..0e8fa9df00b 100644 --- a/prov/cxi/include/cxip/fc.h +++ b/prov/cxi/include/cxip/fc.h @@ -7,10 +7,9 @@ #ifndef _CXIP_FC_H_ #define _CXIP_FC_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_ep_obj; diff --git a/prov/cxi/include/cxip/if.h b/prov/cxi/include/cxip/if.h index 7e591b6feb7..bfed2b0f2b2 100644 --- a/prov/cxi/include/cxip/if.h +++ b/prov/cxi/include/cxip/if.h @@ -7,12 +7,11 @@ #ifndef _CXIP_IF_H_ #define _CXIP_IF_H_ - -#include -#include -#include #include +#include #include +#include +#include /* Type definitions */ struct cxip_if { diff --git a/prov/cxi/include/cxip/info.h b/prov/cxi/include/cxip/info.h index abd9024cb91..c92d794a95f 100644 --- a/prov/cxi/include/cxip/info.h +++ b/prov/cxi/include/cxip/info.h @@ -7,7 +7,6 @@ #ifndef _CXIP_INFO_H_ #define _CXIP_INFO_H_ - /* Function declarations */ const char *cxip_rdzv_proto_to_str(enum cxip_rdzv_proto proto); diff --git a/prov/cxi/include/cxip/iomm.h b/prov/cxi/include/cxip/iomm.h index 507a9569211..4507a99713d 100644 --- a/prov/cxi/include/cxip/iomm.h +++ b/prov/cxi/include/cxip/iomm.h @@ -7,7 +7,6 @@ #ifndef _CXIP_IOMM_H_ #define _CXIP_IOMM_H_ - #include /* Forward declarations */ diff --git a/prov/cxi/include/cxip/log.h b/prov/cxi/include/cxip/log.h index 82cf491b0dc..98e69525089 100644 --- a/prov/cxi/include/cxip/log.h +++ b/prov/cxi/include/cxip/log.h @@ -7,17 +7,15 @@ #ifndef _CXIP_LOG_H_ #define _CXIP_LOG_H_ - /* Macros */ -#define CXIP_LOG(fmt, ...) \ - fi_log(&cxip_prov, FI_LOG_WARN, FI_LOG_CORE, \ - __func__, __LINE__, "%s: " fmt "", cxip_env.hostname, \ - ##__VA_ARGS__) +#define CXIP_LOG(fmt, ...) \ + fi_log(&cxip_prov, FI_LOG_WARN, FI_LOG_CORE, __func__, __LINE__, \ + "%s: " fmt "", cxip_env.hostname, ##__VA_ARGS__) -#define CXIP_FATAL(fmt, ...) \ - do { \ - CXIP_LOG(fmt, ##__VA_ARGS__); \ - abort(); \ +#define CXIP_FATAL(fmt, ...) \ + do { \ + CXIP_LOG(fmt, ##__VA_ARGS__); \ + abort(); \ } while (0) #endif /* _CXIP_LOG_H_ */ diff --git a/prov/cxi/include/cxip/mr.h b/prov/cxi/include/cxip/mr.h index 4fab151844d..afa62b066e8 100644 --- a/prov/cxi/include/cxip/mr.h +++ b/prov/cxi/include/cxip/mr.h @@ -7,12 +7,11 @@ #ifndef _CXIP_MR_H_ #define _CXIP_MR_H_ - -#include -#include -#include #include +#include #include +#include +#include /* Forward declarations */ struct cxip_cntr; @@ -46,17 +45,17 @@ struct cxip_mr_key { union { /* Provider generated standard cached */ struct { - uint64_t lac : 3; - uint64_t lac_off: 58; - uint64_t opt : 1; - uint64_t cached : 1; - uint64_t unused1: 1; + uint64_t lac : 3; + uint64_t lac_off : 58; + uint64_t opt : 1; + uint64_t cached : 1; + uint64_t unused1 : 1; /* shares CXIP_CTRL_LE_TYPE_MR */ }; /* Client or Provider non-cached */ struct { - uint64_t key : 61; - uint64_t unused2: 3; + uint64_t key : 61; + uint64_t unused2 : 3; /* Provider shares opt */ /* Provider shares cached == 0 */ /* Provider shares CXIP_CTRL_LE_TYPE_MR */ @@ -70,11 +69,11 @@ struct cxip_mr_key { * every micro-second, would take months before * it repeated. */ - uint64_t id : 16; /* Unique - 64K MR */ - uint64_t seqnum : 44; /* Sequence with random seed */ - uint64_t events : 1; /* Requires event generation */ - uint64_t unused3: 2; - uint64_t is_prov: 1; + uint64_t id : 16; /* Unique - 64K MR */ + uint64_t seqnum : 44; /* Sequence with random seed */ + uint64_t events : 1; /* Requires event generation */ + uint64_t unused3 : 2; + uint64_t is_prov : 1; /* Overloads CXIP_CTRL_LE_TYPE_MR and must be cleared * before appending MR LE or TX using in match bits. */ @@ -111,12 +110,12 @@ struct cxip_mr_domain { struct cxip_mr { struct fid_mr mr_fid; - struct cxip_domain *domain; // parent domain - struct cxip_ep *ep; // endpoint for remote memory - uint64_t key; // memory key - uint64_t flags; // special flags - struct fi_mr_attr attr; // attributes - struct cxip_cntr *cntr; // if bound to cntr + struct cxip_domain *domain; // parent domain + struct cxip_ep *ep; // endpoint for remote memory + uint64_t key; // memory key + uint64_t flags; // special flags + struct fi_mr_attr attr; // attributes + struct cxip_cntr *cntr; // if bound to cntr /* Indicates if FI_RMA_EVENT was specified at creation and * will be used to enable fi_writedata() and fi_inject_writedata() @@ -132,8 +131,8 @@ struct cxip_mr { * libfabric MR cache. */ bool count_events; - ofi_atomic32_t match_events; - ofi_atomic32_t access_events; + ofi_atomic32_t match_events; + ofi_atomic32_t access_events; ofi_spin_t lock; @@ -141,21 +140,21 @@ struct cxip_mr { bool enabled; struct cxip_pte *pte; enum cxip_mr_state mr_state; - int64_t mr_id; // Non-cached provider key uniqueness + int64_t mr_id; // Non-cached provider key uniqueness struct cxip_ctrl_req req; bool optimized; - void *buf; // memory buffer VA - uint64_t len; // memory length - struct cxip_md *md; // buffer IO descriptor + void *buf; // memory buffer VA + uint64_t len; // memory length + struct cxip_md *md; // buffer IO descriptor struct dlist_entry ep_entry; struct dlist_entry mr_domain_entry; }; /* Function declarations */ -int cxip_generic_mr_key_to_ptl_idx(struct cxip_domain *dom, - uint64_t key, bool write); +int cxip_generic_mr_key_to_ptl_idx(struct cxip_domain *dom, uint64_t key, + bool write); bool cxip_generic_is_mr_key_opt(uint64_t key); diff --git a/prov/cxi/include/cxip/mr_lac_cache.h b/prov/cxi/include/cxip/mr_lac_cache.h index 702ae12579d..59d3ed813d5 100644 --- a/prov/cxi/include/cxip/mr_lac_cache.h +++ b/prov/cxi/include/cxip/mr_lac_cache.h @@ -7,9 +7,9 @@ #ifndef _CXIP_MR_LAC_CACHE_H_ #define _CXIP_MR_LAC_CACHE_H_ - /* cxip_mr_lac_cache type definition */ -/* This is in a separate header to break the circular dependency between mr.h and ctrl.h */ +/* This is in a separate header to break the circular dependency between mr.h + * and ctrl.h */ /* Forward declarations */ struct cxip_ctrl_req; diff --git a/prov/cxi/include/cxip/msg.h b/prov/cxi/include/cxip/msg.h index f685c627aa4..2e630a99e10 100644 --- a/prov/cxi/include/cxip/msg.h +++ b/prov/cxi/include/cxip/msg.h @@ -7,12 +7,11 @@ #ifndef _CXIP_MSG_H_ #define _CXIP_MSG_H_ - -#include -#include -#include -#include #include +#include +#include +#include +#include /* Forward declarations */ struct cxip_md; @@ -23,93 +22,89 @@ struct cxip_rxc_hpc; struct cxip_txc; /* Macros */ -#define CXIP_MSG_ORDER (FI_ORDER_SAS | \ - FI_ORDER_WAW | \ - FI_ORDER_RMA_WAW | \ - FI_ORDER_RMA_RAR | \ - FI_ORDER_ATOMIC_WAW | \ - FI_ORDER_ATOMIC_WAR | \ - FI_ORDER_ATOMIC_RAW | \ - FI_ORDER_ATOMIC_RAR) +#define CXIP_MSG_ORDER \ + (FI_ORDER_SAS | FI_ORDER_WAW | FI_ORDER_RMA_WAW | FI_ORDER_RMA_RAR | \ + FI_ORDER_ATOMIC_WAW | FI_ORDER_ATOMIC_WAR | FI_ORDER_ATOMIC_RAW | \ + FI_ORDER_ATOMIC_RAR) -#define CXIP_TAG_WIDTH 48 +#define CXIP_TAG_WIDTH 48 -#define CXIP_TAG_MASK ((1UL << CXIP_TAG_WIDTH) - 1) +#define CXIP_TAG_MASK ((1UL << CXIP_TAG_WIDTH) - 1) /* Type definitions */ union cxip_match_bits { struct { - uint64_t tag : CXIP_TAG_WIDTH; /* User tag value */ - uint64_t tx_id : CXIP_TX_ID_WIDTH; /* Prov. tracked ID */ - uint64_t cq_data : 1; /* Header data is valid */ - uint64_t tagged : 1; /* Tagged API */ - uint64_t match_comp : 1; /* Notify initiator on match */ - uint64_t rdzv_done : 1; /* Notify initiator when rdzv done */ - uint64_t le_type : 1; + uint64_t tag : CXIP_TAG_WIDTH; /* User tag value */ + uint64_t tx_id : CXIP_TX_ID_WIDTH; /* Prov. tracked ID */ + uint64_t cq_data : 1; /* Header data is valid */ + uint64_t tagged : 1; /* Tagged API */ + uint64_t match_comp : 1; /* Notify initiator on match */ + uint64_t rdzv_done : 1; /* Notify initiator when rdzv done */ + uint64_t le_type : 1; }; /* Rendezvous protocol request, overloads match_comp and rdzv_done * to specify requested protocol. */ struct { - uint64_t pad0 : 61; + uint64_t pad0 : 61; uint64_t rdzv_proto : 2; - uint64_t pad1 : 1; + uint64_t pad1 : 1; }; /* Split TX ID for rendezvous operations. */ struct { - uint64_t pad2 : (CXIP_TAG_WIDTH - 1); /* User tag value */ - uint64_t coll_get : 1; /* leaf rdma get */ + uint64_t pad2 : (CXIP_TAG_WIDTH - 1); /* User tag value */ + uint64_t coll_get : 1; /* leaf rdma get */ uint64_t rdzv_id_hi : CXIP_RDZV_ID_HIGH_WIDTH; - uint64_t rdzv_lac : 4; /* Rendezvous Get LAC */ + uint64_t rdzv_lac : 4; /* Rendezvous Get LAC */ }; struct { uint64_t rdzv_id_lo : CXIP_RDZV_ID_CMD_WIDTH; }; /* Client/Server messaging match bits */ struct { - uint64_t rnr_tag : CXIP_CS_TAG_WIDTH; /* User tag value */ - uint64_t rnr_rsvd : 6; /* Unused, set to 0 */ - uint64_t rnr_cq_data : 1; /* Header data valid */ - uint64_t rnr_tagged : 1; /* Tagged API */ - uint64_t rnr_vni : CXIP_VNI_WIDTH; /* Source VNI */ + uint64_t rnr_tag : CXIP_CS_TAG_WIDTH; /* User tag value */ + uint64_t rnr_rsvd : 6; /* Unused, set to 0 */ + uint64_t rnr_cq_data : 1; /* Header data valid */ + uint64_t rnr_tagged : 1; /* Tagged API */ + uint64_t rnr_vni : CXIP_VNI_WIDTH; /* Source VNI */ }; /* Control LE match bit format for notify/resume */ struct { - uint64_t txc_id : 8; - uint64_t rxc_id : 8; - uint64_t drops : 16; - uint64_t pad3 : 29; - uint64_t ctrl_msg_type: 2; + uint64_t txc_id : 8; + uint64_t rxc_id : 8; + uint64_t drops : 16; + uint64_t pad3 : 29; + uint64_t ctrl_msg_type : 2; uint64_t ctrl_le_type : 1; }; /* Control LE match bit format for zbcollectives */ struct { - uint64_t zb_data :61; - uint64_t zb_pad : 3; + uint64_t zb_data : 61; + uint64_t zb_pad : 3; /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_CTRL_MSG * shares ctrl_msg_type == CXIP_CTRL_MSG_ZB_BCAST */ }; /* Control LE match bit format for cached MR */ struct { - uint64_t mr_lac : 3; - uint64_t mr_lac_off : 58; - uint64_t mr_opt : 1; - uint64_t mr_cached : 1; - uint64_t mr_unused : 1; + uint64_t mr_lac : 3; + uint64_t mr_lac_off : 58; + uint64_t mr_opt : 1; + uint64_t mr_cached : 1; + uint64_t mr_unused : 1; /* shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR */ }; struct { - uint64_t mr_key : 61; - uint64_t mr_pad : 3; + uint64_t mr_key : 61; + uint64_t mr_pad : 3; /* shares mr_opt * shares mr_cached == 0 * shares ctrl_le_type == CXIP_CTRL_LE_TYPE_MR */ }; struct { - uint64_t unused2 : 63; - uint64_t is_prov : 1; + uint64_t unused2 : 63; + uint64_t is_prov : 1; /* Indicates provider generated key and shares ctrl_le_type == * CXIP_CTRL_LE_TYPE_MR so it must be cleared before matching. */ @@ -120,9 +115,9 @@ union cxip_match_bits { struct cxip_ux_dump_state { bool done; - size_t max_count; /* Number entries/src_addr provided */ - size_t ret_count; /* Number of UX entries returned */ - size_t ux_count; /* Total UX entries available */ + size_t max_count; /* Number entries/src_addr provided */ + size_t ret_count; /* Number of UX entries returned */ + size_t ux_count; /* Total UX entries available */ struct fi_cq_tagged_entry *entry; fi_addr_t *src_addr; @@ -134,14 +129,15 @@ struct cxip_ux_send { struct cxip_rxc *rxc; struct fi_peer_rx_entry *rx_entry; union c_event put_ev; - bool claimed; /* Reserved with FI_PEEK | FI_CLAIM */ + bool claimed; /* Reserved with FI_PEEK | FI_CLAIM */ }; struct cxip_msg_counters { /* Histogram counting the number of messages based on priority, buffer * type (HMEM), and message size. */ - ofi_atomic32_t msg_count[CXIP_LIST_COUNTS][OFI_HMEM_MAX][CXIP_COUNTER_BUCKETS]; + ofi_atomic32_t msg_count[CXIP_LIST_COUNTS][OFI_HMEM_MAX] + [CXIP_COUNTER_BUCKETS]; }; /* Function declarations */ @@ -153,9 +149,8 @@ int cxip_recv_cancel(struct cxip_req *req); void cxip_recv_pte_cb(struct cxip_pte *pte, const union c_event *event); -fi_addr_t cxip_recv_req_src_addr(struct cxip_rxc *rxc, - uint32_t init, uint16_t vni, - bool force); +fi_addr_t cxip_recv_req_src_addr(struct cxip_rxc *rxc, uint32_t init, + uint16_t vni, bool force); int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len, struct cxip_md *md, struct cxip_req **cxip_req, diff --git a/prov/cxi/include/cxip/msg_hpc.h b/prov/cxi/include/cxip/msg_hpc.h index cab5f1fbc07..1922a8d8955 100644 --- a/prov/cxi/include/cxip/msg_hpc.h +++ b/prov/cxi/include/cxip/msg_hpc.h @@ -7,7 +7,6 @@ #ifndef _CXIP_MSG_HPC_H_ #define _CXIP_MSG_HPC_H_ - #include /* Forward declarations */ diff --git a/prov/cxi/include/cxip/nic.h b/prov/cxi/include/cxip/nic.h index bc6cfe7c372..80efac23312 100644 --- a/prov/cxi/include/cxip/nic.h +++ b/prov/cxi/include/cxip/nic.h @@ -7,7 +7,6 @@ #ifndef _CXIP_NIC_H_ #define _CXIP_NIC_H_ - /* Forward declarations */ struct cxip_if; diff --git a/prov/cxi/include/cxip/portals_table.h b/prov/cxi/include/cxip/portals_table.h index 31af8e7cb14..6c5069e8a72 100644 --- a/prov/cxi/include/cxip/portals_table.h +++ b/prov/cxi/include/cxip/portals_table.h @@ -7,9 +7,8 @@ #ifndef _CXIP_PORTALS_TABLE_H_ #define _CXIP_PORTALS_TABLE_H_ - -#include #include +#include /* Forward declarations */ struct cxip_lni; diff --git a/prov/cxi/include/cxip/pte.h b/prov/cxi/include/cxip/pte.h index 884607df308..2e55422d838 100644 --- a/prov/cxi/include/cxip/pte.h +++ b/prov/cxi/include/cxip/pte.h @@ -7,11 +7,10 @@ #ifndef _CXIP_PTE_H_ #define _CXIP_PTE_H_ - -#include -#include -#include #include +#include +#include +#include /* Forward declarations */ struct cxip_cmdq; @@ -21,12 +20,12 @@ struct cxip_if; struct cxip_portals_table; /* Macros */ -#define CXIP_PTE_IGNORE_DROPS ((1 << 24) - 1) +#define CXIP_PTE_IGNORE_DROPS ((1 << 24) - 1) /* Type definitions */ struct cxip_pte_map_entry { - struct dlist_entry entry; - struct cxil_pte_map *map; + struct dlist_entry entry; + struct cxil_pte_map *map; }; struct cxip_pte { @@ -50,15 +49,13 @@ int cxip_pte_set_state_wait(struct cxip_pte *pte, struct cxip_cmdq *cmdq, enum c_ptlte_state new_state, uint32_t drop_count); int cxip_pte_append(struct cxip_pte *pte, uint64_t iova, size_t len, - unsigned int lac, enum c_ptl_list list, - uint32_t buffer_id, uint64_t match_bits, - uint64_t ignore_bits, uint32_t match_id, - uint64_t min_free, uint32_t flags, - struct cxip_cntr *cntr, struct cxip_cmdq *cmdq, - bool ring); - -int cxip_pte_unlink(struct cxip_pte *pte, enum c_ptl_list list, - int buffer_id, struct cxip_cmdq *cmdq); + unsigned int lac, enum c_ptl_list list, uint32_t buffer_id, + uint64_t match_bits, uint64_t ignore_bits, + uint32_t match_id, uint64_t min_free, uint32_t flags, + struct cxip_cntr *cntr, struct cxip_cmdq *cmdq, bool ring); + +int cxip_pte_unlink(struct cxip_pte *pte, enum c_ptl_list list, int buffer_id, + struct cxip_cmdq *cmdq); int cxip_pte_map(struct cxip_pte *pte, uint64_t pid_idx, bool is_multicast); diff --git a/prov/cxi/include/cxip/ptelist_buf.h b/prov/cxi/include/cxip/ptelist_buf.h index f33e71436c6..0bef0d14af5 100644 --- a/prov/cxi/include/cxip/ptelist_buf.h +++ b/prov/cxi/include/cxip/ptelist_buf.h @@ -7,11 +7,10 @@ #ifndef _CXIP_PTELIST_BUF_H_ #define _CXIP_PTELIST_BUF_H_ - -#include -#include -#include #include +#include +#include +#include /* Forward declarations */ struct cxip_md; @@ -110,8 +109,7 @@ void cxip_ptelist_bufpool_fini(struct cxip_ptelist_bufpool *pool); int cxip_ptelist_buf_replenish(struct cxip_ptelist_bufpool *pool, bool seq_restart); -void cxip_ptelist_buf_link_err(struct cxip_ptelist_buf *buf, - int rc_link_error); +void cxip_ptelist_buf_link_err(struct cxip_ptelist_buf *buf, int rc_link_error); void cxip_ptelist_buf_unlink(struct cxip_ptelist_buf *buf); diff --git a/prov/cxi/include/cxip/rdzv_pte.h b/prov/cxi/include/cxip/rdzv_pte.h index adf1229dd1b..38599e263eb 100644 --- a/prov/cxi/include/cxip/rdzv_pte.h +++ b/prov/cxi/include/cxip/rdzv_pte.h @@ -7,7 +7,6 @@ #ifndef _CXIP_RDZV_PTE_H_ #define _CXIP_RDZV_PTE_H_ - #include /* Forward declarations */ diff --git a/prov/cxi/include/cxip/repsum.h b/prov/cxi/include/cxip/repsum.h index b746eb95a28..4a9daf5ddbf 100644 --- a/prov/cxi/include/cxip/repsum.h +++ b/prov/cxi/include/cxip/repsum.h @@ -7,17 +7,16 @@ #ifndef _CXIP_REPSUM_H_ #define _CXIP_REPSUM_H_ - -#include -#include #include +#include +#include /* Type definitions */ union cxip_dbl_bits { struct { - uint64_t mantissa:52; - uint64_t exponent:11; - uint64_t sign:1; + uint64_t mantissa : 52; + uint64_t exponent : 11; + uint64_t sign : 1; } __attribute__((__packed__)); double dval; uint64_t ival; diff --git a/prov/cxi/include/cxip/req.h b/prov/cxi/include/cxip/req.h index 62f22841366..5a0ac945d0e 100644 --- a/prov/cxi/include/cxip/req.h +++ b/prov/cxi/include/cxip/req.h @@ -7,11 +7,10 @@ #ifndef _CXIP_REQ_H_ #define _CXIP_REQ_H_ - -#include -#include -#include #include +#include +#include +#include /* Forward declarations */ struct cxip_cntr; @@ -31,12 +30,12 @@ struct cxip_txc_rnr; struct cxip_ux_dump_state; /* Macros */ -#define CXIP_REQ_CLEANUP_TO 3000 +#define CXIP_REQ_CLEANUP_TO 3000 /* Type definitions */ struct cxip_req_rma { struct cxip_txc *txc; - struct cxip_md *local_md; // RMA target buffer + struct cxip_md *local_md; // RMA target buffer void *ibuf; struct cxip_cntr *cntr; /* collectives leaf_rdma_get_callback context data */ @@ -68,11 +67,11 @@ struct cxip_req_recv { }; struct cxip_cntr *cntr; - void *recv_buf; // local receive buffer - struct cxip_md *recv_md; // local receive MD - bool hybrid_md; // True if MD was provided + void *recv_buf; // local receive buffer + struct cxip_md *recv_md; // local receive MD + bool hybrid_md; // True if MD was provided bool success_disable; - uint32_t ulen; // User buffer length + uint32_t ulen; // User buffer length bool tagged; uint64_t tag; uint64_t ignore; @@ -94,22 +93,22 @@ struct cxip_req_recv { struct cxip_ux_dump_state *ux_dump; /* Control info */ - int rc; // DMA return code - uint32_t rlen; // Send length - uint64_t oflow_start; // Overflow buffer address - uint16_t vni; // VNI operation came in on - uint32_t initiator; // DMA initiator address - uint32_t rdzv_id; // DMA initiator rendezvous ID - uint8_t rdzv_lac; // Rendezvous source LAC - bool done_notify; // Must send done notification + int rc; // DMA return code + uint32_t rlen; // Send length + uint64_t oflow_start; // Overflow buffer address + uint16_t vni; // VNI operation came in on + uint32_t initiator; // DMA initiator address + uint32_t rdzv_id; // DMA initiator rendezvous ID + uint8_t rdzv_lac; // Rendezvous source LAC + bool done_notify; // Must send done notification enum cxip_rdzv_proto rdzv_proto; - int rdzv_events; // Processed rdzv event count + int rdzv_events; // Processed rdzv event count enum c_event_type rdzv_event_types[4]; - uint32_t rdzv_initiator; // Rendezvous initiator used for mrecvs + uint32_t rdzv_initiator; // Rendezvous initiator used for mrecvs uint32_t rget_nic; uint32_t rget_pid; - int multirecv_inflight; // SW EP Multi-receives in progress - bool canceled; // Request canceled? + int multirecv_inflight; // SW EP Multi-receives in progress + bool canceled; // Request canceled? bool unlinked; bool multi_recv; bool tgt_event; @@ -132,9 +131,9 @@ struct cxip_req_send { struct cxip_txc_rnr *txc_rnr; }; struct cxip_cntr *cntr; - const void *buf; // local send buffer - size_t len; // request length - struct cxip_md *send_md; // send buffer memory descriptor + const void *buf; // local send buffer + size_t len; // request length + struct cxip_md *send_md; // send buffer memory descriptor struct cxip_addr caddr; fi_addr_t dest_addr; bool tagged; @@ -150,11 +149,11 @@ struct cxip_req_send { struct dlist_entry txc_entry; struct cxip_fc_peer *fc_peer; union { - int rdzv_id; // SW RDZV ID for long messages + int rdzv_id; // SW RDZV ID for long messages int tx_id; }; - int rc; // DMA return code - int rdzv_send_events; // Processed event count + int rc; // DMA return code + int rdzv_send_events; // Processed event count uint64_t max_rnr_time; uint64_t retry_rnr_time; struct dlist_entry rnr_entry; @@ -188,11 +187,11 @@ struct cxip_req { /* Control info */ struct dlist_entry evtq_entry; void *req_ctx; - struct cxip_cq *cq; // request CQ - struct cxip_evtq *evtq; // request event queue - int req_id; // fast lookup in index table + struct cxip_cq *cq; // request CQ + struct cxip_evtq *evtq; // request event queue + int req_id; // fast lookup in index table int (*cb)(struct cxip_req *req, const union c_event *evt); - // completion event callback + // completion event callback bool discard; /* Triggered related fields. */ diff --git a/prov/cxi/include/cxip/req_buf.h b/prov/cxi/include/cxip/req_buf.h index 1dd4f4df348..0d517ba0e7a 100644 --- a/prov/cxi/include/cxip/req_buf.h +++ b/prov/cxi/include/cxip/req_buf.h @@ -7,23 +7,22 @@ #ifndef _CXIP_REQ_BUF_H_ #define _CXIP_REQ_BUF_H_ - /* Forward declarations */ struct cxip_rxc_hpc; struct cxip_ux_send; /* Macros */ -#define CXIP_REQ_BUF_SIZE (12*1024*1024) +#define CXIP_REQ_BUF_SIZE (12 * 1024 * 1024) -#define CXIP_REQ_BUF_MIN_POSTED 6 +#define CXIP_REQ_BUF_MIN_POSTED 6 -#define CXIP_REQ_BUF_MAX_CACHED 0 +#define CXIP_REQ_BUF_MAX_CACHED 0 -#define CXIP_REQ_BUF_HEADER_MAX_SIZE (sizeof(struct c_port_fab_hdr) + \ - sizeof(struct c_port_unrestricted_hdr)) +#define CXIP_REQ_BUF_HEADER_MAX_SIZE \ + (sizeof(struct c_port_fab_hdr) + sizeof(struct c_port_unrestricted_hdr)) -#define CXIP_REQ_BUF_HEADER_MIN_SIZE (sizeof(struct c_port_fab_hdr) + \ - sizeof(struct c_port_small_msg_hdr)) +#define CXIP_REQ_BUF_HEADER_MIN_SIZE \ + (sizeof(struct c_port_fab_hdr) + sizeof(struct c_port_small_msg_hdr)) /* Function declarations */ int cxip_req_bufpool_init(struct cxip_rxc_hpc *rxc); diff --git a/prov/cxi/include/cxip/rma.h b/prov/cxi/include/cxip/rma.h index f218bf5786d..054422f850b 100644 --- a/prov/cxi/include/cxip/rma.h +++ b/prov/cxi/include/cxip/rma.h @@ -7,10 +7,9 @@ #ifndef _CXIP_RMA_H_ #define _CXIP_RMA_H_ - -#include -#include #include +#include +#include /* Forward declarations */ struct cxip_cntr; @@ -19,11 +18,10 @@ struct cxip_txc; /* Function declarations */ ssize_t cxip_rma_common(enum fi_op_type op, struct cxip_txc *txc, const void *buf, size_t len, void *desc, - fi_addr_t tgt_addr, uint64_t addr, - uint64_t key, uint64_t data, uint64_t flags, - uint32_t tclass, uint64_t msg_order, void *context, - bool triggered, uint64_t trig_thresh, - struct cxip_cntr *trig_cntr, + fi_addr_t tgt_addr, uint64_t addr, uint64_t key, + uint64_t data, uint64_t flags, uint32_t tclass, + uint64_t msg_order, void *context, bool triggered, + uint64_t trig_thresh, struct cxip_cntr *trig_cntr, struct cxip_cntr *comp_cntr); #endif /* _CXIP_RMA_H_ */ diff --git a/prov/cxi/include/cxip/rxc.h b/prov/cxi/include/cxip/rxc.h index 628d4e088b2..9a61f094f12 100644 --- a/prov/cxi/include/cxip/rxc.h +++ b/prov/cxi/include/cxip/rxc.h @@ -7,12 +7,11 @@ #ifndef _CXIP_RXC_H_ #define _CXIP_RXC_H_ - -#include -#include -#include -#include #include +#include +#include +#include +#include /* Forward declarations */ struct cxip_cmdq; @@ -28,36 +27,36 @@ struct cxip_req; /* Macros */ #define RXC_RESERVED_FC_SLOTS 1 -#define RXC_BASE(rxc) ((struct cxip_rxc *)(void *)(rxc)) +#define RXC_BASE(rxc) ((struct cxip_rxc *) (void *) (rxc)) -#define RXC_DBG(rxc, fmt, ...) \ +#define RXC_DBG(rxc, fmt, ...) \ _CXIP_DBG(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_INFO(rxc, fmt, ...) \ +#define RXC_INFO(rxc, fmt, ...) \ _CXIP_INFO(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_WARN(rxc, fmt, ...) \ +#define RXC_WARN(rxc, fmt, ...) \ _CXIP_WARN(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_WARN_ONCE(rxc, fmt, ...) \ +#define RXC_WARN_ONCE(rxc, fmt, ...) \ _CXIP_WARN_ONCE(FI_LOG_EP_DATA, "RXC (%#x:%u) PtlTE %u: " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) -#define RXC_FATAL(rxc, fmt, ...) \ +#define RXC_FATAL(rxc, fmt, ...) \ CXIP_FATAL("RXC (%#x:%u) PtlTE %u:[Fatal] " fmt "", \ - RXC_BASE(rxc)->ep_obj->src_addr.nic, \ - RXC_BASE(rxc)->ep_obj->src_addr.pid, \ + RXC_BASE(rxc)->ep_obj->src_addr.nic, \ + RXC_BASE(rxc)->ep_obj->src_addr.pid, \ RXC_BASE(rxc)->rx_pte->pte->ptn, ##__VA_ARGS__) /* Type definitions */ @@ -89,7 +88,7 @@ struct cxip_rxc { bool trunc_ok; bool sw_ep_only; bool msg_offload; - uint8_t pid_bits; // Zero without SEP + uint8_t pid_bits; // Zero without SEP uint8_t recv_ptl_idx; enum cxip_rxc_state state; diff --git a/prov/cxi/include/cxip/telemetry.h b/prov/cxi/include/cxip/telemetry.h index 62fed6298b2..6e4a240d5b6 100644 --- a/prov/cxi/include/cxip/telemetry.h +++ b/prov/cxi/include/cxip/telemetry.h @@ -7,7 +7,6 @@ #ifndef _CXIP_TELEMETRY_H_ #define _CXIP_TELEMETRY_H_ - #include /* Forward declarations */ diff --git a/prov/cxi/include/cxip/txc.h b/prov/cxi/include/cxip/txc.h index feefb197180..2291588af2e 100644 --- a/prov/cxi/include/cxip/txc.h +++ b/prov/cxi/include/cxip/txc.h @@ -7,12 +7,11 @@ #ifndef _CXIP_TXC_H_ #define _CXIP_TXC_H_ - -#include -#include -#include -#include #include +#include +#include +#include +#include /* Forward declarations */ struct cxip_cmdq; @@ -26,30 +25,30 @@ struct cxip_rdzv_nomatch_pte; struct cxip_req; /* Macros */ -#define CXIP_TXC_FORCE_ERR_ALT_READ_PROTO_ALLOC (1 << 0) +#define CXIP_TXC_FORCE_ERR_ALT_READ_PROTO_ALLOC (1 << 0) -#define TXC_BASE(txc) ((struct cxip_txc *)(void *)(txc)) +#define TXC_BASE(txc) ((struct cxip_txc *) (void *) (txc)) -#define TXC_DBG(txc, fmt, ...) \ +#define TXC_DBG(txc, fmt, ...) \ _CXIP_DBG(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) -#define TXC_INFO(txc, fmt, ...) \ +#define TXC_INFO(txc, fmt, ...) \ _CXIP_INFO(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) -#define TXC_WARN(txc, fmt, ...) \ +#define TXC_WARN(txc, fmt, ...) \ _CXIP_WARN(FI_LOG_EP_DATA, "TXC (%#x:%u): " fmt "", \ - TXC_BASE(txc)->ep_obj->src_addr.nic, \ + TXC_BASE(txc)->ep_obj->src_addr.nic, \ TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) #define TXC_WARN_RET(txc, ret, fmt, ...) \ TXC_WARN(txc, "%d:%s: " fmt "", ret, fi_strerror(-ret), ##__VA_ARGS__) -#define TXC_FATAL(txc, fmt, ...) \ - CXIP_FATAL("TXC (%#x:%u):: " fmt "", \ +#define TXC_FATAL(txc, fmt, ...) \ + CXIP_FATAL("TXC (%#x:%u):: " fmt "", \ TXC_BASE(txc)->ep_obj->src_addr.nic, \ TXC_BASE(txc)->ep_obj->src_addr.pid, ##__VA_ARGS__) @@ -76,7 +75,7 @@ struct cxip_txc { uint32_t protocol; bool enabled; - bool hrp_war_req; // Non-fetching 32-bit HRP + bool hrp_war_req; // Non-fetching 32-bit HRP bool hmem; bool trunc_ok; @@ -87,12 +86,12 @@ struct cxip_txc { struct cxip_txc_ops ops; - struct cxip_ep_obj *ep_obj; // parent EP object - struct cxip_domain *domain; // parent domain + struct cxip_ep_obj *ep_obj; // parent EP object + struct cxip_domain *domain; // parent domain uint8_t pid_bits; uint8_t recv_ptl_idx; - struct fi_tx_attr attr; // attributes + struct fi_tx_attr attr; // attributes bool selective_completion; uint32_t tclass; @@ -102,8 +101,8 @@ struct cxip_txc { /* Inject buffers for EP, protected by ep_obj->lock */ struct ofi_bufpool *ibuf_pool; - struct cxip_cmdq *tx_cmdq; // added during cxip_txc_enable() - int otx_reqs; // outstanding transmit requests + struct cxip_cmdq *tx_cmdq; // added during cxip_txc_enable() + int otx_reqs; // outstanding transmit requests /* Queue of TX messages in flight for the context */ struct dlist_entry msg_queue; @@ -130,7 +129,7 @@ struct cxip_txc_hpc { struct indexer msg_rdzv_ids; enum cxip_rdzv_proto rdzv_proto; - struct cxip_cmdq *rx_cmdq; // Target cmdq for Rendezvous buffers + struct cxip_cmdq *rx_cmdq; // Target cmdq for Rendezvous buffers #if ENABLE_DEBUG uint64_t force_err; @@ -140,16 +139,15 @@ struct cxip_txc_hpc { /* Match complete IDs */ struct indexer tx_ids; - }; struct cxip_txc_rnr { /* Must remain first */ struct cxip_txc base; - uint64_t max_retry_wait_us; /* Maximum time to retry any request */ - ofi_atomic32_t time_wait_reqs; /* Number of RNR time wait reqs */ - uint64_t next_retry_wait_us; /* Time of next retry in all queues */ + uint64_t max_retry_wait_us; /* Maximum time to retry any request */ + ofi_atomic32_t time_wait_reqs; /* Number of RNR time wait reqs */ + uint64_t next_retry_wait_us; /* Time of next retry in all queues */ uint64_t total_retries; uint64_t total_rnr_nacks; bool hybrid_mr_desc; diff --git a/prov/cxi/include/cxip/zbcoll.h b/prov/cxi/include/cxip/zbcoll.h index a568b8cb669..07a1be749c4 100644 --- a/prov/cxi/include/cxip/zbcoll.h +++ b/prov/cxi/include/cxip/zbcoll.h @@ -7,12 +7,11 @@ #ifndef _CXIP_ZBCOLL_H_ #define _CXIP_ZBCOLL_H_ - -#include -#include -#include #include +#include #include +#include +#include /* Forward declarations */ struct cxip_addr; @@ -20,50 +19,50 @@ struct cxip_ep_obj; /* Type definitions */ struct cxip_zbcoll_cb_obj { - zbcomplete_t usrfunc; // callback function - void *usrptr; // callback data + zbcomplete_t usrfunc; // callback function + void *usrptr; // callback data }; struct cxip_zbcoll_state { - struct cxip_zbcoll_obj *zb; // backpointer to zbcoll_obj - uint64_t *dataptr; // user-supplied target - uint64_t dataval; // collective data - int num_relatives; // number of nearest relatives - int *relatives; // nearest relative indices - int contribs; // contribution count - int grp_rank; // local rank within group + struct cxip_zbcoll_obj *zb; // backpointer to zbcoll_obj + uint64_t *dataptr; // user-supplied target + uint64_t dataval; // collective data + int num_relatives; // number of nearest relatives + int *relatives; // nearest relative indices + int contribs; // contribution count + int grp_rank; // local rank within group }; struct cxip_zbcoll_obj { - struct dlist_entry ready_link; // link to zb_coll ready_list - struct cxip_ep_obj *ep_obj; // backpointer to endpoint - struct cxip_zbcoll_state *state;// state array - struct cxip_addr *caddrs; // cxip addresses in collective - int num_caddrs; // number of cxip addresses - zbcomplete_t userfunc; // completion callback function - void *userptr; // completion callback data - uint64_t *grpmskp; // pointer to global group mask - uint32_t *shuffle; // TEST shuffle array - int simcount; // TEST count of states - int simrank; // TEST simulated rank - int simref; // TEST zb0 reference count - int busy; // serialize collectives in zb - int grpid; // zb collective grpid - int error; // error code - int reduce; // set to report reduction data + struct dlist_entry ready_link; // link to zb_coll ready_list + struct cxip_ep_obj *ep_obj; // backpointer to endpoint + struct cxip_zbcoll_state *state; // state array + struct cxip_addr *caddrs; // cxip addresses in collective + int num_caddrs; // number of cxip addresses + zbcomplete_t userfunc; // completion callback function + void *userptr; // completion callback data + uint64_t *grpmskp; // pointer to global group mask + uint32_t *shuffle; // TEST shuffle array + int simcount; // TEST count of states + int simrank; // TEST simulated rank + int simref; // TEST zb0 reference count + int busy; // serialize collectives in zb + int grpid; // zb collective grpid + int error; // error code + int reduce; // set to report reduction data }; struct cxip_ep_zbcoll_obj { - struct dlist_entry ready_list; // zbcoll ops ready to advance - struct cxip_zbcoll_obj **grptbl;// group lookup table - uint64_t grpmsk; // mask of used grptbl entries - int refcnt; // grptbl reference count - bool disable; // low level tests - ofi_spin_t lock; // group ID negotiation lock - ofi_atomic32_t dsc_count; // cumulative RCV discard count - ofi_atomic32_t err_count; // cumulative ACK error count - ofi_atomic32_t ack_count; // cumulative ACK success count - ofi_atomic32_t rcv_count; // cumulative RCV success count + struct dlist_entry ready_list; // zbcoll ops ready to advance + struct cxip_zbcoll_obj **grptbl; // group lookup table + uint64_t grpmsk; // mask of used grptbl entries + int refcnt; // grptbl reference count + bool disable; // low level tests + ofi_spin_t lock; // group ID negotiation lock + ofi_atomic32_t dsc_count; // cumulative RCV discard count + ofi_atomic32_t err_count; // cumulative ACK error count + ofi_atomic32_t ack_count; // cumulative ACK success count + ofi_atomic32_t rcv_count; // cumulative RCV success count }; /* Function declarations */ @@ -88,8 +87,8 @@ int cxip_zbcoll_alloc(struct cxip_ep_obj *ep_obj, int num_addrs, int cxip_zbcoll_simlink(struct cxip_zbcoll_obj *zb0, struct cxip_zbcoll_obj *zb); -void cxip_zbcoll_set_user_cb(struct cxip_zbcoll_obj *zb, - zbcomplete_t userfunc, void *userptr); +void cxip_zbcoll_set_user_cb(struct cxip_zbcoll_obj *zb, zbcomplete_t userfunc, + void *userptr); int cxip_zbcoll_max_grps(bool sim); diff --git a/prov/cxi/include/cxip_faults.h b/prov/cxi/include/cxip_faults.h index 503a178e5dd..a0b32b991df 100644 --- a/prov/cxi/include/cxip_faults.h +++ b/prov/cxi/include/cxip_faults.h @@ -7,9 +7,9 @@ /* Fault injection. */ struct cxip_fault { - char *env; /* Configuration env. var. name */ - int prop; /* Proportion of rand() values */ - size_t count; /* Count of injected faults */ + char *env; /* Configuration env. var. name */ + int prop; /* Proportion of rand() values */ + size_t count; /* Count of injected faults */ }; extern struct cxip_fault dma_fault; @@ -25,17 +25,13 @@ void cxip_fault_inject_init(void); #define INJECT_FAULT(fault) 0 #endif -#define cxi_cq_emit_dma_f(...) \ - (INJECT_FAULT(dma_fault) ? -ENOSPC : \ - cxi_cq_emit_dma(__VA_ARGS__)) +#define cxi_cq_emit_dma_f(...) \ + (INJECT_FAULT(dma_fault) ? -ENOSPC : cxi_cq_emit_dma(__VA_ARGS__)) -#define cxip_pte_unlink_f(...) \ - (INJECT_FAULT(dma_fault) ? -FI_EAGAIN : \ - cxip_pte_unlink(__VA_ARGS__)) +#define cxip_pte_unlink_f(...) \ + (INJECT_FAULT(dma_fault) ? -FI_EAGAIN : cxip_pte_unlink(__VA_ARGS__)) -#define malloc_f(...) \ - (INJECT_FAULT(malloc_fault) ? NULL : \ - malloc(__VA_ARGS__)) +#define malloc_f(...) (INJECT_FAULT(malloc_fault) ? NULL : malloc(__VA_ARGS__)) /** * Collective traps, can be extended for other uses. diff --git a/prov/cxi/include/fi_cxi_ext.h b/prov/cxi/include/fi_cxi_ext.h index f5d25217280..a7a3aba657e 100644 --- a/prov/cxi/include/fi_cxi_ext.h +++ b/prov/cxi/include/fi_cxi_ext.h @@ -41,26 +41,26 @@ struct cxip_nic_attr { * TODO: The following should be integrated into the include/rdma/fi_ext.h * and are use for provider specific fi_control() operations. */ -#define FI_PROV_SPECIFIC_CXI (0xccc << 16) +#define FI_PROV_SPECIFIC_CXI (0xccc << 16) enum { - FI_OPT_CXI_SET_TCLASS = -FI_PROV_SPECIFIC_CXI, /* uint32_t */ - FI_OPT_CXI_SET_MSG_ORDER, /* uint64_t */ + FI_OPT_CXI_SET_TCLASS = -FI_PROV_SPECIFIC_CXI, /* uint32_t */ + FI_OPT_CXI_SET_MSG_ORDER, /* uint64_t */ /* fid_nic control operation to refresh NIC attributes. */ FI_OPT_CXI_NIC_REFRESH_ATTR, - FI_OPT_CXI_SET_MR_MATCH_EVENTS, /* bool */ - FI_OPT_CXI_GET_MR_MATCH_EVENTS, /* bool */ - FI_OPT_CXI_SET_OPTIMIZED_MRS, /* bool */ - FI_OPT_CXI_GET_OPTIMIZED_MRS, /* bool */ - FI_OPT_CXI_SET_PROV_KEY_CACHE, /* bool */ - FI_OPT_CXI_GET_PROV_KEY_CACHE, /* bool */ - FI_OPT_CXI_SET_RNR_MAX_RETRY_TIME, /* uint64_t */ - FI_OPT_CXI_SET_RX_MATCH_MODE_OVERRIDE, /* char string */ - FI_OPT_CXI_GET_RX_MATCH_MODE_OVERRIDE, /* char string */ - FI_OPT_CXI_SET_REQ_BUF_SIZE_OVERRIDE, /* size_t */ - FI_OPT_CXI_GET_REQ_BUF_SIZE_OVERRIDE, /* size_t */ + FI_OPT_CXI_SET_MR_MATCH_EVENTS, /* bool */ + FI_OPT_CXI_GET_MR_MATCH_EVENTS, /* bool */ + FI_OPT_CXI_SET_OPTIMIZED_MRS, /* bool */ + FI_OPT_CXI_GET_OPTIMIZED_MRS, /* bool */ + FI_OPT_CXI_SET_PROV_KEY_CACHE, /* bool */ + FI_OPT_CXI_GET_PROV_KEY_CACHE, /* bool */ + FI_OPT_CXI_SET_RNR_MAX_RETRY_TIME, /* uint64_t */ + FI_OPT_CXI_SET_RX_MATCH_MODE_OVERRIDE, /* char string */ + FI_OPT_CXI_GET_RX_MATCH_MODE_OVERRIDE, /* char string */ + FI_OPT_CXI_SET_REQ_BUF_SIZE_OVERRIDE, /* size_t */ + FI_OPT_CXI_GET_REQ_BUF_SIZE_OVERRIDE, /* size_t */ }; @@ -70,7 +70,7 @@ enum { * included here should map exactly to the value established in the * main branch (enum or define) and this CXI equivalent will exist forever. */ -#define FI_CXI_CNTR_EVENTS_BYTES 1 /* FI_CNTR_EVENTS_BYTES */ +#define FI_CXI_CNTR_EVENTS_BYTES 1 /* FI_CNTR_EVENTS_BYTES */ /* * CXI provider specific counter flag to return current/cached counter value @@ -79,14 +79,14 @@ enum { * the updated counter value. The normal behavior is to wait for a memory update * to complete (or to use the domain ops counter routines). */ -#define FI_CXI_CNTR_CACHED (1ULL << 32) +#define FI_CXI_CNTR_CACHED (1ULL << 32) /* * TODO: Set this to the upstream value prior to releasing software. * This flag returned in a completion and indicates that the message was * truncated and that the length indicates the truncated message length. */ -#define FI_CXI_TRUNC (1ULL << 56) +#define FI_CXI_TRUNC (1ULL << 56) /* * Execute a given libfabric atomic memory operation as a PCIe operation as @@ -109,7 +109,7 @@ enum { * Note: This flag overloads FI_CXI_PCIE_AMO. Accelerated collectives do not * use FI_CXI_PCIE_AMO or FI_SOURCE. */ -#define FI_CXI_PRE_REDUCED (1ULL << 57) +#define FI_CXI_PRE_REDUCED (1ULL << 57) /* * Use CXI High Rate Puts (HRP). Increases message rate performance. Applies to @@ -124,9 +124,9 @@ enum { #define FI_CXI_UNRELIABLE (1ULL << 61) /* Depreciated. */ -#define FI_CXI_WEAK_FENCE \ - _Pragma ("GCC warning \"'FI_CXI_WEAK_FENCE' macro is deprecated\"") \ - (1ULL << 63) +#define FI_CXI_WEAK_FENCE \ + _Pragma("GCC warning \"'FI_CXI_WEAK_FENCE' macro is deprecated\"")( \ + 1ULL << 63) /* * Used in conjunction with the deferred work queue API. If a deferred work @@ -135,7 +135,7 @@ enum { * Note: Addition hardware resources will be used to ensure a counter writeback * occurs at the completion of the deferred work queue operation. */ -#define FI_CXI_CNTR_WB (1ULL << 62) +#define FI_CXI_CNTR_WB (1ULL << 62) #define FI_CXI_COUNTER_OPS "cxi_counter_ops" struct fi_cxi_cntr_ops { @@ -155,13 +155,13 @@ struct fi_cxi_cntr_ops { /* fi_cntr_read() equivalent but for the writeback buffer. */ static inline uint64_t fi_cxi_cntr_wb_read(const void *wb_buf) { - return (*(uint64_t *)wb_buf) & FI_CXI_CNTR_SUCCESS_MAX; + return (*(uint64_t *) wb_buf) & FI_CXI_CNTR_SUCCESS_MAX; }; /* fi_cntr_reader() equivalent but for the writeback buffer. */ static inline uint64_t fi_cxi_cntr_wb_readerr(const void *wb_buf) { - return ((*(uint64_t *)wb_buf) >> 48) & FI_CXI_CNTR_FAILURE_MAX; + return ((*(uint64_t *) wb_buf) >> 48) & FI_CXI_CNTR_FAILURE_MAX; }; /* Generate a counter success value which can be polled on. */ @@ -181,7 +181,7 @@ static inline int fi_cxi_cntr_add(void *cntr_mmio, uint64_t value) if (value > FI_CXI_CNTR_SUCCESS_MAX) return -FI_EINVAL; - *((uint64_t *)cntr_mmio) = value; + *((uint64_t *) cntr_mmio) = value; return FI_SUCCESS; } @@ -192,7 +192,7 @@ static inline int fi_cxi_cntr_adderr(void *cntr_mmio, uint64_t value) if (value > FI_CXI_CNTR_FAILURE_MAX) return -FI_EINVAL; - *((uint64_t *)cntr_mmio + 8) = value; + *((uint64_t *) cntr_mmio + 8) = value; return FI_SUCCESS; } @@ -203,7 +203,7 @@ static inline int fi_cxi_cntr_set(void *cntr_mmio, uint64_t value) if (value > 0) return -FI_EINVAL; - *((uint64_t *)cntr_mmio + 16) = 0; + *((uint64_t *) cntr_mmio + 16) = 0; return FI_SUCCESS; } @@ -214,7 +214,7 @@ static inline int fi_cxi_cntr_seterr(void *cntr_mmio, uint64_t value) if (value > 0) return -FI_EINVAL; - *((uint64_t *)cntr_mmio + 24) = 0; + *((uint64_t *) cntr_mmio + 24) = 0; return FI_SUCCESS; } @@ -227,7 +227,7 @@ static inline void *fi_cxi_get_cntr_add_addr(void *cntr_mmio) /* fi_cntr_adderr() equivalent but for the MMIO region. */ static inline void *fi_cxi_get_cntr_adderr_addr(void *cntr_mmio) { - return (void *)((uint64_t *)cntr_mmio + 8); + return (void *) ((uint64_t *) cntr_mmio + 8); } /* fi_cntr_set() equivalent but for the MMIO region reset. @@ -235,7 +235,7 @@ static inline void *fi_cxi_get_cntr_adderr_addr(void *cntr_mmio) */ static inline void *fi_cxi_get_cntr_reset_addr(void *cntr_mmio) { - return (void *)((uint64_t *)cntr_mmio + 16); + return (void *) ((uint64_t *) cntr_mmio + 16); } /* fi_cntr_seterr() equivalent but for MMIO region reset. @@ -243,7 +243,7 @@ static inline void *fi_cxi_get_cntr_reset_addr(void *cntr_mmio) */ static inline void *fi_cxi_get_cntr_reseterr_addr(void *cntr_mmio) { - return (void *)((uint64_t *)cntr_mmio + 24); + return (void *) ((uint64_t *) cntr_mmio + 24); } #define FI_CXI_DOM_OPS_1 "dom_ops_v1" @@ -256,7 +256,7 @@ static inline void *fi_cxi_get_cntr_reseterr_addr(void *cntr_mmio) /* v1 to v6 can use the same struct since they only appended a routine */ struct fi_cxi_dom_ops { int (*cntr_read)(struct fid *fid, unsigned int cntr, uint64_t *value, - struct timespec *ts); + struct timespec *ts); int (*topology)(struct fid *fid, unsigned int *group_id, unsigned int *switch_id, unsigned int *port_id); @@ -419,23 +419,23 @@ enum cxip_coll_prov_errno { */ FI_CXI_ERRNO_RED_FIRST = 1024, FI_CXI_ERRNO_RED_FLT_OVERFLOW = 1024, - /* double precision value overflow */ + /* double precision value overflow */ FI_CXI_ERRNO_RED_FLT_INVALID = 1025, - /* double precision sNAN/inf value */ + /* double precision sNAN/inf value */ FI_CXI_ERRNO_RED_INT_OVERFLOW = 1026, - /* reproducible sum overflow */ + /* reproducible sum overflow */ FI_CXI_ERRNO_RED_CONTR_OVERFLOW = 1027, - /* reduction contribution overflow */ + /* reduction contribution overflow */ FI_CXI_ERRNO_RED_OP_MISMATCH = 1028, - /* reduction opcode mismatch */ + /* reduction opcode mismatch */ FI_CXI_ERRNO_RED_MC_FAILURE = 1029, - /* unused */ + /* unused */ FI_CXI_COLL_RC_RDMA_FAILURE = 1030, - /* leaf rdma read error */ + /* leaf rdma read error */ FI_CXI_COLL_RC_RDMA_DATA_FAILURE = 1031, - /* leaf rdma read data miscompare, unexpected packet data */ + /* leaf rdma read data miscompare, unexpected packet data */ FI_CXI_ERRNO_RED_OTHER = 1032, - /* non-specific reduction error, fatal */ + /* non-specific reduction error, fatal */ FI_CXI_ERRNO_RED_LAST = 1033, /* collectives EQ join error codes @@ -443,48 +443,48 @@ enum cxip_coll_prov_errno { */ FI_CXI_ERRNO_JOIN_FIRST = 2048, FI_CXI_ERRNO_JOIN_MCAST_INUSE = 2048, - /* endpoint already using mcast address */ + /* endpoint already using mcast address */ FI_CXI_ERRNO_JOIN_HWROOT_INUSE = 2049, - /* endpoint already serving as HWRoot */ + /* endpoint already serving as HWRoot */ FI_CXI_ERRNO_JOIN_MCAST_INVALID = 2050, - /* mcast address from FM is invalid */ + /* mcast address from FM is invalid */ FI_CXI_ERRNO_JOIN_HWROOT_INVALID = 2051, - /* HWRoot address from FM is invalid */ + /* HWRoot address from FM is invalid */ FI_CXI_ERRNO_JOIN_CURL_FAILED = 2052, - /* libcurl initiation failed */ + /* libcurl initiation failed */ FI_CXI_ERRNO_JOIN_CURL_TIMEOUT = 2053, - /* libcurl timed out */ + /* libcurl timed out */ FI_CXI_ERRNO_JOIN_SERVER_ERR = 2054, - /* unhandled CURL response code */ + /* unhandled CURL response code */ FI_CXI_ERRNO_JOIN_FAIL_PTE = 2055, - /* libfabric PTE allocation failed */ + /* libfabric PTE allocation failed */ FI_CXI_ERRNO_JOIN_OTHER = 2056, - /* non-specific JOIN error, fatal */ + /* non-specific JOIN error, fatal */ FI_CXI_ERRNO_JOIN_FAIL_RDMA = 2057, - /* root or leaf rdma init failure */ + /* root or leaf rdma init failure */ FI_CXI_ERRNO_JOIN_LAST = FI_CXI_ERRNO_JOIN_FIRST + 43, - /* LAST is determined by the 43-bit error mask . - * Result is the OR of all bits set by different endpoints. - * This reserves space for all 43 bits for new errors. - */ + /* LAST is determined by the 43-bit error mask . + * Result is the OR of all bits set by different endpoints. + * This reserves space for all 43 bits for new errors. + */ }; -typedef unsigned int cxip_coll_op_t; // CXI collective opcode +typedef unsigned int cxip_coll_op_t; // CXI collective opcode struct cxip_coll_mcast_key { - uint32_t hwroot_idx; // index of hwroot in av_set list - uint32_t mcast_addr; // 13-bit multicast address id + uint32_t hwroot_idx; // index of hwroot in av_set list + uint32_t mcast_addr; // 13-bit multicast address id }; struct cxip_coll_unicast_key { - uint32_t hwroot_idx; // index of hwroot in av_set list - uint32_t mcast_addr; // 13-bit simulated multcast address + uint32_t hwroot_idx; // index of hwroot in av_set list + uint32_t mcast_addr; // 13-bit simulated multcast address }; struct cxip_coll_rank_key { - uint32_t hwroot_idx; // index of hwroot in av_set list - uint32_t rank; // rank of this object - bool rx_discard; // clear to report RX events + uint32_t hwroot_idx; // index of hwroot in av_set list + uint32_t rank; // rank of this object + bool rx_discard; // clear to report RX events }; struct cxip_comm_key { @@ -515,8 +515,8 @@ struct cxip_comm_key { * overlap during initialization. */ enum cxip_coll_op { - FI_CXI_MINMAXLOC = 32, // FLT or INT - FI_CXI_REPSUM, // FLT only + FI_CXI_MINMAXLOC = 32, // FLT or INT + FI_CXI_REPSUM, // FLT only FI_CXI_OP_LAST };