diff --git a/backends/generators/binutils/gas_test_generator.py b/backends/generators/binutils/gas_test_generator.py
new file mode 100644
index 000000000..0efcfa6f0
--- /dev/null
+++ b/backends/generators/binutils/gas_test_generator.py
@@ -0,0 +1,1418 @@
+"""
+GNU Assembler Test Generator for RISC-V
+
+Generates GNU Assembler test files (.s, .d, .l) from RISC-V unified database.
+
+Generated Test Files:
+- Assembly source files (.s) containing assembly instructions
+- Dump files (.d) containing expected disassembly patterns
+- Error files (.l) for negative tests
+- Fail test sets (-fail.s, -fail.d, -fail.l)
+- Architecture-specific tests (currently only rv64)
+The generator automatically discovers extension patterns from the unified database
+and generates tests that should integrate seamlessly with the existing gas test suite.
+"""
+
+import os
+import sys
+import argparse
+import logging
+import yaml
+import glob
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple, Set
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from generator import (parse_extension_requirements, load_csrs)
+
+# Named constants for fallback 12-bit signed immediate range.
+# The 12-bit signed immediate range is from -2048 to 2047 (inclusive).
+DEFAULT_12BIT_SIGNED_IMM_MIN = -2048
+DEFAULT_12BIT_SIGNED_IMM_MAX = 2047
+
+# Named constants for fallback 12-bit unsigned immediate range.
+# The 12-bit unsigned immediate range is from 0 to 4095 (inclusive).
+DEFAULT_12BIT_UNSIGNED_IMM_MIN = 0
+DEFAULT_12BIT_UNSIGNED_IMM_MAX = 4095
+
+# Maximum number of CSR example names to keep for replacements
+MAX_CSR_EXAMPLES = 10
+
+# Maximum length for sanitized extension filenames
+MAX_EXTENSION_NAME_LENGTH = 20
+
+
+def calculate_location_width(location) -> int:
+    """Calculate the total bit width from a location string or integer."""
+    if not location:
+        return 0
+    
+    # Handle case where location is an integer (single bit)
+    if isinstance(location, int):
+        return 1
+
+    location_str = str(location)
+    total_width = 0
+    parts = location_str.split('|')
+    
+    for part in parts:
+        part = part.strip()
+        if '-' in part:
+            try:
+                a, b = map(int, part.split('-'))
+                total_width += abs(a - b) + 1
+            except ValueError:
+                logging.debug(f"Could not parse bit range '{part}' in location '{location_str}'")
+                continue
+        else:
+            try:
+                int(part)
+                total_width += 1
+            except ValueError:
+                logging.debug(f"Could not parse bit '{part}' in location '{location_str}'")
+                continue
+    
+    return total_width
+
+
+def extract_instruction_constraints(name: str, data: dict) -> dict:
+    """Extract constraints from instruction YAML data."""
+    constraints = {}
+    
+    encoding = data.get('encoding', {})
+    variables = encoding.get('variables', [])
+    
+    register_constraints = {}
+    immediate_constraints = {}
+    
+    for var in variables:
+        var_name = var.get('name', '')
+        location = var.get('location', '')
+        not_value = var.get('not')
+        left_shift = var.get('left_shift', 0)
+        sign_extend = var.get('sign_extend', False)
+        
+        if not var_name or not location:
+            continue
+        
+        width = calculate_location_width(location)
+        
+        # Determine if this is a register or immediate field
+        if var_name in ['xd', 'xs1', 'xs2', 'xs3', 'rd', 'rs1', 'rs2', 'rs3']:
+            register_constraints[var_name] = {
+                'width': width,
+                'not_value': not_value,
+                'location': location
+            }
+        elif var_name in ['imm', 'simm'] or var_name.startswith('zimm') or var_name.startswith('simm'):
+            # Calculate the logical immediate range
+            # Determine if signed or unsigned immediate
+            is_signed = (sign_extend or 
+                        var_name.startswith('simm') or
+                        (width == 12 and var_name == 'imm'))  # I-type pattern
+            
+            if is_signed:
+                if width > 0:
+                    max_val = (1 << (width - 1)) - 1
+                    min_val = -(1 << (width - 1))
+                else:
+                    max_val, min_val = DEFAULT_12BIT_SIGNED_IMM_MAX, DEFAULT_12BIT_SIGNED_IMM_MIN
+            else:
+                # Unsigned immediate - use full width
+                if width > 0:
+                    max_val = (1 << width) - 1
+                    min_val = 0
+                else:
+                    # Fallback to 12-bit unsigned immediate range when width is unknown.
+                    # Use named constants to make intent explicit.
+                    max_val, min_val = DEFAULT_12BIT_UNSIGNED_IMM_MAX, DEFAULT_12BIT_UNSIGNED_IMM_MIN
+            
+            immediate_constraints[var_name] = {
+                'range': (min_val, max_val),
+                'not_value': not_value,
+                'left_shift': left_shift,
+                'sign_extend': sign_extend,
+                'width': width
+            }
+
+    if register_constraints or immediate_constraints:
+        constraints['registers'] = register_constraints
+        constraints['immediates'] = immediate_constraints
+
+    base = data.get('base')
+    if base:
+        constraints['architecture'] = base
+
+    if name.startswith('c.'):
+        constraints['compressed'] = True
+        if 'rs1\'' in str(data) or 'rd\'' in str(data):
+            constraints['limited_registers'] = True
+    
+    return constraints
+
+
+def sanitize_extension_name(name: str) -> str:
+    """Sanitize extension name to be a valid filename."""
+    sanitized = name.lower()
+    sanitized = re.sub(r'[{}\[\]\'",\s:]+', '-', sanitized)
+    sanitized = sanitized.strip('-')[:MAX_EXTENSION_NAME_LENGTH]
+    return sanitized if sanitized else 'unknown'
+
+
+def _normalize_extension_token(token: str) -> List[str]:
+    token = (token or "").strip().lower()
+    if not token:
+        return []
+
+    if token.startswith("rv32") or token.startswith("rv64"):
+        token = token[4:]
+    elif token.startswith("rv"):
+        token = token[2:]
+
+    token = token.strip()
+    if not token:
+        return []
+
+    if token.startswith("z") or token.startswith("s") or token.startswith("x"):
+        return [token]
+
+    if len(token) > 1 and token.isalpha():
+        return list(token)
+
+    return [token]
+
+
+def extract_extension_names(defined_by) -> Set[str]:
+    extensions: Set[str] = set()
+
+    if isinstance(defined_by, str):
+        extensions.update(_normalize_extension_token(defined_by))
+    elif isinstance(defined_by, dict) and defined_by:
+        name = defined_by.get("name")
+        if name:
+            extensions.update(_normalize_extension_token(name))
+
+        for key in ("anyOf", "allOf", "oneOf"):
+            value = defined_by.get(key)
+            if isinstance(value, list):
+                for item in value:
+                    extensions.update(extract_extension_names(item))
+            elif value is not None:
+                extensions.update(extract_extension_names(value))
+
+    extensions.discard("i")
+    extensions.discard("")
+    extensions.discard("unknown")
+    return extensions
+
+
+# RISC-V ABI register definitions
+# TODO: Move to UDB specs
+RISCV_ABI_REGISTERS = {
+    'gpr': {
+        'arg_ret': ["a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7"],
+        'temp': ["t0", "t1", "t2", "t3", "t4", "t5", "t6"],
+        'saved': ["s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11"],
+        'special': ["zero", "ra", "sp", "gp", "tp"]
+    },
+    'fpr': {
+        'arg_ret': ["fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7"], 
+        'temp': ["ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11"],
+        'saved': ["fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", "fs8", "fs9", "fs10", "fs11"]
+    },
+    'vpr': {
+        'general': ["v0", "v1", "v2", "v3", "v4", "v8", "v12", "v16", "v20", "v24", "v28"]
+    }
+}
+
+RISCV_FP_ROUNDING_MODES = ["rne", "rtz", "rdn", "rup", "rmm"]
+RISCV_FENCE_ORDERING = ["rw", "r", "w", "iorw", "ior", "iow"]
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+class TestInstructionGroup:
+    """Represents a group of related instructions for test generation."""
+    
+    def __init__(self, extension: str):
+        self.extension = extension
+        self.instructions = []
+        self.error_cases: Dict[str, dict] = {}
+        self.arch_specific = {"rv32": [], "rv64": []}
+        self.required_extensions: Set[str] = set()
+    
+    def add_instruction(self, name: str, info: dict):
+        """Add an instruction to this group."""
+        self.instructions.append((name, info))
+        
+        base = info.get('base')
+        if base == 32:
+            self.arch_specific["rv32"].append((name, info))
+        elif base == 64:
+            self.arch_specific["rv64"].append((name, info))
+
+        defined_by = info.get('definedBy')
+        if defined_by is not None:
+            self.required_extensions.update(extract_extension_names(defined_by))
+        if self.extension:
+            ext_lower = self.extension.lower()
+            if ext_lower != 'unknown':
+                # Split hyphenated extension group names into individual extensions
+                for part in ext_lower.split('-'):
+                    normalized = _normalize_extension_token(part)
+                    self.required_extensions.update(normalized)
+
+    def add_error_case(
+        self,
+        instruction: str,
+        invalid_assembly: str,
+        error_msg: str,
+        *,
+        reason: str | None = None,
+        assembly: str | None = None,
+        display_instruction: str | None = None,
+    ) -> None:
+
+        entry = self.error_cases.setdefault(
+            instruction,
+            {
+                "assembly": assembly,
+                "display_instruction": display_instruction or instruction,
+                "cases": [],
+            },
+        )
+
+        if assembly and not entry.get("assembly"):
+            entry["assembly"] = assembly
+        if display_instruction:
+            entry["display_instruction"] = display_instruction
+
+        entry["cases"].append(
+            {
+                "line": invalid_assembly,
+                "error_msg": error_msg,
+                "reason": reason,
+            }
+        )
+
+
+class AssemblyExampleGenerator:
+    """Generates assembly examples"""
+    
+    def __init__(self, csr_dir: str = "../../../spec/std/isa/csr/", inst_dir: str = "../../../spec/std/isa/inst/"):
+        self.csr_dir = csr_dir
+        self.inst_dir = inst_dir
+        
+        self._load_operand_definitions()
+        self._load_csr_examples()
+        self.all_instruction_data, self.instruction_constraints = self._load_all_instruction_data()
+        self.extension_classification = self._classify_extensions()
+    
+    def _load_operand_definitions(self):
+        """Load operand type definitions from RISC-V ABI and architecture specs."""
+
+        abi_regs = RISCV_ABI_REGISTERS
+        
+        self.gpr_examples = (
+            abi_regs['gpr']['arg_ret'][:4] +
+            abi_regs['gpr']['saved'][:4]
+        )
+        
+        # Compressed instruction register set per RISC-V spec
+        # 3-bit register fields (rs1', rs2', rd') encode registers x8-x15
+        # x8=s0, x9=s1, x10=a0, x11=a1, x12=a2, x13=a3, x14=a4, x15=a5
+        self.compressed_gpr_examples = (
+            abi_regs['gpr']['saved'][:2] +  # s0, s1 (x8, x9)
+            abi_regs['gpr']['arg_ret'][:6]  # a0-a5 (x10-x15)
+        )
+        
+        self.fpr_examples = (
+            abi_regs['fpr']['arg_ret'][:4] +
+            abi_regs['fpr']['temp'][:3] +
+            abi_regs['fpr']['saved'][:2]
+        )
+        
+        self.vpr_examples = abi_regs['vpr']['general']
+        
+        self.vector_mask_examples = ["", "v0.t"]
+        
+        self.rounding_mode_examples = RISCV_FP_ROUNDING_MODES
+        self.fence_examples = RISCV_FENCE_ORDERING
+    
+    def _load_csr_examples(self):
+        """Load CSR examples from the unified database."""
+        try:
+            csr_dict = load_csrs(self.csr_dir, enabled_extensions=[], include_all=True, target_arch="BOTH")
+            self.csr_examples = list(set(name.lower().replace('.rv32', '') for name in csr_dict.values()))[:MAX_CSR_EXAMPLES]
+        except Exception as e:
+            logging.warning(f"Failed to load CSRs from {self.csr_dir}: {e}. Using fallback CSR list.")
+            self.csr_examples = ["mstatus", "mtvec", "mscratch", "cycle", "time"]
+    
+    def _load_all_instruction_data(self) -> Tuple[Dict[str, dict], Dict[str, dict]]:
+        instruction_data = {}
+        instruction_constraints = {}
+
+        yaml_files = glob.glob(os.path.join(self.inst_dir, "**/*.yaml"), recursive=True)
+        
+        for yaml_file in yaml_files:
+            try:
+                with open(yaml_file, 'r', encoding='utf-8') as f:
+                    data = yaml.safe_load(f)
+                
+                if not isinstance(data, dict) or data.get('kind') != 'instruction':
+                    continue
+                
+                name = data.get('name')
+                if not name:
+                    continue
+                
+                instruction_data[name] = data
+                
+                constraints = extract_instruction_constraints(name, data)
+                if constraints:
+                    instruction_constraints[name] = constraints
+                        
+            except Exception as e:
+                logging.debug(f"Error loading {yaml_file}: {e}")
+                continue
+        
+        logging.debug(f"Single-pass loaded {len(instruction_data)} instructions, {len(instruction_constraints)} constraints")
+        return instruction_data, instruction_constraints
+    
+    def _classify_extensions(self) -> dict:
+        """Classify extensions based on actual data from the unified database, not hardcoded patterns."""
+        classification = {
+            'standard': set(),
+            'multi_standard': set(),
+            'z_extensions': set(),
+            's_extensions': set(),
+            'x_extensions': set(),
+            'other': set()
+        }
+        
+        all_extensions = set()
+        for name, data in self.all_instruction_data.items():
+            defined_by = data.get('definedBy')
+            if defined_by:
+                if isinstance(defined_by, str):
+                    all_extensions.add(defined_by.lower())
+                elif isinstance(defined_by, dict):
+                    self._extract_extensions_from_complex(defined_by, all_extensions)
+        
+        for ext in all_extensions:
+            ext_clean = ext.lower().strip()
+            if not ext_clean:
+                continue
+            if len(ext_clean) == 1 and ext_clean.isalpha():
+                classification['standard'].add(ext_clean)
+            elif ext_clean.startswith('z'):
+                classification['z_extensions'].add(ext_clean)
+            elif ext_clean.startswith('s'):
+                classification['s_extensions'].add(ext_clean)
+            elif ext_clean.startswith('x'):
+                classification['x_extensions'].add(ext_clean)
+            elif ext_clean.startswith('rv32') or ext_clean.startswith('rv64'):
+                base_ext = ext_clean[4:] if len(ext_clean) > 4 else 'i'
+                if len(base_ext) == 1:
+                    classification['standard'].add(base_ext)
+                else:
+                    classification['multi_standard'].add(base_ext)
+            elif len(ext_clean) > 1:
+                classification['multi_standard'].add(ext_clean)
+            else:
+                classification['other'].add(ext_clean)
+        
+        return classification
+    
+    def _extract_extensions_from_complex(self, defined_by: dict, all_extensions: set):
+        if 'anyOf' in defined_by:
+            for item in defined_by['anyOf']:
+                if isinstance(item, str):
+                    all_extensions.add(item.lower())
+                elif isinstance(item, dict):
+                    self._extract_extensions_from_complex(item, all_extensions)
+        
+        if 'allOf' in defined_by:
+            for item in defined_by['allOf']:
+                if isinstance(item, str):
+                    all_extensions.add(item.lower())
+                elif isinstance(item, dict):
+                    self._extract_extensions_from_complex(item, all_extensions)
+        
+        if 'oneOf' in defined_by:
+            for item in defined_by['oneOf']:
+                if isinstance(item, str):
+                    all_extensions.add(item.lower())
+                elif isinstance(item, dict):
+                    self._extract_extensions_from_complex(item, all_extensions)
+
+    def _get_operand_replacements(self, inst_name: str, assembly: str, variant_index: int) -> Dict[str, str]:
+        """Generate operand replacements based on instruction requirements"""
+        i = variant_index
+        
+        constraints = self._get_instruction_constraints(inst_name)
+        if constraints.get('uses_compressed_regs') or constraints.get('limited_registers'):
+            reg_examples = self.compressed_gpr_examples
+        else:
+            reg_examples = self.gpr_examples
+        
+        replacements = {
+            # GPR register patterns
+            'xd': reg_examples[i % len(reg_examples)],
+            'xs1': reg_examples[(i + 1) % len(reg_examples)],
+            'xs2': reg_examples[(i + 2) % len(reg_examples)],
+            'xs3': reg_examples[(i + 3) % len(reg_examples)],
+            'rd': reg_examples[i % len(reg_examples)],
+            'rs1': reg_examples[(i + 1) % len(reg_examples)],
+            'rs2': reg_examples[(i + 2) % len(reg_examples)],
+            'rs3': reg_examples[(i + 3) % len(reg_examples)],
+            # FPR register patterns
+            'fd': self.fpr_examples[i % len(self.fpr_examples)],
+            'fs1': self.fpr_examples[(i + 1) % len(self.fpr_examples)],
+            'fs2': self.fpr_examples[(i + 2) % len(self.fpr_examples)],
+            'fs3': self.fpr_examples[(i + 3) % len(self.fpr_examples)],
+            # Vector register patterns
+            'vd': self.vpr_examples[i % len(self.vpr_examples)],
+            'vs1': self.vpr_examples[(i + 1) % len(self.vpr_examples)],
+            'vs2': self.vpr_examples[(i + 2) % len(self.vpr_examples)],
+            'vs3': self.vpr_examples[(i + 3) % len(self.vpr_examples)],
+            # Vector mask
+            'vm': self.vector_mask_examples[i % len(self.vector_mask_examples)],
+            # CSR patterns
+            'csr': self.csr_examples[i % len(self.csr_examples)],
+            # Immediate patterns
+            'imm': str(self._get_safe_immediate(inst_name, self._get_instruction_constraints(inst_name))),
+            'simm': str(self._get_safe_immediate(inst_name, self._get_instruction_constraints(inst_name))),
+            'zimm': str(abs(self._get_safe_immediate(inst_name, self._get_instruction_constraints(inst_name)))),
+            'shamt': str(1 + i),
+            'offset': str((i + 1) * 4),
+        }
+        
+        constraints = self._get_instruction_constraints(inst_name)
+        
+        # Use constraint-based immediate generation for all instructions
+        if 'imm_range' in constraints:
+            min_val, max_val = constraints['imm_range']
+            imm_multiple = constraints.get('imm_multiple', 1)
+            imm_not_zero = constraints.get('imm_not_zero', False)
+            
+            safe_imm = self._get_safe_immediate_from_constraints(min_val, max_val, imm_multiple, imm_not_zero, i)
+            
+            for key in ['imm', 'simm', 'zimm']:
+                if key in replacements:
+                    replacements[key] = str(safe_imm)
+        
+        return replacements
+    
+    def _get_safe_immediate_from_constraints(self, min_val: int, max_val: int, multiple: int, not_zero: bool, variant: int) -> int:
+        """Generate a safe immediate value that satisfies the given constraints."""
+        candidates = [1, 2, 4, 8, 16, 32, -1, -2, -4]
+        
+        candidates = [c + variant for c in candidates] + candidates
+        
+        for candidate in candidates:
+            if (min_val <= candidate <= max_val and 
+                candidate % multiple == 0 and 
+                (not not_zero or candidate != 0)):
+                return candidate
+        
+        if multiple > 1:
+            start = ((min_val + multiple - 1) // multiple) * multiple
+            if not_zero and start == 0:
+                start = multiple
+            if start <= max_val:
+                return start
+        
+        return min_val if not not_zero or min_val != 0 else (min_val + 1 if min_val + 1 <= max_val else max_val)
+    
+    def generate_examples(self, name: str, assembly: str) -> List[str]:
+        """Generate assembly examples using YAML assembly field as the authoritative source."""
+        instruction_data = self.all_instruction_data.get(name, {})
+        actual_assembly = instruction_data.get('assembly', assembly)
+        
+        if actual_assembly:
+            assembly = actual_assembly
+        
+        if not assembly or not assembly.strip():
+            return []
+        
+        examples = []
+        
+        if ',' in assembly or any(reg in assembly for reg in ['rd', 'rs1', 'rs2', 'imm']):
+            examples.extend(self._generate_variants(name, assembly))
+        else:
+            variants = self._generate_variants(name, assembly)
+            if variants:
+                examples.extend(variants)
+            else:
+                examples.append(f"{name}")
+        
+        return examples
+    
+    def _generate_variants(self, name: str, assembly: str) -> List[str]:
+        """Generate multiple assembly variants using the YAML assembly field."""
+        variants = []
+        
+        instruction_data = self.all_instruction_data.get(name, {})
+        actual_assembly = instruction_data.get('assembly', assembly)
+        
+        if actual_assembly and actual_assembly != assembly:
+            assembly = actual_assembly
+        
+        if not assembly or not assembly.strip():
+            return []
+        
+        reg_set = self.compressed_gpr_examples if name.startswith('c.') else self.gpr_examples
+        
+        for i in range(min(3, len(reg_set) - 1)):
+            example = f"{name}\t{assembly}"
+
+            replacements = self._get_operand_replacements(name, assembly, i)
+            
+            operands = self._parse_assembly_operands(assembly)
+            for operand in operands:
+                operand_type = operand.get("type")
+                operand_raw = operand.get("raw")
+                
+                if operand_type == "rounding_mode" or operand_raw == "rm":
+                    replacements['rm'] = self.rounding_mode_examples[i % len(self.rounding_mode_examples)]
+                elif operand_type == "fence_ordering" or operand_raw in ["pred", "succ"]:
+                    if operand_raw == "pred":
+                        replacements['pred'] = self.fence_examples[i % len(self.fence_examples)]
+                    elif operand_raw == "succ":
+                        replacements['succ'] = self.fence_examples[(i + 1) % len(self.fence_examples)]
+            
+            operands = self._parse_assembly_operands(assembly)
+            
+            for placeholder, value in replacements.items():
+                operand_found = any(op.get("raw") == placeholder or 
+                                  op.get("type") in ["csr", "vector_mask"] and placeholder in ["csr", "vm"]
+                                  for op in operands)
+                
+                if not operand_found and placeholder not in assembly:
+                    continue
+                
+                if placeholder == 'csr':
+                    example = re.sub(r'\bcsr\b', value, example)
+                elif placeholder == 'vm':
+                    # Vector mask is special because it's either empty (unmasked) or v0.t (masked)
+                    if value:
+                        example = re.sub(r'\bvm\b', value, example)
+                    else:
+                        example = re.sub(r',\s*\bvm\b', '', example)
+                        example = re.sub(r'\bvm\b,?\s*', '', example)
+                else:
+                    example = example.replace(placeholder, value)
+            
+            for operand in operands:
+                if operand.get("type") == "memory":
+                    base_placeholder = operand.get("raw")
+                    if '(base)' in base_placeholder:
+                        base_reg = reg_set[i % len(reg_set)]
+                        example = example.replace('(base)', f'({base_reg})')
+                elif operand.get("type") == "memory_sp":
+                    if '(sp)' in example:
+                        continue
+                
+            variants.append(example)
+        
+        return variants
+    
+    def _parse_assembly_operands(self, assembly: str) -> List[Dict]:
+        """Parse assembly string to identify operand types."""
+        operands = []
+
+        if not assembly or not assembly.strip():
+            return []
+        parts = [p.strip() for p in assembly.split(',') if p.strip()]
+        
+        for part in parts:
+            operand_info = {"raw": part}
+            if '(' in part and ')' in part:
+                match = re.match(r'([^(]*)\(([^)]+)\)', part)
+                if match:
+                    offset, base = match.groups()
+                    base_reg = base.strip()
+                    
+                    if base_reg == "sp":
+                        operand_info.update({
+                            "type": "memory_sp",
+                            "offset": "4"
+                        })
+                    else:
+                        operand_info.update({
+                            "type": "memory",
+                            "offset": "0",
+                            "base": self.gpr_examples[1]
+                        })
+                else:
+                    operand_info["type"] = "unknown"
+            elif part in ["imm", "zimm", "simm"]:
+                operand_info["type"] = "immediate"
+            elif part in ["rd", "rs1", "rs2", "rs3", "xd", "xs1", "xs2", "xs3"]:
+                operand_info["type"] = "gpr"
+            elif part in ["fd", "fs1", "fs2", "fs3"]:
+                operand_info["type"] = "fpr"
+            elif part in ["vd", "vs1", "vs2", "vs3"]:
+                operand_info["type"] = "vpr"
+            elif part == "vm":
+                operand_info["type"] = "vector_mask"
+            elif part == "csr":
+                operand_info["type"] = "csr"
+            elif part in ["pred", "succ", "aq", "rl"]:
+                operand_info["type"] = "fence_ordering"
+            elif part in ["rm"]:
+                operand_info["type"] = "rounding_mode"
+            elif part in ["shamt", "shamtw"] or part.startswith("shamt"):
+                operand_info["type"] = "shift_amount"
+            elif part in ["zimm5", "zimm6", "zimm10", "zimm11", "zimm12"] or part.startswith(("zimm", "simm")):
+                operand_info["type"] = "immediate"
+            else:
+                if part.startswith(('x', 'a', 't', 's')):
+                    operand_info["type"] = "gpr"
+                elif part.startswith(('f', 'fa', 'ft', 'fs')):
+                    operand_info["type"] = "fpr"
+                elif part.startswith(('v')):
+                    operand_info["type"] = "vpr"
+                else:
+                    operand_info["type"] = "unknown"
+            
+            operands.append(operand_info)
+        
+        return operands
+    
+    def _get_instruction_constraints(self, name: str) -> dict:
+        """Get instruction-specific constraints from loaded database."""
+        raw_constraints = self.instruction_constraints.get(name, {})
+        
+        processed_constraints = {}
+        
+        immediates = raw_constraints.get('immediates', {})
+        for imm_name, imm_data in immediates.items():
+            if imm_name == 'imm' or imm_name.startswith(('simm', 'zimm')):
+                min_val, max_val = imm_data['range']
+                processed_constraints['imm_range'] = (min_val, max_val)
+
+                if imm_data.get('not_value') == 0:
+                    processed_constraints['imm_not_zero'] = True
+
+                left_shift = imm_data.get('left_shift', 0)
+                if left_shift > 0:
+                    processed_constraints['imm_multiple'] = 1 << left_shift
+                
+                break
+        
+        registers = raw_constraints.get('registers', {})
+        if registers:
+            processed_constraints['registers'] = registers
+            
+            for reg_name, reg_data in registers.items():
+                if reg_name.endswith("'") or reg_data.get('width') == 3:
+                    processed_constraints['uses_compressed_regs'] = True
+                    break
+        
+        if raw_constraints.get('compressed'):
+            processed_constraints['compressed'] = True
+        
+        if raw_constraints.get('limited_registers'):
+            processed_constraints['limited_registers'] = True
+        
+        return processed_constraints
+    
+ 
+    def _get_safe_immediate(self, name: str, constraints: dict) -> int:
+        """Get a safe immediate value that satisfies instruction constraints."""
+        imm_range = constraints.get('imm_range', (DEFAULT_12BIT_SIGNED_IMM_MIN, DEFAULT_12BIT_SIGNED_IMM_MAX))
+        imm_multiple = constraints.get('imm_multiple', 1)
+        imm_not_zero = constraints.get('imm_not_zero', False)
+        
+        min_val, max_val = imm_range
+        
+        candidates = []
+        
+        if imm_multiple > 1:
+            start = ((min_val + imm_multiple - 1) // imm_multiple) * imm_multiple
+            if imm_not_zero and start == 0:
+                start = imm_multiple
+            
+            for i in range(6):
+                candidate = start + (i * imm_multiple)
+                if candidate <= max_val:
+                    candidates.append(candidate)
+                neg_candidate = start - ((i + 1) * imm_multiple)
+                if neg_candidate >= min_val and neg_candidate != 0:
+                    candidates.append(neg_candidate)
+        else:
+            if min_val >= 0:
+                # Unsigned range
+                candidates = [min_val, min_val + 1, min_val + 2, min_val + 4, min_val + 8]
+                candidates.extend([max_val, max_val - 1, max_val - 2])
+            else:
+                # Signed range
+                candidates = [1, 2, 4, 8, 16, -1, -2, -4, -8]
+                candidates.extend([max_val, max_val - 1, min_val, min_val + 1])
+        
+        candidates = list(set(candidates))
+        
+        for candidate in candidates:
+            if (min_val <= candidate <= max_val and 
+                candidate % imm_multiple == 0 and 
+                (not imm_not_zero or candidate != 0)):
+                return candidate
+
+        if imm_multiple > 1:
+            start = ((min_val + imm_multiple - 1) // imm_multiple) * imm_multiple
+            if imm_not_zero and start == 0:
+                start = imm_multiple
+            if start <= max_val:
+                return start
+        
+        return min_val if not imm_not_zero or min_val != 0 else min_val + 1
+
+class GasTestGenerator:
+    """Main class for generating GNU Assembler test files."""
+    
+    def __init__(self, output_dir: str = "gas_tests", csr_dir: str = "../../../spec/std/isa/csr/", inst_dir: str = "../../../spec/std/isa/inst/"):
+        self.output_dir = Path(output_dir)
+        self.example_generator = AssemblyExampleGenerator(csr_dir, inst_dir)
+        self.output_dir.mkdir(exist_ok=True)
+    
+    def load_instructions(self, inst_dir: str, enabled_extensions: List[str] = None, 
+                         include_all: bool = False) -> Dict[str, dict]:
+        """Load instructions from the unified database using precomputed data"""
+        if enabled_extensions is None:
+            enabled_extensions = []
+        
+        all_instructions = self.example_generator.all_instruction_data
+        
+        if include_all:
+            logging.info(f"Using all {len(all_instructions)} precomputed instructions")
+            return all_instructions
+        
+        filtered_instructions = {}
+        
+        for name, data in all_instructions.items():
+            defined_by = data.get('definedBy')
+            if defined_by:
+                try:
+                    meets_req = parse_extension_requirements(defined_by)
+                    if meets_req(enabled_extensions):
+                        filtered_instructions[name] = data
+                except Exception:
+                    continue
+            else:
+                filtered_instructions[name] = data
+        
+        logging.info(f"Filtered to {len(filtered_instructions)} instructions from precomputed data")
+        return filtered_instructions
+    
+    def group_instructions_by_extension(self, instructions: Dict[str, dict]) -> Dict[str, TestInstructionGroup]:
+        """Group instructions by their defining extension."""
+        groups = {}
+        
+        for name, info in instructions.items():
+            defined_by = info.get('definedBy', 'I')
+            ext_name = self._extract_extension_name(defined_by)
+            if ext_name not in groups:
+                groups[ext_name] = TestInstructionGroup(ext_name)
+            
+            groups[ext_name].add_instruction(name, info)
+        
+        return groups
+    
+    def _extract_extension_name(self, defined_by) -> str:
+        """Extract a clean extension name from definedBy field using consistent logic."""
+        if isinstance(defined_by, str):
+            if defined_by.startswith("RV"):
+                if defined_by.startswith("RV32") or defined_by.startswith("RV64"):
+                    return defined_by[4:].lower() if len(defined_by) > 4 else "i"
+                else:
+                    return defined_by[2:].lower() if len(defined_by) > 2 else "i"
+            return defined_by.lower()
+        elif isinstance(defined_by, dict):
+            return self._extract_from_complex_definition(defined_by)
+        else:
+            return sanitize_extension_name(str(defined_by))
+    
+    def _extract_from_complex_definition(self, defined_by: dict) -> str:
+        """Extract extension name from complex definedBy structures."""
+        if 'anyOf' in defined_by:
+            any_of_list = defined_by['anyOf']
+            if any_of_list and len(any_of_list) > 0:
+                first_item = any_of_list[0]
+                if isinstance(first_item, str):
+                    return first_item.lower()
+                elif isinstance(first_item, dict) and 'allOf' in first_item:
+                    all_of_list = first_item['allOf']
+                    if all_of_list and len(all_of_list) > 0:
+
+                        extensions = [ext.lower() for ext in all_of_list if isinstance(ext, str)]
+                        return '-'.join(extensions) if extensions else 'unknown'
+                return sanitize_extension_name(str(first_item))
+        
+        elif 'allOf' in defined_by:
+            all_of_list = defined_by['allOf']
+            if all_of_list and len(all_of_list) > 0:
+                extensions = [ext.lower() for ext in all_of_list if isinstance(ext, str)]
+                return '-'.join(extensions) if extensions else 'unknown'
+        
+        elif 'oneOf' in defined_by:
+            one_of_list = defined_by['oneOf']
+            if one_of_list and len(one_of_list) > 0:
+                first_ext = one_of_list[0]
+                if isinstance(first_ext, str):
+                    return first_ext.lower()
+                return sanitize_extension_name(str(first_ext))
+
+        elif 'name' in defined_by:
+            return defined_by['name'].lower()
+        
+        return sanitize_extension_name(str(defined_by))
+    
+    def generate_tests_for_group(self, group: TestInstructionGroup) -> None:
+        """Generate test files for a group of related instructions."""
+        if not group.instructions:
+            return
+
+        self._generate_main_tests(group)
+
+        if group.arch_specific["rv64"]:
+            self._generate_arch_specific_tests(group, "rv64")
+
+        self._generate_error_tests(group)
+
+        if len(group.instructions) > 5:
+            self._generate_no_alias_tests(group)
+    
+    def _generate_main_tests(self, group: TestInstructionGroup) -> None:
+        """Generate the main .s and .d test files for a group."""
+        ext_name = self._get_binutils_filename(group.extension)
+        
+        main_instructions = []
+        for name, info in group.instructions:
+            base = info.get('base')
+            if base is None or base == 32:
+                main_instructions.append((name, info))
+        
+        # Generate assembly source file
+        source_file = self.output_dir / f"{ext_name}.s"
+        dump_file = self.output_dir / f"{ext_name}.d"
+
+        instruction_examples: List[Tuple[str, str, str]] = []
+        for name, info in main_instructions:
+            assembly = info.get('assembly', '')
+            examples = self.example_generator.generate_examples(name, assembly)
+            primary_example = self._select_primary_example(examples)
+            if not primary_example:
+                continue
+            instruction_examples.append((name, assembly, primary_example))
+
+        with open(source_file, 'w') as f:
+            f.write("target:\n")
+
+            for name, assembly, example in instruction_examples:
+                mnemonic, _ = self._split_example_line(example)
+                signature = assembly.strip() if assembly else "n/a"
+                f.write(
+                    f"\t# Auto-generated pass test for `{mnemonic}` (assembly: {signature})\n"
+                )
+                f.write("\t# This source should assemble successfully.\n")
+                f.write(f"\t{example}\n\n")
+
+        base_arch = "rv32i"
+        march = self._build_march_string(base_arch, group.extension, group.required_extensions)
+
+        with open(dump_file, 'w') as f:
+            f.write(f"#as: -march={march}\n")
+            f.write(f"#source: {source_file.name}\n")
+            f.write("#objdump: -d -M no-aliases\n")
+            f.write("\n")
+            f.write(".*:[ \t]+file format .*\n")
+            f.write("\n")
+            f.write("\n")
+            f.write("Disassembly of section .text:\n")
+            f.write("\n")
+            f.write("0+000 <target>:\n")
+
+            addr = 0
+            for name, _, example in instruction_examples:
+                pattern = self._create_disasm_pattern(addr, name, example)
+                f.write(f"{pattern}\n")
+                addr += self._get_instruction_size(name)
+    
+    def _generate_arch_specific_tests(self, group: TestInstructionGroup, arch: str) -> None:
+        """Generate architecture-specific test files."""
+        ext_name = self._get_binutils_filename(group.extension)
+        
+        source_file = self.output_dir / f"{ext_name}-{arch[2:]}.s"
+        dump_file = self.output_dir / f"{ext_name}-{arch[2:]}.d"
+        
+        arch_instructions = group.arch_specific[arch]
+        if not arch_instructions:
+            return
+        
+        instruction_examples: List[Tuple[str, str, str]] = []
+        for name, info in arch_instructions:
+            assembly = info.get('assembly', '')
+            examples = self.example_generator.generate_examples(name, assembly)
+            primary_example = self._select_primary_example(examples)
+            if not primary_example:
+                continue
+            instruction_examples.append((name, assembly, primary_example))
+
+        with open(source_file, 'w') as f:
+            f.write("target:\n")
+
+            for name, assembly, example in instruction_examples:
+                mnemonic, _ = self._split_example_line(example)
+                signature = assembly.strip() if assembly else "n/a"
+                f.write(
+                    f"\t# Auto-generated pass test for `{mnemonic}` (assembly: {signature})\n"
+                )
+                f.write(f"\t# This source should assemble successfully on {arch.upper()}.\n")
+                f.write(f"\t{example}\n\n")
+
+        base_arch = f"{arch}i"
+        march = self._build_march_string(base_arch, group.extension, group.required_extensions)
+
+        with open(dump_file, 'w') as f:
+            f.write(f"#as: -march={march}\n")
+            f.write(f"#source: {source_file.name}\n")
+            f.write("#objdump: -d -M no-aliases\n")
+            f.write("\n")
+            f.write(".*:[ \t]+file format .*\n")
+            f.write("\n")
+            f.write("\n")
+            f.write("Disassembly of section .text:\n")
+            f.write("\n")
+            f.write("0+000 <target>:\n")
+
+            addr = 0
+            for name, _, example in instruction_examples:
+                pattern = self._create_disasm_pattern(addr, name, example)
+                f.write(f"{pattern}\n")
+                addr += self._get_instruction_size(name)
+    
+    def _generate_error_tests(self, group: TestInstructionGroup) -> None:
+        """Generate negative test cases for error conditions."""
+        ext_name = self._get_binutils_filename(group.extension)
+        
+        source_file = self.output_dir / f"{ext_name}-fail.s"
+        dump_file = self.output_dir / f"{ext_name}-fail.d"
+        error_file = self.output_dir / f"{ext_name}-fail.l"
+        
+        self._generate_common_error_cases(group)
+
+        if not group.error_cases:
+            logging.debug(f"No error cases generated for extension {group.extension}")
+            return
+
+        with open(source_file, 'w') as f:
+            f.write("target:\n")
+
+            for name, _ in group.instructions:
+                entry = group.error_cases.get(name)
+                if not entry or not entry.get("cases"):
+                    continue
+
+                mnemonic = entry.get("display_instruction", name)
+                signature = entry.get("assembly") or "n/a"
+                f.write(
+                    f"\t# Auto-generated FAIL tests for `{mnemonic}` (assembly: {signature})\n"
+                )
+                f.write(
+                    "\t# Each line below is intended to fail assembly for a distinct reason.\n"
+                )
+
+                for case in entry["cases"]:
+                    reason = case.get("reason") or "generated error case"
+                    f.write(f"\t# FAIL: {reason}\n")
+                    f.write(f"\t{case['line']}\n")
+
+                f.write("\n")
+
+        with open(dump_file, 'w') as f:
+            march = self._build_march_string("rv32i", group.extension, group.required_extensions)
+            f.write(f"#as: -march={march}\n")
+            f.write(f"#source: {source_file.name}\n")
+            f.write(f"#error_output: {error_file.name}\n")
+
+        with open(error_file, 'w') as f:
+            f.write(".*: Assembler messages:\n")
+            for name, _ in group.instructions:
+                entry = group.error_cases.get(name)
+                if not entry:
+                    continue
+                for case in entry["cases"]:
+                    f.write(f".*: Error: {case['error_msg']}\n")
+    
+    def _generate_no_alias_tests(self, group: TestInstructionGroup) -> None:
+        """Generate tests with no-aliases option for detailed disassembly."""
+        ext_name = self._get_binutils_filename(group.extension)
+        
+        dump_file = self.output_dir / f"{ext_name}-na.d"
+        source_file = f"{ext_name}.s"
+        
+        main_instructions = []
+        for name, info in group.instructions:
+            base = info.get('base')
+            if base is None:
+                main_instructions.append((name, info))
+        
+        instruction_examples: List[Tuple[str, str]] = []
+        for name, info in main_instructions:
+            assembly = info.get('assembly', '')
+            examples = self.example_generator.generate_examples(name, assembly)
+            primary_example = self._select_primary_example(examples)
+            if not primary_example:
+                continue
+            instruction_examples.append((name, primary_example))
+
+        with open(dump_file, 'w') as f:
+            march = self._build_march_string("rv32i", group.extension, group.required_extensions)
+            f.write(f"#as: -march={march}\n")
+            f.write(f"#source: {source_file}\n")
+            f.write("#objdump: -d -M no-aliases\n")
+            f.write("\n")
+            f.write(".*:[ \t]+file format .*\n")
+            f.write("\n")
+            f.write("Disassembly of section .text:\n")
+            f.write("\n")
+            f.write("0+000 <target>:\n")
+
+            addr = 0
+            for name, example in instruction_examples:
+                pattern = self._create_disasm_pattern(addr, name, example, no_aliases=True)
+                f.write(f"{pattern}\n")
+                addr += self._get_instruction_size(name)
+    
+    def _format_error_operands(self, operands: str) -> str:
+        sanitized = re.sub(r"\s+", " ", operands.strip())
+        sanitized = re.sub(r"\s*,\s*", ",", sanitized)
+        return sanitized
+
+    def _generate_common_error_cases(self, group: TestInstructionGroup) -> None:
+
+        group.error_cases = {}
+
+        for name, info in group.instructions:
+            assembly = info.get('assembly', '')
+            examples = self.example_generator.generate_examples(name, assembly)
+            primary_example = self._select_primary_example(examples)
+            if not primary_example:
+                continue
+
+            mnemonic, operands = self._split_example_line(primary_example)
+            assembly_tokens = [token.strip() for token in assembly.split(',')] if assembly else []
+
+            cases = self._create_standard_fail_cases(mnemonic, operands, assembly_tokens)
+            if not cases:
+                continue
+
+            for case in cases:
+                group.add_error_case(
+                    name,
+                    case["line"],
+                    case["error_msg"],
+                    reason=case["reason"],
+                    assembly=assembly,
+                    display_instruction=mnemonic,
+                )
+
+        if not group.error_cases and group.instructions:
+            fallback_name, info = group.instructions[0]
+            assembly = info.get('assembly', '')
+            mnemonic = fallback_name
+            operands = ["x32", "x0"]
+            line = self._format_instruction_line(mnemonic, operands)
+            formatted_operands = self._format_error_operands(", ".join(operands))
+            error_msg = f"illegal operands `{mnemonic} {formatted_operands}'"
+            group.add_error_case(
+                fallback_name,
+                line,
+                error_msg,
+                reason="generic invalid operand",
+                assembly=assembly,
+                display_instruction=mnemonic,
+            )
+
+    def _select_primary_example(self, examples: List[str]) -> str | None:
+        for example in examples:
+            if example and example.strip():
+                return example.strip()
+        return None
+
+    def _split_example_line(self, example: str) -> Tuple[str, List[str]]:
+        stripped = example.strip()
+        if '\t' in stripped:
+            mnemonic, operand_str = stripped.split('\t', 1)
+        elif ' ' in stripped:
+            mnemonic, operand_str = stripped.split(' ', 1)
+        else:
+            return stripped, []
+
+        operands = [op.strip() for op in operand_str.split(',') if op.strip()]
+        return mnemonic.strip(), operands
+
+    def _format_instruction_line(self, mnemonic: str, operands: List[str]) -> str:
+        if operands:
+            return f"{mnemonic} {', '.join(operands)}"
+        return mnemonic
+
+    def _operand_is_register(self, operand: str) -> bool:
+        op = operand.strip()
+        if not op:
+            return False
+        if op.startswith(('-', '0x', '0b', '0d')):
+            return False
+        if op[0].isdigit():
+            return False
+        if '(' in op or ')' in op:
+            return False
+        if op.startswith('%'):
+            return False
+        return bool(re.match(r"[A-Za-z_][A-Za-z0-9_'.]*$", op))
+
+    def _choose_extra_operand(self, exemplar: str, role: str) -> str:
+        if role and 'csr' in role.lower():
+            return 'nonexistent'
+        return "x0" if self._operand_is_register(exemplar) else "1"
+
+    def _make_wrong_operand(self, operand: str, role: str) -> str | None:
+        if role and 'csr' in role.lower():
+            return 'nonexistent'
+        if self._operand_is_register(operand):
+            return '1'
+        return 'x0'
+
+    def _create_standard_fail_cases(
+        self,
+        mnemonic: str,
+        operands: List[str],
+        assembly_tokens: List[str],
+    ) -> List[Dict[str, str]]:
+        cases: List[Dict[str, str]] = []
+        seen_lines: Set[str] = set()
+
+        def add_case(reason: str, new_operands: List[str], *, custom_error: str | None = None) -> None:
+            line = self._format_instruction_line(mnemonic, new_operands)
+            if line in seen_lines:
+                return
+            seen_lines.add(line)
+
+            if custom_error is not None:
+                error_msg = custom_error
+            else:
+                operands_str = ", ".join(new_operands)
+                if operands_str:
+                    formatted = self._format_error_operands(operands_str)
+                    error_msg = f"illegal operands `{mnemonic} {formatted}'"
+                else:
+                    error_msg = f"illegal operands `{mnemonic}'"
+
+            cases.append({
+                "reason": reason,
+                "line": line,
+                "error_msg": error_msg,
+            })
+
+        if operands:
+            few_operands = operands[:1] if len(operands) > 1 else []
+            add_case("wrong number of operands (too few)", few_operands)
+
+            last_role = assembly_tokens[-1] if assembly_tokens else ""
+            extra_operand = self._choose_extra_operand(operands[-1], last_role)
+            add_case("wrong number of operands (too many)", operands + [extra_operand])
+
+            for idx, operand in enumerate(operands):
+                role = assembly_tokens[idx] if idx < len(assembly_tokens) else ""
+                wrong_operand = self._make_wrong_operand(operand, role)
+                if not wrong_operand or wrong_operand == operand:
+                    continue
+
+                replaced = operands.copy()
+                replaced[idx] = wrong_operand
+
+                if role and 'csr' in role.lower():
+                    add_case(
+                        "unknown CSR operand",
+                        replaced,
+                        custom_error="unknown CSR `nonexistent'",
+                    )
+                else:
+                    add_case(
+                        f"wrong operand type at position {idx + 1}",
+                        replaced,
+                    )
+        else:
+            add_case("wrong number of operands (too many)", ["x0"])
+
+        return cases
+    
+    def _get_instruction_size(self, name: str) -> int:
+        """
+        Determine instruction size in bytes.
+        Compressed instructions (C extension) are 2 bytes, standard instructions are 4 bytes.
+        """
+        # Compressed instructions start with 'c.'
+        if name.startswith('c.'):
+            return 2
+        return 4
+    
+    def _get_binutils_filename(self, extension: str) -> str:
+        """Get binutils-style filename for extension."""
+        ext = extension.lower()
+        if '-' in ext:
+            ext_parts = ext.split('-')
+            classification = self.example_generator.extension_classification
+            standard_exts = classification['standard']
+            
+            if all(part in standard_exts for part in ext_parts):
+                return '-'.join(sorted(ext_parts))
+            else:
+                return ext
+        
+        return ext
+
+    def _build_march_string(self, base_arch: str, extension: str, extra_extensions: Set[str] | None = None) -> str:
+        classification = self.example_generator.extension_classification
+        standard_exts = classification['standard']
+
+        # Extensions that should not appear in march strings
+        # These work with any base ISA and don't need explicit march flags
+        excluded_march_extensions = {
+            's',       # Supervisor mode (sfence.vma, etc.) - part of base privileged spec
+            'sm',      # Supervisor mode (empty group, parent of Sm* extensions)
+            'sdext',   # Debug extension (dret) - doesn't need march flag
+            'xmock',   # Test/mock extension - not real
+        }
+
+        extensions: Set[str] = set()
+
+        for part in extension.lower().split('-'):
+            extensions.update(_normalize_extension_token(part))
+
+        if extra_extensions:
+            for extra in extra_extensions:
+                extensions.update(_normalize_extension_token(extra))
+
+        # Filter out 'i' and excluded privileged extensions
+        extensions = {ext for ext in extensions 
+                     if ext and ext != 'i' and ext not in excluded_march_extensions}
+
+        standard_parts = sorted(ext for ext in extensions if ext in standard_exts and len(ext) == 1)
+        non_standard_parts = sorted(ext for ext in extensions if ext not in standard_exts or len(ext) > 1)
+
+        march = base_arch
+
+        if standard_parts:
+            march += ''.join(standard_parts)
+
+        if non_standard_parts:
+            march += '_' + '_'.join(non_standard_parts)
+
+        return march
+    
+    def _create_disasm_pattern(self, addr: int, name: str, example: str, no_aliases: bool = False) -> str:
+        """Create a regex pattern for expected disassembly output."""
+        line = example.strip()
+        
+        while line.startswith('\t'):
+            line = line[1:]
+        
+        if not line:
+            instr = name
+            operands = ""
+        else:
+            parts = re.split(r'\s+', line, maxsplit=1)
+            instr = parts[0]
+            operands = parts[1] if len(parts) > 1 else ""
+
+        # Format: address: hex_code instruction operands
+        pattern = f"[ \t]+[0-9a-f]+:[ \t]+[0-9a-f]+[ \t]+{re.escape(instr)}"
+        
+        if operands:
+            operands_clean = re.sub(r'\s+', ' ', operands.strip())
+
+            def _escape_with_whitespace(token: str) -> str:
+                escaped = re.escape(token)
+                return escaped.replace(r'\ ', r'\\s+')
+
+            if ',' in operands_clean:
+                pieces = [_escape_with_whitespace(part.strip()) for part in operands_clean.split(',')]
+                operands_pattern = r'\s*,\s*'.join(pieces)
+            else:
+                operands_pattern = _escape_with_whitespace(operands_clean)
+
+            pattern += f"[ \t]+{operands_pattern}"
+        
+        return pattern
+    
+    def generate_all_tests(self, instructions: Dict[str, dict]) -> None:
+        groups = self.group_instructions_by_extension(instructions)
+        
+        logging.info(f"Generating tests for {len(groups)} instruction groups")
+        
+        for ext_name, group in groups.items():
+            logging.info(f"Generating tests for extension: {ext_name}")
+            self.generate_tests_for_group(group)
+        
+        logging.info(f"Test generation complete. Files written to: {self.output_dir}")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Generate GNU Assembler test files from RISC-V unified database"
+    )
+    parser.add_argument("--inst-dir", default="../../../spec/std/isa/inst/", 
+                       help="Directory containing instruction YAML files")
+    parser.add_argument("--csr-dir", default="../../../spec/std/isa/csr/", 
+                       help="Directory containing CSR YAML files")
+    parser.add_argument("--output-dir", default="gas_tests", 
+                       help="Output directory for generated test files")
+    parser.add_argument("--extensions", 
+                       help="Comma-separated list of enabled extensions (default: all)")
+    parser.add_argument("--verbose", "-v", action="store_true", 
+                       help="Enable verbose logging")
+    parser.add_argument("--include-all", "-a", action="store_true", 
+                       help="Include all instructions, ignoring extension filtering")
+    
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+    
+    if args.include_all or not args.extensions:
+        enabled_extensions = []
+        include_all = True
+        logging.info("Including all instructions")
+    else:
+        enabled_extensions = [ext.strip() for ext in args.extensions.split(",") if ext.strip()]
+        include_all = False
+        logging.info(f"Enabled extensions: {', '.join(enabled_extensions)}")
+    
+    if not os.path.isdir(args.inst_dir):
+        logging.error(f"Instruction directory not found: {args.inst_dir}")
+        sys.exit(1)
+    
+    if not os.path.isdir(args.csr_dir):
+        logging.warning(f"CSR directory not found: {args.csr_dir}. Using fallback CSR list.")
+    
+    generator = GasTestGenerator(args.output_dir, args.csr_dir, args.inst_dir)
+
+    instructions = generator.load_instructions(args.inst_dir, enabled_extensions, include_all)
+    
+    if not instructions:
+        logging.error("No instructions found or all were filtered out.")
+        sys.exit(1)
+
+    generator.generate_all_tests(instructions)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/backends/generators/binutils/gas_test_generator_readme.md b/backends/generators/binutils/gas_test_generator_readme.md
new file mode 100644
index 000000000..9a88c57b7
--- /dev/null
+++ b/backends/generators/binutils/gas_test_generator_readme.md
@@ -0,0 +1,150 @@
+# GNU Assembler Test Generator for RISC-V
+
+This tool automatically generates, binutils test files for the GNU Assembler (gas) from the RISC-V unified database. It creates assembly source files (`.s`), dump files (`.d`), and error files (`.l`) using the UDB.
+
+## Overview
+
+The generator attempts to revolutionize RISC-V extension testing by:
+
+- **Automatically discovering** extension patterns from the unified database
+- **Matching binutils conventions** with RISC-V architecture
+- **Generating realistic assembly examples** with multiple operand combinations
+- **Creating comprehensive error cases** for negative testing
+- **Eliminating manual test creation** especially for new RISC-V extensions
+
+### Generated Test Files
+
+For each extension, the generator creates:
+
+1. **Assembly Source Files (`.s`)**: Contain actual assembly instructions with various operand combinations
+2. **Dump Files (`.d`)**: Define test parameters and expected disassembly output patterns
+3. **Error Files (`.l`)**: Expected error messages for negative test cases
+4. **Architecture-specific variants**: RV32/RV64 specific tests when applicable
+
+## Usage
+
+### Basic Usage
+
+Generate tests for all instructions in the unified database:
+
+```bash
+python3 gas_test_generator.py --include-all --output-dir my_riscv_gas_tests
+```
+
+### Generate Tests for Specific Extensions
+
+```bash
+python3 gas_test_generator.py --extensions "i,m,a,f,d,zba,zbb"
+```
+
+### Custom Output Directory
+
+```bash
+python3 gas_test_generator.py --include-all --output-dir my_riscv_gas_tests
+```
+
+### Verbose Output
+
+```bash
+python3 gas_test_generator.py --include-all --output-dir my_riscv_gas_tests --verbose
+```
+
+## Command Line Options
+
+- `--inst-dir`: Directory containing instruction YAML files (default: `../../../spec/std/isa/inst/`)
+- `--csr-dir`: Directory containing CSR YAML files (default: `../../../spec/std/isa/csr/`)
+- `--output-dir`: Output directory for generated test files (default: `gas_tests`)
+- `--extensions`: Comma-separated list of enabled extensions
+- `--include-all`: Include all instructions, ignoring extension filtering
+- `--verbose`: Enable verbose logging
+
+## Integration with Binutils Test Suite
+
+The generated files follow the same format and conventions as the existing binutils gas test suite and can be directly integrated:
+
+1. Copy generated files to `binutils-gdb/gas/testsuite/gas/riscv/`
+2. Update the test Makefile if needed
+3. Run tests with `make check`
+
+## Features
+
+### Assembly Generation
+
+- **Multiple Operand Combinations**: Generates realistic assembly examples with different register and immediate combinations
+- **Constraint-Aware Generation**: Respects instruction-specific constraints from encoding definitions
+- **Edge Case Testing**: Creates boundary value tests for immediate operands
+- **Memory Operand Variants**: Handles `offset(base)` memory operands with various offsets
+- **Register Type Awareness**: Uses appropriate register names (x/a/t/s for GPR, f/fa/ft/fs for FPR)
+- **Compressed Instruction Support**: Handles C extension register constraints properly
+
+### Error Case Generation
+
+- **Invalid Registers**: Tests with out-of-range register numbers
+- **Invalid Immediates**: Tests with out-of-bounds immediate values
+- **Malformed Assembly**: Common syntax error cases
+
+### Test Organization
+
+- **Extension Grouping**: Groups related instructions by defining extension
+- **Consistent Naming**: Follows existing binutils test naming conventions
+- **Regex Patterns**: Generates robust regex patterns for disassembly matching
+
+## Architecture
+
+The generator uses a clean, modular architecture with three main components:
+
+### TestInstructionGroup
+Groups related instructions by extension and categorizes them:
+- Main instructions (architecture-neutral)
+- Compressed variants (C extension)
+- Architecture-specific instructions (RV32/RV64 only)
+- Error cases for negative testing
+
+### AssemblyExampleGenerator
+Creates realistic assembly examples using data-driven approach:
+- Loads and classifies all extensions from unified database
+- Parses assembly format strings from YAML definitions
+- Generates constraint-aware operand combinations
+- Creates realistic immediate values respecting encoding constraints
+- Handles different operand types (registers, immediates, memory, CSRs)
+- Manages compressed instruction register constraints
+
+### GasTestGenerator
+Main orchestrator implementing binutils conventions:
+- Loads instructions
+- Groups instructions by extension
+- Generates RV32-default tests matching binutils patterns
+- Creates architecture-specific variants when needed
+- Builds march strings
+- Manages binutils-compatible output directory structure
+
+## Extending the Generator
+
+### Adding New Operand Types
+
+To support new operand types, extend the `_parse_assembly_operands` method in `AssemblyExampleGenerator`:
+
+```python
+elif part == "new_operand_type":
+    operand_info["type"] = "new_type"
+```
+
+### Custom Error Cases
+
+Add extension-specific error cases by overriding `_generate_common_error_cases`:
+
+```python
+def _generate_custom_error_cases(self, group: TestInstructionGroup):
+    # Add custom error scenarios
+    group.add_error_case("instruction", "invalid_assembly", "error_message")
+```
+
+### Architecture-specific Logic
+
+Modify `_determine_march` to handle new architecture requirements:
+
+```python
+def _determine_march(self, group: TestInstructionGroup) -> str:
+    # Custom march string logic
+    return f"rv32i_{extension}"
+```