Skip to content

src/literals.h is stored in .rodata section (RAM) on ESP8266 #277

@bdraco

Description

@bdraco

Platform

ESP8266

IDE / Tooling

Arduino (IDE/CLI)

What happened?

Hi! First, I want to thank you for creating and maintaining ESPAsyncWebServer - it's an excellent library that has been invaluable for countless ESP8266 and ESP32 projects. The async architecture and feature set are fantastic.

I've been doing some memory profiling on ESP8266 projects and noticed that the string literals defined in src/literals.h (HTTP status messages, MIME types, and header names) are currently stored in RAM via the .rodata section, consuming approximately 2KB.

Would you be open to considering moving these string constants to PROGMEM on ESP8266 builds? This could free up nearly 2KB of RAM on a platform where every byte counts, while having minimal performance impact since these strings are accessed relatively infrequently.

Perhaps a hybrid approach could work well - keeping frequently-used strings (like "OK", "text/html", common headers) in RAM for performance, while moving the less common HTTP status messages and MIME types to PROGMEM. Of course, you know the library's usage patterns best, so any approach you think makes sense would be wonderful.

I understand this would require some refactoring to handle the different string access methods between platforms, but the RAM savings on ESP8266 could be quite beneficial for memory-constrained applications.

Thanks again for all your work on this library - it really is appreciated by the community!

Stack Trace

n/a

Minimal Reproductible Example (MRE)

python3 analyze_ram_strings.py .esphome/build/log8266/.pioenvs/log8266/firmware.elf --min-length 10

#!/usr/bin/env python3
"""
Analyze ESP8266 firmware ELF file to find strings stored in RAM.

This script identifies strings that are in RAM sections (.data, .bss, .rodata)
rather than in flash sections (.irom0.text, .irom.text).
"""

import subprocess
import sys
import re
from pathlib import Path
from typing import List, Tuple, Dict, Set
import argparse


def run_command(cmd: List[str]) -> str:
    """Run a command and return its output."""
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        return result.stdout
    except subprocess.CalledProcessError as e:
        print(f"Error running command: {' '.join(cmd)}", file=sys.stderr)
        print(f"Error: {e.stderr}", file=sys.stderr)
        sys.exit(1)
    except FileNotFoundError:
        print(f"Command not found: {cmd[0]}", file=sys.stderr)
        print("Please ensure you have the xtensa toolchain installed", file=sys.stderr)
        sys.exit(1)


def get_sections(elf_file: str) -> Dict[str, Tuple[int, int]]:
    """Get section addresses and sizes from ELF file."""
    sections = {}
    
    # Try to find the right objdump command
    objdump_cmd = None
    for cmd in ['xtensa-lx106-elf-objdump', 'xtensa-esp32-elf-objdump', 'objdump']:
        try:
            subprocess.run([cmd, '--version'], capture_output=True, check=True)
            objdump_cmd = cmd
            break
        except (subprocess.CalledProcessError, FileNotFoundError):
            continue
    
    if not objdump_cmd:
        print("Error: Could not find objdump command", file=sys.stderr)
        sys.exit(1)
    
    output = run_command([objdump_cmd, '-h', elf_file])
    
    # Parse section headers
    # Format: Idx Name          Size      VMA       LMA       File off  Algn
    section_pattern = r'^\s*\d+\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)'
    
    for line in output.split('\n'):
        match = re.match(section_pattern, line)
        if match:
            name = match.group(1)
            size = int(match.group(2), 16)
            vma = int(match.group(3), 16)
            sections[name] = (vma, size)
    
    return sections


def get_strings_from_section(elf_file: str, section: str, min_length: int = 4) -> List[Tuple[int, str]]:
    """Extract strings from a specific section."""
    strings = []
    
    # Try to find the right objdump command
    objdump_cmd = None
    for cmd in ['xtensa-lx106-elf-objdump', 'xtensa-esp32-elf-objdump', 'objdump']:
        try:
            subprocess.run([cmd, '--version'], capture_output=True, check=True)
            objdump_cmd = cmd
            break
        except (subprocess.CalledProcessError, FileNotFoundError):
            continue
    
    if not objdump_cmd:
        return strings
    
    try:
        output = run_command([objdump_cmd, '-s', '-j', section, elf_file])
    except:
        return strings
    
    # Parse hex dump output
    current_string = bytearray()
    current_addr = 0
    string_start_addr = 0
    
    for line in output.split('\n'):
        # Lines look like: " 3ffef8a0 00000000 00000000 00000000 00000000  ................"
        match = re.match(r'^\s+([0-9a-fA-F]+)\s+((?:[0-9a-fA-F]{2,8}\s*)+)', line)
        if match:
            addr = int(match.group(1), 16)
            hex_data = match.group(2).strip()
            
            # Convert hex to bytes
            hex_bytes = hex_data.split()
            for hex_chunk in hex_bytes:
                # Handle both byte-by-byte and word formats
                if len(hex_chunk) == 2:
                    byte_val = int(hex_chunk, 16)
                    if 0x20 <= byte_val <= 0x7e:  # Printable ASCII
                        if not current_string:
                            string_start_addr = addr
                        current_string.append(byte_val)
                    else:
                        if byte_val == 0 and len(current_string) >= min_length:
                            # Found null terminator
                            strings.append((string_start_addr, current_string.decode('ascii', errors='ignore')))
                        current_string = bytearray()
                else:
                    # Handle multi-byte chunks (little-endian)
                    for i in range(0, len(hex_chunk), 2):
                        byte_val = int(hex_chunk[i:i+2], 16)
                        if 0x20 <= byte_val <= 0x7e:  # Printable ASCII
                            if not current_string:
                                string_start_addr = addr + i // 2
                            current_string.append(byte_val)
                        else:
                            if byte_val == 0 and len(current_string) >= min_length:
                                strings.append((string_start_addr, current_string.decode('ascii', errors='ignore')))
                            current_string = bytearray()
                
                addr += 1
    
    return strings


def analyze_symbols(elf_file: str) -> Dict[str, Tuple[str, int, int]]:
    """Get symbol information from ELF file."""
    symbols = {}
    
    # Try to find the right nm command
    nm_cmd = None
    for cmd in ['xtensa-lx106-elf-nm', 'xtensa-esp32-elf-nm', 'nm']:
        try:
            subprocess.run([cmd, '--version'], capture_output=True, check=True)
            nm_cmd = cmd
            break
        except (subprocess.CalledProcessError, FileNotFoundError):
            continue
    
    if not nm_cmd:
        return symbols
    
    output = run_command([nm_cmd, '-S', '--size-sort', elf_file])
    
    for line in output.split('\n'):
        parts = line.split()
        if len(parts) >= 4:
            addr = int(parts[0], 16)
            size = int(parts[1], 16) if parts[1] != '?' else 0
            sym_type = parts[2]
            name = ' '.join(parts[3:])
            
            # Filter for data symbols
            if sym_type in ['D', 'd', 'R', 'r', 'B', 'b']:
                symbols[name] = (sym_type, addr, size)
    
    return symbols


def main():
    parser = argparse.ArgumentParser(description='Analyze ESP8266 firmware for RAM-stored strings')
    parser.add_argument('elf_file', help='Path to firmware.elf file')
    parser.add_argument('--min-length', type=int, default=8, help='Minimum string length to report (default: 8)')
    parser.add_argument('--show-all', action='store_true', help='Show all sections, not just RAM')
    args = parser.parse_args()
    
    if not Path(args.elf_file).exists():
        print(f"Error: File not found: {args.elf_file}", file=sys.stderr)
        sys.exit(1)
    
    print(f"Analyzing: {args.elf_file}\n")
    
    # Get sections
    sections = get_sections(args.elf_file)
    
    # Define RAM sections for ESP8266
    ram_sections = {'.data', '.rodata', '.bss'}
    flash_sections = {'.irom0.text', '.irom.text', '.text'}
    
    print("=" * 80)
    print("SECTION ANALYSIS")
    print("=" * 80)
    print(f"{'Section':<20} {'Address':<12} {'Size':<12} {'Location'}")
    print("-" * 80)
    
    total_ram_usage = 0
    total_flash_usage = 0
    
    for name, (addr, size) in sorted(sections.items()):
        if name in ram_sections:
            location = "RAM"
            total_ram_usage += size
        elif name in flash_sections:
            location = "FLASH"
            total_flash_usage += size
        else:
            location = "OTHER"
        
        if args.show_all or name in ram_sections:
            print(f"{name:<20} 0x{addr:08x}   {size:>8} B   {location}")
    
    print("-" * 80)
    print(f"Total RAM sections size: {total_ram_usage:,} bytes")
    print(f"Total Flash sections size: {total_flash_usage:,} bytes")
    
    # Analyze strings in RAM sections
    print("\n" + "=" * 80)
    print("STRINGS IN RAM SECTIONS")
    print("=" * 80)
    
    all_ram_strings = []
    
    for section in ram_sections:
        if section in sections:
            strings = get_strings_from_section(args.elf_file, section, args.min_length)
            if strings:
                print(f"\nSection: {section}")
                print("-" * 40)
                for addr, string in sorted(strings):
                    if len(string) >= args.min_length:
                        # Clean up string for display
                        clean_string = string[:100] + ('...' if len(string) > 100 else '')
                        print(f"  0x{addr:08x}: \"{clean_string}\" (len={len(string)})")
                        all_ram_strings.append((section, addr, string))
    
    # Analyze symbols
    print("\n" + "=" * 80)
    print("LARGE DATA SYMBOLS IN RAM")
    print("=" * 80)
    
    symbols = analyze_symbols(args.elf_file)
    
    # Filter and sort by size
    ram_symbols = []
    for name, (sym_type, addr, size) in symbols.items():
        # Check if symbol is in a RAM range
        for section_name in ram_sections:
            if section_name in sections:
                section_start, section_size = sections[section_name]
                if section_start <= addr < section_start + section_size:
                    ram_symbols.append((name, sym_type, addr, size, section_name))
                    break
    
    # Sort by size (largest first)
    ram_symbols.sort(key=lambda x: x[3], reverse=True)
    
    print(f"\n{'Symbol':<40} {'Type':<6} {'Size':<10} {'Section'}")
    print("-" * 80)
    
    # Show top 20 largest symbols
    for name, sym_type, addr, size, section in ram_symbols[:20]:
        if size > 0:  # Only show symbols with known size
            print(f"{name[:39]:<40} {sym_type:<6} {size:>8} B  {section}")
    
    # Summary
    print("\n" + "=" * 80)
    print("SUMMARY")
    print("=" * 80)
    print(f"Total strings found in RAM: {len(all_ram_strings)}")
    total_string_bytes = sum(len(s) + 1 for _, _, s in all_ram_strings)  # +1 for null terminator
    print(f"Total bytes used by strings: {total_string_bytes:,}")
    
    # Identify potential optimization targets
    print("\n" + "=" * 80)
    print("POTENTIAL OPTIMIZATION TARGETS")
    print("=" * 80)
    
    # Find commonly repeated strings
    string_counts = {}
    for _, _, string in all_ram_strings:
        string_counts[string] = string_counts.get(string, 0) + 1
    
    repeated_strings = [(s, c) for s, c in string_counts.items() if c > 1]
    if repeated_strings:
        print("\nRepeated strings (could be deduplicated):")
        for string, count in sorted(repeated_strings, key=lambda x: x[1] * len(x[0]), reverse=True)[:10]:
            savings = (count - 1) * (len(string) + 1)
            clean_string = string[:50] + ('...' if len(string) > 50 else '')
            print(f"  \"{clean_string}\" - appears {count} times (potential savings: {savings} bytes)")
    
    # Find long strings that could be moved to PROGMEM
    long_strings = [(s, a, st) for s, a, st in all_ram_strings if len(st) >= 20]
    if long_strings:
        print(f"\nLong strings that could be moved to PROGMEM (>= 20 chars):")
        for section, addr, string in sorted(long_strings, key=lambda x: len(x[2]), reverse=True)[:10]:
            clean_string = string[:60] + ('...' if len(string) > 60 else '')
            print(f"  {section} @ 0x{addr:08x}: \"{clean_string}\" ({len(string)} bytes)")


if __name__ == '__main__':
    main()

I confirm that:

  • I have read the documentation.
  • I have searched for similar discussions.
  • I have searched for similar issues.
  • I have looked at the examples.
  • I have upgraded to the lasted version of ESPAsyncWebServer (and AsyncTCP for ESP32).

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions