Skip to content

Experience report: shrinking Python for lint issues (and the Worst Hello World)ย #26

@Zac-HD

Description

@Zac-HD

I've been pointing shrinkray at some linter issues lately, and so have some notes on shrinking Python files I thought I'd share, in the spirit of #8 and generally hoping that feedback is helpful ๐Ÿ™‚

  • initial progress is very fast (๐ŸŽ‰), but it's common to see multi-minute stalls where a whole pass makes ~no progress
    • it seems to me like proposing reductions from a mixture of passes would often make steadier (and thus faster) progress than completing passes sequentially.
      • dynamically reweighting passes (within reasonable bounds) with a multi-arm bandit style trick might also help a lot
    • the variable renaming creates collisions which make future reductions harder because they apply to 'multiple things' which happen to share a name. Seems tricky but useful if we can avoid that.
      • more aggressive inlining would help a lot in Python, but that's usually a pump rather than a reduction per se. Same for moving class-scope vars etc to global scope
  • adding some problem-specific smarts seems pretty valuable, e.g. by
    • make reduction passes or pumps pluggable so I can experiment downstream. But it'd be easier to instead...
    • have another copy (or copies) of the input which can be live-edited, watching for saves, and trying out those diffs as reductions/pumps. This seems like the easiest way to put a human in the loop
      • I've found asking Claude Code to propose edits checked by the interestingness test is very very effective, and so this would be a neat integration point for shrinkray+Claude too.
World's Worst Hello World
#!/bin/bash
# Test if the program outputs "hello world"
python hello.py 2>/dev/null | grep -q "^hello world$"
#!/usr/bin/env python3
"""
An enterprise-grade, scalable, fault-tolerant, cloud-native Hello World implementation.
"""

import abc
import asyncio
import base64
import functools
import hashlib
import inspect
import itertools
import json
import logging
import os
import random
import sys
import time
import typing
from collections import defaultdict, namedtuple
from contextlib import contextmanager
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum, auto
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Protocol, TypeVar, Union

# Configure logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)


class CharacterEncodingStrategy(Enum):
    """Enumeration of supported character encoding strategies."""

    ASCII = auto()
    UTF8 = auto()
    UTF16 = auto()
    BASE64 = auto()
    ROT13 = auto()


class OutputMedium(Enum):
    """Supported output media for message delivery."""

    STDOUT = auto()
    STDERR = auto()
    FILE = auto()
    MEMORY = auto()


@dataclass
class CharacterMetadata:
    """Metadata for individual character processing."""

    char: str
    position: int
    timestamp: float
    encoding: CharacterEncodingStrategy
    checksum: str

    def __post_init__(self):
        self.checksum = hashlib.md5(self.char.encode()).hexdigest()


class AbstractCharacterFactory(abc.ABC):
    """Abstract factory for character creation."""

    @abc.abstractmethod
    def create_character(self, char: str, position: int) -> CharacterMetadata:
        """Create a character with metadata."""
        pass


class ConcreteCharacterFactory(AbstractCharacterFactory):
    """Concrete implementation of character factory."""

    def __init__(
        self, encoding: CharacterEncodingStrategy = CharacterEncodingStrategy.UTF8
    ):
        self.encoding = encoding
        self._cache: Dict[tuple, CharacterMetadata] = {}

    def create_character(self, char: str, position: int) -> CharacterMetadata:
        """Create a character with caching for performance."""
        cache_key = (char, position)
        if cache_key not in self._cache:
            self._cache[cache_key] = CharacterMetadata(
                char=char,
                position=position,
                timestamp=time.time(),
                encoding=self.encoding,
            )
        return self._cache[cache_key]


class MessageBuilder:
    """Builder pattern for constructing messages."""

    def __init__(self):
        self._components: List[str] = []
        self._metadata: Dict[str, Any] = {}

    def add_word(self, word: str) -> "MessageBuilder":
        """Add a word to the message."""
        self._components.append(word)
        return self

    def add_separator(self, separator: str = " ") -> "MessageBuilder":
        """Add a separator between words."""
        if self._components:
            self._components.append(separator)
        return self

    def with_metadata(self, key: str, value: Any) -> "MessageBuilder":
        """Add metadata to the message."""
        self._metadata[key] = value
        return self

    def build(self) -> str:
        """Build the final message."""
        return "".join(self._components)


class OutputStrategyProtocol(Protocol):
    """Protocol for output strategies."""

    def output(self, message: str) -> None:
        """Output the message."""
        ...


class StdoutOutputStrategy:
    """Strategy for outputting to stdout."""

    def output(self, message: str) -> None:
        """Output to stdout."""
        print(message, file=sys.stdout)


class MessageProcessor:
    """Processes messages with various transformations."""

    def __init__(self):
        self._preprocessors: List[Callable[[str], str]] = []
        self._postprocessors: List[Callable[[str], str]] = []

    def add_preprocessor(self, func: Callable[[str], str]) -> None:
        """Add a preprocessing function."""
        self._preprocessors.append(func)

    def add_postprocessor(self, func: Callable[[str], str]) -> None:
        """Add a postprocessing function."""
        self._postprocessors.append(func)

    def process(self, message: str) -> str:
        """Process the message through all transformations."""
        # Apply preprocessors
        for preprocessor in self._preprocessors:
            message = preprocessor(message)

        # Identity transformation (the most complex operation)
        message = self._apply_identity_transformation(message)

        # Apply postprocessors
        for postprocessor in self._postprocessors:
            message = postprocessor(message)

        return message

    def _apply_identity_transformation(self, message: str) -> str:
        """Apply the identity transformation (returns input unchanged)."""
        # Decompose into characters
        chars = list(message)

        # Reconstruct using a generator expression with unnecessary complexity
        reconstructed = "".join(
            char
            for i, char in enumerate(chars)
            if self._validate_character_at_position(char, i)
        )

        return reconstructed

    def _validate_character_at_position(self, char: str, position: int) -> bool:
        """Validate that a character can exist at a given position."""
        # Always returns True, but with extra steps
        validations = [
            lambda c, p: c is not None,
            lambda c, p: isinstance(c, str),
            lambda c, p: len(c) <= 1,
            lambda c, p: p >= 0,
            lambda c, p: True,  # Final validation always passes
        ]

        return all(validation(char, position) for validation in validations)


class MessageOrchestrator:
    """Orchestrates the entire message generation process."""

    def __init__(
        self,
        character_factory: AbstractCharacterFactory,
        output_strategy: OutputStrategyProtocol,
        processor: MessageProcessor,
    ):
        self.character_factory = character_factory
        self.output_strategy = output_strategy
        self.processor = processor
        self._performance_metrics: Dict[str, float] = defaultdict(float)

    @contextmanager
    def _measure_performance(self, operation: str):
        """Measure performance of an operation."""
        start = time.perf_counter()
        yield
        self._performance_metrics[operation] += time.perf_counter() - start

    def generate_and_output_message(self, word1: str, word2: str) -> None:
        """Generate and output the message."""
        with self._measure_performance("message_building"):
            # Build the message using the builder pattern
            builder = MessageBuilder()
            builder.add_word(word1).add_separator().add_word(word2)
            message = builder.build()

        with self._measure_performance("message_processing"):
            # Process the message
            processed_message = self.processor.process(message)

        with self._measure_performance("message_output"):
            # Output the message
            self.output_strategy.output(processed_message)

        # Log performance metrics (but suppress them)
        logger.debug(f"Performance metrics: {dict(self._performance_metrics)}")


class SingletonMetaclass(type):
    """Metaclass for implementing singleton pattern."""

    _instances: Dict[type, Any] = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super().__call__(*args, **kwargs)
        return cls._instances[cls]


class ApplicationContext(metaclass=SingletonMetaclass):
    """Singleton application context."""

    def __init__(self):
        self.start_time = datetime.now()
        self.configuration = self._load_configuration()

    def _load_configuration(self) -> Dict[str, Any]:
        """Load application configuration."""
        return {
            "encoding": CharacterEncodingStrategy.UTF8,
            "output_medium": OutputMedium.STDOUT,
            "enable_caching": True,
            "performance_monitoring": True,
            "word1": "hello",
            "word2": "world",
        }


class DependencyInjector:
    """Manages dependency injection for the application."""

    def __init__(self):
        self._registry: Dict[type, Callable[[], Any]] = {}

    def register(self, interface: type, factory: Callable[[], Any]) -> None:
        """Register a factory for an interface."""
        self._registry[interface] = factory

    def resolve(self, interface: type) -> Any:
        """Resolve an interface to an implementation."""
        if interface not in self._registry:
            raise ValueError(f"No factory registered for {interface}")
        return self._registry[interface]()


class HelloWorldApplication:
    """Main application class."""

    def __init__(self):
        self.context = ApplicationContext()
        self.injector = self._configure_dependencies()

    def _configure_dependencies(self) -> DependencyInjector:
        """Configure dependency injection."""
        injector = DependencyInjector()

        injector.register(
            AbstractCharacterFactory,
            lambda: ConcreteCharacterFactory(self.context.configuration["encoding"]),
        )

        injector.register(OutputStrategyProtocol, lambda: StdoutOutputStrategy())

        injector.register(MessageProcessor, lambda: MessageProcessor())

        return injector

    async def _async_initialization(self) -> None:
        """Perform async initialization tasks."""
        await asyncio.sleep(0)  # Simulate async work
        logger.debug("Async initialization complete")

    def run(self) -> None:
        """Run the application."""
        # Perform async initialization
        asyncio.run(self._async_initialization())

        # Resolve dependencies
        character_factory = self.injector.resolve(AbstractCharacterFactory)
        output_strategy = self.injector.resolve(OutputStrategyProtocol)
        processor = self.injector.resolve(MessageProcessor)

        # Create orchestrator
        orchestrator = MessageOrchestrator(
            character_factory, output_strategy, processor
        )

        # Generate and output the message
        orchestrator.generate_and_output_message(
            self.context.configuration["word1"], self.context.configuration["word2"]
        )


def main() -> int:
    """Main entry point."""
    try:
        app = HelloWorldApplication()
        app.run()
        return 0
    except Exception as e:
        logger.error(f"Application failed: {e}")
        return 1


if __name__ == "__main__":
    sys.exit(main())

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions