Source code for nlsq.global_optimization.config

"""
Global Optimization Configuration
=================================

Configuration dataclass for multi-start optimization with Latin Hypercube Sampling.

This module provides the GlobalOptimizationConfig dataclass which controls all
aspects of multi-start global optimization, including sampling strategy, starting
point generation, and tournament selection for large datasets.

Examples
--------
Basic configuration with defaults:

>>> from nlsq.global_optimization import GlobalOptimizationConfig
>>> config = GlobalOptimizationConfig()
>>> config.n_starts
10

Using presets:

>>> config = GlobalOptimizationConfig.from_preset('robust')
>>> config.n_starts
5

>>> config = GlobalOptimizationConfig.from_preset('global')
>>> config.n_starts
20

Custom configuration:

>>> config = GlobalOptimizationConfig(
...     n_starts=30,
...     sampler='sobol',
...     center_on_p0=True,
...     scale_factor=0.5,
... )
"""

from dataclasses import dataclass, field
from typing import Any, Literal

# Import CMA-ES presets to include in global PRESETS
from nlsq.global_optimization.cmaes_config import CMAES_PRESETS

# Preset configurations for common use cases
# Multi-start presets (standard optimization)
_MULTISTART_PRESETS: dict[str, dict[str, Any]] = {
    "fast": {
        "n_starts": 0,
        "sampler": "lhs",
        "center_on_p0": False,
        "scale_factor": 1.0,
        "elimination_rounds": 0,
        "elimination_fraction": 0.5,
        "batches_per_round": 50,
    },
    "robust": {
        "n_starts": 5,
        "sampler": "lhs",
        "center_on_p0": True,
        "scale_factor": 1.0,
        "elimination_rounds": 2,
        "elimination_fraction": 0.5,
        "batches_per_round": 50,
    },
    "global": {
        "n_starts": 20,
        "sampler": "lhs",
        "center_on_p0": True,
        "scale_factor": 1.0,
        "elimination_rounds": 3,
        "elimination_fraction": 0.5,
        "batches_per_round": 100,
    },
    "thorough": {
        "n_starts": 50,
        "sampler": "lhs",
        "center_on_p0": True,
        "scale_factor": 1.0,
        "elimination_rounds": 4,
        "elimination_fraction": 0.5,
        "batches_per_round": 150,
    },
    "streaming": {
        "n_starts": 10,
        "sampler": "lhs",
        "center_on_p0": True,
        "scale_factor": 1.0,
        "elimination_rounds": 3,
        "elimination_fraction": 0.5,
        "batches_per_round": 50,
    },
}

# Combined PRESETS dict includes both multi-start and CMA-ES presets
# Multi-start presets: 'fast', 'robust', 'global', 'thorough', 'streaming'
# CMA-ES presets: 'cmaes-fast', 'cmaes', 'cmaes-global'
PRESETS: dict[str, dict[str, Any]] = {**_MULTISTART_PRESETS, **CMAES_PRESETS}



[docs]
@dataclass(slots=True)
class GlobalOptimizationConfig:
    """Configuration for multi-start global optimization.

    This configuration class controls all aspects of multi-start optimization
    with Latin Hypercube Sampling or other quasi-random samplers.

    Parameters
    ----------
    n_starts : int, default=10
        Number of starting points to generate. Set to 0 to disable multi-start.

    sampler : {'lhs', 'sobol', 'halton'}, default='lhs'
        Sampling strategy for generating starting points:

        - 'lhs': Latin Hypercube Sampling (recommended, stratified random)
        - 'sobol': Sobol quasi-random sequence (deterministic, low-discrepancy)
        - 'halton': Halton quasi-random sequence (deterministic, prime bases)

    center_on_p0 : bool, default=True
        Whether to center starting points around the initial parameter guess (p0).
        When True, samples are generated in a region around p0 rather than
        uniformly in the full parameter bounds.

    scale_factor : float, default=1.0
        Scale factor for exploration region when center_on_p0=True.
        Multiplier for the exploration range around p0.
        Smaller values (0.5) = tighter exploration around p0.
        Larger values (2.0) = wider exploration.

    elimination_rounds : int, default=3
        Number of tournament elimination rounds for large datasets.
        Each round eliminates a fraction of candidates based on loss.

    elimination_fraction : float, default=0.5
        Fraction of candidates to eliminate in each tournament round.
        Must be in (0, 1). Default 0.5 eliminates half in each round.

    batches_per_round : int, default=50
        Number of data batches to use for evaluation in each tournament round.
        More batches = more reliable selection but slower.

    Examples
    --------
    >>> config = GlobalOptimizationConfig(n_starts=20, sampler='sobol')
    >>> config.n_starts
    20

    >>> config = GlobalOptimizationConfig.from_preset('global')
    >>> config.n_starts
    20

    Notes
    -----
    - When n_starts=0, multi-start is disabled and standard single-start
      optimization is used.
    - Tournament selection (elimination_rounds > 0) is designed for streaming
      datasets where evaluating all candidates on the full dataset is impractical.
    - LHS provides better coverage guarantees than Sobol/Halton for small N,
      while Sobol/Halton are deterministic and may be preferred for reproducibility.

    See Also
    --------
    MultiStartOrchestrator : Orchestrates multi-start optimization
    TournamentSelector : Implements tournament selection for large datasets
    """

    # Sampling configuration
    n_starts: int = 10
    sampler: Literal["lhs", "sobol", "halton"] = "lhs"

    # Centering configuration
    center_on_p0: bool = True
    scale_factor: float = 1.0

    # Tournament selection for large datasets
    elimination_rounds: int = 3
    elimination_fraction: float = 0.5
    batches_per_round: int = 50

    # Private field for tracking preset origin (not user-configurable)
    _preset: str | None = field(default=None, repr=False)


[docs]
    def __post_init__(self):
        """Validate configuration after initialization."""
        # Validate n_starts
        if self.n_starts < 0:
            raise ValueError(f"n_starts must be non-negative, got {self.n_starts}")

        # Validate sampler
        valid_samplers = ("lhs", "sobol", "halton")
        if self.sampler.lower() not in valid_samplers:
            raise ValueError(
                f"sampler must be one of {valid_samplers}, got '{self.sampler}'"
            )
        # Normalize sampler to lowercase
        object.__setattr__(self, "sampler", self.sampler.lower())

        # Validate scale_factor
        if self.scale_factor <= 0:
            raise ValueError(f"scale_factor must be positive, got {self.scale_factor}")

        # Validate elimination_fraction
        if not 0 < self.elimination_fraction < 1:
            raise ValueError(
                f"elimination_fraction must be in (0, 1), got {self.elimination_fraction}"
            )

        # Validate elimination_rounds
        if self.elimination_rounds < 0:
            raise ValueError(
                f"elimination_rounds must be non-negative, got {self.elimination_rounds}"
            )

        # Validate batches_per_round
        if self.batches_per_round <= 0:
            raise ValueError(
                f"batches_per_round must be positive, got {self.batches_per_round}"
            )

        # Validate parameter combinations
        if self.n_starts == 0 and self.elimination_rounds > 0:
            # Tournament selection makes no sense with 0 starts
            # Silently set elimination_rounds to 0
            object.__setattr__(self, "elimination_rounds", 0)



[docs]
    @classmethod
    def from_preset(cls, preset_name: str) -> "GlobalOptimizationConfig":
        """Create configuration from a named preset.

        Parameters
        ----------
        preset_name : str
            Name of the preset. One of: 'fast', 'robust', 'global',
            'thorough', 'streaming'.

        Returns
        -------
        GlobalOptimizationConfig
            Configuration instance with preset values.

        Raises
        ------
        ValueError
            If preset_name is not a known preset.

        Examples
        --------
        >>> config = GlobalOptimizationConfig.from_preset('robust')
        >>> config.n_starts
        5

        >>> config = GlobalOptimizationConfig.from_preset('global')
        >>> config.n_starts
        20
        """
        preset_name_lower = preset_name.lower()
        if preset_name_lower not in _MULTISTART_PRESETS:
            valid_presets = list(_MULTISTART_PRESETS.keys())
            raise ValueError(
                f"Unknown preset '{preset_name}'. Valid presets: {valid_presets}"
            )

        preset_values = _MULTISTART_PRESETS[preset_name_lower].copy()
        preset_values["_preset"] = preset_name_lower
        return cls(**preset_values)


    @property
    def is_multistart_enabled(self) -> bool:
        """Whether multi-start optimization is enabled.

        Returns
        -------
        bool
            True if n_starts > 0, False otherwise.
        """
        return self.n_starts > 0

    @property
    def preset(self) -> str | None:
        """The preset name if this config was created from a preset.

        Returns
        -------
        str or None
            Preset name ('fast', 'robust', etc.) or None if custom.
        """
        return self._preset


[docs]
    def to_dict(self) -> dict[str, Any]:
        """Serialize configuration to a dictionary.

        Returns
        -------
        dict
            Dictionary representation suitable for JSON serialization
            or checkpoint saving.

        Examples
        --------
        >>> config = GlobalOptimizationConfig(n_starts=20)
        >>> d = config.to_dict()
        >>> d['n_starts']
        20
        """
        return {
            "n_starts": self.n_starts,
            "sampler": self.sampler,
            "center_on_p0": self.center_on_p0,
            "scale_factor": self.scale_factor,
            "elimination_rounds": self.elimination_rounds,
            "elimination_fraction": self.elimination_fraction,
            "batches_per_round": self.batches_per_round,
            "_preset": self._preset,
        }



[docs]
    @classmethod
    def from_dict(cls, d: dict[str, Any]) -> "GlobalOptimizationConfig":
        """Deserialize configuration from a dictionary.

        Parameters
        ----------
        d : dict
            Dictionary with configuration values.

        Returns
        -------
        GlobalOptimizationConfig
            Configuration instance.

        Examples
        --------
        >>> d = {'n_starts': 20, 'sampler': 'sobol'}
        >>> config = GlobalOptimizationConfig.from_dict(d)
        >>> config.n_starts
        20
        """
        # Filter to known fields
        known_fields = {
            "n_starts",
            "sampler",
            "center_on_p0",
            "scale_factor",
            "elimination_rounds",
            "elimination_fraction",
            "batches_per_round",
            "_preset",
        }
        filtered = {k: v for k, v in d.items() if k in known_fields}
        return cls(**filtered)



[docs]
    def with_overrides(self, **kwargs: Any) -> "GlobalOptimizationConfig":
        """Create a new config with specified overrides.

        Parameters
        ----------
        **kwargs
            Configuration fields to override.

        Returns
        -------
        GlobalOptimizationConfig
            New configuration with overrides applied.

        Examples
        --------
        >>> config = GlobalOptimizationConfig.from_preset('robust')
        >>> config2 = config.with_overrides(n_starts=10)
        >>> config2.n_starts
        10
        """
        d = self.to_dict()
        d.update(kwargs)
        # Clear preset if we're overriding values
        if kwargs and "_preset" not in kwargs:
            d["_preset"] = None
        return self.from_dict(d)