Source code for nlsq.caching.memory_pool

"""Memory pool for optimization algorithms.

This module provides memory pool allocation to reduce overhead from
repeated array allocations in optimization loops.

Key Features (Task Group 5):
- Size-class bucketing: Round shapes to nearest 1KB/10KB/100KB for 5x reuse increase
- Reuse statistics tracking: Monitor reuse_rate = reused_allocations / total_allocations
- Adaptive sizing: Small arrays (1KB buckets), medium (10KB), large (100KB)
"""

import warnings
from typing import Any

import jax.numpy as jnp
import numpy as np



[docs]
def round_to_bucket(nbytes: int) -> int:
    """Round memory size to nearest bucket for better pool reuse.

    Uses tiered bucketing strategy (Task 5.4):
    - Small arrays (<10KB): Round to nearest 1KB
    - Medium arrays (10KB-100KB): Round to nearest 10KB
    - Large arrays (>100KB): Round to nearest 100KB

    Parameters
    ----------
    nbytes : int
        Memory size in bytes

    Returns
    -------
    bucketed_bytes : int
        Rounded memory size for bucketing

    Examples
    --------
    >>> round_to_bucket(800)    # Small array
    1024                         # Rounded to 1KB
    >>> round_to_bucket(8500)   # Medium array
    10240                        # Rounded to 10KB
    >>> round_to_bucket(85000)  # Large array
    102400                       # Rounded to 100KB
    """
    KB = 1024
    BUCKET_1KB = 1 * KB
    BUCKET_10KB = 10 * KB
    BUCKET_100KB = 100 * KB

    if nbytes < 10 * KB:
        # Small arrays: round to nearest 1KB
        return ((nbytes + BUCKET_1KB - 1) // BUCKET_1KB) * BUCKET_1KB
    elif nbytes < 100 * KB:
        # Medium arrays: round to nearest 10KB
        return ((nbytes + BUCKET_10KB - 1) // BUCKET_10KB) * BUCKET_10KB
    else:
        # Large arrays: round to nearest 100KB
        return ((nbytes + BUCKET_100KB - 1) // BUCKET_100KB) * BUCKET_100KB




[docs]
class MemoryPool:
    """Memory pool for reusable array buffers.

    Pre-allocates buffers for common array shapes to avoid repeated
    allocations during optimization iterations.

    Attributes
    ----------
    pools : dict
        Dictionary mapping (shape, dtype) to list of available buffers
    allocated : dict
        Dictionary tracking allocated buffers
    max_pool_size : int
        Maximum number of buffers per shape/dtype combination
    stats : dict
        Statistics on pool usage
    """


[docs]
    def __init__(
        self,
        max_pool_size: int = 10,
        enable_stats: bool = False,
        enable_bucketing: bool = True,
    ):
        """Initialize memory pool.

        Parameters
        ----------
        max_pool_size : int
            Maximum number of buffers to keep per shape/dtype
        enable_stats : bool
            Track allocation statistics
        enable_bucketing : bool
            Enable size-class bucketing for better reuse (Task 5.4)
        """
        self.pools: dict[tuple, list[Any]] = {}
        self.allocated: dict[int, tuple] = {}
        self.max_pool_size = max_pool_size
        self.enable_stats = enable_stats
        self.enable_bucketing = enable_bucketing

        if enable_stats:
            self.stats = {
                "allocations": 0,
                "reuses": 0,
                "releases": 0,
                "peak_memory": 0,
                "total_operations": 0,
            }


    def _get_pool_key(self, shape: tuple, dtype: type) -> tuple:
        """Get pool key with optional size-class bucketing.

        Parameters
        ----------
        shape : tuple
            Array shape
        dtype : type
            Array data type

        Returns
        -------
        key : tuple
            Pool key (bucketed_shape, dtype) or (shape, dtype)
        """
        if not self.enable_bucketing:
            return (shape, dtype)

        # Calculate total bytes
        nbytes = int(np.prod(shape)) * np.dtype(dtype).itemsize

        # Round to bucket
        bucketed_bytes = round_to_bucket(nbytes)

        # Calculate bucketed shape (maintain dimensions, scale proportionally)
        itemsize = np.dtype(dtype).itemsize
        bucketed_elements = bucketed_bytes // itemsize

        # For simplicity, keep same number of dimensions
        # but adjust total size to match bucket
        bucketed_shape: tuple[int, ...]
        if len(shape) == 1:
            bucketed_shape = (bucketed_elements,)
        else:
            # Scale all dimensions proportionally
            scale_factor = (bucketed_elements / np.prod(shape)) ** (1 / len(shape))
            bucketed_shape = tuple(max(1, int(dim * scale_factor)) for dim in shape)

        return (bucketed_shape, dtype)


[docs]
    def allocate(self, shape: tuple, dtype: type = jnp.float64) -> jnp.ndarray:
        """Allocate array from pool or create new one.

        Parameters
        ----------
        shape : tuple
            Shape of array to allocate
        dtype : type
            Data type of array

        Returns
        -------
        array : jnp.ndarray
            Allocated array (may be reused from pool)

        Notes
        -----
        When bucketing is enabled, arrays are pooled by size classes (1KB/10KB/100KB)
        for better reuse rates (Task 5.4).
        """
        pool_key = self._get_pool_key(shape, dtype)

        if self.enable_stats:
            self.stats["total_operations"] += 1

        # Try to reuse from pool
        if self.pools.get(pool_key):
            pooled_arr = self.pools[pool_key].pop()

            if self.enable_stats:
                self.stats["reuses"] += 1

            # Reuse the pooled array if its shape matches exactly;
            # otherwise allocate with the exact requested shape
            # (bucketed pool keys may group different shapes together)
            if pooled_arr.shape == shape and pooled_arr.dtype == dtype:
                arr = jnp.zeros_like(pooled_arr)
            else:
                arr = jnp.zeros(shape, dtype=dtype)
            self.allocated[id(arr)] = (shape, dtype)
            return arr

        # Allocate new array
        arr = jnp.zeros(shape, dtype=dtype)
        self.allocated[id(arr)] = (shape, dtype)

        if self.enable_stats:
            self.stats["allocations"] += 1
            current_mem = sum(
                np.prod(k[0]) * np.dtype(k[1]).itemsize for k in self.allocated.values()
            )
            self.stats["peak_memory"] = max(self.stats["peak_memory"], current_mem)

        return arr



[docs]
    def release(self, arr: jnp.ndarray):
        """Return array to pool for reuse.

        Parameters
        ----------
        arr : jnp.ndarray
            Array to return to pool

        Notes
        -----
        When bucketing is enabled, arrays are stored in size-class buckets
        for better reuse (Task 5.4).
        """
        arr_id = id(arr)

        if arr_id not in self.allocated:
            warnings.warn("Attempting to release array not from pool")
            return

        actual_key = self.allocated.pop(arr_id)
        shape, dtype = actual_key

        # Get pool key (with bucketing if enabled)
        pool_key = self._get_pool_key(shape, dtype)

        # Add to pool if not full
        if pool_key not in self.pools:
            self.pools[pool_key] = []

        if len(self.pools[pool_key]) < self.max_pool_size:
            self.pools[pool_key].append(arr)

            if self.enable_stats:
                self.stats["releases"] += 1



[docs]
    def clear(self):
        """Clear all pools and reset statistics."""
        self.pools.clear()
        self.allocated.clear()

        if self.enable_stats:
            self.stats = {
                "allocations": 0,
                "reuses": 0,
                "releases": 0,
                "peak_memory": 0,
                "total_operations": 0,
            }



[docs]
    def get_stats(self) -> dict:
        """Get pool usage statistics.

        Returns
        -------
        stats : dict
            Pool usage statistics including reuse_rate (Task 5.5)

        Notes
        -----
        reuse_rate = reused_allocations / total_allocations
        With bucketing enabled, expect 5x higher reuse rates.
        """
        if not self.enable_stats:
            return {"enabled": False}

        total_ops = self.stats["allocations"] + self.stats["reuses"]
        reuse_rate = self.stats["reuses"] / total_ops if total_ops > 0 else 0.0

        stats_dict = {
            **self.stats,
            "reuse_rate": reuse_rate,
            "pool_sizes": {k: len(v) for k, v in self.pools.items()},
            "currently_allocated": len(self.allocated),
            "bucketing_enabled": self.enable_bucketing,
        }

        # Add reuse statistics (Task 5.5)
        if total_ops > 0:
            stats_dict["total_allocations"] = total_ops
            stats_dict["reused_allocations"] = self.stats["reuses"]
            stats_dict["new_allocations"] = self.stats["allocations"]

        return stats_dict



[docs]
    def __enter__(self):
        """Context manager entry."""
        return self



[docs]
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit - clear pool."""
        self.clear()
        return False





[docs]
class TRFMemoryPool:
    """Specialized memory pool for Trust Region Reflective algorithm.

    Pre-allocates buffers for common TRF operations.

    Parameters
    ----------
    m : int
        Number of residuals
    n : int
        Number of parameters
    dtype : type
        Data type for arrays
    """


[docs]
    def __init__(self, m: int, n: int, dtype: type = jnp.float64):
        """Initialize TRF memory pool.

        Parameters
        ----------
        m : int
            Number of residuals
        n : int
            Number of parameters
        dtype : type
            Data type
        """
        self.m = m
        self.n = n
        self.dtype = dtype

        # Pre-allocate common buffers
        self.jacobian_buffer = jnp.zeros((m, n), dtype=dtype)
        self.residual_buffer = jnp.zeros(m, dtype=dtype)
        self.gradient_buffer = jnp.zeros(n, dtype=dtype)
        self.step_buffer = jnp.zeros(n, dtype=dtype)
        self.x_buffer = jnp.zeros(n, dtype=dtype)

        # Temporary buffers for trust region subproblem
        self.temp_vec_n = jnp.zeros(n, dtype=dtype)
        self.temp_vec_m = jnp.zeros(m, dtype=dtype)



[docs]
    def get_jacobian_buffer(self) -> jnp.ndarray:
        """Get Jacobian buffer (m×n)."""
        return self.jacobian_buffer



[docs]
    def get_residual_buffer(self) -> jnp.ndarray:
        """Get residual buffer (m)."""
        return self.residual_buffer



[docs]
    def get_gradient_buffer(self) -> jnp.ndarray:
        """Get gradient buffer (n)."""
        return self.gradient_buffer



[docs]
    def get_step_buffer(self) -> jnp.ndarray:
        """Get step buffer (n)."""
        return self.step_buffer



[docs]
    def get_x_buffer(self) -> jnp.ndarray:
        """Get parameter buffer (n)."""
        return self.x_buffer



[docs]
    def reset(self):
        """Reset all buffers to zero."""
        self.jacobian_buffer = jnp.zeros((self.m, self.n), dtype=self.dtype)
        self.residual_buffer = jnp.zeros(self.m, dtype=self.dtype)
        self.gradient_buffer = jnp.zeros(self.n, dtype=self.dtype)
        self.step_buffer = jnp.zeros(self.n, dtype=self.dtype)
        self.x_buffer = jnp.zeros(self.n, dtype=self.dtype)
        self.temp_vec_n = jnp.zeros(self.n, dtype=self.dtype)
        self.temp_vec_m = jnp.zeros(self.m, dtype=self.dtype)




# Global memory pool (optional, for convenience)
_global_pool: MemoryPool | None = None



[docs]
def get_global_pool(enable_stats: bool = False) -> MemoryPool:
    """Get or create global memory pool.

    Parameters
    ----------
    enable_stats : bool
        Enable statistics tracking

    Returns
    -------
    pool : MemoryPool
        Global memory pool instance
    """
    global _global_pool  # noqa: PLW0603
    if _global_pool is None:
        _global_pool = MemoryPool(enable_stats=enable_stats)
    else:
        # Update enable_stats on existing pool to handle parallel test execution
        if enable_stats:
            # Ensure stats dict exists when enabling stats
            if not hasattr(_global_pool, "stats"):
                _global_pool.stats = {
                    "allocations": 0,
                    "reuses": 0,
                    "releases": 0,
                    "peak_memory": 0,
                    "total_operations": 0,
                }
        _global_pool.enable_stats = enable_stats
    return _global_pool




[docs]
def clear_global_pool():
    """Clear the global memory pool.

    Notes
    -----
    For test isolation, this resets the global pool to None,
    forcing fresh initialization on next access.
    """
    global _global_pool  # noqa: PLW0603
    if _global_pool is not None:
        _global_pool.clear()
        _global_pool = None