Source code for libauc.trainer.config.args

import json
import logging
from typing import Any

logger = logging.getLogger(__name__)


[docs]
class TrainingArguments:
    r"""
    Container for all hyperparameters and settings that govern a single
    training run.

    All fields map one-to-one to keys in the ``training`` section of a YAML
    config file and can be overridden from the CLI via ``apply_cli_overrides``.

    Args:
        optimizer (str): Name of the libauc optimizer class, e.g. ``"PESG"``,
            ``"PDSCA"``, ``"SOAP"``.
        optimizer_kwargs (dict): Extra keyword arguments forwarded verbatim to
            the optimizer constructor (e.g. ``lr``, ``momentum``,
            ``weight_decay``).
        loss (str): Name of the loss-function class, e.g. ``"AUCMLoss"``,
            ``"CompositionalAUCLoss"``.  Looked up first in
            ``libauc.losses``, then ``torch.nn``.
        loss_kwargs (dict): Extra keyword arguments forwarded verbatim to the
            loss constructor.
        SEED (int): Global random seed for NumPy, PyTorch and cuDNN
            (default: ``42``).
        batch_size (int): Mini-batch size for training (default: ``128``).
        eval_batch_size (int): Mini-batch size for evaluation
            (default: ``128``).
        sampling_rate (float): Positive-class sampling rate passed to
            :class:`~libauc.sampler.DualSampler` / ``TriSampler``
            (default: ``0.5``).
        epochs (int): Total number of training epochs (default: ``50``).
        decay_epochs (list): Epoch indices (or fractional multiples of
            ``epochs``) at which the learning-rate / regulariser is decayed.
            Floats are converted to ``int(f * epochs)`` at construction time.
        num_workers (int): Number of DataLoader worker processes
            (default: ``2``).
        output_path (str): Root directory for checkpoints and logs
            (default: ``"./output"``).
        num_tasks (int): Number of output tasks / classes.  ``1`` → binary;
            ``≥ 3`` → multi-label with :class:`~libauc.sampler.TriSampler`.
        resume_from_checkpoint (bool): Whether to resume from the latest
            checkpoint found in ``output_path/experiment_name``
            (default: ``True``).
        save_checkpoint_every (int): Save a checkpoint every *N* epochs
            (default: ``5``).
        project_name (str): Weights & Biases project name
            (default: ``"libauc"``).
        experiment_name (str): Weights & Biases run name; also used as the
            checkpoint sub-directory.
        verbose (int): Verbosity level.  ``0`` = silent; ``1`` = progress bar;
            ``2`` = one line per epoch (default: ``1``).

    Example::

        >>> args = TrainingArguments(
        ...     optimizer="PESG",
        ...     optimizer_kwargs={"lr": 0.1, "momentum": 0.9},
        ...     loss="AUCMLoss",
        ...     loss_kwargs={"margin": 1.0},
        ...     SEED=42,
        ...     batch_size=128,
        ...     eval_batch_size=128,
        ...     sampling_rate=0.5,
        ...     epochs=50,
        ...     decay_epochs=[],
        ...     num_workers=2,
        ...     output_path="./output",
        ...     num_tasks=1,
        ...     resume_from_checkpoint=True,
        ...     save_checkpoint_every=5,
        ...     project_name="libauc",
        ...     experiment_name="my_experiment",
        ...     verbose=1,
        ... )
    """

    def __init__(self, **kwargs):
        # ── Core training settings ───────────────────────────────────────────
        self.optimizer         = kwargs.pop("optimizer")
        self.optimizer_kwargs  = kwargs.pop("optimizer_kwargs")
        self.loss              = kwargs.pop("loss")
        self.loss_kwargs       = kwargs.pop("loss_kwargs")
        self.SEED              = kwargs.pop("SEED")
        self.batch_size        = kwargs.pop("batch_size")
        self.eval_batch_size   = kwargs.pop("eval_batch_size")
        self.sampling_rate     = kwargs.pop("sampling_rate")
        self.epochs            = kwargs.pop("epochs")

        # Convert fractional decay epochs to absolute epoch indices
        self.decay_epochs = kwargs.pop("decay_epochs")
        for i in range(len(self.decay_epochs)):
            if isinstance(self.decay_epochs[i], float):
                self.decay_epochs[i] = int(self.decay_epochs[i] * self.epochs)

        self.num_workers  = kwargs.pop("num_workers")
        self.output_path  = kwargs.pop("output_path")
        self.num_tasks    = kwargs.pop("num_tasks")

        # ── Checkpoint settings ──────────────────────────────────────────────
        self.resume_from_checkpoint = kwargs.pop("resume_from_checkpoint")
        self.save_checkpoint_every  = kwargs.pop("save_checkpoint_every")

        # ── Weights & Biases settings ────────────────────────────────────────
        self.project_name    = kwargs.pop("project_name")
        self.experiment_name = kwargs.pop("experiment_name")

        # ── Logging / display ────────────────────────────────────────────────
        self.verbose = kwargs.pop("verbose")



# ---------------------------------------------------------------------------
# Default config resolver
# ---------------------------------------------------------------------------


[docs]
def parse_defaultconfig(type_name: str, multilabel: bool = False, kwargs: dict = {}):
    r"""
    Resolve a loss or optimizer name to its canonical ``{optimizer, loss}``
    configuration dict by looking up the corresponding
    :mod:`~trainer.config.spaces` class.

    The mapping covers every loss/optimizer pair supported by libauc:

    +-----------------------------+------------------------------+
    | ``type_name``               | Space class                  |
    +=============================+==============================+
    | ``AUCMLoss`` / ``PESG``     | ``AUCMLossSpace``            |
    |                             | (``MultiLabelAUCMLossSpace`` |
    |                             | when ``multilabel=True``)    |
    +-----------------------------+------------------------------+
    | ``CompositionalAUCLoss`` /  | ``CompositionalAUCLossSpace``|
    | ``PDSCA``                   |                              |
    +-----------------------------+------------------------------+
    | ``APLoss`` / ``SOAP``       | ``APLossSpace``              |
    |                             | (``mAPLossSpace`` when       |
    |                             | ``multilabel=True``)         |
    +-----------------------------+------------------------------+
    | ``pAUC_CVaR_Loss`` /        | ``pAUC_CVaR_LossSpace``      |
    | ``SOPA`` / ``pAUCLoss``     | (``MultiLabel…`` variant)    |
    | mode ``SOPA``               |                              |
    +-----------------------------+------------------------------+
    | ``pAUC_DRO_Loss`` /         | ``pAUC_DRO_LossSpace``       |
    | ``SOPAs`` / ``pAUCLoss``    | (``MultiLabel…`` variant)    |
    | mode ``1w``                 |                              |
    +-----------------------------+------------------------------+
    | ``tpAUC_KL_Loss`` /         | ``tpAUC_KL_LossSpace``       |
    | ``SOTAs`` / ``pAUCLoss``    | (``MultiLabel…`` variant)    |
    | mode ``2w``                 |                              |
    +-----------------------------+------------------------------+
    | ``tpAUC_CVaR_loss`` /       | ``tpAUC_CVaR_lossSpace``     |
    | ``STACO``                   |                              |
    +-----------------------------+------------------------------+
    | ``NDCGLoss`` / ``SONG``     | ``NDCGLossSpace``            |
    +-----------------------------+------------------------------+
    | ``CrossEntropyLoss`` /      | ``SGDSpace``                 |
    | ``SGD``                     |                              |
    +-----------------------------+------------------------------+
    | ``Adam``                    | ``AdamSpace``                |
    +-----------------------------+------------------------------+
    | ``BCELoss``                 | ``BCELossSpace``             |
    +-----------------------------+------------------------------+

    Args:
        type_name (str): Name of the loss or optimizer class.
        multilabel (bool): When ``True``, selects the multi-label variant of
            the space if one exists (default: ``False``).
        kwargs (dict): Additional keyword arguments for the loss/optimizer,
            used to disambiguate ``pAUCLoss`` by its ``mode`` key.

    Returns:
        dict: ``{"optimizer": <optimizer_cfg>, "loss": <loss_cfg>}`` where
        each config is a dict with at least a ``"type"`` key and a ``"space"``
        key containing the hyperparameter search space.

    Raises:
        ValueError: If *type_name* is not recognised.

    Example::

        >>> cfg = parse_defaultconfig("AUCMLoss", multilabel=False)
        >>> cfg["optimizer"]["type"]
        'PESG'
        >>> cfg["loss"]["type"]
        'AUCMLoss'
    """
    if type_name in ('AUCMLoss', 'PESG'):
        if multilabel:
            from .spaces import MultiLabelAUCMLossSpace as Sp
        else:
            from .spaces import AUCMLossSpace as Sp
    elif type_name in ('CompositionalAUCLoss', 'PDSCA'):
        from .spaces import CompositionalAUCLossSpace as Sp
    elif type_name in ('APLoss', 'SOAP'):
        if multilabel:
            from .spaces import mAPLossSpace as Sp
        else:
            from .spaces import APLossSpace as Sp
    elif type_name in ('pAUC_CVaR_Loss', 'SOPA') or (
        type_name == 'pAUCLoss' and kwargs.get('mode') == 'SOPA'
    ):
        if multilabel:
            from .spaces import MultiLabelpAUC_CVaR_LossSpace as Sp
        else:
            from .spaces import pAUC_CVaR_LossSpace as Sp
    elif type_name in ('pAUC_DRO_Loss', 'SOPAs') or (
        type_name == 'pAUCLoss' and kwargs.get('mode') == '1w'
    ):
        if multilabel:
            from .spaces import MultiLabelpAUC_DRO_LossSpace as Sp
        else:
            from .spaces import pAUC_DRO_LossSpace as Sp
    elif type_name in ('tpAUC_KL_Loss', 'SOTAs') or (
        type_name == 'pAUCLoss' and kwargs.get('mode') == '2w'
    ):
        if multilabel:
            from .spaces import MultiLabeltpAUC_KL_LossSpace as Sp
        else:
            from .spaces import tpAUC_KL_LossSpace as Sp
    elif type_name in ('tpAUC_CVaR_loss', 'STACO'):
        from .spaces import tpAUC_CVaR_lossSpace as Sp
    elif type_name in ('NDCGLoss', 'SONG'):
        from .spaces import NDCGLossSpace as Sp
    elif type_name in ('CrossEntropyLoss', 'SGD'):
        from .spaces import SGDSpace as Sp
    elif type_name in ('Adam',):
        from .spaces import AdamSpace as Sp
    elif type_name in ('BCELoss',):
        from .spaces import BCELossSpace as Sp
    else:
        raise ValueError(f"Unsupported loss/optimizer type: '{type_name}'")

    return {"optimizer": Sp.optimizer, "loss": Sp.loss}