Source code for libauc.trainer.config.args

import json
import logging
from typing import Any

logger = logging.getLogger(__name__)

[docs] class TrainingArguments: r""" Container for all hyperparameters and settings that govern a single training run. All fields map one-to-one to keys in the ``training`` section of a YAML config file and can be overridden from the CLI via ``apply_cli_overrides``. Args: optimizer (str): Name of the libauc optimizer class, e.g. ``"PESG"``, ``"PDSCA"``, ``"SOAP"``. optimizer_kwargs (dict): Extra keyword arguments forwarded verbatim to the optimizer constructor (e.g. ``lr``, ``momentum``, ``weight_decay``). loss (str): Name of the loss-function class, e.g. ``"AUCMLoss"``, ``"CompositionalAUCLoss"``. Looked up first in ``libauc.losses``, then ``torch.nn``. loss_kwargs (dict): Extra keyword arguments forwarded verbatim to the loss constructor. SEED (int): Global random seed for NumPy, PyTorch and cuDNN (default: ``42``). batch_size (int): Mini-batch size for training (default: ``128``). eval_batch_size (int): Mini-batch size for evaluation (default: ``128``). sampling_rate (float): Positive-class sampling rate passed to :class:`~libauc.sampler.DualSampler` / ``TriSampler`` (default: ``0.5``). epochs (int): Total number of training epochs (default: ``50``). decay_epochs (list): Epoch indices (or fractional multiples of ``epochs``) at which the learning-rate / regulariser is decayed. Floats are converted to ``int(f * epochs)`` at construction time. num_workers (int): Number of DataLoader worker processes (default: ``2``). output_path (str): Root directory for checkpoints and logs (default: ``"./output"``). num_tasks (int): Number of output tasks / classes. ``1`` → binary; ``≥ 3`` → multi-label with :class:`~libauc.sampler.TriSampler`. resume_from_checkpoint (bool): Whether to resume from the latest checkpoint found in ``output_path/experiment_name`` (default: ``True``). save_checkpoint_every (int): Save a checkpoint every *N* epochs (default: ``5``). project_name (str): Weights & Biases project name (default: ``"libauc"``). experiment_name (str): Weights & Biases run name; also used as the checkpoint sub-directory. verbose (int): Verbosity level. ``0`` = silent; ``1`` = progress bar; ``2`` = one line per epoch (default: ``1``). Example:: >>> args = TrainingArguments( ... optimizer="PESG", ... optimizer_kwargs={"lr": 0.1, "momentum": 0.9}, ... loss="AUCMLoss", ... loss_kwargs={"margin": 1.0}, ... SEED=42, ... batch_size=128, ... eval_batch_size=128, ... sampling_rate=0.5, ... epochs=50, ... decay_epochs=[], ... num_workers=2, ... output_path="./output", ... num_tasks=1, ... resume_from_checkpoint=True, ... save_checkpoint_every=5, ... project_name="libauc", ... experiment_name="my_experiment", ... verbose=1, ... ) """ def __init__(self, **kwargs): # ── Core training settings ─────────────────────────────────────────── self.optimizer = kwargs.pop("optimizer") self.optimizer_kwargs = kwargs.pop("optimizer_kwargs") self.loss = kwargs.pop("loss") self.loss_kwargs = kwargs.pop("loss_kwargs") self.SEED = kwargs.pop("SEED") self.batch_size = kwargs.pop("batch_size") self.eval_batch_size = kwargs.pop("eval_batch_size") self.sampling_rate = kwargs.pop("sampling_rate") self.epochs = kwargs.pop("epochs") # Convert fractional decay epochs to absolute epoch indices self.decay_epochs = kwargs.pop("decay_epochs") for i in range(len(self.decay_epochs)): if isinstance(self.decay_epochs[i], float): self.decay_epochs[i] = int(self.decay_epochs[i] * self.epochs) self.num_workers = kwargs.pop("num_workers") self.output_path = kwargs.pop("output_path") self.num_tasks = kwargs.pop("num_tasks") # ── Checkpoint settings ────────────────────────────────────────────── self.resume_from_checkpoint = kwargs.pop("resume_from_checkpoint") self.save_checkpoint_every = kwargs.pop("save_checkpoint_every") # ── Weights & Biases settings ──────────────────────────────────────── self.project_name = kwargs.pop("project_name") self.experiment_name = kwargs.pop("experiment_name") # ── Logging / display ──────────────────────────────────────────────── self.verbose = kwargs.pop("verbose")
# --------------------------------------------------------------------------- # Default config resolver # ---------------------------------------------------------------------------
[docs] def parse_defaultconfig(type_name: str, multilabel: bool = False, kwargs: dict = {}): r""" Resolve a loss or optimizer name to its canonical ``{optimizer, loss}`` configuration dict by looking up the corresponding :mod:`~trainer.config.spaces` class. The mapping covers every loss/optimizer pair supported by libauc: +-----------------------------+------------------------------+ | ``type_name`` | Space class | +=============================+==============================+ | ``AUCMLoss`` / ``PESG`` | ``AUCMLossSpace`` | | | (``MultiLabelAUCMLossSpace`` | | | when ``multilabel=True``) | +-----------------------------+------------------------------+ | ``CompositionalAUCLoss`` / | ``CompositionalAUCLossSpace``| | ``PDSCA`` | | +-----------------------------+------------------------------+ | ``APLoss`` / ``SOAP`` | ``APLossSpace`` | | | (``mAPLossSpace`` when | | | ``multilabel=True``) | +-----------------------------+------------------------------+ | ``pAUC_CVaR_Loss`` / | ``pAUC_CVaR_LossSpace`` | | ``SOPA`` / ``pAUCLoss`` | (``MultiLabel…`` variant) | | mode ``SOPA`` | | +-----------------------------+------------------------------+ | ``pAUC_DRO_Loss`` / | ``pAUC_DRO_LossSpace`` | | ``SOPAs`` / ``pAUCLoss`` | (``MultiLabel…`` variant) | | mode ``1w`` | | +-----------------------------+------------------------------+ | ``tpAUC_KL_Loss`` / | ``tpAUC_KL_LossSpace`` | | ``SOTAs`` / ``pAUCLoss`` | (``MultiLabel…`` variant) | | mode ``2w`` | | +-----------------------------+------------------------------+ | ``tpAUC_CVaR_loss`` / | ``tpAUC_CVaR_lossSpace`` | | ``STACO`` | | +-----------------------------+------------------------------+ | ``NDCGLoss`` / ``SONG`` | ``NDCGLossSpace`` | +-----------------------------+------------------------------+ | ``CrossEntropyLoss`` / | ``SGDSpace`` | | ``SGD`` | | +-----------------------------+------------------------------+ | ``Adam`` | ``AdamSpace`` | +-----------------------------+------------------------------+ | ``BCELoss`` | ``BCELossSpace`` | +-----------------------------+------------------------------+ Args: type_name (str): Name of the loss or optimizer class. multilabel (bool): When ``True``, selects the multi-label variant of the space if one exists (default: ``False``). kwargs (dict): Additional keyword arguments for the loss/optimizer, used to disambiguate ``pAUCLoss`` by its ``mode`` key. Returns: dict: ``{"optimizer": <optimizer_cfg>, "loss": <loss_cfg>}`` where each config is a dict with at least a ``"type"`` key and a ``"space"`` key containing the hyperparameter search space. Raises: ValueError: If *type_name* is not recognised. Example:: >>> cfg = parse_defaultconfig("AUCMLoss", multilabel=False) >>> cfg["optimizer"]["type"] 'PESG' >>> cfg["loss"]["type"] 'AUCMLoss' """ if type_name in ('AUCMLoss', 'PESG'): if multilabel: from .spaces import MultiLabelAUCMLossSpace as Sp else: from .spaces import AUCMLossSpace as Sp elif type_name in ('CompositionalAUCLoss', 'PDSCA'): from .spaces import CompositionalAUCLossSpace as Sp elif type_name in ('APLoss', 'SOAP'): if multilabel: from .spaces import mAPLossSpace as Sp else: from .spaces import APLossSpace as Sp elif type_name in ('pAUC_CVaR_Loss', 'SOPA') or ( type_name == 'pAUCLoss' and kwargs.get('mode') == 'SOPA' ): if multilabel: from .spaces import MultiLabelpAUC_CVaR_LossSpace as Sp else: from .spaces import pAUC_CVaR_LossSpace as Sp elif type_name in ('pAUC_DRO_Loss', 'SOPAs') or ( type_name == 'pAUCLoss' and kwargs.get('mode') == '1w' ): if multilabel: from .spaces import MultiLabelpAUC_DRO_LossSpace as Sp else: from .spaces import pAUC_DRO_LossSpace as Sp elif type_name in ('tpAUC_KL_Loss', 'SOTAs') or ( type_name == 'pAUCLoss' and kwargs.get('mode') == '2w' ): if multilabel: from .spaces import MultiLabeltpAUC_KL_LossSpace as Sp else: from .spaces import tpAUC_KL_LossSpace as Sp elif type_name in ('tpAUC_CVaR_loss', 'STACO'): from .spaces import tpAUC_CVaR_lossSpace as Sp elif type_name in ('NDCGLoss', 'SONG'): from .spaces import NDCGLossSpace as Sp elif type_name in ('CrossEntropyLoss', 'SGD'): from .spaces import SGDSpace as Sp elif type_name in ('Adam',): from .spaces import AdamSpace as Sp elif type_name in ('BCELoss',): from .spaces import BCELossSpace as Sp else: raise ValueError(f"Unsupported loss/optimizer type: '{type_name}'") return {"optimizer": Sp.optimizer, "loss": Sp.loss}