Loss Functions API Reference

Loss function implementations for training neural networks with GPU acceleration.

Base Loss Class

class CYXWIZ_API Loss {
public:
    Loss(ReductionType reduction = ReductionType::Mean);
    virtual ~Loss() = default;

    // Compute loss
    virtual Tensor Forward(const Tensor& predictions,
                           const Tensor& targets) = 0;

    // Call operator
    Tensor operator()(const Tensor& predictions, const Tensor& targets);

protected:
    ReductionType reduction_;
};

enum class ReductionType {
    None,   // Return per-element loss
    Mean,   // Average over all elements
    Sum     // Sum all elements
};

Classification Losses

CrossEntropyLoss

CrossEntropyLoss(ReductionType reduction = ReductionType::Mean,
                 const Tensor& weight = Tensor(),  // Class weights
                 int ignore_index = -100,
                 float label_smoothing = 0.0f);

// predictions: (N, C) logits (NOT softmax)
// targets: (N,) integer class indices

Usage

CrossEntropyLoss ce_loss;

Tensor logits = Randn({32, 10});  // Batch of 32, 10 classes
Tensor targets = ...;             // Integer tensor (32,) with values 0-9

Tensor loss = ce_loss(logits, targets);

// With label smoothing
CrossEntropyLoss smooth_ce(ReductionType::Mean, Tensor(), -100, 0.1f);

BCEWithLogitsLoss

BCEWithLogitsLoss(ReductionType reduction = ReductionType::Mean,
                  const Tensor& weight = Tensor(),
                  const Tensor& pos_weight = Tensor());

// predictions: (N, *) logits
// targets: (N, *) float 0 or 1

Usage

BCEWithLogitsLoss bce_loss;

Tensor logits = Randn({32, 1});    // Binary classification
Tensor targets = ...;              // Float tensor (32, 1) with 0.0 or 1.0

Tensor loss = bce_loss(logits, targets);

// With positive class weight for imbalanced binary
Tensor pos_weight = Full({1}, 2.0f);  // Weight positive class 2x
BCEWithLogitsLoss balanced_bce(ReductionType::Mean, Tensor(), pos_weight);

FocalLoss

For handling class imbalance by down-weighting easy examples.

FocalLoss(float alpha = 0.25f,
          float gamma = 2.0f,
          ReductionType reduction = ReductionType::Mean);

// Algorithm:
// p = sigmoid(predictions)
// p_t = p * targets + (1 - p) * (1 - targets)
// focal_weight = alpha * (1 - p_t)^gamma

Usage

// Object detection with class imbalance
FocalLoss focal(0.25f, 2.0f);

Tensor predictions = ...;
Tensor targets = ...;
Tensor loss = focal(predictions, targets);

Regression Losses

MSELoss

MSELoss(ReductionType reduction
        = ReductionType::Mean);

// loss = (predictions - targets)^2

MSELoss mse_loss;
Tensor loss = mse_loss(predictions, targets);

L1Loss

L1Loss(ReductionType reduction
       = ReductionType::Mean);

// loss = |predictions - targets|

L1Loss l1_loss;
Tensor loss = l1_loss(predictions, targets);

SmoothL1Loss (Huber)

SmoothL1Loss(float beta = 1.0f,
             ReductionType reduction
             = ReductionType::Mean);

// loss = 0.5 * x^2 / beta  if |x| < beta
//      = |x| - 0.5 * beta  otherwise

// More robust to outliers than MSE
SmoothL1Loss huber_loss(1.0f);

TripletMarginLoss

TripletMarginLoss(float margin = 1.0f,
                  float p = 2.0f,
                  bool swap = false);

// loss = max(d(anchor, positive)
//          - d(anchor, negative) + margin, 0)

TripletMarginLoss triplet_loss(0.5f);

Segmentation Losses

DiceLoss

DiceLoss(float smooth = 1.0f,
         ReductionType reduction
         = ReductionType::Mean);

// dice = 2 * |P intersection T| / (|P| + |T|)
// loss = 1 - dice

// Medical image segmentation
DiceLoss dice_loss;
Tensor loss = dice_loss(predictions, targets);

CombinedLoss

CombinedLoss(
    std::vector<std::pair<
        std::unique_ptr<Loss>, float>> losses);

// loss = sum(weight_i * loss_i)

// Combine Dice and Cross Entropy
std::vector<...> losses;
losses.push_back({make_unique<DiceLoss>(), 0.5f});
losses.push_back({make_unique<CrossEntropyLoss>(), 0.5f});

CombinedLoss combined(std::move(losses));

Python Bindings

import pycyxwiz as cyx

# Classification losses
ce_loss = cyx.nn.CrossEntropyLoss()
bce_loss = cyx.nn.BCEWithLogitsLoss()
focal_loss = cyx.nn.FocalLoss(alpha=0.25, gamma=2.0)

# Regression losses
mse_loss = cyx.nn.MSELoss()
l1_loss = cyx.nn.L1Loss()
huber_loss = cyx.nn.SmoothL1Loss(beta=1.0)

# Segmentation
dice_loss = cyx.nn.DiceLoss()

# Contrastive
triplet_loss = cyx.nn.TripletMarginLoss(margin=0.5)
infonce_loss = cyx.nn.InfoNCELoss(temperature=0.07)

# Usage
loss = ce_loss(predictions, targets)
loss.backward()

Loss Selection Guide

Task	Recommended Loss	Notes
Multi-class Classification	CrossEntropyLoss	Use label smoothing for regularization
Binary Classification	BCEWithLogitsLoss	Use pos_weight for imbalance
Object Detection	FocalLoss	Handles foreground/background imbalance
Regression	MSE or SmoothL1	SmoothL1 more robust to outliers
Semantic Segmentation	CrossEntropy + Dice	Combined usually works best
Metric Learning	TripletMarginLoss	Or InfoNCE for contrastive

Best Practices

Label Smoothing: Use 0.1 for classification to prevent overconfidence
Class Weights: Inverse frequency for imbalanced datasets
Focal Loss: gamma=2.0 is good default for detection
Combined Losses: Weight by relative magnitude
Gradient Clipping: Consider when using unstable losses