CyxWiz LogoCyxWiz
DocsLayers

Layer API Reference

Neural network layer implementations providing GPU-accelerated building blocks for deep learning models.

Base Layer Class

class CYXWIZ_API Layer {
public:
    Layer();
    virtual ~Layer() = default;

    // Forward pass
    virtual Tensor Forward(const Tensor& input) = 0;

    // Get/Set parameters
    virtual std::vector<Tensor*> Parameters();
    virtual std::vector<Tensor*> Gradients();

    // Training mode
    void Train(bool mode = true);
    void Eval();
    bool IsTraining() const;

    // Shape inference
    virtual std::vector<int> OutputShape(const std::vector<int>& input_shape) const;

    // Parameter count
    virtual int64_t NumParameters() const;
};

Dense (Fully Connected) Layer

Dense(int units,
      ActivationType activation = ActivationType::None,
      bool use_bias = true,
      InitializerType kernel_initializer = InitializerType::GlorotUniform,
      InitializerType bias_initializer = InitializerType::Zeros);

Usage

Dense dense1(128, ActivationType::ReLU);
Dense dense2(64, ActivationType::ReLU, true, InitializerType::HeNormal);
Dense output(10, ActivationType::Softmax);

Tensor x = Randn({32, 784});  // Batch of 32, input size 784
Tensor h1 = dense1.Forward(x);    // Shape: (32, 128)
Tensor h2 = dense2.Forward(h1);   // Shape: (32, 64)
Tensor out = output.Forward(h2);  // Shape: (32, 10)

Conv2D Layer

Conv2D(int filters,
       std::pair<int, int> kernel_size,
       std::pair<int, int> stride = {1, 1},
       PaddingType padding = PaddingType::Valid,
       ActivationType activation = ActivationType::None,
       bool use_bias = true);

// Input shape: (batch, channels, height, width)
// Output shape: (batch, filters, new_height, new_width)

Usage

Conv2D conv1(32, {3, 3}, {1, 1}, PaddingType::Same, ActivationType::ReLU);
Conv2D conv2(64, {3, 3}, {1, 1}, PaddingType::Same, ActivationType::ReLU);

Tensor x = Randn({32, 3, 224, 224});  // Batch of 32 RGB images
Tensor h1 = conv1.Forward(x);          // Shape: (32, 32, 224, 224)
Tensor h2 = conv2.Forward(h1);         // Shape: (32, 64, 224, 224)

Pooling Layers

MaxPool2D
MaxPool2D(
    std::pair<int, int> pool_size,
    std::pair<int, int> stride = {0, 0},
    PaddingType padding = PaddingType::Valid
);

MaxPool2D pool({2, 2});
Tensor h = pool.Forward(x);
GlobalAveragePooling2D
GlobalAveragePooling2D gap;

// Input: (batch, channels, height, width)
// Output: (batch, channels)

Tensor x = Randn({32, 64, 7, 7});
Tensor h = gap.Forward(x);  // (32, 64)

Normalization Layers

BatchNorm
BatchNorm(int num_features,
          float epsilon = 1e-5f,
          float momentum = 0.1f);

BatchNorm bn(64);  // 64 channels
Tensor normalized = bn.Forward(x);
LayerNorm
LayerNorm(
    const std::vector<int>& normalized_shape,
    float epsilon = 1e-5f
);

// For transformers
LayerNorm ln({512});

Recurrent Layers

LSTM

LSTM(int hidden_size,
     int num_layers = 1,
     bool bidirectional = false,
     float dropout = 0.0f,
     bool batch_first = true);

// Returns (output, (h_n, c_n))
auto [output, h_n, c_n] = lstm.ForwardWithState(input);

Usage

LSTM lstm(256, 2, true, 0.2f);  // Bidirectional 2-layer LSTM

Tensor x = Randn({32, 100, 128});  // (batch, seq_len, input_size)
auto [output, h_n, c_n] = lstm.ForwardWithState(x);
// output: (32, 100, 512)  // 256*2 for bidirectional
// h_n: (4, 32, 256)       // 2 layers * 2 directions
// c_n: (4, 32, 256)

Attention Layers

MultiHeadAttention

MultiHeadAttention(int embed_dim,
                   int num_heads,
                   float dropout = 0.0f,
                   bool bias = true);

// Self-attention
Tensor Forward(const Tensor& input);

// Cross-attention
Tensor Forward(const Tensor& query,
               const Tensor& key,
               const Tensor& value,
               const Tensor& attn_mask = Tensor());

Usage

MultiHeadAttention mha(512, 8, 0.1f);

Tensor x = Randn({32, 100, 512});  // (batch, seq_len, embed_dim)
Tensor output = mha.Forward(x);    // Self-attention

// Cross-attention
Tensor query = Randn({32, 50, 512});
Tensor key = Randn({32, 100, 512});
Tensor value = Randn({32, 100, 512});
Tensor cross_output = mha.Forward(query, key, value);

Regularization Layers

Dropout drop(0.3f);

model.Train();
Tensor train_out = drop.Forward(x);  // Dropout applied

model.Eval();
Tensor eval_out = drop.Forward(x);   // No dropout

Activation Types

enum class ActivationType {
    None,
    ReLU,
    LeakyReLU,
    GELU,
    Sigmoid,
    Tanh,
    Softmax,
    Swish,
    Mish
};

Python Bindings

import pycyxwiz as cyx

# Dense layer
dense = cyx.layers.Dense(128, activation='relu')

# Conv2D
conv = cyx.layers.Conv2D(32, kernel_size=(3, 3), padding='same')

# BatchNorm
bn = cyx.layers.BatchNorm(64)

# LSTM
lstm = cyx.layers.LSTM(256, num_layers=2, bidirectional=True)

# Attention
mha = cyx.layers.MultiHeadAttention(512, num_heads=8)

# Sequential model
model = cyx.Sequential([
    cyx.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    cyx.layers.MaxPool2D((2, 2)),
    cyx.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    cyx.layers.GlobalAveragePooling2D(),
    cyx.layers.Dense(10, activation='softmax')
])

# Forward pass
output = model(input_tensor)

Available Layers

CategoryLayers
CoreDense, Conv1D, Conv2D, Conv3D
PoolingMaxPool2D, AvgPool2D, GlobalAvgPool2D, AdaptiveAvgPool
NormalizationBatchNorm, LayerNorm, GroupNorm, InstanceNorm
RecurrentRNN, LSTM, GRU, Embedding
AttentionMultiHeadAttention, TransformerEncoder, TransformerDecoder
RegularizationDropout, Dropout2D
UtilityFlatten, Reshape