DocsLayers
Layer API Reference
Neural network layer implementations providing GPU-accelerated building blocks for deep learning models.
Base Layer Class
class CYXWIZ_API Layer {
public:
Layer();
virtual ~Layer() = default;
// Forward pass
virtual Tensor Forward(const Tensor& input) = 0;
// Get/Set parameters
virtual std::vector<Tensor*> Parameters();
virtual std::vector<Tensor*> Gradients();
// Training mode
void Train(bool mode = true);
void Eval();
bool IsTraining() const;
// Shape inference
virtual std::vector<int> OutputShape(const std::vector<int>& input_shape) const;
// Parameter count
virtual int64_t NumParameters() const;
};Dense (Fully Connected) Layer
Dense(int units,
ActivationType activation = ActivationType::None,
bool use_bias = true,
InitializerType kernel_initializer = InitializerType::GlorotUniform,
InitializerType bias_initializer = InitializerType::Zeros);Usage
Dense dense1(128, ActivationType::ReLU);
Dense dense2(64, ActivationType::ReLU, true, InitializerType::HeNormal);
Dense output(10, ActivationType::Softmax);
Tensor x = Randn({32, 784}); // Batch of 32, input size 784
Tensor h1 = dense1.Forward(x); // Shape: (32, 128)
Tensor h2 = dense2.Forward(h1); // Shape: (32, 64)
Tensor out = output.Forward(h2); // Shape: (32, 10)Conv2D Layer
Conv2D(int filters,
std::pair<int, int> kernel_size,
std::pair<int, int> stride = {1, 1},
PaddingType padding = PaddingType::Valid,
ActivationType activation = ActivationType::None,
bool use_bias = true);
// Input shape: (batch, channels, height, width)
// Output shape: (batch, filters, new_height, new_width)Usage
Conv2D conv1(32, {3, 3}, {1, 1}, PaddingType::Same, ActivationType::ReLU);
Conv2D conv2(64, {3, 3}, {1, 1}, PaddingType::Same, ActivationType::ReLU);
Tensor x = Randn({32, 3, 224, 224}); // Batch of 32 RGB images
Tensor h1 = conv1.Forward(x); // Shape: (32, 32, 224, 224)
Tensor h2 = conv2.Forward(h1); // Shape: (32, 64, 224, 224)Pooling Layers
MaxPool2D
MaxPool2D(
std::pair<int, int> pool_size,
std::pair<int, int> stride = {0, 0},
PaddingType padding = PaddingType::Valid
);
MaxPool2D pool({2, 2});
Tensor h = pool.Forward(x);GlobalAveragePooling2D
GlobalAveragePooling2D gap;
// Input: (batch, channels, height, width)
// Output: (batch, channels)
Tensor x = Randn({32, 64, 7, 7});
Tensor h = gap.Forward(x); // (32, 64)Normalization Layers
BatchNorm
BatchNorm(int num_features,
float epsilon = 1e-5f,
float momentum = 0.1f);
BatchNorm bn(64); // 64 channels
Tensor normalized = bn.Forward(x);LayerNorm
LayerNorm(
const std::vector<int>& normalized_shape,
float epsilon = 1e-5f
);
// For transformers
LayerNorm ln({512});Recurrent Layers
LSTM
LSTM(int hidden_size,
int num_layers = 1,
bool bidirectional = false,
float dropout = 0.0f,
bool batch_first = true);
// Returns (output, (h_n, c_n))
auto [output, h_n, c_n] = lstm.ForwardWithState(input);Usage
LSTM lstm(256, 2, true, 0.2f); // Bidirectional 2-layer LSTM
Tensor x = Randn({32, 100, 128}); // (batch, seq_len, input_size)
auto [output, h_n, c_n] = lstm.ForwardWithState(x);
// output: (32, 100, 512) // 256*2 for bidirectional
// h_n: (4, 32, 256) // 2 layers * 2 directions
// c_n: (4, 32, 256)Attention Layers
MultiHeadAttention
MultiHeadAttention(int embed_dim,
int num_heads,
float dropout = 0.0f,
bool bias = true);
// Self-attention
Tensor Forward(const Tensor& input);
// Cross-attention
Tensor Forward(const Tensor& query,
const Tensor& key,
const Tensor& value,
const Tensor& attn_mask = Tensor());Usage
MultiHeadAttention mha(512, 8, 0.1f);
Tensor x = Randn({32, 100, 512}); // (batch, seq_len, embed_dim)
Tensor output = mha.Forward(x); // Self-attention
// Cross-attention
Tensor query = Randn({32, 50, 512});
Tensor key = Randn({32, 100, 512});
Tensor value = Randn({32, 100, 512});
Tensor cross_output = mha.Forward(query, key, value);Regularization Layers
Dropout drop(0.3f); model.Train(); Tensor train_out = drop.Forward(x); // Dropout applied model.Eval(); Tensor eval_out = drop.Forward(x); // No dropout
Activation Types
enum class ActivationType {
None,
ReLU,
LeakyReLU,
GELU,
Sigmoid,
Tanh,
Softmax,
Swish,
Mish
};Python Bindings
import pycyxwiz as cyx
# Dense layer
dense = cyx.layers.Dense(128, activation='relu')
# Conv2D
conv = cyx.layers.Conv2D(32, kernel_size=(3, 3), padding='same')
# BatchNorm
bn = cyx.layers.BatchNorm(64)
# LSTM
lstm = cyx.layers.LSTM(256, num_layers=2, bidirectional=True)
# Attention
mha = cyx.layers.MultiHeadAttention(512, num_heads=8)
# Sequential model
model = cyx.Sequential([
cyx.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
cyx.layers.MaxPool2D((2, 2)),
cyx.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
cyx.layers.GlobalAveragePooling2D(),
cyx.layers.Dense(10, activation='softmax')
])
# Forward pass
output = model(input_tensor)Available Layers
| Category | Layers |
|---|---|
| Core | Dense, Conv1D, Conv2D, Conv3D |
| Pooling | MaxPool2D, AvgPool2D, GlobalAvgPool2D, AdaptiveAvgPool |
| Normalization | BatchNorm, LayerNorm, GroupNorm, InstanceNorm |
| Recurrent | RNN, LSTM, GRU, Embedding |
| Attention | MultiHeadAttention, TransformerEncoder, TransformerDecoder |
| Regularization | Dropout, Dropout2D |
| Utility | Flatten, Reshape |