bpe_framework/serialization
2025-08-27 14:02:03 -07:00

94 lines
3.4 KiB
Plaintext

Tensor embedding_weights = Tensor::xavier({vocab_size, embed_dim}, true);
parameters_["embedding.weight"] = embedding_weights;
// Initialize transformer layers
for (size_t i = 0; i < num_layers; ++i) {
// Initialize query weights
Tensor query_weights = Tensor::xavier({embed_dim, hidden_dim}, true);
parameters_[fmt::format("transformer.layers.{}.attention.query.weight", i)] = query_weights;
// Initialize query bias
Tensor query_bias = Tensor::zeros({hidden_dim}, true);
parameters_[fmt::format("transformer.layers.{}.attention.query.bias", i)] = query_bias;
// Similarly for key, value, output, etc.
// ...
}
// Initialize output layer
Tensor output_weights = Tensor::xavier({hidden_dim, vocab_size}, true);
parameters_["output.weight"] = output_weights;
Tensor output_bias = Tensor::zeros({vocab_size}, true);
parameters_["output.bias"] = output_bias;
Modify LanguageModel Initialization
You need to ensure that all parameters are stored in the parameters_ map during initialization. For example:
cpp
// In your LanguageModel constructor or initialization method
LanguageModel::LanguageModel(size_t vocab_size, size_t embed_dim,
size_t hidden_dim, size_t num_layers) {
// Initialize embedding layer
Tensor embedding_weights = Tensor::xavier({vocab_size, embed_dim}, true);
parameters_["embedding.weight"] = embedding_weights;
// Initialize transformer layers
for (size_t i = 0; i < num_layers; ++i) {
// Initialize query weights
Tensor query_weights = Tensor::xavier({embed_dim, hidden_dim}, true);
parameters_[fmt::format("transformer.layers.{}.attention.query.weight", i)] = query_weights;
// Initialize query bias
Tensor query_bias = Tensor::zeros({hidden_dim}, true);
parameters_[fmt::format("transformer.layers.{}.attention.query.bias", i)] = query_bias;
// Similarly for key, value, output, etc.
// ...
}
// Initialize output layer
Tensor output_weights = Tensor::xavier({hidden_dim, vocab_size}, true);
parameters_["output.weight"] = output_weights;
Tensor output_bias = Tensor::zeros({vocab_size}, true);
parameters_["output.bias"] = output_bias;
}
Note: You'll need to include the fmt library for string formatting, or use another method to create parameter names.
6. Add Error Handling and Validation
Consider adding these validations to your serialization code:
cpp
// In the load method, after reading each parameter
if (tensor.shape() != it->second.shape()) {
throw std::runtime_error("Shape mismatch for parameter: " + name);
}
// Add checks for file operations
if (file.fail()) {
throw std::runtime_error("Error reading from file: " + path);
}
// Add checks for end of file
if (file.eof()) {
throw std::runtime_error("Unexpected end of file: " + path);
}
7. Usage Example
cpp
// Create and train a model
LanguageModelTrainer trainer(tokenizer, embedding_dim, hidden_dim, num_layers);
trainer.train(corpus, epochs, batch_size, sequence_length);
// Save the model
trainer.save_model("trained_model.bin");
// Later, create a new trainer and load the model
LanguageModelTrainer new_trainer(tokenizer, embedding_dim, hidden_dim, num_layers);
new_trainer.load_model("trained_model.bin");
// Continue training or use for inference