bpe_framework/include/lm/models/language_model.hpp
2025-08-27 14:02:03 -07:00

46 lines
1.1 KiB
C++

// lm/models/language_model.hpp
#pragma once
#include "../core/tensor.hpp"
#include "../tokenizer/bpe_tokenizer.hpp"
#include <vector>
namespace lm {
class LanguageModel {
public:
LanguageModel(size_t vocab_size, size_t embedding_dim, size_t hidden_dim, size_t num_layers);
Tensor forward(const Tensor& input);
Tensor operator()(const Tensor& input) { return forward(input); }
void save(const std::string& path) const;
void load(const std::string& path);
// Parameter access methods
std::vector<Tensor> parameters() const;
std::unordered_map<std::string, Tensor> named_parameters() const;
void set_parameter(const std::string& name, const Tensor& param);
void train();
void eval();
private:
size_t vocab_size_, embedding_dim_, hidden_dim_, num_layers_;
// Model parameters
Tensor embedding_weight_;
Tensor lstm_weight_ih_;
Tensor lstm_weight_hh_;
Tensor lstm_bias_ih_;
Tensor lstm_bias_hh_;
Tensor output_weight_;
Tensor output_bias_;
bool is_training_;
std::unordered_map<std::string, Tensor> parameters_;
};
} // namespace lm