// transformer_model.hpp #pragma once #include #include #include #include #include #include #include "lm/tokenizer/token_types.hpp" namespace lm { class TransformerModel { public: TransformerModel(size_t vocab_size, size_t d_model = 512, size_t n_layers = 6, size_t n_heads = 8, size_t d_ff = 2048, float dropout = 0.1); ~TransformerModel(); // Forward pass std::vector forward(const std::vector& input_tokens); // Training methods void train_step(const std::vector& input_tokens, const std::vector& target_tokens); float calculate_loss(const std::vector& logits, const std::vector& targets); // Generation methods std::vector generate(const std::vector& context, size_t max_length = 100, float temperature = 1.0); // Serialization bool save(const std::string& filename); bool load(const std::string& filename); // Get model info size_t get_vocab_size() const { return vocab_size_; } size_t get_d_model() const { return d_model_; } private: class Impl; std::unique_ptr pimpl_; // Model parameters size_t vocab_size_; size_t d_model_; size_t n_layers_; size_t n_heads_; size_t d_ff_; float dropout_; }; } // namespace lm