bpe_framework/CMakeLists.txt
2025-09-13 12:45:42 -07:00

262 lines
7.2 KiB
CMake
Executable File

cmake_minimum_required(VERSION 3.16)
project(bpe_framework)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
add_compile_definitions(__x86_64__)
endif()
# Set C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Add profile build option - must be defined before any usage
option(ENABLE_PROFILING "Enable profiling with gprof" OFF)
# Set compiler flags based on build type and profiling option
if(ENABLE_PROFILING)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
message(STATUS "Profiling enabled: gprof flags added")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -g")
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
endif()
# Include directories
include_directories(include)
include_directories(include/lm)
include_directories(include/lm/models)
include_directories(include/lm/training)
include_directories(include/lm/optimizers)
include_directories(include/lm/core)
include_directories(include/lm/tokenizer)
include_directories(include/lm/generation)
include_directories(include/lm/runtime)
# Find required packages
find_package(Eigen3 REQUIRED)
find_package(ICU REQUIRED COMPONENTS uc i18n)
# Cereal serialization library (header-only)
# We'll manually download it to avoid Boost dependency issues
if(NOT EXISTS ${CMAKE_SOURCE_DIR}/third_party/cereal/include/cereal/cereal.hpp)
message(STATUS "Downloading Cereal library...")
file(MAKE_DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/cereal)
# Download the specific version of Cereal
file(DOWNLOAD
https://github.com/USCiLab/cereal/archive/refs/tags/v1.3.2.tar.gz
${CMAKE_SOURCE_DIR}/third_party/cereal_v1.3.2.tar.gz
SHOW_PROGRESS
)
# Extract the archive
execute_process(
COMMAND tar -xf ${CMAKE_SOURCE_DIR}/third_party/cereal_v1.3.2.tar.gz -C ${CMAKE_SOURCE_DIR}/third_party
)
# Move the include directory
file(RENAME
${CMAKE_SOURCE_DIR}/third_party/cereal-1.3.2/include
${CMAKE_SOURCE_DIR}/third_party/cereal/include
)
# Clean up
file(REMOVE_RECURSE ${CMAKE_SOURCE_DIR}/third_party/cereal-1.3.2)
file(REMOVE ${CMAKE_SOURCE_DIR}/third_party/cereal_v1.3.2.tar.gz)
endif()
# Add the manually downloaded Cereal include directory
set(CEREAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/third_party/cereal/include)
include_directories(${CEREAL_INCLUDE_DIR})
message(STATUS "Using Cereal from: ${CEREAL_INCLUDE_DIR}")
# Since Tensor is header-only, create an interface library for core components
add_library(lm_core INTERFACE)
target_include_directories(lm_core INTERFACE ${CMAKE_SOURCE_DIR}/include)
target_link_libraries(lm_core INTERFACE Eigen3::Eigen)
# Tokenizer library
add_library(lm_tokenizer STATIC
src/tokenizer/bpe_tokenizer.cpp
src/tokenizer/unicode_utils.cpp
)
target_link_libraries(lm_tokenizer PUBLIC lm_core ICU::uc ICU::i18n ${EIGEN3_LIBRARIES})
# Optimizers library
add_library(lm_optimizers STATIC
src/optimizers/adam.cpp
)
target_link_libraries(lm_optimizers PUBLIC lm_core)
# Models library - keep only TransformerModel implementation
add_library(lm_models STATIC
src/models/transformer_model.cpp
src/models/conversation_model.cpp
)
target_link_libraries(lm_models PUBLIC lm_core lm_optimizers lm_tokenizer)
#add_library(lm_core INTERFACE)
#target_include_directories(lm_core INTERFACE ${CMAKE_SOURCE_DIR}/include)
#target_link_libraries(lm_core INTERFACE Eigen3::Eigen)
# Add TensorPool as part of the core library
target_sources(lm_core INTERFACE
${CMAKE_SOURCE_DIR}/include/lm/core/tensor_pool.hpp
)
# Generation library (samplers)
add_library(lm_generation STATIC
src/generation/sampler.cpp
)
target_link_libraries(lm_generation PUBLIC lm_core)
# Context management library
add_library(lm_context STATIC
src/context_manager.cpp
)
target_link_libraries(lm_context PUBLIC lm_core lm_tokenizer)
# Conversation management library
add_library(lm_conversation STATIC
src/conversation_manager.cpp
)
target_link_libraries(lm_conversation PUBLIC lm_core lm_context)
# Runtime library
add_library(lm_runtime STATIC
src/runtime/init.cpp
src/runtime/shutdown.cpp
src/runtime/state_utils.cpp
)
target_link_libraries(lm_runtime PUBLIC lm_core)
# Add Tensor and TensorPool as part of the core library
target_sources(lm_core INTERFACE
${CMAKE_SOURCE_DIR}/include/lm/core/tensor.hpp
${CMAKE_SOURCE_DIR}/include/lm/core/tensor_pool.hpp
)
# Alpha components
add_library(lm_alpha STATIC
src/alpha/config_io.cpp
src/alpha/repl.cpp
)
target_link_libraries(lm_alpha PUBLIC lm_core lm_runtime lm_conversation lm_models)
# Test executables
add_executable(performance_test src/performance_test.cpp)
target_link_libraries(performance_test
lm_training
lm_models
lm_optimizers
lm_tokenizer
lm_core
)
add_executable(test_generation src/test_generation.cpp)
target_link_libraries(test_generation
lm_training
lm_models
lm_optimizers
lm_tokenizer
lm_generation
lm_core
)
add_executable(serialization_demo src/serialization_demo.cpp)
target_link_libraries(serialization_demo
lm_training
lm_models
lm_optimizers
lm_tokenizer
lm_conversation
lm_context
lm_core
)
add_executable(test_bpe src/test_bpe.cpp)
target_link_libraries(test_bpe
lm_tokenizer
lm_core
)
add_executable(test_unicode_bpe src/test_unicode_bpe.cpp)
target_link_libraries(test_unicode_bpe
lm_tokenizer
lm_core
)
add_executable(sampler_test src/sampler_test.cpp)
target_link_libraries(sampler_test
lm_training
lm_models
lm_optimizers
lm_tokenizer
lm_generation
lm_core
)
add_executable(test_conversation src/test_conversation.cpp)
target_link_libraries(test_conversation
lm_conversation
lm_context
lm_core
)
add_executable(test_logger src/test_logger.cpp)
target_link_libraries(test_logger
lm_tokenizer
lm_models
lm_core
)
add_executable(test_transformer src/test_transformer.cpp)
target_link_libraries(test_transformer
lm_models
lm_tokenizer
lm_core
)
add_executable(starter_convo src/starter_convo.cpp)
target_link_libraries(starter_convo
lm_alpha
lm_conversation
lm_context
lm_models
lm_tokenizer
lm_core
)
add_library(lm_training STATIC
src/training/trainer.cpp
src/training/data_loader.cpp
src/training/losses.cpp
)
target_link_libraries(lm_training PUBLIC lm_models lm_optimizers lm_tokenizer)
add_executable(test_tensor_pool src/test_tensor_pool.cpp)
target_link_libraries(test_tensor_pool
lm_core
)
# Enable testing if needed
#enable_testing()
# Print configuration summary
message(STATUS "Project configured successfully")
message(STATUS "Eigen3 found: ${Eigen3_FOUND}")
message(STATUS "ICU found: ${ICU_FOUND}")
message(STATUS "Cereal include: ${CEREAL_INCLUDE_DIR}")
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Profiling enabled: ${ENABLE_PROFILING}")