|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# SCRIPT MI50 Compilation Script for llama.cpp |
| 4 | +# Optimized build for AMD MI50 (gfx906) with ROCm/HIP support |
| 5 | +# |
| 6 | +# This script compiles llama.cpp with maximum optimizations for the MI50 GPU |
| 7 | +# including server support, flash attention, and all performance features |
| 8 | +# |
| 9 | + |
| 10 | +set -e # Exit on any error |
| 11 | + |
| 12 | +# Colors for output |
| 13 | +RED='\033[0;31m' |
| 14 | +GREEN='\033[0;32m' |
| 15 | +YELLOW='\033[1;33m' |
| 16 | +BLUE='\033[0;34m' |
| 17 | +NC='\033[0m' # No Color |
| 18 | + |
| 19 | +echo -e "${BLUE}======================================${NC}" |
| 20 | +echo -e "${BLUE} SCRIPT MI50 llama.cpp Builder ${NC}" |
| 21 | +echo -e "${BLUE}======================================${NC}" |
| 22 | + |
| 23 | +# Check if we're in the right directory |
| 24 | +if [[ ! -f "CMakeLists.txt" ]]; then |
| 25 | + echo -e "${RED}Error: Not in llama.cpp root directory${NC}" |
| 26 | + echo "Please run this script from the llama.cpp root directory" |
| 27 | + exit 1 |
| 28 | +fi |
| 29 | + |
| 30 | +# Verify ROCm installation |
| 31 | +echo -e "${YELLOW}Checking ROCm installation...${NC}" |
| 32 | +if ! command -v rocm_agent_enumerator &> /dev/null; then |
| 33 | + echo -e "${RED}Error: ROCm not found. Please install ROCm first.${NC}" |
| 34 | + exit 1 |
| 35 | +fi |
| 36 | + |
| 37 | +# Check for gfx906 support |
| 38 | +GPUS=$(rocm_agent_enumerator) |
| 39 | +if [[ ! "$GPUS" =~ "gfx906" ]]; then |
| 40 | + echo -e "${RED}Warning: gfx906 (MI50) not detected in system${NC}" |
| 41 | + echo "Available GPUs: $GPUS" |
| 42 | + read -p "Continue anyway? (y/N): " -n 1 -r |
| 43 | + echo |
| 44 | + if [[ ! $REPLY =~ ^[Yy]$ ]]; then |
| 45 | + exit 1 |
| 46 | + fi |
| 47 | +fi |
| 48 | + |
| 49 | +echo -e "${GREEN}✓ ROCm installation verified${NC}" |
| 50 | +echo -e "${GREEN}✓ Available GPUs: $GPUS${NC}" |
| 51 | + |
| 52 | +# Set ROCm environment variables for optimal gfx906 compilation |
| 53 | +echo -e "${YELLOW}Setting ROCm environment variables for gfx906...${NC}" |
| 54 | +export ROCM_PATH=${ROCM_PATH:-/opt/rocm} |
| 55 | +export HCC_AMDGPU_TARGET=gfx906 |
| 56 | +export HSA_OVERRIDE_GFX_VERSION=9.0.6 |
| 57 | +export AMDGPU_TARGETS=gfx906 |
| 58 | +export GPU_TARGETS=gfx906 |
| 59 | + |
| 60 | +# Clean previous build |
| 61 | +echo -e "${YELLOW}Cleaning previous build...${NC}" |
| 62 | +rm -rf build |
| 63 | +mkdir -p build |
| 64 | + |
| 65 | +# Configure with maximum optimizations |
| 66 | +echo -e "${YELLOW}Configuring CMake with MI50 optimizations...${NC}" |
| 67 | +cd build |
| 68 | + |
| 69 | +cmake .. \ |
| 70 | + -DCMAKE_BUILD_TYPE=Release \ |
| 71 | + -DCMAKE_C_COMPILER=gcc \ |
| 72 | + -DCMAKE_CXX_COMPILER=g++ \ |
| 73 | + -DCMAKE_HIP_COMPILER_FORCED=1 \ |
| 74 | + -DCMAKE_HIP_ARCHITECTURES=gfx906 \ |
| 75 | + -DCMAKE_C_FLAGS="-O3 -march=native -mtune=native -DNDEBUG -ffast-math -fno-finite-math-only -ffp-contract=fast" \ |
| 76 | + -DCMAKE_CXX_FLAGS="-O3 -march=native -mtune=native -DNDEBUG -DGGML_HIP_GFX906_OPTIMIZED -ffast-math -fno-finite-math-only -ffp-contract=fast" \ |
| 77 | + -DCMAKE_HIP_FLAGS=" --offload-arch=gfx906 -DGGML_HIP_GFX906_OPTIMIZED -Wno-ignored-attributes -Wno-cuda-compat -Wno-unused-result -mllvm -amdgpu-simplify-libcall -mllvm -amdgpu-internalize-symbols -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false -ffast-math -ffp-contract=fast" \ |
| 78 | + -DGGML_HIP=ON \ |
| 79 | + -DGGML_HIP_MMQ_MFMA=ON \ |
| 80 | + -DGGML_HIP_GRAPHS=ON \ |
| 81 | + -DGGML_HIP_NO_VMM=ON \ |
| 82 | + -DGGML_HIP_EXPORT_METRICS=ON \ |
| 83 | + -DGGML_HIP_GFX906_OPTIMIZED=ON \ |
| 84 | + -DGGML_NATIVE=ON \ |
| 85 | + -DGGML_CUDA_FA=ON \ |
| 86 | + -DGGML_CUDA_FA_ALL_QUANTS=ON \ |
| 87 | + -DGGML_CUDA_FORCE_MMQ=OFF \ |
| 88 | + -DGGML_CUDA_FORCE_CUBLAS=OFF \ |
| 89 | + -DGGML_CUDA_NO_PEER_COPY=ON \ |
| 90 | + -DLLAMA_BUILD_SERVER=ON \ |
| 91 | + -DLLAMA_BUILD_EXAMPLES=ON \ |
| 92 | + -DLLAMA_BUILD_TOOLS=ON \ |
| 93 | + -DLLAMA_BUILD_TESTS=OFF \ |
| 94 | + -DLLAMA_CURL=ON \ |
| 95 | + -DLLAMA_STATIC=OFF |
| 96 | + |
| 97 | +if [[ $? -ne 0 ]]; then |
| 98 | + echo -e "${RED}✗ CMake configuration failed${NC}" |
| 99 | + exit 1 |
| 100 | +fi |
| 101 | + |
| 102 | +echo -e "${GREEN}✓ CMake configuration successful${NC}" |
| 103 | + |
| 104 | +# Compile with all CPU cores and dump detailed logs |
| 105 | +NPROC=$(nproc) |
| 106 | +LOG_FILE="compilation_log.txt" |
| 107 | +echo -e "${YELLOW}Compiling with $NPROC cores...${NC}" |
| 108 | +echo -e "${YELLOW}This may take several minutes...${NC}" |
| 109 | +echo -e "${YELLOW}Detailed compilation log will be saved to: $LOG_FILE${NC}" |
| 110 | + |
| 111 | +# Clear previous log |
| 112 | +> $LOG_FILE |
| 113 | + |
| 114 | +# Run make with detailed output and save to log file |
| 115 | +make -j$NPROC 2>&1 | tee $LOG_FILE |
| 116 | + |
| 117 | +if [[ ${PIPESTATUS[0]} -ne 0 ]]; then |
| 118 | + echo -e "${RED}✗ Compilation failed${NC}" |
| 119 | + echo -e "${RED}Check $LOG_FILE for detailed error information${NC}" |
| 120 | + exit 1 |
| 121 | +fi |
| 122 | + |
| 123 | +echo -e "${GREEN}✓ Compilation successful!${NC}" |
| 124 | + |
| 125 | +# Verify the build |
| 126 | +echo -e "${YELLOW}Verifying build...${NC}" |
| 127 | + |
| 128 | +# Check if main executables were built |
| 129 | +EXECUTABLES=( |
| 130 | + "bin/llama-cli" |
| 131 | + "bin/llama-server" |
| 132 | + "bin/llama-bench" |
| 133 | + "bin/libggml-hip.so" |
| 134 | +) |
| 135 | + |
| 136 | +ALL_GOOD=true |
| 137 | +for exec in "${EXECUTABLES[@]}"; do |
| 138 | + if [[ -f "$exec" ]]; then |
| 139 | + echo -e "${GREEN}✓ $exec built successfully${NC}" |
| 140 | + |
| 141 | + # Check HIP linking for executables (not libraries) |
| 142 | + if [[ "$exec" =~ ^bin/llama- && ! "$exec" =~ \.so$ ]]; then |
| 143 | + if ldd "$exec" | grep -q "libggml-hip.so"; then |
| 144 | + echo -e "${GREEN} ✓ HIP backend linked${NC}" |
| 145 | + else |
| 146 | + echo -e "${RED} ✗ HIP backend not linked${NC}" |
| 147 | + ALL_GOOD=false |
| 148 | + fi |
| 149 | + fi |
| 150 | + else |
| 151 | + echo -e "${RED}✗ $exec not found${NC}" |
| 152 | + ALL_GOOD=false |
| 153 | + fi |
| 154 | +done |
| 155 | + |
| 156 | +if [[ "$ALL_GOOD" = false ]]; then |
| 157 | + echo -e "${RED}✗ Build verification failed${NC}" |
| 158 | + exit 1 |
| 159 | +fi |
| 160 | + |
| 161 | +# Display ROCm libraries linked |
| 162 | +echo -e "${YELLOW}ROCm libraries linked:${NC}" |
| 163 | +ldd bin/llama-cli | grep -E "(hip|roc)" | head -5 |
| 164 | + |
| 165 | +# Quick functionality test |
| 166 | +echo -e "${YELLOW}Testing HIP backend availability...${NC}" |
| 167 | +if ./bin/llama-cli --help 2>/dev/null | grep -q "backend"; then |
| 168 | + echo -e "${GREEN}✓ llama-cli responding correctly${NC}" |
| 169 | +else |
| 170 | + echo -e "${RED}✗ llama-cli test failed${NC}" |
| 171 | +fi |
| 172 | + |
| 173 | +# Success message |
| 174 | +echo |
| 175 | +echo -e "${GREEN}======================================${NC}" |
| 176 | +echo -e "${GREEN} ✓ BUILD COMPLETED SUCCESSFULLY ${NC}" |
| 177 | +echo -e "${GREEN}======================================${NC}" |
| 178 | +echo |
| 179 | +echo -e "${BLUE}Built executables:${NC}" |
| 180 | +echo " • CLI: ./build/bin/llama-cli" |
| 181 | +echo " • Server: ./build/bin/llama-server" |
| 182 | +echo " • Bench: ./build/bin/llama-bench" |
| 183 | +echo |
| 184 | +echo -e "${BLUE}Optimizations enabled:${NC}" |
| 185 | +echo " • Target GPU: AMD MI50 (gfx906)" |
| 186 | +echo " • HIP/ROCm backend with MFMA support" |
| 187 | +echo " • Flash Attention kernels" |
| 188 | +echo " • All quantization formats" |
| 189 | +echo " • Performance metrics export" |
| 190 | +echo " • Native CPU optimizations" |
| 191 | +echo " • Optimization 5: GFX906 compiler flags (-ffast-math, early-inline, function-calls=false)" |
| 192 | +echo |
| 193 | +echo -e "${BLUE}Ready to run:${NC}" |
| 194 | +echo " ./SCRIPT_launch_server_MI50.sh <model.gguf>" |
| 195 | +echo |
| 196 | +echo -e "${YELLOW}Note: Make sure to set proper ROCm environment variables before running!${NC}" |
| 197 | +echo |
| 198 | +echo -e "${BLUE}For debugging with maximum HIP logging:${NC}" |
| 199 | +echo " export AMD_LOG_LEVEL=8" |
| 200 | +echo " export AMD_LOG_MASK=0xFFFFFFFF" |
| 201 | +echo " export AMD_SERIALIZE_KERNEL=3" |
| 202 | +echo " ./SCRIPT_launch_server_MI50.sh <model.gguf> 2>&1 | tee hip_debug.log" |
0 commit comments