GPU to XPU - yiliu30/yi GitHub Wiki
sycl
sudo gpasswd -a ${USER} render
newgrp render
# @yu recommends
# sudo usermod -aG render,video $USER
- dump utilization
sudo xpu-smi dump -d 0,1,2,3 -m 18
https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md
Verify installation and environment
source /opt/intel/oneapi/setvars.sh
sycl-ls
xpu-smi ps -h
xpu-smi ps
Install XPU driver
sudo apt-get update
sudo apt-get install -y software-properties-common
sudo add-apt-repository -y ppa:kobuk-team/intel-graphics
sudo apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc
sudo apt-get install -y intel-media-va-driver-non-free libmfx-gen1 libvpl2 libvpl-tools libva-glx2 va-driver-all vainfo
Check XPU driver:
clinfo | grep "Device Name"
Add user permission
sudo usermod -aG render,video $USER
QBits
# export ICPX_COMPILER_HOME=/opt/intel/oneapi/compiler/2025.2/bin/icpx
# cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=/opt/intel/oneapi/compiler/2025.2/bin/icpx
# cmake --build build -j 8
# cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=$ICPX_COMPILER_HOME -DBTLA_UT_DEBUG=ON -DBTLA_UT_BENCHMARK=ON -DBTLA_SYCL=ON
# cmake -B build -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$ICPX_COMPILER_HOME -DBTLA_UT_DEBUG=ON
# cmake -B build -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$ICPX_COMPILER_HOME -DBTLA_UT_DEBUG=ON
# TLA
export ICPX_COMPILER_HOME=/opt/intel/oneapi/compiler/2025.2/bin/icpx
cmake -B build -DCMAKE_CXX_COMPILER=$ICPX_COMPILER_HOME -DCUTLASS_ENABLE_SYCL=ON -DDPCPP_SYCL_TARGET=intel_gpu_bmg_g21 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0 -fdiagnostics-color=always"
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
export IGC_VISAOptions="-perfmodel"
export IGC_VectorAliasBBThreshold=10000
export IGC_ExtraOCLOptions="-cl-intel-256-GRF-per-thread"
cmake --build build -j 8
# Bfloat16 GEMM
# ./examples/00_bmg_gemm/00_bmg_gemm_padded --m=4096 --n=4096 --k=4096 --l=16 --iterations=100
# Int4 WOQ GEMM:
# ./examples/02_bmg_gemm_mixed_dtype/02_bmg_gemm_f16_u4_f16 --m=4096 --n=4096 --k=4096 --l=16 --iterations=10
# Benchmark INT4/INT8
# ./benchmarks/gemm/cutlass_benchmarks_gemm_sycl --config_file=../benchmarks/device/bmg/input_files/input_sglang_gemm_mixed_dtype.in
ZE_AFFINITY_MASK=3 bash run_xpu.py
ONEAPI_DEVICE_SELECTOR=level_zero:0
sudo xpu-smi dump -d 0,1,2,3,4,5,6,7 -m 0,5,18,9
# sudo !!!