LM‐EVAL - yiliu30/yi GitHub Wiki
--log_samples --verbosity DEBUG
export VLLM_LOGGING_LEVEL=DEBUG
timestamp=$(date +%Y%m%d-%H%M%S)
log_file=server.$timestamp.log
model_path=/home/yiliu7/models/deepseek-ai/DeepSeek-R1
model_path=/home/yliu7/workspace/inc/3rd-party/llm-compressor/examples/quantization_non_uniform/Llama-3.2-1B-Instruct-NVFP4-FP8-Dynamic
model_path="/data5/yliu7/HF_HOME/qwen_moe_skip_lm_head"
tp_size=2
ep_size=2
VLLM_USE_STANDALONE_COMPILE=1 VLLM_WORKER_MULTIPROC_METHOD=spawn vllm serve $model_path \
--max-model-len 8192 \
--tensor-parallel-size $tp_size \
--max-num-seqs 64 \
--gpu-memory-utilization 0.6 \
--dtype bfloat16 \
--port 8688 \
--enable-expert-parallel \
--trust-remote-code 2>&1 | tee $log_file
export no_proxy="localhost, 127.0.0.1, ::1"
task_name=gsm8k
batch_size=256
# LIMIT=32
timestamp=$(date +%Y%m%d_%H%M%S)
EVAL_LOG_NAME="eval_${task_name}_${timestamp}"
max_length=8192
max_gen_toks=2048
mkdir -p benchmark_logs
model_path=/home/yliu7/workspace/inc/3rd-party/llm-compressor/examples/quantization_non_uniform/Llama-3.2-1B-Instruct-NVFP4-FP8-Dynamic
model_path="/data5/yliu7/HF_HOME/qwen_moe_skip_lm_head"
HF_ALLOW_CODE_EVAL=1 \
lm_eval --model local-completions \
--tasks $task_name \
--model_args model=${model_path},base_url=http://127.0.0.1:8688/v1/completions,max_concurrent=1,max_length=${max_length},max_gen_toks=${max_gen_toks} \
--batch_size ${batch_size} \
--gen_kwargs="max_length=${max_length},max_gen_toks=${max_gen_toks}" \
--confirm_run_unsafe_code \
--log_samples \
--output_path "benchmark_logs/$EVAL_LOG_NAME" \
2>&1 | tee "benchmark_logs/${EVAL_LOG_NAME}.log"
HF BACKEND
model_path="/dataset/auto-round/qwen_moe/"
taskname=gsm8k
timestamp=$(date +%Y%m%d_%H%M%S)
output_log_file_name="${taskname}_${timestamp}"
HF_ALLOW_CODE_EVAL=1 lm_eval \
--model hf \
--tasks $taskname \
--model_args "pretrained=$model_path,max_length=8192", \
--batch_size auto \
--limit 32 \
--confirm_run_unsafe_code \
--gen_kwargs="max_length=8192,max_gen_toks=2048" \
--trust_remote_code \
--output_path $output_log_file_name 2>&1 | tee "${output_log_file_name}.out"
Install
pip install lm-eval==0.4.8
TypeError: LLM.generate() got an unexpected keyword argument 'prompt_token_ids'
pip install lm-eval>0.4.9
Lm-eval MMMU
@mengni
vllm (pretrained=/mengni/scout_mxfp4/Llama-4-Scout-w4g32,tensor_parallel_size=2,max_model_len=8192,max_num_seqs=1024,max_gen_toks=2048,kv_cache_dtype=auto,enable_expert_parallel=True,gpu_memory_utilization=0.7), gen_kwargs: (max_gen
_toks=2048), limit: None, num_fewshot: None, batch_size: 1