qwen3.6-27B-FP8部署
目录路径按需调整
当前根目录/
1、环境安装
cd /
conda create -n vllm-env python=3.10
conda activate vllm-env
python3 -m venv vllm-env
source vllm-env/bin/activate
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install vllm
2、验证
import vllm
print(vllm.__version__)
3、魔塔社区下载
pip install modelscope
mkdir Qwen3.6-27B-FP8
modelscope download --model Qwen/Qwen3.6-27B-FP8
4、模型启动脚本:
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1
# Qwen3.6-27B-FP8 双卡自动部署脚本
# 功能:激活指定虚拟环境,自动停止旧服务,并使用双卡启动 vLLM
# --- 配置区域 ---
VENV_PATH="/vllm-env"
MODEL_PATH="/root/ai-models/Qwen/Qwen3___6-27B-FP8" # 本地模型绝对路径
PORT=8000
HOST="0.0.0.0"
TP_SIZE=2 # 张量并行度,双卡设为 2
MAX_MODEL_LEN=262144 # 上下文长度,根据显存调整 (32k/64k/128k)
GPU_MEM_UTIL=0.9 # 显存利用率
LOG_FILE="vllm_server.log"
# --- 函数定义 ---
# 1. 激活虚拟环境
#activate_venv() {
# if [ -f "${VENV_PATH}/bin/activate" ]; then
# echo ">>> 正在激活虚拟环境: ${VENV_PATH}"
# source "${VENV_PATH}/bin/activate"
#else
# echo "错误: 虚拟环境路径不存在: ${VENV_PATH}/bin/activate"
# exit 1
#fi
#}
# 2. 停止现有服务
stop_existing_service() {
echo ">>> 检查端口 ${PORT} 是否被占用..."
PID=$(lsof -ti:${PORT} 2>/dev/null)
if [ -n "$PID" ]; then
echo ">>> 发现占用进程 PID: $PID,正在终止..."
kill -9 $PID
sleep 2
echo ">>> 旧进程已终止。"
else
echo ">>> 端口 ${PORT} 空闲,无需停止旧进程。"
fi
}
# 3. 检查环境
check_environment() {
if ! command -v vllm &> /dev/null; then
echo "错误: vLLM 未安装。请确保在虚拟环境中已执行: pip install vllm"
exit 1
fi
if [ ! -d "${MODEL_PATH}" ]; then
echo "错误: 模型路径不存在: ${MODEL_PATH}"
exit 1
fi
GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | wc -l)
if [ "$GPU_COUNT" -lt 2 ]; then
echo "警告: 检测到少于 2 张 GPU,但配置为 TP_SIZE=2。请确认硬件连接。"
fi
}
# 4. 启动服务
start_service() {
echo ">>> 正在启动 Qwen3.6-27B-FP8 (TP=${TP_SIZE}, Context=${MAX_MODEL_LEN})..."
echo ">>> 日志将输出到 ${LOG_FILE}"
nohup vllm serve ${MODEL_PATH} \
--host ${HOST} \
--port ${PORT} \
--tensor-parallel-size ${TP_SIZE} \
--max-model-len ${MAX_MODEL_LEN} \
--gpu-memory-utilization ${GPU_MEM_UTIL} \
--dtype auto \
--served-model-name qwen3-local \
> ${LOG_FILE} 2>&1 &
NEW_PID=$!
echo ">>> 服务已在后台启动,PID: ${NEW_PID}"
echo ">>> 等待服务初始化..."
for i in {1..30}; do
if curl -s http://localhost:${PORT}/health > /dev/null 2>&1; then
echo ">>> 服务启动成功!访问地址: http://${HOST}:${PORT}/v1"
return 0
fi
sleep 2
done
echo ">>> 警告: 服务启动超时,请查看 ${LOG_FILE} 排查错误。"
return 1
}
# --- 主执行流程 ---
echo "========================================"
echo " Qwen3.6-27B-FP8 双卡部署助手"
echo "========================================"
#activate_venv
check_environment
stop_existing_service
start_service
echo "========================================"
echo " 部署完成。使用 'tail -f ${LOG_FILE}' 查看实时日志"
echo "========================================"
