使用专用的sglang推理框架:
docker run -it \--restart always \--device=/dev/dri \--device=/dev/mxcd \--group-add 44 \--name qwen3-30b-sglang \--device=/dev/mem \--network=host \--security-opt seccomp=unconfined \--security-opt apparmor=unconfined \--shm-size '100gb' \--ulimit memlock=-1 \-v /home/models/modelscope/:/home/models/modelscope/ \-v /etc/localtime:/etc/localtime \sglang:0.5.9-maca.ai3.5.3.208-torch2.8-py310-ubuntu22.04-amd64 \/opt/conda/bin/python -m sglang.launch_server \--model-path /home/models/modelscope/qwen35_4b \--port 8000 \--api-key "your-api-key-here" \--host 0.0.0.0 \--tensor-parallel-size 1 \--context-length 32768 \--trust-remote-code
