vllm serve /model/ModelScope/Qwen/Qwen2.5-VL-32B-Instruct --port 8000 -tp 4 --max-model-len=20480 --gpu-memory-utilization 0.85 --allowed-local-media-path /root --mm_processor_kwargs '{"max_pixels": 589824,"min_pixels": 3136}'
python /root/ucloud/image.py
QwQ-32B
vllm serve /model/ModelScope/Qwen/QwQ-32B --port 8000 -tp 4
等待模型启动后,再进行交互方式的启动
vllm chat
conda activate openwebui
open-webui serve --port 8080