curl -LsSf https://astral.sh/uv/install.sh | sh source $HOME/.local/bin/env uv venv source .venv/bin/activate uv pip install vllm -i https://mirrors.cloud.tencent.com/pypi/simple
python -m vllm.entrypoints.openai.api_server
--model /workspace/AutoGLM-Phone-9B
--tensor-parallel-size 1
--gpu-memory-utilization 0.9