评估与部署
TIP
训练完成后,评估模型效果并部署到生产环境。
模型评估
python
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-7B")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
model = PeftModel.from_pretrained(base_model, "./qwen-lora-final")
def test(prompt):
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7)
return tokenizer.decode(outputs[0], skip_special_tokens=True)合并权重
python
merged = model.merge_and_unload()
merged.save_pretrained("./qwen-merged")
tokenizer.save_pretrained("./qwen-merged")部署
python
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
app = FastAPI()
model = load_model()
tokenizer = load_tokenizer()
class Query(BaseModel):
prompt: str
max_tokens: int = 512
temperature: float = 0.7
@app.post("/generate")
async def generate(query: Query):
inputs = tokenizer(query.prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=query.max_tokens)
return {"response": tokenizer.decode(outputs[0], skip_special_tokens=True)}vLLM 部署(推荐)
bash
python -m vllm.entrypoints.openai.api_server \
--model ./qwen-merged --port 8000