Skip to content

RAG 检索优化

TIP

检索效果直接影响 RAG 的回答质量。优化检索策略可以显著提升准确率。

混合检索

python
from langchain.retrievers import BM25Retriever, EnsembleRetriever

# 关键词检索
bm25_retriever = BM25Retriever.from_texts(chunks)
# 向量检索
vector_retriever = vectorstore.as_retriever(k=3)
# 混合
ensemble = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.3, 0.7]
)

查询重写

python
def rewrite_query(question):
    prompt = f"将以下问题改写得更适合检索:\n原始: {question}\n优化: "
    return llm.invoke(prompt)

重排序

python
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

多轮检索

python
def multi_step_retrieve(query, depth=2):
    all_docs = []
    current = query
    for _ in range(depth):
        docs = retriever.get_relevant_documents(current)
        all_docs.extend(docs)
        current = f"{query} {docs[0].page_content[:100]}"
    return list({d.page_content: d for d in all_docs}.values())