RAG 检索优化
TIP
检索效果直接影响 RAG 的回答质量。优化检索策略可以显著提升准确率。
混合检索
python
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# 关键词检索
bm25_retriever = BM25Retriever.from_texts(chunks)
# 向量检索
vector_retriever = vectorstore.as_retriever(k=3)
# 混合
ensemble = EnsembleRetriever(
retrievers=[bm25_retriever, vector_retriever],
weights=[0.3, 0.7]
)查询重写
python
def rewrite_query(question):
prompt = f"将以下问题改写得更适合检索:\n原始: {question}\n优化: "
return llm.invoke(prompt)重排序
python
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=retriever
)多轮检索
python
def multi_step_retrieve(query, depth=2):
all_docs = []
current = query
for _ in range(depth):
docs = retriever.get_relevant_documents(current)
all_docs.extend(docs)
current = f"{query} {docs[0].page_content[:100]}"
return list({d.page_content: d for d in all_docs}.values())