RAG
This commit is contained in:
kennethcheng 2026-04-15 09:33:16 +08:00
parent 05ad8e647c
commit f1cdf6dd1f
4 changed files with 109 additions and 3 deletions

View File

@ -1,14 +1,15 @@
# LangChain Learning # LangChain Learning
[![](https://img.shields.io/badge/version-0.0.6-blue.svg)](https://github.com/your-repo/langchain-learning) [![](https://img.shields.io/badge/version-0.0.7-blue.svg)](https://github.com/your-repo/langchain-learning)
[![](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/) [![](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/)
[![](https://img.shields.io/badge/LangChain-v1.2-orange.svg)](https://www.langchain.com/) [![](https://img.shields.io/badge/LangChain-v1.2-orange.svg)](https://www.langchain.com/)
> LangChain 框架学习项目,集成 SiliconFlow & Ollama API > LangChain 框架学习项目,集成 SiliconFlow & Ollama API
## 功能特性 ## 功能特性
- **多 LLM 集成**:支持 OpenAI API、SiliconFlow、Ollama 及 LangChain 抽象层 - **多 LLM 集成**:支持 OpenAI API、SiliconFlow、Ollama 及 LangChain 抽象层
- **RAG 检索增强生成**基于向量库FAISS的文档检索与问答
- **流式响应**:实时流式输出,带来更好的使用体验 - **流式响应**:实时流式输出,带来更好的使用体验
- **Prompt 工程**:多种 Prompt 模板构建方式 - **Prompt 工程**:多种 Prompt 模板构建方式
- **输出解析**:支持 JSON 等格式解析 - **输出解析**:支持 JSON 等格式解析
@ -65,6 +66,12 @@ OLLAMA_API_KEY=ollama
|------|------|------| |------|------|------|
| JSON 解析器 | `python parser/json_parser_demo.py` | 使用 JsonOutputParser 解析 LLM 输出 | | JSON 解析器 | `python parser/json_parser_demo.py` | 使用 JsonOutputParser 解析 LLM 输出 |
**RAG 示例**
| 示例 | 命令 | 说明 |
|------|------|------|
| 基础 RAG | `python rag/rag_demo.py` | 基于 FAISS 向量库的检索问答系统 |
**Token 用量示例** **Token 用量示例**
| 示例 | 命令 | 说明 | | 示例 | 命令 | 说明 |
@ -104,6 +111,8 @@ langchain-learning/
│ └── prompt_from_file.json # Prompt JSON 模板文件 │ └── prompt_from_file.json # Prompt JSON 模板文件
├── parser/ ├── parser/
│ └── json_parser_demo.py # JSON 输出解析示例 │ └── json_parser_demo.py # JSON 输出解析示例
├── rag/
│ └── rag_demo.py # RAG 检索增强生成示例
├── token/ ├── token/
│ └── token_demo.py # Token 用量追踪示例 │ └── token_demo.py # Token 用量追踪示例
├── memory/ ├── memory/
@ -137,6 +146,7 @@ langchain-learning/
|------|------| |------|------|
| 框架 | LangChain | | 框架 | LangChain |
| LLM 提供商 | SiliconFlow, Ollama | | LLM 提供商 | SiliconFlow, Ollama |
| 向量库 | FAISS |
| 终端美化 | Rich | | 终端美化 | Rich |
| 语言 | Python 3.11+ | | 语言 | Python 3.11+ |

View File

@ -5,6 +5,7 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
"faiss-cpu>=1.13.2",
"langchain==0.3.27", "langchain==0.3.27",
"langchain-community==0.3.31", "langchain-community==0.3.31",
"langchain-siliconflow==0.1.3", "langchain-siliconflow==0.1.3",

68
rag/rag_demo.py Normal file
View File

@ -0,0 +1,68 @@
import logging
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import os
import dotenv
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
dotenv.load_dotenv()
## 设置环境变量
os.environ['OPENAI_API_KEY'] = os.getenv("SILICONFLOW_API_KEY")
os.environ['OPENAI_BASE_URL'] = os.getenv("SILICONFLOW_BASE_URL")
# 默认的 'model_name': 'deepseek-ai/DeepSeek-V3.1',
llm = ChatOpenAI(model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B")
## 1. 准备某个领域的文档:测试相关的知识
docs = [
"等价类划分是一种黑盒测试方法,将输入数据划分为有效等价类和无效等价类。",
"边界值分析通常作为等价类划分的补充,重点测试输入输出的边界条件。",
"集成测试用于验证模块间接口的正确性,常见策略包括自顶向下和自底向上。",
"回归测试是在软件变更后执行的测试,确保原有功能不受新修改影响。",
"性能测试包括负载测试、压力测试和耐久性测试,用于评估系统的响应能力。",
"测试用例应包含测试ID、模块、前置条件、步骤、预期结果和优先级信息。"
]
splitter = CharacterTextSplitter()
## 2. 切割文档(可选)
texts = []
for doc in docs:
chunks = splitter.split_text(doc)
texts.extend(chunks)
logging.info("文档切分:原文=%s -> %d 个分片",doc,len(chunks))
logging.info(texts)
## 3. embedding向量化 以及 建立向量库
embeddings = OpenAIEmbeddings(model="netease-youdao/bce-embedding-base_v1")
## 第一次调用 embedding模型HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK"
vectorstore = FAISS.from_texts(texts,embeddings)
logging.info("构建向量数据库完成")
logging.info(vectorstore)
## 4. 构建 RAG的调用链 k参数 topK
retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={"k":2})
## HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK"
chain = RetrievalQA.from_chain_type(llm=llm,retriever=retriever)
query = "什么是等价类划分?"
## 检索过程探索
retrieved_docs = retriever.get_relevant_documents(query)
logging.info("---------")
for retrieved_doc in retrieved_docs:
logging.info(retrieved_doc)
logging.info("---------")
## 5. 查询数据(通过模型自己去查数据库)
## HTTP Request: POST https://api.siliconflow.cn/v1/chat/completions "HTTP/1.1 200 OK"
response = chain.invoke(query)
logging.info(response)

27
uv.lock
View File

@ -290,6 +290,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
] ]
[[package]]
name = "faiss-cpu"
version = "1.13.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "packaging" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/c9/671f66f6b31ec48e5825d36435f0cb91189fa8bb6b50724029dbff4ca83c/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_arm64.whl", hash = "sha256:a9064eb34f8f64438dd5b95c8f03a780b1a3f0b99c46eeacb1f0b5d15fc02dc1", size = 3452776, upload-time = "2025-12-24T10:27:01.419Z" },
{ url = "https://files.pythonhosted.org/packages/5a/4a/97150aa1582fb9c2bca95bd8fc37f27d3b470acec6f0a6833844b21e4b40/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_x86_64.whl", hash = "sha256:c8d097884521e1ecaea6467aeebbf1aa56ee4a36350b48b2ca6b39366565c317", size = 7896434, upload-time = "2025-12-24T10:27:03.592Z" },
{ url = "https://files.pythonhosted.org/packages/0b/d0/0940575f059591ca31b63a881058adb16a387020af1709dcb7669460115c/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ee330a284042c2480f2e90450a10378fd95655d62220159b1408f59ee83ebf1", size = 11485825, upload-time = "2025-12-24T10:27:05.681Z" },
{ url = "https://files.pythonhosted.org/packages/e7/e1/a5acac02aa593809f0123539afe7b4aff61d1db149e7093239888c9053e1/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab88ee287c25a119213153d033f7dd64c3ccec466ace267395872f554b648cd7", size = 23845772, upload-time = "2025-12-24T10:27:08.194Z" },
{ url = "https://files.pythonhosted.org/packages/9c/7b/49dcaf354834ec457e85ca769d50bc9b5f3003fab7c94a9dcf08cf742793/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85511129b34f890d19c98b82a0cd5ffb27d89d1cec2ee41d2621ee9f9ef8cf3f", size = 13477567, upload-time = "2025-12-24T10:27:10.822Z" },
{ url = "https://files.pythonhosted.org/packages/f7/6b/12bb4037921c38bb2c0b4cfc213ca7e04bbbebbfea89b0b5746248ce446e/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b32eb4065bac352b52a9f5ae07223567fab0a976c7d05017c01c45a1c24264f", size = 25102239, upload-time = "2025-12-24T10:27:13.476Z" },
{ url = "https://files.pythonhosted.org/packages/14/6d/40439a05e4e60a0e889aa68b08ec70f5c8e32901f75f2be25c593a2e050e/faiss_cpu-1.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7c5944d7807d58fe7244b6aba06be710ee7ed99343365ed92699349efe979f51", size = 18879906, upload-time = "2025-12-24T10:27:19.041Z" },
{ url = "https://files.pythonhosted.org/packages/9f/f9/b97eadbdd9e00f945d1566c7101382344f504596bfb19219465b0fc61e6e/faiss_cpu-1.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:19508a1badfb36e456c1c8664eeb948349f604db5c7545f277a0126b4a84b080", size = 8548280, upload-time = "2025-12-24T10:27:22.114Z" },
{ url = "https://files.pythonhosted.org/packages/87/ff/35ed875423200c17bdd594ce921abfc1812ddd21e09355290b9a94e170ab/faiss_cpu-1.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:b82c01d30430dd7b1fa442001b9099735d1a82f6bb72033acdc9206d5ac66a64", size = 18890300, upload-time = "2025-12-24T10:27:24.194Z" },
{ url = "https://files.pythonhosted.org/packages/c5/3a/bbdf5deaf6feb34b46b469c0a0acd40216c3d3c6ecf5aeb71d56b8a650e3/faiss_cpu-1.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2c4f696ae76e7c97cbc12311db83aaf1e7f4f7be06a3ffea7e5b0e8ec1fd805b", size = 8553157, upload-time = "2025-12-24T10:27:26.38Z" },
{ url = "https://files.pythonhosted.org/packages/60/4b/903d85bf3a8264d49964ec799e45c7ffc91098606b8bc9ef2c904c1a56cb/faiss_cpu-1.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:cb4b5ee184816a4b099162ac93c0d7f0033d81a88e7c1291d0a9cc41ec348984", size = 18891330, upload-time = "2025-12-24T10:27:28.806Z" },
{ url = "https://files.pythonhosted.org/packages/b2/52/5d10642da628f63544aab27e48416be4a7ea25c6b81d8bd65016d8538b00/faiss_cpu-1.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:1243967eeb2298791ff7f3683a4abd2100d7e6ec7542ca05c3b75d47a7f621e5", size = 8553088, upload-time = "2025-12-24T10:27:31.325Z" },
{ url = "https://files.pythonhosted.org/packages/b0/b1/daaab8046f56c60079648bd83774f61b283b59a9930a2f60790ee4cdedfe/faiss_cpu-1.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:c8b645e7d56591aa35dc75415bb53a62e4a494dba010e16f4b67daeffd830bd7", size = 18892621, upload-time = "2025-12-24T10:27:33.923Z" },
{ url = "https://files.pythonhosted.org/packages/06/6f/5eaf3e249c636e616ebb52e369a4a2f1d32b1caf9a611b4f917b3dd21423/faiss_cpu-1.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:8113a2a80b59fe5653cf66f5c0f18be0a691825601a52a614c30beb1fca9bc7c", size = 8556374, upload-time = "2025-12-24T10:27:36.653Z" },
]
[[package]] [[package]]
name = "frozenlist" name = "frozenlist"
version = "1.8.0" version = "1.8.0"
@ -673,6 +698,7 @@ name = "langchain-learning"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "faiss-cpu" },
{ name = "langchain" }, { name = "langchain" },
{ name = "langchain-community" }, { name = "langchain-community" },
{ name = "langchain-siliconflow" }, { name = "langchain-siliconflow" },
@ -682,6 +708,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "faiss-cpu", specifier = ">=1.13.2" },
{ name = "langchain", specifier = "==0.3.27" }, { name = "langchain", specifier = "==0.3.27" },
{ name = "langchain-community", specifier = "==0.3.31" }, { name = "langchain-community", specifier = "==0.3.31" },
{ name = "langchain-siliconflow", specifier = "==0.1.3" }, { name = "langchain-siliconflow", specifier = "==0.1.3" },