v0.0.9
RAG
This commit is contained in:
parent
05ad8e647c
commit
f1cdf6dd1f
14
README.md
14
README.md
@ -1,14 +1,15 @@
|
|||||||
# LangChain Learning
|
# LangChain Learning
|
||||||
|
|
||||||
[](https://github.com/your-repo/langchain-learning)
|
[](https://github.com/your-repo/langchain-learning)
|
||||||
[](https://www.python.org/)
|
[](https://www.python.org/)
|
||||||
[](https://www.langchain.com/)
|
[](https://www.langchain.com/)
|
||||||
|
|
||||||
> LangChain 框架学习项目,集成 SiliconFlow & Ollama API
|
> LangChain 框架学习项目,集成 SiliconFlow & Ollama API
|
||||||
|
|
||||||
## 功能特性
|
## 功能特性
|
||||||
|
|
||||||
- **多 LLM 集成**:支持 OpenAI API、SiliconFlow、Ollama 及 LangChain 抽象层
|
- **多 LLM 集成**:支持 OpenAI API、SiliconFlow、Ollama 及 LangChain 抽象层
|
||||||
|
- **RAG 检索增强生成**:基于向量库(FAISS)的文档检索与问答
|
||||||
- **流式响应**:实时流式输出,带来更好的使用体验
|
- **流式响应**:实时流式输出,带来更好的使用体验
|
||||||
- **Prompt 工程**:多种 Prompt 模板构建方式
|
- **Prompt 工程**:多种 Prompt 模板构建方式
|
||||||
- **输出解析**:支持 JSON 等格式解析
|
- **输出解析**:支持 JSON 等格式解析
|
||||||
@ -65,6 +66,12 @@ OLLAMA_API_KEY=ollama
|
|||||||
|------|------|------|
|
|------|------|------|
|
||||||
| JSON 解析器 | `python parser/json_parser_demo.py` | 使用 JsonOutputParser 解析 LLM 输出 |
|
| JSON 解析器 | `python parser/json_parser_demo.py` | 使用 JsonOutputParser 解析 LLM 输出 |
|
||||||
|
|
||||||
|
**RAG 示例**
|
||||||
|
|
||||||
|
| 示例 | 命令 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 基础 RAG | `python rag/rag_demo.py` | 基于 FAISS 向量库的检索问答系统 |
|
||||||
|
|
||||||
**Token 用量示例**
|
**Token 用量示例**
|
||||||
|
|
||||||
| 示例 | 命令 | 说明 |
|
| 示例 | 命令 | 说明 |
|
||||||
@ -104,6 +111,8 @@ langchain-learning/
|
|||||||
│ └── prompt_from_file.json # Prompt JSON 模板文件
|
│ └── prompt_from_file.json # Prompt JSON 模板文件
|
||||||
├── parser/
|
├── parser/
|
||||||
│ └── json_parser_demo.py # JSON 输出解析示例
|
│ └── json_parser_demo.py # JSON 输出解析示例
|
||||||
|
├── rag/
|
||||||
|
│ └── rag_demo.py # RAG 检索增强生成示例
|
||||||
├── token/
|
├── token/
|
||||||
│ └── token_demo.py # Token 用量追踪示例
|
│ └── token_demo.py # Token 用量追踪示例
|
||||||
├── memory/
|
├── memory/
|
||||||
@ -137,6 +146,7 @@ langchain-learning/
|
|||||||
|------|------|
|
|------|------|
|
||||||
| 框架 | LangChain |
|
| 框架 | LangChain |
|
||||||
| LLM 提供商 | SiliconFlow, Ollama |
|
| LLM 提供商 | SiliconFlow, Ollama |
|
||||||
|
| 向量库 | FAISS |
|
||||||
| 终端美化 | Rich |
|
| 终端美化 | Rich |
|
||||||
| 语言 | Python 3.11+ |
|
| 语言 | Python 3.11+ |
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ description = "Add your description here"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"faiss-cpu>=1.13.2",
|
||||||
"langchain==0.3.27",
|
"langchain==0.3.27",
|
||||||
"langchain-community==0.3.31",
|
"langchain-community==0.3.31",
|
||||||
"langchain-siliconflow==0.1.3",
|
"langchain-siliconflow==0.1.3",
|
||||||
|
|||||||
68
rag/rag_demo.py
Normal file
68
rag/rag_demo.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from langchain.chains.retrieval_qa.base import RetrievalQA
|
||||||
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||||
|
import os
|
||||||
|
import dotenv
|
||||||
|
from langchain_text_splitters import CharacterTextSplitter
|
||||||
|
from langchain_community.vectorstores import FAISS
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
## 设置环境变量
|
||||||
|
os.environ['OPENAI_API_KEY'] = os.getenv("SILICONFLOW_API_KEY")
|
||||||
|
os.environ['OPENAI_BASE_URL'] = os.getenv("SILICONFLOW_BASE_URL")
|
||||||
|
|
||||||
|
# 默认的 'model_name': 'deepseek-ai/DeepSeek-V3.1',
|
||||||
|
llm = ChatOpenAI(model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B")
|
||||||
|
|
||||||
|
## 1. 准备某个领域的文档:测试相关的知识
|
||||||
|
docs = [
|
||||||
|
"等价类划分是一种黑盒测试方法,将输入数据划分为有效等价类和无效等价类。",
|
||||||
|
"边界值分析通常作为等价类划分的补充,重点测试输入输出的边界条件。",
|
||||||
|
"集成测试用于验证模块间接口的正确性,常见策略包括自顶向下和自底向上。",
|
||||||
|
"回归测试是在软件变更后执行的测试,确保原有功能不受新修改影响。",
|
||||||
|
"性能测试包括负载测试、压力测试和耐久性测试,用于评估系统的响应能力。",
|
||||||
|
"测试用例应包含测试ID、模块、前置条件、步骤、预期结果和优先级信息。"
|
||||||
|
]
|
||||||
|
|
||||||
|
splitter = CharacterTextSplitter()
|
||||||
|
|
||||||
|
## 2. 切割文档(可选)
|
||||||
|
texts = []
|
||||||
|
for doc in docs:
|
||||||
|
chunks = splitter.split_text(doc)
|
||||||
|
texts.extend(chunks)
|
||||||
|
logging.info("文档切分:原文=%s -> %d 个分片",doc,len(chunks))
|
||||||
|
logging.info(texts)
|
||||||
|
|
||||||
|
## 3. embedding(向量化) 以及 建立向量库
|
||||||
|
embeddings = OpenAIEmbeddings(model="netease-youdao/bce-embedding-base_v1")
|
||||||
|
## 第一次调用 embedding模型:HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK"
|
||||||
|
vectorstore = FAISS.from_texts(texts,embeddings)
|
||||||
|
logging.info("构建向量数据库完成")
|
||||||
|
logging.info(vectorstore)
|
||||||
|
|
||||||
|
## 4. 构建 RAG的调用链 k参数: topK
|
||||||
|
retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={"k":2})
|
||||||
|
## HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK"
|
||||||
|
chain = RetrievalQA.from_chain_type(llm=llm,retriever=retriever)
|
||||||
|
|
||||||
|
query = "什么是等价类划分?"
|
||||||
|
|
||||||
|
## 检索过程探索
|
||||||
|
retrieved_docs = retriever.get_relevant_documents(query)
|
||||||
|
logging.info("---------")
|
||||||
|
for retrieved_doc in retrieved_docs:
|
||||||
|
logging.info(retrieved_doc)
|
||||||
|
logging.info("---------")
|
||||||
|
|
||||||
|
## 5. 查询数据(通过模型自己去查数据库)
|
||||||
|
## HTTP Request: POST https://api.siliconflow.cn/v1/chat/completions "HTTP/1.1 200 OK"
|
||||||
|
response = chain.invoke(query)
|
||||||
|
logging.info(response)
|
||||||
|
|
||||||
27
uv.lock
27
uv.lock
@ -290,6 +290,31 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
|
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "faiss-cpu"
|
||||||
|
version = "1.13.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "numpy" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
]
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/07/c9/671f66f6b31ec48e5825d36435f0cb91189fa8bb6b50724029dbff4ca83c/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_arm64.whl", hash = "sha256:a9064eb34f8f64438dd5b95c8f03a780b1a3f0b99c46eeacb1f0b5d15fc02dc1", size = 3452776, upload-time = "2025-12-24T10:27:01.419Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5a/4a/97150aa1582fb9c2bca95bd8fc37f27d3b470acec6f0a6833844b21e4b40/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_x86_64.whl", hash = "sha256:c8d097884521e1ecaea6467aeebbf1aa56ee4a36350b48b2ca6b39366565c317", size = 7896434, upload-time = "2025-12-24T10:27:03.592Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0b/d0/0940575f059591ca31b63a881058adb16a387020af1709dcb7669460115c/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ee330a284042c2480f2e90450a10378fd95655d62220159b1408f59ee83ebf1", size = 11485825, upload-time = "2025-12-24T10:27:05.681Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e7/e1/a5acac02aa593809f0123539afe7b4aff61d1db149e7093239888c9053e1/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab88ee287c25a119213153d033f7dd64c3ccec466ace267395872f554b648cd7", size = 23845772, upload-time = "2025-12-24T10:27:08.194Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9c/7b/49dcaf354834ec457e85ca769d50bc9b5f3003fab7c94a9dcf08cf742793/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85511129b34f890d19c98b82a0cd5ffb27d89d1cec2ee41d2621ee9f9ef8cf3f", size = 13477567, upload-time = "2025-12-24T10:27:10.822Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f7/6b/12bb4037921c38bb2c0b4cfc213ca7e04bbbebbfea89b0b5746248ce446e/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b32eb4065bac352b52a9f5ae07223567fab0a976c7d05017c01c45a1c24264f", size = 25102239, upload-time = "2025-12-24T10:27:13.476Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/14/6d/40439a05e4e60a0e889aa68b08ec70f5c8e32901f75f2be25c593a2e050e/faiss_cpu-1.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7c5944d7807d58fe7244b6aba06be710ee7ed99343365ed92699349efe979f51", size = 18879906, upload-time = "2025-12-24T10:27:19.041Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9f/f9/b97eadbdd9e00f945d1566c7101382344f504596bfb19219465b0fc61e6e/faiss_cpu-1.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:19508a1badfb36e456c1c8664eeb948349f604db5c7545f277a0126b4a84b080", size = 8548280, upload-time = "2025-12-24T10:27:22.114Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/87/ff/35ed875423200c17bdd594ce921abfc1812ddd21e09355290b9a94e170ab/faiss_cpu-1.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:b82c01d30430dd7b1fa442001b9099735d1a82f6bb72033acdc9206d5ac66a64", size = 18890300, upload-time = "2025-12-24T10:27:24.194Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c5/3a/bbdf5deaf6feb34b46b469c0a0acd40216c3d3c6ecf5aeb71d56b8a650e3/faiss_cpu-1.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2c4f696ae76e7c97cbc12311db83aaf1e7f4f7be06a3ffea7e5b0e8ec1fd805b", size = 8553157, upload-time = "2025-12-24T10:27:26.38Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/60/4b/903d85bf3a8264d49964ec799e45c7ffc91098606b8bc9ef2c904c1a56cb/faiss_cpu-1.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:cb4b5ee184816a4b099162ac93c0d7f0033d81a88e7c1291d0a9cc41ec348984", size = 18891330, upload-time = "2025-12-24T10:27:28.806Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b2/52/5d10642da628f63544aab27e48416be4a7ea25c6b81d8bd65016d8538b00/faiss_cpu-1.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:1243967eeb2298791ff7f3683a4abd2100d7e6ec7542ca05c3b75d47a7f621e5", size = 8553088, upload-time = "2025-12-24T10:27:31.325Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b0/b1/daaab8046f56c60079648bd83774f61b283b59a9930a2f60790ee4cdedfe/faiss_cpu-1.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:c8b645e7d56591aa35dc75415bb53a62e4a494dba010e16f4b67daeffd830bd7", size = 18892621, upload-time = "2025-12-24T10:27:33.923Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/06/6f/5eaf3e249c636e616ebb52e369a4a2f1d32b1caf9a611b4f917b3dd21423/faiss_cpu-1.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:8113a2a80b59fe5653cf66f5c0f18be0a691825601a52a614c30beb1fca9bc7c", size = 8556374, upload-time = "2025-12-24T10:27:36.653Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "frozenlist"
|
name = "frozenlist"
|
||||||
version = "1.8.0"
|
version = "1.8.0"
|
||||||
@ -673,6 +698,7 @@ name = "langchain-learning"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { virtual = "." }
|
source = { virtual = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "faiss-cpu" },
|
||||||
{ name = "langchain" },
|
{ name = "langchain" },
|
||||||
{ name = "langchain-community" },
|
{ name = "langchain-community" },
|
||||||
{ name = "langchain-siliconflow" },
|
{ name = "langchain-siliconflow" },
|
||||||
@ -682,6 +708,7 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "faiss-cpu", specifier = ">=1.13.2" },
|
||||||
{ name = "langchain", specifier = "==0.3.27" },
|
{ name = "langchain", specifier = "==0.3.27" },
|
||||||
{ name = "langchain-community", specifier = "==0.3.31" },
|
{ name = "langchain-community", specifier = "==0.3.31" },
|
||||||
{ name = "langchain-siliconflow", specifier = "==0.1.3" },
|
{ name = "langchain-siliconflow", specifier = "==0.1.3" },
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user