diff --git a/README.md b/README.md index 00fb49a..868bfd6 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,15 @@ # LangChain Learning -[![](https://img.shields.io/badge/version-0.0.6-blue.svg)](https://github.com/your-repo/langchain-learning) +[![](https://img.shields.io/badge/version-0.0.7-blue.svg)](https://github.com/your-repo/langchain-learning) [![](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/) [![](https://img.shields.io/badge/LangChain-v1.2-orange.svg)](https://www.langchain.com/) -> LangChain 框架学习项目,集成 SiliconFlow & Ollama API +> LangChain 框架学习项目,集成 SiliconFlow & Ollama API ## 功能特性 - **多 LLM 集成**:支持 OpenAI API、SiliconFlow、Ollama 及 LangChain 抽象层 +- **RAG 检索增强生成**:基于向量库(FAISS)的文档检索与问答 - **流式响应**:实时流式输出,带来更好的使用体验 - **Prompt 工程**:多种 Prompt 模板构建方式 - **输出解析**:支持 JSON 等格式解析 @@ -65,6 +66,12 @@ OLLAMA_API_KEY=ollama |------|------|------| | JSON 解析器 | `python parser/json_parser_demo.py` | 使用 JsonOutputParser 解析 LLM 输出 | +**RAG 示例** + +| 示例 | 命令 | 说明 | +|------|------|------| +| 基础 RAG | `python rag/rag_demo.py` | 基于 FAISS 向量库的检索问答系统 | + **Token 用量示例** | 示例 | 命令 | 说明 | @@ -104,6 +111,8 @@ langchain-learning/ │ └── prompt_from_file.json # Prompt JSON 模板文件 ├── parser/ │ └── json_parser_demo.py # JSON 输出解析示例 +├── rag/ +│ └── rag_demo.py # RAG 检索增强生成示例 ├── token/ │ └── token_demo.py # Token 用量追踪示例 ├── memory/ @@ -137,9 +146,10 @@ langchain-learning/ |------|------| | 框架 | LangChain | | LLM 提供商 | SiliconFlow, Ollama | +| 向量库 | FAISS | | 终端美化 | Rich | | 语言 | Python 3.11+ | ## 许可证 -MIT License \ No newline at end of file +MIT License diff --git a/pyproject.toml b/pyproject.toml index 703752e..12fa2d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.11" dependencies = [ + "faiss-cpu>=1.13.2", "langchain==0.3.27", "langchain-community==0.3.31", "langchain-siliconflow==0.1.3", diff --git a/rag/rag_demo.py b/rag/rag_demo.py new file mode 100644 index 0000000..2a41841 --- /dev/null +++ b/rag/rag_demo.py @@ -0,0 +1,68 @@ +import logging + +from langchain.chains.retrieval_qa.base import RetrievalQA +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +import os +import dotenv +from langchain_text_splitters import CharacterTextSplitter +from langchain_community.vectorstores import FAISS + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +dotenv.load_dotenv() + +## 设置环境变量 +os.environ['OPENAI_API_KEY'] = os.getenv("SILICONFLOW_API_KEY") +os.environ['OPENAI_BASE_URL'] = os.getenv("SILICONFLOW_BASE_URL") + +# 默认的 'model_name': 'deepseek-ai/DeepSeek-V3.1', +llm = ChatOpenAI(model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B") + +## 1. 准备某个领域的文档:测试相关的知识 +docs = [ + "等价类划分是一种黑盒测试方法,将输入数据划分为有效等价类和无效等价类。", + "边界值分析通常作为等价类划分的补充,重点测试输入输出的边界条件。", + "集成测试用于验证模块间接口的正确性,常见策略包括自顶向下和自底向上。", + "回归测试是在软件变更后执行的测试,确保原有功能不受新修改影响。", + "性能测试包括负载测试、压力测试和耐久性测试,用于评估系统的响应能力。", + "测试用例应包含测试ID、模块、前置条件、步骤、预期结果和优先级信息。" +] + +splitter = CharacterTextSplitter() + +## 2. 切割文档(可选) +texts = [] +for doc in docs: + chunks = splitter.split_text(doc) + texts.extend(chunks) + logging.info("文档切分:原文=%s -> %d 个分片",doc,len(chunks)) +logging.info(texts) + +## 3. embedding(向量化) 以及 建立向量库 +embeddings = OpenAIEmbeddings(model="netease-youdao/bce-embedding-base_v1") +## 第一次调用 embedding模型:HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK" +vectorstore = FAISS.from_texts(texts,embeddings) +logging.info("构建向量数据库完成") +logging.info(vectorstore) + +## 4. 构建 RAG的调用链 k参数: topK +retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={"k":2}) +## HTTP Request: POST https://api.siliconflow.cn/v1/embeddings "HTTP/1.1 200 OK" +chain = RetrievalQA.from_chain_type(llm=llm,retriever=retriever) + +query = "什么是等价类划分?" + +## 检索过程探索 +retrieved_docs = retriever.get_relevant_documents(query) +logging.info("---------") +for retrieved_doc in retrieved_docs: + logging.info(retrieved_doc) +logging.info("---------") + +## 5. 查询数据(通过模型自己去查数据库) +## HTTP Request: POST https://api.siliconflow.cn/v1/chat/completions "HTTP/1.1 200 OK" +response = chain.invoke(query) +logging.info(response) + diff --git a/uv.lock b/uv.lock index d334051..9a9defc 100644 --- a/uv.lock +++ b/uv.lock @@ -290,6 +290,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] +[[package]] +name = "faiss-cpu" +version = "1.13.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c9/671f66f6b31ec48e5825d36435f0cb91189fa8bb6b50724029dbff4ca83c/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_arm64.whl", hash = "sha256:a9064eb34f8f64438dd5b95c8f03a780b1a3f0b99c46eeacb1f0b5d15fc02dc1", size = 3452776, upload-time = "2025-12-24T10:27:01.419Z" }, + { url = "https://files.pythonhosted.org/packages/5a/4a/97150aa1582fb9c2bca95bd8fc37f27d3b470acec6f0a6833844b21e4b40/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_x86_64.whl", hash = "sha256:c8d097884521e1ecaea6467aeebbf1aa56ee4a36350b48b2ca6b39366565c317", size = 7896434, upload-time = "2025-12-24T10:27:03.592Z" }, + { url = "https://files.pythonhosted.org/packages/0b/d0/0940575f059591ca31b63a881058adb16a387020af1709dcb7669460115c/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ee330a284042c2480f2e90450a10378fd95655d62220159b1408f59ee83ebf1", size = 11485825, upload-time = "2025-12-24T10:27:05.681Z" }, + { url = "https://files.pythonhosted.org/packages/e7/e1/a5acac02aa593809f0123539afe7b4aff61d1db149e7093239888c9053e1/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab88ee287c25a119213153d033f7dd64c3ccec466ace267395872f554b648cd7", size = 23845772, upload-time = "2025-12-24T10:27:08.194Z" }, + { url = "https://files.pythonhosted.org/packages/9c/7b/49dcaf354834ec457e85ca769d50bc9b5f3003fab7c94a9dcf08cf742793/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85511129b34f890d19c98b82a0cd5ffb27d89d1cec2ee41d2621ee9f9ef8cf3f", size = 13477567, upload-time = "2025-12-24T10:27:10.822Z" }, + { url = "https://files.pythonhosted.org/packages/f7/6b/12bb4037921c38bb2c0b4cfc213ca7e04bbbebbfea89b0b5746248ce446e/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b32eb4065bac352b52a9f5ae07223567fab0a976c7d05017c01c45a1c24264f", size = 25102239, upload-time = "2025-12-24T10:27:13.476Z" }, + { url = "https://files.pythonhosted.org/packages/14/6d/40439a05e4e60a0e889aa68b08ec70f5c8e32901f75f2be25c593a2e050e/faiss_cpu-1.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7c5944d7807d58fe7244b6aba06be710ee7ed99343365ed92699349efe979f51", size = 18879906, upload-time = "2025-12-24T10:27:19.041Z" }, + { url = "https://files.pythonhosted.org/packages/9f/f9/b97eadbdd9e00f945d1566c7101382344f504596bfb19219465b0fc61e6e/faiss_cpu-1.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:19508a1badfb36e456c1c8664eeb948349f604db5c7545f277a0126b4a84b080", size = 8548280, upload-time = "2025-12-24T10:27:22.114Z" }, + { url = "https://files.pythonhosted.org/packages/87/ff/35ed875423200c17bdd594ce921abfc1812ddd21e09355290b9a94e170ab/faiss_cpu-1.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:b82c01d30430dd7b1fa442001b9099735d1a82f6bb72033acdc9206d5ac66a64", size = 18890300, upload-time = "2025-12-24T10:27:24.194Z" }, + { url = "https://files.pythonhosted.org/packages/c5/3a/bbdf5deaf6feb34b46b469c0a0acd40216c3d3c6ecf5aeb71d56b8a650e3/faiss_cpu-1.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2c4f696ae76e7c97cbc12311db83aaf1e7f4f7be06a3ffea7e5b0e8ec1fd805b", size = 8553157, upload-time = "2025-12-24T10:27:26.38Z" }, + { url = "https://files.pythonhosted.org/packages/60/4b/903d85bf3a8264d49964ec799e45c7ffc91098606b8bc9ef2c904c1a56cb/faiss_cpu-1.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:cb4b5ee184816a4b099162ac93c0d7f0033d81a88e7c1291d0a9cc41ec348984", size = 18891330, upload-time = "2025-12-24T10:27:28.806Z" }, + { url = "https://files.pythonhosted.org/packages/b2/52/5d10642da628f63544aab27e48416be4a7ea25c6b81d8bd65016d8538b00/faiss_cpu-1.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:1243967eeb2298791ff7f3683a4abd2100d7e6ec7542ca05c3b75d47a7f621e5", size = 8553088, upload-time = "2025-12-24T10:27:31.325Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b1/daaab8046f56c60079648bd83774f61b283b59a9930a2f60790ee4cdedfe/faiss_cpu-1.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:c8b645e7d56591aa35dc75415bb53a62e4a494dba010e16f4b67daeffd830bd7", size = 18892621, upload-time = "2025-12-24T10:27:33.923Z" }, + { url = "https://files.pythonhosted.org/packages/06/6f/5eaf3e249c636e616ebb52e369a4a2f1d32b1caf9a611b4f917b3dd21423/faiss_cpu-1.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:8113a2a80b59fe5653cf66f5c0f18be0a691825601a52a614c30beb1fca9bc7c", size = 8556374, upload-time = "2025-12-24T10:27:36.653Z" }, +] + [[package]] name = "frozenlist" version = "1.8.0" @@ -673,6 +698,7 @@ name = "langchain-learning" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "faiss-cpu" }, { name = "langchain" }, { name = "langchain-community" }, { name = "langchain-siliconflow" }, @@ -682,6 +708,7 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "faiss-cpu", specifier = ">=1.13.2" }, { name = "langchain", specifier = "==0.3.27" }, { name = "langchain-community", specifier = "==0.3.31" }, { name = "langchain-siliconflow", specifier = "==0.1.3" },