1
【ChatGLM3】ERNIE-Bot API 4.0网页内容摘要实现ChatGLM3在线搜索
大模型开发/技术交流
- API
2023.11.175129看过
【ChatGLM3】ERNIE-Bot API 4.0网页内容摘要实现ChatGLM3在线搜索
1.解决方案概述
随着互联网的快速发展,大量的网页和文章被发布和更新,如何从这些海量的数据中快速找到用户感兴趣的信息成为了搜索引擎的重要任务。传统的搜索引擎主要通过关键词匹配和排名来展示搜索结果,但这种方法可能无法准确捕捉到用户的真实需求,而且随着语义理解的不断发展,基于关键词的搜索已经不能满足用户的需求。因此,我们提出了基于大型语言模型的人工智能助手——ERNIE-Bot,它能够对用户的问题和要求进行理解和回答,从而帮助他们找到所需的信息。
在本解决方案中,我们将探讨如何使用智谱清言和ERNIE-Bot API 4.0来实现网页内容摘要,并将其与ChatGLM3在线搜索相结合,为用户提供更好更准确的搜索结果。
2. 实施步骤
2.1 环境准备
确保已安装Python 3.10,此外,还需要安装以下库:
huggingface_hubipykernelipythonjupyter_clientpillowsentencepiecestreamlittokenizerstorchtransformerspyyamlrequestsprotobuftransformers==4.30.2cpm_kernelstorch>=2.0gradio==3.39mdtex2htmlsentencepieceacceleratesse-starlettestreamlit>=1.24.0fastapi==0.95.1typing_extensions==4.4.0uvicornsse_starletteloguruopenai>=1.0.0
使用git clone命令 clone 模型仓库:
git clone https://huggingface.co/THUDM/ChatGLM3-6B ./ChatGLM3-6B
clone模型代码:
git clone https://github.com/THUDM/ChatGLM3
2.2 ERNIE-Bot API 4.0的使用
首先,需要申请智谱清言的API密钥。然后,可以使用以下代码来创建一个智谱清言的客户端:
import requestsfrom bs4 import BeautifulSoupAPI_KEY = "API_KEY"SECRET_KEY = "SECRET_KEY"def generate_summary(url):content = parser_html(url)url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token()payload = '{"messages": [{"role":"user","content":"你现在是营销专家,帮我对以下文章内容进行摘要,文章内容如下'+ content +''"}]}'headers = {'Content-Type': 'application/json'}response = requests.request("POST", url, headers=headers, data=payload)return response.textdef get_access_token():"""使用 AK,SK 生成鉴权签名(Access Token):return: access_token,或是None(如果错误)"""url = "https://aip.baidubce.com/oauth/2.0/token"params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}return str(requests.post(url, params=params).json().get("access_token"))def parser_html(url):# 发起HTTP请求response = requests.get(url)all_content = ''# 检查请求是否成功if response.status_code == 200:# 解析HTML内容soup = BeautifulSoup(response.text, 'html.parser')# 提取主要内容(这里以百度搜索结果为例)search_results = soup.select('.c-container') # 这里的选择器可能需要根据页面结构调整if search_results:# 打印提取的内容for index, result in enumerate(search_results, 1):# 获取文本内容content = result.get_text().strip()all_content += contentreturn all_contentelse:return ''return ''
2.3 网页内容摘要的生成
使用智谱清言的generate_summary方法可以生成网页内容摘要。例如:
response = client.generate_summary(url="https://www.example.com/some_page")
2.4 ChatGLM3在线搜索的集成
将生成的摘要与ChatGLM3进行集成,可以使用ChatGLM3的search方法,例如:
def baidu_search(query):url = 'https://www.baidu.com/s?wd=' + queryheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}response = requests.get(url, headers=headers)soup = BeautifulSoup(response.text, 'html.parser')content = ""results = []for result in soup.find_all('h3', class_='t') + soup.find_all('div', class_='result'):title = result.find('a').textlink = result.find('a')['href']summary = generate_summary(url)content = content + title + "\n" + link + "\n" + summary + "\n" + "\n\n"if content == "":return("没有找到结果")else:return content@register_tooldef search_baidu(user_input: Annotated[str, 'the content of the user input', True]) -> str:"""Search the 'user input' on google"""search_data = baidu_search(user_input)return search_data
3.整体代码
以下是实现上述功能的Python代码:
from copy import deepcopyimport inspectfrom pprint import pformatimport tracebackfrom types import GenericAliasfrom typing import get_origin, Annotatedfrom bs4 import BeautifulSoupfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.chrome.service import Serviceimport urllibimport requestsfrom bs4 import BeautifulSoupimport requests_TOOL_HOOKS = {}_TOOL_DESCRIPTIONS = {}API_KEY = "API_KEY"SECRET_KEY = "SECRET_KEY"def generate_summary(url):content = parser_html(url)url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token()payload = '{"messages": [{"role":"user","content":"你现在是营销专家,帮我对以下文章内容进行摘要,文章内容如下'+ content +''"}]}'headers = {'Content-Type': 'application/json'}response = requests.request("POST", url, headers=headers, data=payload)return response.textdef get_access_token():"""使用 AK,SK 生成鉴权签名(Access Token):return: access_token,或是None(如果错误)"""url = "https://aip.baidubce.com/oauth/2.0/token"params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}return str(requests.post(url, params=params).json().get("access_token"))def parser_html(url):# 发起HTTP请求response = requests.get(url)all_content = ''# 检查请求是否成功if response.status_code == 200:# 解析HTML内容soup = BeautifulSoup(response.text, 'html.parser')# 提取主要内容(这里以百度搜索结果为例)search_results = soup.select('.c-container') # 这里的选择器可能需要根据页面结构调整if search_results:# 打印提取的内容for index, result in enumerate(search_results, 1):# 获取文本内容content = result.get_text().strip()all_content += contentreturn all_contentelse:return ''return ''def baidu_search(query):url = 'https://www.baidu.com/s?wd=' + queryheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}response = requests.get(url, headers=headers)soup = BeautifulSoup(response.text, 'html.parser')content = ""results = []for result in soup.find_all('h3', class_='t') + soup.find_all('div', class_='result'):title = result.find('a').textlink = result.find('a')['href']summary = result.find('div', class_='c-abstract')if summary:summary = summary.text.strip()else:summary = ''content = content + title + "\n" + link + "\n" + summary + "\n" + "\n\n"if content == "":return("没有找到结果")else:return contentdef register_tool(func: callable):tool_name = func.__name__tool_description = inspect.getdoc(func).strip()python_params = inspect.signature(func).parameterstool_params = []for name, param in python_params.items():annotation = param.annotationif annotation is inspect.Parameter.empty:raise TypeError(f"Parameter `{name}` missing type annotation")if get_origin(annotation) != Annotated:raise TypeError(f"Annotation type for `{name}` must be typing.Annotated")typ, (description, required) = annotation.__origin__, annotation.__metadata__typ: str = str(typ) if isinstance(typ, GenericAlias) else typ.__name__if not isinstance(description, str):raise TypeError(f"Description for `{name}` must be a string")if not isinstance(required, bool):raise TypeError(f"Required for `{name}` must be a bool")tool_params.append({"name": name,"description": description,"type": typ,"required": required})tool_def = {"name": tool_name,"description": tool_description,"params": tool_params}print("[registered tool] " + pformat(tool_def))_TOOL_HOOKS[tool_name] = func_TOOL_DESCRIPTIONS[tool_name] = tool_defreturn funcdef dispatch_tool(tool_name: str, tool_params: dict) -> str:if tool_name not in _TOOL_HOOKS:return f"Tool `{tool_name}` not found. Please use a provided tool."tool_call = _TOOL_HOOKS[tool_name]try:ret = tool_call(**tool_params)except:ret = traceback.format_exc()return str(ret)def get_tools() -> dict:return deepcopy(_TOOL_DESCRIPTIONS)# Tool Definitions@register_tooldef random_number_generator(seed: Annotated[int, 'The random seed used by the generator', True],range: Annotated[tuple[int, int], 'The range of the generated numbers', True],) -> int:"""Generates a random number x, s.t. range[0] <= x < range[1]"""if not isinstance(seed, int):raise TypeError("Seed must be an integer")if not isinstance(range, tuple):raise TypeError("Range must be a tuple")if not isinstance(range[0], int) or not isinstance(range[1], int):raise TypeError("Range must be a tuple of integers")import randomreturn random.Random(seed).randint(*range)@register_tooldef get_weather(city_name: Annotated[str, 'The name of the city to be queried', True],) -> str:"""Get the current weather for `city_name`"""if not isinstance(city_name, str):raise TypeError("City name must be a string")key_selection = {"current_condition": ["temp_C", "FeelsLikeC", "humidity", "weatherDesc", "observation_time"],}import requeststry:resp = requests.get(f"https://wttr.in/{city_name}?format=j1")resp.raise_for_status()resp = resp.json()ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()}except:import tracebackret = "Error encountered while fetching weather data!\n" + traceback.format_exc()return str(ret)def baidu_search(query):url = 'https://www.baidu.com/s?wd=' + queryheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}response = requests.get(url, headers=headers)soup = BeautifulSoup(response.text, 'html.parser')content = ""results = []for result in soup.find_all('h3', class_='t') + soup.find_all('div', class_='result'):title = result.find('a').textlink = result.find('a')['href']summary = generate_summary(url)content = content + title + "\n" + link + "\n" + summary + "\n" + "\n\n"if content == "":return("没有找到结果")else:return content@register_tooldef search_baidu(user_input: Annotated[str, 'the content of the user input', True]) -> str:"""Search the 'user input' on google"""search_data = baidu_search(user_input)return search_data@register_tooldef search_baidu(user_input: Annotated[str, 'the content of the user input', True]) -> str:"""Search the 'user input' on google"""search_data = baidu_search(user_input)return search_dataif __name__ == "__main__":print(dispatch_tool("get_weather", {"city_name": "beijing"}))print(get_tools())
使用ChatGLM3进行搜索
streamlit run --server.port 8888 main.py
以上就是使用智谱清言和ERNIE-Bot API 4.0实现网页内容摘要和ChatGLM3在线搜索的解决方案报告。
Enjoy~
评论