首页 - 技术栈

人人做网站保定网站搜索引擎优化

作者: 五速梦信息网
时间: 2026年04月20日 09:53

当前位置：首页 > news >正文

人人做网站,保定网站搜索引擎优化,深圳建筑设计师招聘信息,外贸网站搭建一站式服务实现当用户在GUI中输入问题#xff08;例如“刘邦”#xff09;且输出的答案被标记为不正确时#xff0c;自动从百度百科中搜索相关内容并显示在GUI中的功能#xff0c;我们需要对现有的代码进行一些修改。以下是完整的代码#xff0c;包括对XihuaChatbotGUI类的修改以及新…实现当用户在GUI中输入问题例如“刘邦”且输出的答案被标记为不正确时自动从百度百科中搜索相关内容并显示在GUI中的功能我们需要对现有的代码进行一些修改。以下是完整的代码包括对XihuaChatbotGUI类的修改以及新增的功能 import os import json import jsonlines import torch import torch.optim as optim from torch.utils.data import Dataset, DataLoader from transformers import BertModel, BertTokenizer import tkinter as tk from tkinter import filedialog, messagebox, ttk import logging from difflib import SequenceMatcher from datetime import datetime import requests from bs4 import BeautifulSoup# 获取项目根目录 PROJECT_ROOT os.path.dirname(os.path.abspath(file))# 配置日志 LOGS_DIR os.path.join(PROJECT_ROOT, logs) os.makedirs(LOGS_DIR, exist_okTrue)def setup_logging():log_file os.path.join(LOGSDIR, datetime.now().strftime(%Y-%m-%d%H-%M-%S_羲和.txt))logging.basicConfig(levellogging.INFO,format%(asctime)s - %(levelname)s - %(message)s,handlers[logging.FileHandler(log_file),logging.StreamHandler()])setup_logging()# 数据集类 class XihuaDataset(Dataset):def init(self, file_path, tokenizer, max_length128):self.tokenizer tokenizerself.max_length max_lengthself.data self.load_data(file_path)def load_data(self, file_path):data []if file_path.endswith(.jsonl):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f跳过无效行 {i 1}: {e})elif file_path.endswith(.json):with open(file_path, r) as f:try:data json.load(f)except json.JSONDecodeError as e:logging.warning(f跳过无效文件 {file_path}: {e})return datadef len(self):return len(self.data)def getitem(self, idx):item self.data[idx]question item[question]human_answer item[human_answers][0]chatgpt_answer item[chatgpt_answers][0]try:inputs self.tokenizer(question, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)human_inputs self.tokenizer(human_answer, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)chatgpt_inputs self.tokenizer(chatgpt_answer, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)except Exception as e:logging.warning(f跳过无效项 {idx}: {e})return self.getitem((idx 1) % len(self.data))return {input_ids: inputs[input_ids].squeeze(),attention_mask: inputs[attention_mask].squeeze(),human_input_ids: human_inputs[input_ids].squeeze(),human_attention_mask: human_inputs[attention_mask].squeeze(),chatgpt_input_ids: chatgpt_inputs[input_ids].squeeze(),chatgpt_attention_mask: chatgpt_inputs[attention_mask].squeeze(),human_answer: human_answer,chatgpt_answer: chatgpt_answer}# 获取数据加载器 def get_data_loader(file_path, tokenizer, batch_size8, max_length128):dataset XihuaDataset(file_path, tokenizer, max_length)return DataLoader(dataset, batch_sizebatch_size, shuffleTrue)# 模型定义 class XihuaModel(torch.nn.Module):def init(self, pretrained_model_nameF:/models/bert-base-chinese):super(XihuaModel, self).init()self.bert BertModel.from_pretrained(pretrained_model_name)self.classifier torch.nn.Linear(self.bert.config.hidden_size, 1)def forward(self, input_ids, attention_mask):outputs self.bert(input_idsinput_ids, attention_maskattention_mask)pooled_output outputs.pooler_outputlogits self.classifier(pooled_output)return logits# 训练函数 def train(model, data_loader, optimizer, criterion, device, progress_varNone):model.train()total_loss 0.0num_batches len(data_loader)for batch_idx, batch in enumerate(data_loader):try:input_ids batch[input_ids].to(device)attention_mask batch[attention_mask].to(device)human_input_ids batch[human_input_ids].to(device)human_attention_mask batch[human_attention_mask].to(device)chatgpt_input_ids batch[chatgpt_input_ids].to(device)chatgpt_attention_mask batch[chatgpt_attention_mask].to(device)optimizer.zero_grad()human_logits model(human_input_ids, human_attention_mask)chatgpt_logits model(chatgpt_input_ids, chatgpt_attention_mask)human_labels torch.ones(human_logits.size(0), 1).to(device)chatgpt_labels torch.zeros(chatgpt_logits.size(0), 1).to(device)loss criterion(human_logits, human_labels) criterion(chatgpt_logits, chatgpt_labels)loss.backward()optimizer.step()total_loss loss.item()if progress_var:progress_var.set((batch_idx 1) / num_batches * 100)except Exception as e:logging.warning(f跳过无效批次: {e})return total_loss / len(data_loader)# 主训练函数 def main_train(retrainFalse):device torch.device(cuda if torch.cuda.is_available() else cpu)logging.info(f使用设备: {device})tokenizer BertTokenizer.from_pretrained(F:/models/bert-base-chinese)model XihuaModel(pretrained_model_nameF:/models/bert-base-chinese).to(device)if retrain:model_path os.path.join(PROJECT_ROOT, models/xihua_model.pth)if os.path.exists(model_path):model.load_state_dict(torch.load(model_path, map_locationdevice))logging.info(加载现有模型)else:logging.info(没有找到现有模型将使用预训练模型)optimizer optim.Adam(model.parameters(), lr1e-5)criterion torch.nn.BCEWithLogitsLoss()train_data_loader get_data_loader(os.path.join(PROJECT_ROOT, data/train_data.jsonl), tokenizer, batch_size8, max_length128)num_epochs 30for epoch in range(num_epochs):train_loss train(model, train_data_loader, optimizer, criterion, device)logging.info(fEpoch [{epoch1}/{num_epochs}], Loss: {train_loss:.10f})torch.save(model.state_dict(), os.path.join(PROJECT_ROOT, models/xihua_model.pth))logging.info(模型训练完成并保存)# 网络搜索函数 def search_baidu(query):url fhttps://www.baidu.com/s?wd{query}headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3}response requests.get(url, headersheaders)soup BeautifulSoup(response.text, html.parser)results soup.find_all(div, class_c-abstract)if results:return results[0].get_text().strip()return 没有找到相关信息# 百度百科搜索函数 def search_baidu_baike(query):url fhttps://baike.baidu.com/item/{query}headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3}response requests.get(url, headersheaders)soup BeautifulSoup(response.text, html.parser)meta_description soup.find(meta, attrs{name: description})if meta_description:return meta_description[content]return 没有找到相关信息# GUI界面 class XihuaChatbotGUI:def init(self, root):self.root rootself.root.title(羲和聊天机器人)self.tokenizer BertTokenizer.from_pretrained(F:/models/bert-base-chinese)self.device torch.device(cuda if torch.cuda.is_available() else cpu)self.model XihuaModel(pretrained_model_nameF:/models/bert-base-chinese).to(self.device)self.load_model()self.model.eval()# 加载训练数据集以便在获取答案时使用self.data self.load_data(os.path.join(PROJECT_ROOT, data/train_data.jsonl))# 历史记录self.history []self.create_widgets()def create_widgets(self):# 设置样式style ttk.Style()style.theme_use(clam)# 顶部框架top_frame ttk.Frame(self.root)top_frame.pack(pady10)self.question_label ttk.Label(top_frame, text问题:, font(Arial, 12))self.question_label.grid(row0, column0, padx10)self.question_entry ttk.Entry(top_frame, width50, font(Arial, 12))self.question_entry.grid(row0, column1, padx10)self.answer_button ttk.Button(top_frame, text获取回答, commandself.get_answer, styleTButton)self.answer_button.grid(row0, column2, padx10)# 中部框架middle_frame ttk.Frame(self.root)middle_frame.pack(pady10)self.chat_text tk.Text(middle_frame, height20, width100, font(Arial, 12), wrapword)self.chat_text.grid(row0, column0, padx10, pady10)self.chat_text.tag_configure(user, justifyright, foregroundblue)self.chat_text.tag_configure(xihua, justifyleft, foregroundgreen)# 底部框架bottom_frame ttk.Frame(self.root)bottom_frame.pack(pady10)self.correct_button ttk.Button(bottom_frame, text准确, commandself.mark_correct, styleTButton)self.correct_button.grid(row0, column0, padx10)self.incorrect_button ttk.Button(bottom_frame, text不准确, commandself.mark_incorrect, styleTButton)self.incorrect_button.grid(row0, column1, padx10)self.train_button ttk.Button(bottom_frame, text训练模型, commandself.train_model, styleTButton)self.train_button.grid(row0, column2, padx10)self.retrain_button ttk.Button(bottom_frame, text重新训练模型, commandlambda: self.train_model(retrainTrue), styleTButton)self.retrain_button.grid(row0, column3, padx10)self.progress_var tk.DoubleVar()self.progress_bar ttk.Progressbar(bottom_frame, variableself.progress_var, maximum100, length200, modedeterminate)self.progress_bar.grid(row1, column0, columnspan4, pady10)self.log_text tk.Text(bottom_frame, height10, width70, font(Arial, 12))self.log_text.grid(row2, column0, columnspan4, pady10)self.evaluate_button ttk.Button(bottom_frame, text评估模型, commandself.evaluate_model, styleTButton)self.evaluate_button.grid(row3, column0, padx10, pady10)self.history_button ttk.Button(bottom_frame, text查看历史记录, commandself.view_history, styleTButton)self.history_button.grid(row3, column1, padx10, pady10)self.save_history_button ttk.Button(bottom_frame, text保存历史记录, commandself.save_history, styleTButton)self.save_history_button.grid(row3, column2, padx10, pady10)def get_answer(self):question self.question_entry.get()if not question:messagebox.showwarning(输入错误, 请输入问题)returninputs self.tokenizer(question, return_tensorspt, paddingmax_length, truncationTrue, max_length128)with torch.no_grad():input_ids inputs[input_ids].to(self.device)attention_mask inputs[attention_mask].to(self.device)logits self.model(input_ids, attention_mask)if logits.item() 0:answer_type 羲和回答else:answer_type 零回答specific_answer self.get_specific_answer(question, answer_type)self.chat_text.insert(tk.END, f用户: {question}\n, user)self.chat_text.insert(tk.END, f羲和: {specific_answer}\n, xihua)# 添加到历史记录self.history.append({question: question,answer_type: answer_type,specific_answer: specific_answer,accuracy: None # 初始状态为未评价})def get_specific_answer(self, question, answer_type):# 使用模糊匹配查找最相似的问题best_match Nonebest_ratio 0.0for item in self.data:ratio SequenceMatcher(None, question, item[question]).ratio()if ratio best_ratio:best_ratio ratiobest_match itemif best_match:if answer_type 羲和回答:return best_match[human_answers][0]else:return best_match[chatgpt_answers][0]return 这个我也不清楚你问问零吧def load_data(self, file_path):data []if file_path.endswith(.jsonl):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f跳过无效行 {i 1}: {e})elif file_path.endswith(.json):with open(file_path, r) as f:try:data json.load(f)except json.JSONDecodeError as e:logging.warning(f跳过无效文件 {file_path}: {e})return datadef load_model(self):model_path os.path.join(PROJECT_ROOT, models/xihua_model.pth)if os.path.exists(model_path):self.model.load_state_dict(torch.load(model_path, map_locationself.device))logging.info(加载现有模型)else:logging.info(没有找到现有模型将使用预训练模型)def train_model(self, retrainFalse):file_path filedialog.askopenfilename(filetypes[(JSONL files, *.jsonl), (JSON files, *.json)])if not file_path:messagebox.showwarning(文件选择错误, 请选择一个有效的数据文件)returntry:dataset XihuaDataset(file_path, self.tokenizer)data_loader DataLoader(dataset, batch_size8, shuffleTrue)# 加载已训练的模型权重if retrain:self.model.load_state_dict(torch.load(os.path.join(PROJECT_ROOT, models/xihua_model.pth), map_locationself.device))self.model.to(self.device)self.model.train()optimizer torch.optim.Adam(self.model.parameters(), lr1e-5)criterion torch.nn.BCEWithLogitsLoss()num_epochs 30for epoch in range(num_epochs):train_loss train(self.model, data_loader, optimizer, criterion, self.device, self.progress_var)logging.info(fEpoch [{epoch1}/{num_epochs}], Loss: {train_loss:.10f})self.log_text.insert(tk.END, fEpoch [{epoch1}/{num_epochs}], Loss: {train_loss:.10f}\n)self.log_text.see(tk.END)torch.save(self.model.state_dict(), os.path.join(PROJECT_ROOT, models/xihua_model.pth))logging.info(模型训练完成并保存)self.log_text.insert(tk.END, 模型训练完成并保存\n)self.log_text.see(tk.END)messagebox.showinfo(训练完成, 模型训练完成并保存)except Exception as e:logging.error(f模型训练失败: {e})self.log_text.insert(tk.END, f模型训练失败: {e}\n)self.log_text.see(tk.END)messagebox.showerror(训练失败, f模型训练失败: {e})def evaluate_model(self):# 这里可以添加模型评估的逻辑messagebox.showinfo(评估结果, 模型评估功能暂未实现)def mark_correct(self):if self.history:self.history[-1][accuracy] Truemessagebox.showinfo(评价成功, 您认为这次回答是准确的)def mark_incorrect(self):if self.history:self.history[-1][accuracy] Falsequestion self.history[-1][question]baike_answer self.search_baidu_baike(question)self.chat_text.insert(tk.END, f百度百科结果: {baike_answer}\n, xihua)messagebox.showinfo(评价成功, 您认为这次回答是不准确的)def search_baidu_baike(self, query):return search_baidu_baike(query)def view_history(self):history_window tk.Toplevel(self.root)history_window.title(历史记录)history_text tk.Text(history_window, height20, width80, font(Arial, 12))history_text.pack(padx10, pady10)for entry in self.history:history_text.insert(tk.END, f问题: {entry[question]}\n)history_text.insert(tk.END, f回答类型: {entry[answer_type]}\n)history_text.insert(tk.END, f具体回答: {entry[specific_answer]}\n)if entry[accuracy] is None:history_text.insert(tk.END, 评价: 未评价\n)elif entry[accuracy]:history_text.insert(tk.END, 评价: 准确\n)else:history_text.insert(tk.END, 评价: 不准确\n)history_text.insert(tk.END, - * 50 \n)def save_history(self):file_path filedialog.asksaveasfilename(defaultextension.json, filetypes[(JSON files, *.json)])if not file_path:returnwith open(file_path, w) as f:json.dump(self.history, f, ensure_asciiFalse, indent4)messagebox.showinfo(保存成功, 历史记录已保存到文件)# 主函数 if name main:# 启动GUIroot tk.Tk()app XihuaChatbotGUI(root)root.mainloop()主要修改点增加百度百科搜索函数search_baidu_baike函数用于从百度百科中搜索问题的相关信息。修改mark_incorrect方法当用户标记回答为不正确时调用search_baidu_baike函数获取百度百科的结果并将其显示在GUI的Text组件中。文件结构 main.py主程序文件包含所有代码。 logs/日志文件存储目录。 models/模型权重文件存储目录。 data/训练数据文件存储目录。运行步骤确保安装了所有依赖库如torch, transformers, requests, beautifulsoup4等。将训练数据文件放在data/目录下。运行main.py启动GUI。这样当用户在GUI中输入问题并标记回答为不正确时程序会自动从百度百科中搜索相关信息并显示在GUI中。