东莞网站建设方案企业湖南网站设计公司

当前位置: 首页 > news >正文

东莞网站建设方案企业,湖南网站设计公司,旅游网站网页设计,三联网站建设价格请参阅#xff1a;java : pdfbox 读取 PDF文件内书签 或者 python#xff1a;从PDF中提取目录 请注意#xff1a;书的目录.txt 编码#xff1a;UTF-8#xff0c;推荐用 Notepad 转换编码。 xml 是 python 标准库#xff0c;在 D:\Python39\Lib\xml\etree pip install … 请参阅java : pdfbox 读取 PDF文件内书签 或者 python从PDF中提取目录 请注意书的目录.txt 编码UTF-8推荐用 Notepad 转换编码。 xml 是 python 标准库在 D:\Python39\Lib\xml\etree pip install xmltodict ; python 用 xml.etree.ElementTree用 xmltodict 转换为json数据。 编写 txt_xml_etree_json.py  如下

-- coding: utf-8 --读目录.txt文件用 xmltodict转换为json数据

import os import sys import codecs import json import xml.etree.ElementTree as et import xmltodictif len(sys.argv) 2:f1 sys.argv[1] else:print(usage: python txt_xml_etree_json.py file1.txt)sys.exit(1)if not os.path.exists(f1):print(fERROR: {f1} not found.)sys.exit(1)fn,ext os.path.splitext(f1) if ext.lower() ! .txt:print(ext is not .txt)sys.exit(2)fp codecs.open(f1, moder, encodingutf-8)

读取第一行书名

title fp.readline()

创建主题节点

root et.Element(node) root.set(id, 1) root.set(text, title.strip())# 定义状态 state et.SubElement(root, state) state.set(opened, true) state.set(disabled, true)# 用缩排表现层级关系假设最多5个层级 indent1 *2 indent2 *4 indent3 *6 indent4 *8n 2 for line in fp:txt line.strip()if len(txt) 0:continuetxt txt[0:-3] # 去掉行尾的页数if len(txt) 0 and line[0] ! :# 创建主题的子节点1级节点node1 et.SubElement(root, children)node1.set(id, str(n))node1.set(text, txt)p_node node1 # 寄存父节点elif line.startswith(indent1) and line[2] ! :# 创建node1的子节点2级节点try: type(node1)except NameError: node2 et.SubElement(root, children)else: node2 et.SubElement(node1, children)node2.set(id, str(n))node2.set(text, txt)p_node node2elif line.startswith(indent2) and line[4] ! :# 创建node2的子节点3级节点try: type(node2)except NameError: node3 et.SubElement(node1, children)else: node3 et.SubElement(node2, children)node3.set(id, str(n))node3.set(text, txt)p_node node3elif line.startswith(indent3) and line[6] ! :# 创建node3的子节点4级节点try: type(node3)except NameError: node4 et.SubElement(node2, children)else: node4 et.SubElement(node3, children)node4.set(id, str(n))node4.set(text, txt)p_node node4elif line.startswith(indent4) and line[8] ! :# 创建node4的子节点5级节点try: type(node4)except NameError: node5 et.SubElement(p_node, children)else: node5 et.SubElement(node4, children)node5.set(id, str(n))node5.set(text, txt)else:print(txt)n 1 fp.close() print(fline number: {n})# 转换成 str方便导出 root_bytes et.tostring(root, encodingutf-8) xml_str root_bytes.decode() try:json_dict xmltodict.parse(xml_str, encodingutf-8)json_str json.dumps(json_dict[node], indent2) except:print(xmltodict.parse error!)

去掉

json_str [ json_str.replace(\,) ] #print(json_str)# 导出.json文件 f2 fn .json with codecs.open(f2, w, encodingutf8) as fp:fp.write(json_str)python 用 xml.etree.ElementTree用 xmltodict 转换为json数据jinja2 生成jstree模板所需的文件。 编写 txt_xml_etree_htm.py  如下

-- coding: utf-8 --读目录.txt文件用 xmltodict转换为json数据生成jstree所需的文件

import os import sys import codecs import json import xml.etree.ElementTree as et import xmltodict from jinja2 import Environment,FileSystemLoaderif len(sys.argv) 2:f1 sys.argv[1] else:print(usage: python txt_xml_etree_htm.py file1.txt)sys.exit(1)if not os.path.exists(f1):print(fERROR: {f1} not found.)sys.exit(1)fn,ext os.path.splitext(f1) if ext.lower() ! .txt:print(ext is not .txt)sys.exit(2)fp codecs.open(f1, moder, encodingutf-8)

读取第一行书名

title fp.readline()

创建主题节点

root et.Element(node) root.set(id, 1) root.set(text, title.strip())# 定义状态 state et.SubElement(root, state) state.set(opened, true) state.set(disabled, true)# 用缩排表现层级关系假设最多5个层级 indent1 *2 indent2 *4 indent3 *6 indent4 *8n 2 for line in fp:txt line.strip()if len(txt) 0:continuetxt txt[0:-3] # 去掉行尾的页数if len(txt) 0 and line[0] ! :# 创建主题的子节点1级节点node1 et.SubElement(root, children)node1.set(id, str(n))node1.set(text, txt)p_node node1 # 寄存父节点elif line.startswith(indent1) and line[2] ! :# 创建node1的子节点2级节点try: type(node1)except NameError: node2 et.SubElement(root, children)else: node2 et.SubElement(node1, children)node2.set(id, str(n))node2.set(text, txt)p_node node2elif line.startswith(indent2) and line[4] ! :# 创建node2的子节点3级节点try: type(node2)except NameError: node3 et.SubElement(node1, children)else: node3 et.SubElement(node2, children)node3.set(id, str(n))node3.set(text, txt)p_node node3elif line.startswith(indent3) and line[6] ! :# 创建node3的子节点4级节点try: type(node3)except NameError: node4 et.SubElement(node2, children)else: node4 et.SubElement(node3, children)node4.set(id, str(n))node4.set(text, txt)p_node node4elif line.startswith(indent4) and line[8] ! :# 创建node4的子节点5级节点try: type(node4)except NameError: node5 et.SubElement(p_node, children)else: node5 et.SubElement(node4, children)node5.set(id, str(n))node5.set(text, txt)else:print(txt)n 1 fp.close() print(fline number: {n})# 转换成 str方便导出 root_bytes et.tostring(root, encodingutf-8) xml_str root_bytes.decode() try:json_dict xmltodict.parse(xml_str, encodingutf-8)json_str json.dumps(json_dict[node], indent2) except:print(xmltodict.parse error!)

去掉

json_str [ json_str.replace(\,) ] #print(json_str)# 使用 jinja2 对html模板文件进行数据替换 env Environment(loaderFileSystemLoader(d:/python/)) tpl env.get_template(jstree_template.htm)

导出.html文件

f2 fn .htm with codecs.open(f2, w, encodingutf8) as fp:content tpl.render(titletitle.strip(), mydirjson_str)fp.write(content)https://gitee.com/ 搜索 jstree 下载 https://gitee.com/mirrors/jstree?_fromgitee_search git clone https://gitee.com/mirrors/jstree.git 编写 jstree 模板文件jstree_template.htm !DOCTYPE html html langen headmeta charsetUTF-8meta http-equivX-UA-Compatible contentIEEdgemeta nameviewport contentwidthdevice-width, initial-scale1title{{title}}/titlescript src../js/jquery-3.2.1.min.js/scriptlink relstylesheet href../js/jstree/dist/themes/default/style.css /script src../js/jstree/dist/jstree.min.js/script /head body!– 搜索框 –div classsearch_inputinput typetext idsearch_a /img src../js/jstree/dist/search.png //divdiv idtreeview1 classtreeview/div script typetext/javascriptvar mydir {{mydir}};\((#treeview1).jstree({core : {multiple : false,data : mydir,dblclick_toggle: true},plugins : [search]});//输入框输入时自动搜索var tout false;\)(#search_a).keyup(function(){if (tout) clearTimeout(tout); tout setTimeout(function(){\((#treeview1).jstree(true).search(\)(#search_a).val()); }, 250);});
/script /body /html运行 python txt_xml_etree_htm.py your_pdf_dir.txt 生成 your_pdf_dir.htm