spire.doc for python 12.2.1 已发布。本次更新新增支持通过固定布局获取页的内容。详情请阅读以下内容。
新功能:
- 支持通过固定布局获取页的内容。
def writealltext(fpath:str,content:str):
with open(fpath,'w',encoding="utf-8") as fp:
fp.write(content)
# 指定文件路径
inputfile = "./data/sample.docx"
outputfile = "output.txt"
# 创建一个新的 document 实例
doc = document()
# 从指定文件加载文档
doc.loadfromfile(inputfile, fileformat.docx)
# 使用加载的文档创建一个 fixedlayoutdocument 对象
layoutdoc = fixedlayoutdocument(doc)
result = ''
# 获取第一页第一列的第一行
line = layoutdoc.pages[0].columns[0].lines[0]
result = "行: "
result = line.text
result = "\n"
# 获取与该行关联的原始段落
para = line.paragraph
result = "段落文本: "
result = para.text
result = "\n"
# 获取以纯文本格式显示在第一页上的所有文本(包括页眉和页脚)。
pagetext = layoutdoc.pages[0].text
result = pagetext
result = "\n"
# 遍历文档中的每一页,并打印每页上出现的行数。
pages = layoutdoc.pages
for i in range(pages.count):
page = pages[i]
lines = page.getchildentities(layoutelementtype.line, true)
result = "第 "
result = str(page.pageindex)
result = " 页有 "
result = str(lines.count)
result = " 行。"
result = "\n"
# 对第一个段落执行反向查找布局实体
result = "\n"
result = "第一个段落的行:"
result = "\n"
tempchild = doc.firstchild
section = section(tempchild)
para = section.body.paragraphs[0]
paragraphlines = layoutdoc.getlayoutentitiesofnode(para)
for i in range(paragraphlines.count):
templine = paragraphlines[i]
paragraphline = fixedlayoutline(templine)
result = (paragraphline.text).strip()
result = "\n"
result = paragraphline.rectangle.tostring()
result = "\n"
result = "\n"
# 将提取的文本写入文件
writealltext(outputfile, result)
# 释放文档资源
doc.dispose()
获取 spire.doc for python 12.2.1请点击:
https://www.e-iceblue.cn/downloads/spire-presentation-python.html