python富文本转word

富文本转word

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#coding=utf-8
from docx import Document
from docx.shared import Pt, RGBColor
from docx.oxml.ns import qn
from lxml import etree
# from docx.enum.text import WD_ALIGN_PARAGRAPH

data = [{
'document_title': u'<h1>写一个题目阿斯蒂芬按时</h1>',
'document_content': u"""<p>你好</p>

<p>你好中国</p>

<p><strong>hello </strong>world</p>
"""
},{
'document_title': u'写一个题目阿斯蒂芬按时',
'document_content': u"""<p>你好</p>

<p>你好中国</p>

<p><strong>hello </strong>world</p>
"""
}]
<!-- more -->

# 格式化富文本
def clean_richtext(richtext):
response = etree.HTML(text=richtext)
return response.xpath('string(.)')

def render_data(data):
document = Document()
font_name = u'宋体'

for item in data:
title_text = clean_richtext(item['document_title'])
content_text = clean_richtext(item['document_content'])
title = document.add_heading()
paragraph = document.add_paragraph()
# 居中
# title.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_run = title.add_run(title_text)
title_run.font.size = Pt(22)
title_run.font.color.rgb = RGBColor(0, 0, 0)
title_run.font.name = font_name
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
para_run = paragraph.add_run(content_text)
para_run.font.size = Pt(10.5)
para_run.font.name = font_name
para_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)

document.save('test.docx')

render_data(data)