
讲一些网上的书去除水印的代码模板
使用的电子书是:
http://lamda.nju.edu.cn/weixs/book/CNN_book.html
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_
wm_text = b'x00*x00Qx00T'
replace_with = ''
# Load PDF into pyPDF
source = PdfFileReader(open('CNN_book_small.pdf', "rb"))
output = PdfFileWriter()
# For each page
for page in range(source.getNumPages()):
# Get the current page and it's contents
page = source.getPage(page)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
# Loop over all pdf elements
for operands, operator in content.operations:
# You might adapt this part depending on your PDF file
if operator == b_("TJ") :
try:
text = operands[0][0]
if text == b'x00*x00Qx00T':
operands[0] = TextStringObject(replace_with)
except:
pass
# Set the modified content as content object on the page
page.__setitem__(NameObject('/Contents'), content)
# Add the page to the output
output.addPage(page)
# Write the stream
outputStream = open("output.pdf", "wb")
output.write(outputStream)




近期评论