去除pdf水印

讲一些网上的书去除水印的代码模板

使用的电子书是:
http://lamda.nju.edu.cn/weixs/book/CNN_book.html

from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_

wm_text = b'x00*x00Qx00T'
replace_with = ''

# Load PDF into pyPDF
source = PdfFileReader(open('CNN_book_small.pdf', "rb"))
output = PdfFileWriter()

# For each page
for page in range(source.getNumPages()):
    # Get the current page and it's contents
    page = source.getPage(page)
    content_object = page["/Contents"].getObject()
    content = ContentStream(content_object, source)

    # Loop over all pdf elements
    for operands, operator in content.operations:
        # You might adapt this part depending on your PDF file
        if operator == b_("TJ") :
            try:
                text = operands[0][0]
                if  text == b'x00*x00Qx00T':
                    operands[0] = TextStringObject(replace_with)
            except:
                pass

    # Set the modified content as content object on the page
    page.__setitem__(NameObject('/Contents'), content)

    # Add the page to the output
    output.addPage(page)

# Write the stream
outputStream = open("output.pdf", "wb")
output.write(outputStream)