Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,25 @@ Read PDF and output form field statistics.

- ``pdf_path`` : input PDF file

``dump_data``
~~~~~~~~~~~~~~~~~~~~

Read PDF and output metadata, bookmarks, page metrics.

- ``pdf_path`` : input PDF file
- ``out_file`` (default=None) : text file containing dumped data


``update_info``
~~~~~~~~~~~~~~~~~~~~

Read PDF ``pdf_path``, update metadata according to ``info_data`` and
write the finale PDF to ``out_file``.

- ``pdf_path`` : input PDF file
- ``info_data`` : text file containing new info data
- ``out_file`` : output PDF file


Example
-------
Expand Down
20 changes: 20 additions & 0 deletions pypdftk.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,23 @@ def uncompress(pdf_path, out_file=None, flatten=True):
'''

return pdftk_cmd_util(pdf_path, "uncompress", out_file, flatten)

def dump_data(pdf_path, out_file=None):
'''
Reads a single input PDF file and returns its metadata, bookmarks
(a/k/a outlines), page metrics (media, rotation and labels) and
other data

:param pdf_path: input PDF file
:param out_file: (default=None) : data will also be written to this file if provided
:return: dumped_data
'''
info_data = run_command([PDFTK_PATH, pdf_path, 'dump_data'])
if out_file:
with open(out_file, 'w') as f:
f.writelines(info_data)

return "\n".join(info_data)

def update_info(pdf_path, info_data, out_file):
run_command([PDFTK_PATH, pdf_path, 'update_info', info_data, 'output', out_file])
58 changes: 58 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,64 @@ def test_replace_page_at_end(self):
pypdftk.replace_page(TEST_PDF_PATH, last_page, pdf_to_insert)
self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))

def test_dump_data(self):
form_info_data = """InfoBegin
InfoKey: Keywords
InfoValue: PDF Form
InfoBegin
InfoKey: Creator
InfoValue: Writer
InfoBegin
InfoKey: CreationDate
InfoValue: D:20130629204853+02'00'
InfoBegin
InfoKey: Producer
InfoValue: OpenOffice.org 3.4
InfoBegin
InfoKey: Title
InfoValue: PDF Form Example
PdfID0: 5e0a553555622a0516e9877ca55217a6
PdfID1: 5e0a553555622a0516e9877ca55217a6
NumberOfPages: 1
PageMediaBegin
PageMediaNumber: 1
PageMediaRotation: 0
PageMediaRect: 0 0 595 842
PageMediaDimensions: 595 842"""
dumped_data = pypdftk.dump_data('test_files/form.pdf')
self.assertEqual(dumped_data, form_info_data)

def test_update_info(self):
form_info_data = """InfoBegin
InfoKey: Keywords
InfoValue: My fancy form
InfoBegin
InfoKey: Creator
InfoValue: Ghostwriter
InfoBegin
InfoKey: CreationDate
InfoValue: D:20210101204853+02'00'
InfoBegin
InfoKey: Producer
InfoValue: PDFTK
InfoBegin
InfoKey: Title
InfoValue: Form with updated metadata
PdfID0: 5e0a553555622a0516e9877ca55217a6
PdfID1: 5e0a553555622a0516e9877ca55217a6
NumberOfPages: 1
PageMediaBegin
PageMediaNumber: 1
PageMediaRotation: 0
PageMediaRect: 0 0 595 842
PageMediaDimensions: 595 842"""

with open("test_files/form_info_data.txt", 'w') as f:
f.write(form_info_data)
pypdftk.update_info('test_files/form.pdf', 'test_files/form_info_data.txt', 'test_files/form_updated.pdf')
dumped_data = pypdftk.dump_data('test_files/form_updated.pdf')
self.assertEqual(dumped_data, form_info_data)

@unittest.skip('Not implemented yet')
def test_stamp(self):
pass
Expand Down