How to Extract Fonts from a PDF - pymupdf/PyMuPDF GitHub Wiki
This script can be used to extract all fonts referenced by some page of a PDF.
from __future__ import print_function
import fitz
# Open the PDF
doc = fitz.open("some.pdf")
xref_visited = [] # memorize already processed font xrefs here
num = 0 # count the extracted fonts
for page in doc:
fl = page.getFontList() # list of fonts of page
for f in fl:
xref = f[0] # xref of font
if xref in xref_visited:
continue # skip if already processed
xref_visited.append(xref) # do not process a second time
# extract font buffer
basename, ext, _, buffer = doc.extractFont(xref)
if ext != "n/a": # is the font extractable?
num += 1
foutname = "%s-%i.%s" % (basename, xref, ext) # build the filename
fout = open(foutname, "wb") # and output the font
fout.write(buffer)
fout.close()
print("extracted", foutname)
footer = "extracted %i font files from %s." % (num, doc.name)
footer_line = "-".ljust(len(footer), "-")
# output some protocol
print(footer_line)
print(footer)
print(footer_line)
# Close the PDF
doc.close()