Extract sound annotations from a PDF

420 views Asked by At

I have a script that lists the annotations of a PDF file Parse annotations from a pdf:

import popplerqt5
import argparse


def extract(fn):
    doc = popplerqt5.Poppler.Document.load(fn)
    annotations = []
    for i in range(doc.numPages()):
        page = doc.page(i)
        for annot in page.annotations():
            contents = annot.contents()
            if contents:
                annotations.append(contents)
                print(f'page={i + 1} {contents}')

    print(f'{len(annotations)} annotation(s) found')
    return annotations


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('fn')
    args = parser.parse_args()
    extract(args.fn)

But it only works for text annotations, there are a lot of Python libraries like Poppler, PyPDF2, PyMuPDF, and I've been searching their documentations and source codes a lot and as far as I'm concerned, they are not able to extract the binary of sound annotations. Do you know any library that can do this? I need to extract the binaries of these sound annotations and convert them to MP3's.

1

There are 1 answers

0
Shayan On BEST ANSWER

The next version of PyMuPDF will support extracting audio annotations. Use this script to extract audio annotations from a PDF using PyMuPDF, it's easy to use, just call the script and pass a PDF file as the first argument: python script.py myfile.pdf

Note: only works on Windows.

import fitz, sys, os, subprocess
assert len(sys.argv) == 2, "need filename as parameter"
ifile = sys.argv[1]
doc = fitz.open(ifile)
ofolder = os.path.dirname(ifile)
if ofolder == "":
    ofolder = os.getcwd()
flnm = os.path.splitext(os.path.basename(ifile))[0]
defolder = ofolder + "\\" + flnm
os.mkdir(defolder)
defolder = defolder + "\\" + flnm
for page in doc:
    print(page)
    annotNumber = 1
    for annot in page.annots(types=[fitz.PDF_ANNOT_SOUND]):  
        try: 
            sound = annot.soundGet()  
        except Exception as e:
            print(e)
            continue
        for k, v in sound.items():
            print(k, "=", v if k != "stream" else len(v))
        ofile = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".raw"
        fout = open(ofile,"wb") 
        fout.write(sound["stream"])
        fout.close()
        ofileffmpeg = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".mp3"
        annotNumber += 1
        if "channels" in sound:
            channels = str(sound["channels"])
        else:
            channels = "1"
        if "encoding" in sound:
            if sound["encoding"] == "Signed":
                encoding = "s"
            else:
                encoding = "u"
        else:
            encoding = "u"
        if "bps" in sound:
            fmt = encoding + str(sound["bps"]) + "be"
        else:
            fmt = encoding + "8"
        subprocess.call(['ffmpeg', '-hide_banner', '-f', fmt, '-ar', str(sound["rate"]), '-ac', channels, '-i', str(ofile), str(ofileffmpeg)], shell=True)