Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

pdf to jpg

# Extract jpg's from pdf's. Quick and dirty.
import sys

pdf = file(sys.argv[1], "rb").read()

startmark = "xffxd8"
startfix = 0
endmark = "xffxd9"
endfix = 2
i = 0

njpg = 0
while True:
    istream = pdf.find("stream", i)
    if istream < 0:
        break
    istart = pdf.find(startmark, istream, istream+20)
    if istart < 0:
        i = istream+20
        continue
    iend = pdf.find("endstream", istart)
    if iend < 0:
        raise Exception("Didn't find end of stream!")
    iend = pdf.find(endmark, iend-20)
    if iend < 0:
        raise Exception("Didn't find end of JPG!")
    

    istart += startfix
    iend += endfix
    print "JPG %d from %d to %d" % (njpg, istart, iend)
    jpg = pdf[istart:iend]
    jpgfile = file("jpg%d.jpg" % njpg, "wb")
    jpgfile.write(jpg)
    jpgfile.close()
    

    njpg += 1
    i = iend
Source by nedbatchelder.com #
 
PREVIOUS NEXT
Tagged: #pdf #jpg
ADD COMMENT
Topic
Name
7+3 =