You are here: start » pdf2sne

PDF2SNE

This simple script converts a PDF (typically course slides) into a SuperNode (SNE) notebook.

Note: I've yet to decode some bits of the SNE file format… but with hardcoded values it works (at least for me)

Note: requires Imagemagick

Changelog:

  • version 0.1 (6 december 2012)
    • First version
  • version 0.2 (10 december 2012)
    • Support for multiple page layouts (width scaled to 736px)
#!/usr/bin/python
# PDF2SNE simple converter (PDF to SuperNote)
# Amos Brocco <amos.brocco@supsi.ch>
# Copyright 2012, Amos Brocco
# version 0.2 (10 Dec 2012)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
import struct
import subprocess
import glob
import sys
import os
import datetime
import calendar
import random
import string
 
N=15
TEMP_DIR=''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(N))+"/"
 
MAX_PAGE_WIDTH=736.0
 
def info(msg):
	print msg
 
def getImageGeometry(fimage):
	cp = subprocess.Popen("identify '%s'" % fimage, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
	info = cp.stdout.read()
	w,h = info.split(' ')[2].split('x')
	return (int(w),int(h))
 
 
def getResizedImageGeometry(geometry):
	w, h = geometry	
	new_h = h * MAX_PAGE_WIDTH / w
	print "Original geometry %dx%d, resized %dx%d" % (w, h, int(MAX_PAGE_WIDTH), int(new_h))
	return (int(MAX_PAGE_WIDTH), int(new_h))
 
def makeIndex(pcount):
	idx = open("idx.fn", 'wb')
	idx.write(struct.pack('b', 0))
	idx.write(struct.pack('b', 0))
	idx.write(struct.pack('b', pcount))
	ms = calendar.timegm(datetime.datetime.now().utctimetuple()) * 1000
	for p in range(1, pcount+1):
		idx.write(struct.pack('b', 3))
		idx.write(struct.pack('>q', p))
		idx.write(struct.pack('>q', ms))
		idx.write(struct.pack('>q', ms))
		idx.write(struct.pack('>i', 35))
		idx.write(struct.pack('>q', 592511259)) # FIXME: Bogus data (copy pasted)
		idx.write(struct.pack('>h', 27136)) # FIXME: Bogus data (copy pasted)
	idx.flush()
	idx.close()
 
def makeTemplatePage(pindex, geometry):
	page = open("%d_05" % pindex, 'wb')
	width, height = geometry
	page.write(struct.pack('>h', 0)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>h', 260)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>h', 37)) # Left margin
	page.write(struct.pack('>h', 4)) # Top margin
	page.write(struct.pack('>h', width))
	page.write(struct.pack('>h', height))
	page.write(struct.pack('>i', 0)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>i', 1)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>h', 2)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>h', 0)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>i', 0)) # FIXME: Bogus data (copy pasted)
	page.write(struct.pack('>i', 0)) # FIXME: Bogus data (copy pasted)
	page.flush()
	page.close()
 
 
def makeTemplateMf():
	mf = open("mf.fn", 'wb')
	mf.write(struct.pack('B', 0)) # FIXME: Bogus data (copy pasted)
	mf.write(struct.pack('>i', 35)) # FIXME: Bogus data (copy pasted)
	mf.write(struct.pack('>i', -1)) # FIXME: Bogus data (copy pasted)
	mf.write(struct.pack('B', 0)) # FIXME: Bogus data (copy pasted)
	mf.close()
 
def prepare(pdffile):
	pdfnoext = os.path.splitext(pdffile)[0]
	if (os.path.exists(TEMP_DIR)):
		info("Temp directory '%s' already exists! Please remove it before using this script" % TEMP_DIR)
		sys.exit(1)
	#info("Cleaning up old temporary data...")
	#subprocess.call(["rm", "-rf", "%s" % (TEMP_DIR)]) # FIXME: Should test (and ask) before that
	info("Extracting slides from PDF...")
	os.mkdir(TEMP_DIR)
	subprocess.call(["convert", "-density", "96", "-quality", "85", "%s" % pdffile, "%s/image-%%04d.png" % TEMP_DIR])
	pngslides = sorted(glob.glob1(TEMP_DIR,"image-*.png"))
	pcount = len(pngslides)
	info("Found %d pages to embed" % pcount)
	os.chdir("%s" % (TEMP_DIR))
	index = 1
	for p in pngslides:
		subprocess.call(["convert", "-thumbnail", "161x218", "-gravity", "center", "-background", "black", "-extent", "161x218", "%s" % p, "thumb.png"])
		subprocess.call(["mv", "thumb.png", "%d_01" % index])
		os.mkdir(str(index))
		geometry = getResizedImageGeometry(getImageGeometry(p))
		subprocess.call(["mv", "%s" % p, "%d/1" % index])
		makeTemplatePage(index, geometry)
		index += 1
	info("Creating index...")
	makeIndex(pcount)
	makeTemplateMf()
	info("Creating archive...")
	subprocess.call(["zip", "-r", "%s.zip" % pdfnoext, ".", "-i", "*"])
	subprocess.call(["cp", "%s.zip" % pdfnoext, "../%s.sne" % pdfnoext])
	info("Cleaning up...")
	os.chdir("..")
	subprocess.call(["rm", "-rf", "%s" % (TEMP_DIR)])
	info("%s.sne created" % (pdfnoext))
 
if len(sys.argv) == 2:
	prepare(sys.argv[1])
else:
	info("Usage: %s pdffile" % sys.argv[0])
pdf2sne/start.txt · Last modified: 2012/12/10 12:15 by attila
Kleine Websites, die ein Wiki als CMS verwenden.de