#!/usr/bin/env python import cgitb; cgitb.enable() import cgi, os, sys, subprocess, shlex from tempfile import NamedTemporaryFile as NTF from settings import PANDOC """ Simple Upload form that saves files to a temp named file, calls pandoc for conversion to a given format, & displays results in browser. Uses a temp file because formats like docx are considered "archives" and need to be accessed by pandoc as filenames to actual filenames (not piped directly in) """ method = os.environ.get("REQUEST_METHOD") _types = { 'markdown': { 'mime': "text/plain;charset=utf-8", 'ext': 'markdown' }, # 'icml': { 'mime': "application/xml;charset=utf-8", 'ext': 'icml' }, 'icml': { 'mime': "text/plain;charset=utf-8", 'ext': 'icml' }, 'docx': { 'mime': "application/vnd.openxmlformats-officedocument.wordprocessingml.document;charset=utf-8", 'ext': 'docx' }, 'html': { 'mime': "text/html;charset=utf-8", 'ext': 'html' } } DEFAULT_TYPE = { 'mime': "text/plain;charset=utf-8", 'ext': 'txt' } def guess_format_from_filename (fn): if fn.endswith(".docx"): return "docx" return "txt" def print_template(txt): print "Content-type: text/html;charset=utf-8" print print txt if method == "POST": fs = cgi.FieldStorage() from_format = fs.getvalue("from") to_format = fs.getvalue("to", "markdown") to_type = _types.get(to_format, DEFAULT_TYPE) submit = fs.getvalue("_submit", "submit") download = submit == "download" download_filename = None if download: download_filename = "hype_conversion." + to_type['ext'] try: f = fs["file"] except KeyError: f = None ################################### # FILE INPUT ################################### if f != None and f.file: if from_format == None: from_format = guess_format_from_filename(f.filename) tmp = NTF(suffix=".docx", delete=False) bytes = 0 while True: data = f.file.read() if not data: break bytes += len(data) tmp.write(data) # print "saved to '{0}'
".format(tmp.name) # print "read {0} bytes
".format(bytes) tmp.close() os.chmod(tmp.name, 0666) try: print "Content-type: {0}".format(to_type['mime']) if download: print "Content-Disposition: attachment;filename=\"hype_conversion.{0}\"".format(to_type['ext']) print # p = subprocess.check_output('pandoc --from {0} --to {1} "{2}"'.format(ffrom, to, tmp.name), shell=True, stderr=subprocess.STDOUT) # read content of tmp docx cmd_head = "head {}".format(tmp.name) cmd_head_list = shlex.split( cmd_head ) p_head = subprocess.Popen(cmd_head_list, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # convert docx to html: error. No idea why cmd_unoconv = "unoconv --stdout -f html {}".format(tmp.name) cmd_unoconv_list = shlex.split( cmd_unoconv ) p_unoconv = subprocess.Popen(cmd_unoconv_list, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # convert docx to html: depends on most recente versions of pandoc # cmd = "{} --from {} --to {} {}".format(PANDOC, from_format, to_format, tmp.name) # cmd_list = shlex.split( cmd ) # p = subprocess.Popen(cmd_list, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # print p # p = subprocess.Popen([PANDOC, '--from', from_format, '--to', to_format, tmp.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p_head.communicate() # p_unoconv.communicate() #print tmp.name, from_format print out, err except subprocess.CalledProcessError, e: print "Content-type: text/html;charset=utf-8" print print u"An error occurred, pandoc said: {0}".format(e.output).format("utf-8") # tmp.unlink(tmp.name) # remove file ################################### # FORM/TEXT INPUT ################################### else: text = fs.getvalue("text") if from_format and to_format: try: print "Content-type: {0}".format(to_type['mime']) if download: print "Content-Disposition: attachment;filename=\"hype_conversion.{0}\"".format(to_type['ext']) print #p = subprocess.check_output('echo "{2}"|pandoc --from {0} --to {1} '.format(from_format, to_format, '#foo'), shell=True, stderr=subprocess.STDOUT) #print p cmd = "{} --from {} --to {}".format(PANDOC, from_format, to_format) cmd_list = shlex.split( cmd ) p = subprocess.Popen(cmd_list, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate(text) print cmd+' '+ out except subprocess.CalledProcessError, e: print "Content-type: text/html;charset=utf-8" print print u"An error occurred, pandoc said: {0}".format(e.output).encode("utf-8") else: print "Content-type: text/html;charset=utf-8" print print u"Format unspecified".encode("utf-8") sys.exit(0) ''' ''' # print """ # # # # # #
#

Convert:

# to: #

#
# # """ ''' p = subprocess.Popen([PANDOC, '--from', from_format, '--to', to_format, tmp.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() print out # print_template(data) '''