rewriting all mechanism from scrath - w/out json

10 years ago · 4fbfd6b88a
parent 63e8313370
commit 4fbfd6b88a
5 changed files with 122 additions and 13 deletions
--- a/allworks_mmdc.json
+++ b/allworks_mmdc.json
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -64,11 +64,59 @@ def write_html_file(html_tree, filename):
    edited.write(html)
    edited.close()
 # mw article modules
 def parse_work_page(title, content):
 #    content = content.encode('utf-8')
    if re.match('\{\{\Graduation work', content):
        work_dict = {}
        work_dict['Title']=title
        template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
        # template's key/value pair
        keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)
        if extra:
            extra = ('Extra', extra)
            keyval.append(extra)
 #        checkkeys = [keyval[i] for i in range(len(keyval)) if keyval[i][0] in mainkeys and len(keyval[i][1])>3] #list mainkeys present, w/ values, in tuples [(key, val),(key, val)...]
 #       if len(checkkeys) == 3 : # checkkeys contains all mainkeys and values 
            for pair in keyval:
                key = pair[0]
                val = pair[1]
                val = val.replace('\n','')            
                if 'Creator' in key:
                    val = val.replace(', ', '')
                elif 'Thumbnail' in key:
                    thumburl = api_thumb_url(val)
                    work_dict['Thumbnail_url']=thumburl
                    print 'THUMB:', thumburl
                work_dict[key]=val
            return work_dict, extra
 # Alternative to parse_work_page - W/out dictionary        
 def parse_work(title, content):
    if re.match('\{\{\Graduation work', content):
        template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
        # template's key/value pair
        keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)
        for pair in keyval:
            key = pair[0]
            val = pair[1]
            if 'Description' in key:
                description = val
        print keyval
    return extra, description
 # Conversion Modules
 def pandoc2html(mw_content):
    if mw_content:
-        mw_content = mw_content.encode('utf-8')
+        mw_content = mw_content#.encode('utf-8')
        # convert from mw to html
        args_echo =shlex.split( ('echo "{}"'.format(mw_content)) )
        args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' )
@ -90,7 +138,11 @@ def pandoc(filename, title, creator, date, website, thumbnail, bio, description,
 #    return html
 # pandoc either reades input from stdin or through input file
-# pandoc DOES NOT convert variables
+# pandoc DOES NOT convert variables; it has to receive the input from stdin.
 # to create html convert: bio, description, extra, of at time
 # insert them into HTML template by:
 ## gerating html in python? and insertion sub elements ?
 ## ??
                               #     pandoc = 'pandoc -s -f mediawiki -t html5 \
 # --template template_article.html \
--- a/mmdc_pages.py
+++ b/mmdc_pages.py
@ -183,16 +183,21 @@ for key in json_allworks.keys():
    print work_file, website
    # if key in work.keys() else ''
-    pandoc( filename=work_file, \
+
-            template='web/work-template.html', \
+    if extra:
-            title=title, \
+        extra_html = pandoc2html(extra)
-            creator=creator, \
+        print (extra_html)
-            date=date, \
+
-            website=website, \
+    # pandoc( filename=work_file, \
-            thumbnail=thumbnail, \
+    #         template='web/work-template.html', \
-            bio=bio, \
+    #         title=title, \
-            description=description, \
+    #         creator=creator, \
-            extra=extra ) # convert to HTML
+    #         date=date, \
    #         website=website, \
    #         thumbnail=thumbnail, \
    #         bio=bio, \
    #         description=description, \
    #         extra=extra ) # convert to HTML
    # print '= = = = = = ='
    # print html_content 
    # print '= = = = = = ='
--- a/prototype_page.py
+++ b/prototype_page.py
@ -0,0 +1,31 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 ###########
 # Testing downloading and converting mw page content to html
 ###########
 from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work_page, parse_work
 template = open("web/page-template.html", "r")
 template =  template.read()
 template =  template.format(title="This is My Title", creator='', date='', website='', thumbnail='', bio='',description='', extra=''  )
 # download
 pageid='15965'
 article = api_page(pageid, 'content')
 #print article
 #print '----------- article -----'
 # parsing article
 extra, description = parse_work('Qq', article)
 # placing mw content inside dict makes it non convertable. Why?
 #print extra #work_dict['Extra']
 html_extra = pandoc2html(extra.encode('utf-8'))
 html_description = pandoc2html(description.encode('utf-8'))
 print '----------- html -----'
 print html_description
 #print html_extra
 print template
--- a/web/page-template.html
+++ b/web/page-template.html
@ -0,0 +1,22 @@
 <!DOCTYPE html>
 <html>
  <head>
    <meta charset="UTF-8">
    <title>{title}</title> <!-- Work title will go to here --> 
    <link rel="stylesheet" href="css/style.css">
  </head>
  <body>
    <div id="header">
      <div id="title"><h1>{title}</h1></div>
      <div id="creator"><h2>{creator}</h2></div>
      <div id="date">{date}</div>
      <div id="website"><a href="{website}">{website}</a></div>
      <div id="thumbnail"><img src="{thumbnail}" class="" alt="" /></div>
      <div id="bio">{bio}</div>
    </div>    
    <div id="body">
      <div id="description">{description}</div>
      <div id="extra">{extra}</div>
    </div>
  </body>
 </html>