operational

10 years ago · b629c6e33b
parent 0f4e675604
commit b629c6e33b
3 changed files with 21 additions and 78 deletions
--- a/README.md
+++ b/README.md
@ -12,18 +12,7 @@ Or index all the gaduation works:
 `python mmdc_wiki2web.py --category Graduation_work` 


-## Thumbnails
-thumbnails in work pages are hidden. this can be changed.
-
-style_projectpage.css:
-
-`#thumnail {
-display: none;
-}`
-
-
 ## To Do
-
 * remove thumbnail from page_imgs


--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -48,13 +48,16 @@ def mw_page_cats(site, page):
    return cats


-def mw_page_imgsurl(site, page):
+def mw_page_imgsurl(site, page, thumb):
    #all the imgs in a page
+    #except thumb: if thumb: remove
    #returns list of tuples (img.name, img.fullurl)
    imgs = page.images()
-    imgs = list(imgs)
-    urls = { img.name: (img.imageinfo)['url'] for img in imgs}
-    return urls
+    imgs = list(imgs)    
+    imgs_dict = { img.name:(img.imageinfo)['url'] for img in imgs if  (img.imageinfo)['url'] != thumb } # exclude thumb 
+    imgs_dict = { key.capitalize():value for key, value in imgs_dict.items()}
+    # capilatize image name, so it can be called later
+    return imgs_dict


 def mw_img_url(site, img): #find full of an img 
@ -77,14 +80,12 @@ def write_html_file(html_tree, filename):
    edited.close()

 def parse_work(site, title, content):
-#    print title, content
    workdict = {'Title':title, 'Creator':u'', 'Date':u'', 'Website':u'', 'Thumbnail':u'', 'Bio':u'', 'Description':u'', 'Extra':u''}    
+
    if re.match(u'\{\{\Graduation work', content):
        template, extra = (re.findall(u'\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
        if extra:
-            workdict['Extra'] = extra
-        # template's key/value pair
-        # Note:Extra value is NOT CAPTURED by this regex
+            workdict['Extra'] = extra 
        keyval = re.findall(u'\|(.*?)\=(.*?\n)', template, re.DOTALL) 
        for pair in keyval:
            key = pair[0]
@ -96,7 +97,6 @@ def parse_work(site, title, content):
            elif 'Website' in key:
                val = urllib.unquote( val)                
            workdict[key]=val
-#    pprint.pprint(workdict)
    return workdict

 def pandoc2html(mw_content):
--- a/mmdc_x.py
+++ b/mmdc_x.py
@ -4,7 +4,6 @@
 import xml.etree.ElementTree as ET
 import html5lib, urllib, pprint
 from mmdc_modules import pandoc2html, parse_work, write_html_file, mw_cats, mw_page_imgsurl, mw_img_url, mw_page_text, mwsite, mw_page_cats, mw_page, remove_cats, find_authors, replace_video, replace_img_a_tag, index_addwork
-# unsued from bs_modules: replace_gallery, replace_video, index_addwork,
 from argparse import ArgumentParser
 from random import shuffle as shuffle

@ -17,9 +16,7 @@ p.add_argument("--host", default="pzwiki.wdka.nl")
 p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /")
 p.add_argument("--category", "-c", nargs="*", default=[["2015", "Graduation_work"]], action="append", help="category to query, use -c foo -c bar to intersect multiple categories")
 p.add_argument("--preview", help='Preview page. Will override category querying. Use: --page "Name Of Wiki Page"')
-
 args = p.parse_args()
-
 print 'args', args

 ######
@ -32,7 +29,8 @@ def create_page(memberpages, mode):
        print member
        page = mw_page(site, member)
        page_text = mw_page_text(site, page)
-        articledict = parse_work(site, member, page_text) # create dictionary w/ page co
+        articledict = parse_work(site, member, page_text) # create dictionary
+        # Title, Creator, Date, Website, Thumbnail, Bio, Description, Extra
        if len(articledict['Creator'])>0 and len(articledict['Title'])>0  and len(articledict['Thumbnail'])>0:
            for key in articledict.keys():# convert Extra, Description, Bio to HTML
                if key in ['Extra', 'Description', 'Bio']:
@ -43,12 +41,8 @@ def create_page(memberpages, mode):
                     articledict[key] = remove_cats(articledict['Content'])
                     articledict[key] = replace_video(articledict['Content'])

-                     
-#        page_imgs = mw_page_imgsurl(site, page)
-        # page_imgs = { key.capitalize():value for key, value in page_imgs.items()} # capatalize keys, so can be called later
-        # #articledict = {'Title': member, 'Content': page_text, 'Categories':page_cats, 'Images': page_imgs}
-
-
+            articledict['Imgs'] = mw_page_imgsurl(site, page, articledict['Thumbnail'] )
+        
        page_tree = html5lib.parse(page_template, namespaceHTMLElements=False)
        page_title = page_tree.find('.//title')
        page_title.text=articledict['Title']#.decode('utf-8')
@ -69,19 +63,17 @@ def create_page(memberpages, mode):
        page_extra.extend(page_extra_el)
        page_website = page_tree.find('.//p[@class="hightlightSidebar"]/a')
        page_website.set('href', articledict['Website'])
-        page_website.text=articledict['Website']
+        page_website.text=articledict['Website']        
        page_thumb = page_tree.find('.//img[@id="thumbnail"]')
        page_thumb.set('src', articledict['Thumbnail'])

        # give work page's imgs full url
-        imgs = page_tree.findall('.//img')
-        # for img in  imgs:
-        #    img_class = img.get('class')
-        #    if  img_class != 'template': 
-        #         src =unicode(img.get('src'))
-        #         for pair in workpage_imgs: 
-        #             if src.replace("_", " ") in pair[0]:#if img in html matchs img in workpage_imgs
-        #                 img.set('src', pair[1])                    
+        imgs = page_tree.findall('.//img')        
+        for img in imgs: #replace src: full url
+            src = (('File:'+img.get('src')).capitalize()).decode('utf-8')
+            if src in articledict['Imgs'].keys():
+                url = articledict['Imgs'][src]
+                img.set('src', url)

        # save work page
        creator = articledict['Creator'].encode('ascii', 'ignore')
@ -98,7 +90,6 @@ def create_index(indexdict):
    index_tree = html5lib.parse(index_template, namespaceHTMLElements=False)
    index_container = index_tree.find(".//div[@class='isotope']") #maybe id is imp    
    for key in indexdict.keys():    
-        print 'key', key
        index_addwork( parent=index_container,
                       workid=key,
                       href=indexdict[key]['Path'],
@ -108,47 +99,9 @@ def create_index(indexdict):
                       thumbnail=indexdict[key]['Thumbnail']
        )
        print '----', indexdict[key]['Title'],indexdict[key]['Path']
-     #   print ET.tostring(tree)
-        
-#print index_tree, type(index_tree)        
    write_html_file(index_tree, 'web/index.html')


-    #     authors = indexdict[article]['Authors']
-    #     path = indexdict[article]['Path']
-    #     issue = indexdict[article]['Category Issue']
-    #     section = indexdict[article]['Category Section']
-    #     topics =  indexdict[article]['Category Topics']
-    #     images = indexdict[article]['Images']
-    #     index_section = index_tree.find('.//ul[@id="section_{}"]'.format(section.encode('utf-8')))
-    #     index_item = ET.SubElement(index_section, 'li',
-    #                                attrib={'class': " ".join(topics)+" "+section,
-    #                                        'data-name': article,
-    #                                        'data-section':section,
-    #                                        'data-categories': " ".join(topics)+" "+section
-    #                                    })
-    #     article_link = ET.SubElement(index_item, 'a', attrib={'href':urllib.quote(path)})
-    #     article_link.text = article
-    #     article_author = ET.SubElement(index_item, 'p', attrib={'class':'authorTitle'})
-    #     article_author.text = authors
-        
-    #     for imgurl in images.values():
-    #         print 'imgurl', imgurl
-    #         index_img_item = ET.SubElement(index_imgs_section, 'li',
-    #                                    attrib={'class': " ".join(topics)+" "+section,
-    #                                            'data-name': article,
-    #                                            'data-section':section,
-    #                                            'data-categories': " ".join(topics)+" "+section,
-    #                                            'style':'position: absolute; left: 0px; top: 0px;'
-    #                                        })
-    #         article_img_link = ET.SubElement(index_img_item, 'a', attrib={'href':urllib.quote(path)})
-    #         article_img_img = ET.SubElement(article_img_link, 'img', attrib={'src':imgurl})            
-    # title=index_tree.find('.//title')
-    # title.text = 'Beyond Social: ' + issue_current
-    # index_filename = 'index.html'
-    # write_html_file(index_tree, index_filename)
-
-
 #####
 # ACTION
 #####    
@ -163,6 +116,7 @@ if args.preview is not None:
 else:
    print "** New Index Mode **"
    memberpages=mw_cats(site, args)
+    #memberpages=[u'Unintended Images']
    shuffle(memberpages)
    print 'memberpages:', memberpages
    indexdict = create_page(memberpages, 'index')