# Flask application to serve the web pages # mwclient to interact with the MediaWiki API # BS to read the html table from the wiki # os and dotenv to store the mediawiki credentials in a safe place from flask import Flask, request, redirect, url_for, jsonify, render_template import mwclient from bs4 import BeautifulSoup import os from dotenv import load_dotenv from pathlib import Path import textwrap # load the mediawiki credentials from the shared folder dotenv_path = Path("/var/www/.mw-credentials") load_dotenv(dotenv_path=dotenv_path) # load the configuration env load_dotenv() DEFAULT_PAGE = os.environ.get("DEFAULT_PAGE", '') # prefix to add /soupboat/padliography to all the routes class PrefixMiddleware(object): def __init__(self, app, prefix=""): self.app = app self.prefix = prefix def __call__(self, environ, start_response): if environ["PATH_INFO"].startswith(self.prefix): environ["PATH_INFO"] = environ["PATH_INFO"][len(self.prefix):] environ["SCRIPT_NAME"] = self.prefix return self.app(environ, start_response) else: start_response("404", [("Content-Type", "text/plain")]) return ["This url does not belong to the app.".encode()] # create flask application app = Flask(__name__) # Get the URL prefix for the soupboat # register the middleware to use our base_url as prefix on all the requests base_url = os.environ.get('BASE_URL', '') app.wsgi_app = PrefixMiddleware(app.wsgi_app, prefix=base_url) def add_pad(padliography, link, title, overview, categories, date): '''Add a new pad to the wiki page''' # 1. Connect to the wiki site = mwclient.Site('pzwiki.wdka.nl', path='/mw-mediadesign/') # 2. Authenticate using the credential of a bot user registered in the wiki ### This is necesary the edit the contents of the page site.login( username=os.environ.get('MW_BOT'), password=os.environ.get('MW_KEY') ) # 3. Select the page and get the contents # --> prefix the page title with Padliography/ # so we dont erase eventual pages with the same title padliography = f'Padliography/{padliography}' page = site.pages[padliography] text = page.text() # 4. Append the pad as new row in the table of pads new_row = f'|-\n| {link} || {title} || {overview} || {categories} || {date} \n|-\n' + '|}' text = text.replace('|}', new_row) # 5. Apply the edit page.edit(text, f'New pad in the {padliography}: {title}') def get_pads(padliography): '''Retrieve pads from the wiki''' # 1. Connect to the wiki site = mwclient.Site('pzwiki.wdka.nl', path='/mw-mediadesign/') # 2. Log in using the credential of a bot user registered in the wiki site.login( username=os.environ.get('MW_BOT'), password=os.environ.get('MW_KEY'), ) # 3. Use the MediaWiki API to get the wikitext contents in HTML # Pages in the padliography comes with the Padliography/ prefix padliography = f'Padliography/{padliography}' html = site.api('parse', prop='text', page=padliography) # 4. Parse the HTML with BeautifulSoup to extract data from the table of pads table = BeautifulSoup(html['parse']['text']['*'], features="html.parser").find( "table", attrs={"class": "padliography"}) # 5. Find the headers of the table headers = [header.text.lower().strip() for header in table.find_all('th')] # 6. Create a list of pad, using each header as property of the object pad pads = [ {headers[i]: cell.text.rstrip('\n') for i, cell in enumerate(row.find_all('td'))} for row in table.find_all('tr')] #7. Remove empty pads from the list pads = [pad for pad in pads if pad != {}] return pads def init_page(padliography, description): '''Initialize a new instance of the padliography a the given page''' # 1. Connect to the wiki site = mwclient.Site('pzwiki.wdka.nl', path='/mw-mediadesign/') # 2. Authenticate using the credential of a bot user registered in the wiki # This is necesary the edit the contents of the page site.login( username=os.environ.get('MW_BOT'), password=os.environ.get('MW_KEY') ) # 3. Select the page and get the contents # page in the padliography comes with the Padliography/ prefix padliography = f'Padliography/{padliography}' page = site.pages[padliography] # 4. Insert the table template and a user-provided description text = f''' {description} == Padliography == {{| class = "wikitable sortable padliography" |- !link !! title !! overview !! categories !! date |- |}} [[Category:Padliography]] ''' # 5. Apply the edit page.edit(textwrap.dedent(text), f'New padliographish page created: Pads/{padliography}') # Routes @app.route('/') def home(): '''Serve the homepage layout''' return render_template('home.html', page=DEFAULT_PAGE, base_url=base_url) @app.route('//') def page(padliography): '''Serve a specific padliography''' return render_template('home.html', page=padliography, base_url=base_url) @app.route('/api//', methods=['GET', 'POST']) def api(padliography): '''Manage the interaction with the MediaWiki API''' if request.method == 'POST': # Add a new pad link = request.json.get('link', None) title = request.json.get('title', None) overview = request.json.get('overview', '') categories = request.json.get('categories', '') date = request.json.get('date', None) add_pad(padliography, link, title, overview, categories, date) redirect(url_for('home')) # Return the pad list response = jsonify({ 'pads': get_pads(padliography) }) response.headers.add('Access-Control-Allow-Origin', '*') return response @app.route('/api//init', methods=['GET', 'POST']) def init(padliography): if request.method == 'POST': description = request.json.get('description', None) if padliography is not None: init_page(padliography, description) return redirect(url_for('home')) return 'ok' # Get the port and mount the app port = os.environ.get('FLASK_RUN_PORT', '') debug = os.environ.get('DEBUG', False) app.run(port=port, debug=debug)