# Flask application to serve the web pages from flask import Flask, request, redirect, url_for, jsonify, render_template # Mediawiki client to interact with the Wiki import mwclient # BS to read the html table from the wiki from bs4 import BeautifulSoup # os and dotenv to store the mediawiki credentials in a safe place import os from dotenv import load_dotenv from pathlib import Path # load the mediawiki credentials from the shared folder dotenv_path = Path("/var/www/.mw-credentials") load_dotenv(dotenv_path=dotenv_path) # load the configuration env load_dotenv() # prefix to add /soupboat/padliography to all the routes # and to leave the @app.route() decorator more clean class PrefixMiddleware(object): def __init__(self, app, prefix=""): self.app = app self.prefix = prefix def __call__(self, environ, start_response): if environ["PATH_INFO"].startswith(self.prefix): environ["PATH_INFO"] = environ["PATH_INFO"][len(self.prefix):] environ["SCRIPT_NAME"] = self.prefix return self.app(environ, start_response) else: start_response("404", [("Content-Type", "text/plain")]) return ["This url does not belong to the app.".encode()] # create flask application app = Flask(__name__) # Get the URL prefix for the soupboat # and register the middleware to use our base_url as prefix on all the requests base_url = os.environ.get('BASE_URL', '') app.wsgi_app = PrefixMiddleware(app.wsgi_app, prefix=base_url) # Page of the wiki with the pads padliography = os.environ.get('PAGE', '') def add_pad(link, title, overview, categories, date): '''Add a new pad to the wiki page''' # 1. Connect to the wiki site = mwclient.Site('pzwiki.wdka.nl', path='/mw-mediadesign/') # 2. Authenticate using the credential of a bot user registered in the wiki ### This is necesary the edit the contents of the page site.login( username=os.environ.get('MW_BOT'), password=os.environ.get('MW_KEY') ) # 3. Select the page and get the contents page = site.pages[padliography] text = page.text() # 4. Append the pad as new row in the table of pads new_row = f'|-\n| {link} || {title} || {overview} || {categories} || {date} \n|-\n' + '|}' text = text.replace('|}', new_row) # 5. Apply the edit page.edit(text, f'New pad in the {padliography}: {title}') def get_pads(): '''Retrieve pads from the wiki''' # 1. Connect to the wiki site = mwclient.Site('pzwiki.wdka.nl', path='/mw-mediadesign/') # 2. Log in using the credential of a bot user registered in the wiki site.login( username=os.environ.get('MW_BOT'), password=os.environ.get('MW_KEY'), ) # 3. Use the MediaWiki API to get the wikitext contents in HTML html = site.api('parse', prop='text', page=padliography) # 4. Parse the HTML with BeautifulSoup to extract data from the table of pads table = BeautifulSoup(html['parse']['text']['*'], features="html.parser").find( "table", attrs={"class": "padliography"}) # 5. Find the headers of the table headers = [header.text.lower().strip() for header in table.find_all('th')] # 6. Create a list of pad, using each header as property of the object pad pads = [ {headers[i]: cell.text.rstrip('\n') for i, cell in enumerate(row.find_all('td'))} for row in table.find_all('tr')] #7. Remove empty pads from the list pads = [pad for pad in pads if pad != {}] return pads @app.route('/') def home(): '''Serve the homepage layout''' return render_template('home.html') @app.route('/api', methods=['GET', 'POST']) def api(): '''Manage the interaction with the MediaWiki API''' # Add a new pad if request.method == 'POST': link = request.json.get('link', None) title = request.json.get('title', None) overview = request.json.get('overview', '') categories = request.json.get('categories', '') date = request.json.get('date', None) add_pad(link, title, overview, categories, date) redirect(url_for('home')) # Return the pad list response = jsonify({ 'page': padliography, 'pads': get_pads() }) response.headers.add('Access-Control-Allow-Origin', '*') return response port = os.environ.get('FLASK_RUN_PORT', '') app.run(port=port)