You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
2.0 KiB
Python

#!/usr/bin/env python3
import os, requests, urllib, subprocess
from bs4 import BeautifulSoup
def pandoc (src, fro="markdown", to="html5"):
# print ("[pandoc]", file=sys.stderr)
p = subprocess.Popen(["pandoc", "--from", fro, "--to", to, "--section-divs"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate(src.encode("utf-8"))
return stdout.decode("utf-8")
def read_jargon_dictionary(url):
data = urllib.request.urlopen(url).read() #bytes object
# print('data', type(data), data)
string = data.decode("utf-8")
# print("string", type(string), string)
# print ()
# html = markdown(string)
html = pandoc(string) # convert from markdown to html
# print("html", html)
soup = BeautifulSoup(html, features="html5lib")
# headers = soup.find_all('section', {"class": "level1"})
# print("headers", headers)
defs = {} # creating empty dictionary
for section in soup.find_all('section', {"class": "level1"}):
h = section.find("h1")
word = h.text # TCP
d = section.find("p")
if not d:
d = section.find("ul")
# print("found",h.text,d.text)
#if d:
#defition =
#store the definition in the dict...
# print ("i am in a loop with a variable header", type(header), header.text)
defs[word.upper()] ={'name': word, 'text': d.text} # add to dictionary word(key): d.text (value)
return defs
if __name__ == "__main__":
# this only happens when you type python read.py
# NOT when this file is imported
url = "https://pad.xpub.nl/p/jargon-file.test/export/txt"
defs = read_jargon_dictionary(url)
print ("OUR JARGON DICTIONARY")
from pprint import pprint
pprint(defs)
# print(defs["UDP"])
# cmd = '#TCP'
# query = cmd.replace('#','')
# for header in soup('h1'):
# if query in header.string:
# print(header.string)
# print('---')
# cmd = '#UDP'
# query = cmd.replace('#','')
# for header in soup('h1'):
# if query in header.string:
# print(header.string)
# print('---')