From 2a23d871de0207c81120b9a66d2cfaa6c0dc33eb Mon Sep 17 00:00:00 2001
From: Michael Murtaugh <mm@automatist.org>
Date: Tue, 14 Apr 2020 13:11:45 +0200
Subject: [PATCH] reorganized readfrompad with functions

---
 readfrompad.py | 69 +++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 37 deletions(-)

diff --git a/readfrompad.py b/readfrompad.py
index e48abaa..5e59807 100644
--- a/readfrompad.py
+++ b/readfrompad.py
@@ -2,11 +2,38 @@ from urllib.request import urlopen
 import markdown, html5lib
 
 
-sample_text = """
+def parse_pad (pad_text):
+    html = markdown.markdown(pad_text)
+    t = html5lib.parseFragment(html, namespaceHTMLElements=False)
+    # create a "database" of paragraphs associated with each URL given in an H1
+    paragraphs_by_header = {}
+    current_header = None
+    for elt in t:
+        if elt.tag == "h1" and elt.text is not None:
+            # print (("HEADER"), elt.text)
+            current_header = elt.text.strip()
+        elif elt.tag == "p":
+            if current_header:
+                if current_header not in paragraphs_by_header:
+                    paragraphs_by_header[current_header] = []
+                paragraphs_by_header[current_header].append(elt)
+    return paragraphs_by_header
+
+def curl (url):
+    f = urlopen(pad_text_url)
+    return f.read().decode('utf-8')
+
+
+if __name__== "__main__":
+    # THIS CODE ONLY HAPPENS WHEN YOU RUN THE SCRIPT DIRECTLY
+    pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches"
+    pad_text_url = pad_url + "/export/txt"
+    pad_text = curl(pad_text_url)
+
+    sample_text = """
 # https://hub.xpub.nl/bootleglibrary/book/374
 This is an annotation of Mrs. Gersande's Binding Index
 
-
 # https://hub.xpub.nl/bootleglibrary/book/348
 Telegraph Telephone Teletype
 
@@ -16,39 +43,7 @@ More TEXT HERERERE!!!!!!!!!!!!!!
 Stuff now
 """
 
-pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches"
-pad_text_url = pad_url + "/export/txt"
-f = urlopen(pad_text_url)
-
-pad_text = f.read().decode('utf-8')
-pad_text = sample_text
-print (pad_text)
-print ()
-
-# print (pad_text)
-# Turn pad text into html text
-html = markdown.markdown(pad_text)
-print (html)
-print ()
-
-# Turn html text in an elementtree
-t = html5lib.parseFragment(html, namespaceHTMLElements=False)
-print (t)
-
-# create a "database" of paragraphs associated with each URL given in an H1
-paragraphs_by_header = {}
-current_header = None
-
-for elt in t:
-    if elt.tag == "h1" and elt.text is not None:
-        print (("HEADER"), elt.text)
-        current_header = elt.text.strip()
-    elif elt.tag == "p":
-        if current_header:
-            if current_header not in paragraphs_by_header:
-                paragraphs_by_header[current_header] = []
-            paragraphs_by_header[current_header].append(elt)
-
-from pprint import pprint
-pprint(paragraphs_by_header)
+    paragraphs_by_header = parse_pad(pad_text)
+    from pprint import pprint
+    pprint(paragraphs_by_header)