Upgrade of database

master
camilo 2 years ago
parent 64a871f40f
commit 8b0f830494

@ -19,23 +19,33 @@
"metadata": {},
"source": [
"```\n",
"glossary = {\n",
" 'title': f'{title}',\n",
" 'properties': {\n",
"[\n",
"{\n",
" \"Word#\": \"3\",\n",
" \"title\": \"Property\",\n",
" \"properties\": [\n",
" \"word\",\n",
" \"proposition\",\n",
" \"logic\"\n",
" ],\n",
" \"voices\": [\n",
" {\n",
" \"voice\": \"⤷ An attribute, characteristic, or quality\",\n",
" \"link\": \"link\"\n",
" },\n",
" {\n",
" \"voice\": \"⤷ From etymology the word comes from propert\",\n",
" \"link\":\"link\"\n",
" }\n",
" 'words':{\n",
" 'reflection': {\n",
" 'voices': ['','',''],\n",
" 'properties': ['','',''],\n",
" }\n",
" 'version': '0.1',\n",
" ]\n",
"}\n",
"]\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -45,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -85,11 +95,245 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 13,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://issue.xpub.nl/13/ATATA/\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://files.cargocollective.com/c1032387/publicacion-independiente-PRINT.pdf\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"http://postgrowth.art/\">⤴</a>]\n",
"[]\n",
"[<a href=\"https://tdingsun.github.io/reading-machines/\">⤴</a>]\n",
"[<a href=\"https://en.wikipedia.org/wiki/Teleology\">⤴</a>]\n",
"[]\n",
"[]\n",
"[<a href=\"https://soulellis.com/writing/urgentcraft2/\">⤴</a>]\n",
"[<a href=\"https://soulellis.com/work/urgentcraft/index.html\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://docs.google.com/spreadsheets/d/1UkgIsDpFMuA0_hvqf5f4ytPTKX4wfPp8ByRs3Uymvag/edit#gid=0\">⤴</a>]\n",
"[<a href=\"https://networkcultures.org/digitalpublishing/\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://odotoo.com/risograph/\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://constantvzw.org/wefts/webpublications.en.html\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://files.cargocollective.com/c1032387/publicacion-independiente-PRINT.pdf#page=12\">⤴</a>]\n",
"[<a href=\"https://files.cargocollective.com/c1032387/publicacion-independiente-PRINT.pdf#page=12\">⤴</a>]\n",
"[<a href=\"https://files.cargocollective.com/c1032387/publicacion-independiente-PRINT.pdf#page=12\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://daringfireball.net/projects/markdown/syntax#overview\">⤴</a>]\n",
"[]\n",
"[<a href=\"https://hub.xpub.nl/soupboat/pad/p/camilo_glossary\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://ia800509.us.archive.org/15/items/p-dpa_booklet/p-dpa_booklet.pdf\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://thenewbridgeproject.com/product/why-publish-noise-miekal-and/\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[<a href=\"https://www.livingbooksaboutlife.org/\">⤴</a>]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n",
"[]\n"
]
}
],
"source": [
"glossary_bag = [] \n",
"word_no = 1\n",
@ -98,7 +342,7 @@
"for word in words:\n",
" \n",
" title = word.find('h1').text\n",
" \n",
"\n",
" voices = word.find_all('p')\n",
" \n",
" links = word.find_all('a')\n",
@ -107,10 +351,19 @@
" \n",
" li_voices = []\n",
" \n",
" li_links = []\n",
" \n",
" for voice in voices:\n",
" li_voices.append(voice.text)\n",
" links = voice.find_all('a')\n",
" print(links)\n",
" sentence = {}\n",
" sentence[\"voice\"]= voice.text.replace(\"⤴\",\"\")\n",
" if len(links) > 0:\n",
" sentence[\"link\"]= []\n",
" \n",
" for link in links:\n",
" url = link.get('href')\n",
" sentence[\"link\"].append(url)\n",
" \n",
" li_voices.append(sentence)\n",
" \n",
" for link in links:\n",
" url = link.get('href')\n",
@ -121,7 +374,6 @@
" 'title': title, \n",
" 'properties': properties,\n",
" 'voices': li_voices,\n",
" 'links': li_links\n",
" }\n",
" \n",
" glossary_bag.append(word)\n",
@ -140,11 +392,11 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
" with open('glossary.json', 'w+', encoding='utf-8') as f:\n",
"with open('glossary.json', 'w+', encoding='utf-8') as f:\n",
" json.dump(glossary_bag, f, indent=5, ensure_ascii=False)"
]
},

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save