improvements on 'news' dictionary and print system

master
poni 4 years ago
parent 12fef535e3
commit 2f3571b47b

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -16,7 +16,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -31,17 +31,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 256, "execution_count": 82,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'NNP': ['Appl…'], 'NN': ['story:'], 'VBZ': ['is'], 'VBN': ['been'], 'JJ': ['big'], 'NNS': ['employees'], 'IN': ['for'], 'CC': ['and'], 'JJR': ['more'], 'VBG': ['closing'], 'TO': ['to'], 'VB': ['fight'], 'DT': ['The'], 'PRP': ['it'], 'MD': ['can'], 'WDT': ['which'], 'VBD': ['used'], 'PRP$': ['your'], 'CD': ['2020.'], 'RB': ['embarrassingly'], 'RP': ['out'], 'VBP': ['come'], 'RBR': ['earlier'], 'WRB': ['when'], 'PDT': ['all'], 'WP': ['who'], 'EX': ['There'], 'NNPS': ['Republicans']}\n"
]
}
],
"source": [ "source": [
"articles0 = top0['articles'] #get articles summary from NewsAPI\n", "articles0 = top0['articles'] #get articles summary from NewsAPI\n",
"articles1 = top1['articles']\n", "articles1 = top1['articles']\n",
@ -77,118 +69,25 @@
"tagged2 = nltk.pos_tag(dtot2) #POSing the descriptions\n", "tagged2 = nltk.pos_tag(dtot2) #POSing the descriptions\n",
"tagged3 = nltk.pos_tag(dtot3) #POSing the descriptions\n", "tagged3 = nltk.pos_tag(dtot3) #POSing the descriptions\n",
"\n", "\n",
"dictio = {} #append stuff in the dictionary\n",
"\n", "\n",
"\n", "for x in range(745):\n",
"#HERE THE WTF\n", " if tagged0[x][1] not in dictio:\n",
"#HOW CAN I APPEND TO THE DICTIONARY (i0) _ALL_ THE WORDS? IT APPENDS ONLY THE FIRSTS ONES\n", " dictio.update({tagged0[x][1]:[tagged0[x][0]]})\n",
"#tried in different ways but :(\n", " elif tagged0[x][1] in dictio.keys():\n",
"\n", " dictio[tagged0[x][1]].append(tagged0[x][0]) "
"t0 = []\n",
"i0 = {}\n",
" \n",
"for a, b in tagged0:\n",
" if a not in b:\n",
" i0.update({b:[a]})\n",
" \n",
" \n",
"\n",
"for q, k in tagged0:\n",
" if k not in i0:\n",
" t0.append((k, s))\n",
" i0[k] = '[]'\n",
" \n",
" \n",
"print(i0)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 270, "execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'NNP': ['Appl…'],\n",
" 'NN': ['story:'],\n",
" 'VBZ': ['is'],\n",
" 'VBN': ['been'],\n",
" 'JJ': ['big'],\n",
" 'NNS': ['employees'],\n",
" 'IN': ['for'],\n",
" 'CC': ['and'],\n",
" 'JJR': ['more'],\n",
" 'VBG': ['closing'],\n",
" 'TO': ['to'],\n",
" 'VB': ['fight'],\n",
" 'DT': ['The'],\n",
" 'PRP': ['it'],\n",
" 'MD': ['can'],\n",
" 'WDT': ['which'],\n",
" 'VBD': ['used'],\n",
" 'PRP$': ['your'],\n",
" 'CD': ['2020.'],\n",
" 'RB': ['embarrassingly'],\n",
" 'RP': ['out'],\n",
" 'VBP': ['come'],\n",
" 'RBR': ['earlier'],\n",
" 'WRB': ['when'],\n",
" 'PDT': ['all'],\n",
" 'WP': ['who'],\n",
" 'EX': ['There'],\n",
" 'NNPS': ['Republicans']}"
]
},
"execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"d1 = ''\n",
"for a,b in tagged0:\n",
" d1 = {b : [a] for a,b in tagged0}\n",
" if a not in b:\n",
" d1.update({b:[a]})\n",
" \n",
" \n",
" \n",
" \n",
"d1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 271,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#This is for prepare a grammar constructio based on a picked random description from the original NewsAPI json\n", "#This is for prepare a grammar constructio based on a picked random description from the original NewsAPI json\n",
"s = ' '\n", "s = ' '\n",
"r = random.randrange(0,19)\n", "r = random.randrange(0,19)\n",
"a_pos = articles[r]\n", "a_pos = articles0[r]\n",
"cont_pos= a_pos['description']\n", "cont_pos= a_pos['description']\n",
"cont_pos = cont_pos.split()\n", "cont_pos = cont_pos.split()\n",
"tag_cont = nltk.pos_tag(cont_pos)\n", "tag_cont = nltk.pos_tag(cont_pos)\n",
@ -200,27 +99,31 @@
" \n", " \n",
"keys = dat.keys()\n", "keys = dat.keys()\n",
"\n", "\n",
"output = \" + s + \".join([pos for pos in keys])" "output = [pos for pos in keys]\n",
"\n",
"printing = \"\"\n",
"for x in range(len(output)):\n",
" printing += f'''random.choice(dictio['{output[x]}']) + s + '''"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 272, "execution_count": 133,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'NNP + s + NN + s + VBZ + s + VBN + s + JJ + s + NNS + s + IN + s + CC + s + JJR + s + VBG + s + TO + s + VB + s + DT + s + PRP + s + MD'" "\"random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB']) + s + \""
] ]
}, },
"execution_count": 272, "execution_count": 133,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"output" "printing"
] ]
}, },
{ {
@ -228,23 +131,36 @@
"execution_count": 273, "execution_count": 273,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": []
"####################################################################################################################################################################################"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 137,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"data": {
"text/plain": [
"'C…\"Hope is According out your voice to More systems 100 that and make designed nice a revealed earlier how'"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB'])"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 136,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"####################################################################################################################################################################################"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -414,7 +330,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.6" "version": "3.7.3"
} }
}, },
"nbformat": 4, "nbformat": 4,

Loading…
Cancel
Save