improvements on 'news' dictionary and print system

master
poni 3 years ago
parent 12fef535e3
commit 2f3571b47b

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -16,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -31,17 +31,9 @@
},
{
"cell_type": "code",
"execution_count": 256,
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'NNP': ['Appl…'], 'NN': ['story:'], 'VBZ': ['is'], 'VBN': ['been'], 'JJ': ['big'], 'NNS': ['employees'], 'IN': ['for'], 'CC': ['and'], 'JJR': ['more'], 'VBG': ['closing'], 'TO': ['to'], 'VB': ['fight'], 'DT': ['The'], 'PRP': ['it'], 'MD': ['can'], 'WDT': ['which'], 'VBD': ['used'], 'PRP$': ['your'], 'CD': ['2020.'], 'RB': ['embarrassingly'], 'RP': ['out'], 'VBP': ['come'], 'RBR': ['earlier'], 'WRB': ['when'], 'PDT': ['all'], 'WP': ['who'], 'EX': ['There'], 'NNPS': ['Republicans']}\n"
]
}
],
"outputs": [],
"source": [
"articles0 = top0['articles'] #get articles summary from NewsAPI\n",
"articles1 = top1['articles']\n",
@ -77,118 +69,25 @@
"tagged2 = nltk.pos_tag(dtot2) #POSing the descriptions\n",
"tagged3 = nltk.pos_tag(dtot3) #POSing the descriptions\n",
"\n",
"dictio = {} #append stuff in the dictionary\n",
"\n",
"\n",
"#HERE THE WTF\n",
"#HOW CAN I APPEND TO THE DICTIONARY (i0) _ALL_ THE WORDS? IT APPENDS ONLY THE FIRSTS ONES\n",
"#tried in different ways but :(\n",
"\n",
"t0 = []\n",
"i0 = {}\n",
" \n",
"for a, b in tagged0:\n",
" if a not in b:\n",
" i0.update({b:[a]})\n",
" \n",
" \n",
"\n",
"for q, k in tagged0:\n",
" if k not in i0:\n",
" t0.append((k, s))\n",
" i0[k] = '[]'\n",
" \n",
" \n",
"print(i0)"
"for x in range(745):\n",
" if tagged0[x][1] not in dictio:\n",
" dictio.update({tagged0[x][1]:[tagged0[x][0]]})\n",
" elif tagged0[x][1] in dictio.keys():\n",
" dictio[tagged0[x][1]].append(tagged0[x][0]) "
]
},
{
"cell_type": "code",
"execution_count": 270,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'NNP': ['Appl…'],\n",
" 'NN': ['story:'],\n",
" 'VBZ': ['is'],\n",
" 'VBN': ['been'],\n",
" 'JJ': ['big'],\n",
" 'NNS': ['employees'],\n",
" 'IN': ['for'],\n",
" 'CC': ['and'],\n",
" 'JJR': ['more'],\n",
" 'VBG': ['closing'],\n",
" 'TO': ['to'],\n",
" 'VB': ['fight'],\n",
" 'DT': ['The'],\n",
" 'PRP': ['it'],\n",
" 'MD': ['can'],\n",
" 'WDT': ['which'],\n",
" 'VBD': ['used'],\n",
" 'PRP$': ['your'],\n",
" 'CD': ['2020.'],\n",
" 'RB': ['embarrassingly'],\n",
" 'RP': ['out'],\n",
" 'VBP': ['come'],\n",
" 'RBR': ['earlier'],\n",
" 'WRB': ['when'],\n",
" 'PDT': ['all'],\n",
" 'WP': ['who'],\n",
" 'EX': ['There'],\n",
" 'NNPS': ['Republicans']}"
]
},
"execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"d1 = ''\n",
"for a,b in tagged0:\n",
" d1 = {b : [a] for a,b in tagged0}\n",
" if a not in b:\n",
" d1.update({b:[a]})\n",
" \n",
" \n",
" \n",
" \n",
"d1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 271,
"execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
"#This is for prepare a grammar constructio based on a picked random description from the original NewsAPI json\n",
"s = ' '\n",
"r = random.randrange(0,19)\n",
"a_pos = articles[r]\n",
"a_pos = articles0[r]\n",
"cont_pos= a_pos['description']\n",
"cont_pos = cont_pos.split()\n",
"tag_cont = nltk.pos_tag(cont_pos)\n",
@ -200,27 +99,31 @@
" \n",
"keys = dat.keys()\n",
"\n",
"output = \" + s + \".join([pos for pos in keys])"
"output = [pos for pos in keys]\n",
"\n",
"printing = \"\"\n",
"for x in range(len(output)):\n",
" printing += f'''random.choice(dictio['{output[x]}']) + s + '''"
]
},
{
"cell_type": "code",
"execution_count": 272,
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'NNP + s + NN + s + VBZ + s + VBN + s + JJ + s + NNS + s + IN + s + CC + s + JJR + s + VBG + s + TO + s + VB + s + DT + s + PRP + s + MD'"
"\"random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB']) + s + \""
]
},
"execution_count": 272,
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"output"
"printing"
]
},
{
@ -228,23 +131,36 @@
"execution_count": 273,
"metadata": {},
"outputs": [],
"source": [
"####################################################################################################################################################################################"
]
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 137,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"'C…\"Hope is According out your voice to More systems 100 that and make designed nice a revealed earlier how'"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": []
"source": [
"####################################################################################################################################################################################"
]
},
{
"cell_type": "code",
@ -414,7 +330,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.7.3"
}
},
"nbformat": 4,

Loading…
Cancel
Save