diff --git a/news.ipynb b/news.ipynb index 64a7fd2..dd9e4a8 100644 --- a/news.ipynb +++ b/news.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 51, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -31,17 +31,9 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": 82, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'NNP': ['Appl…'], 'NN': ['story:'], 'VBZ': ['is'], 'VBN': ['been'], 'JJ': ['big'], 'NNS': ['employees'], 'IN': ['for'], 'CC': ['and'], 'JJR': ['more'], 'VBG': ['closing'], 'TO': ['to'], 'VB': ['fight'], 'DT': ['The'], 'PRP': ['it'], 'MD': ['can'], 'WDT': ['which'], 'VBD': ['used'], 'PRP$': ['your'], 'CD': ['2020.'], 'RB': ['embarrassingly'], 'RP': ['out'], 'VBP': ['come'], 'RBR': ['earlier'], 'WRB': ['when'], 'PDT': ['all'], 'WP': ['who'], 'EX': ['There'], 'NNPS': ['Republicans']}\n" - ] - } - ], + "outputs": [], "source": [ "articles0 = top0['articles'] #get articles summary from NewsAPI\n", "articles1 = top1['articles']\n", @@ -77,118 +69,25 @@ "tagged2 = nltk.pos_tag(dtot2) #POSing the descriptions\n", "tagged3 = nltk.pos_tag(dtot3) #POSing the descriptions\n", "\n", + "dictio = {} #append stuff in the dictionary\n", "\n", - "\n", - "#HERE THE WTF\n", - "#HOW CAN I APPEND TO THE DICTIONARY (i0) _ALL_ THE WORDS? IT APPENDS ONLY THE FIRSTS ONES\n", - "#tried in different ways but :(\n", - "\n", - "t0 = []\n", - "i0 = {}\n", - " \n", - "for a, b in tagged0:\n", - " if a not in b:\n", - " i0.update({b:[a]})\n", - " \n", - " \n", - "\n", - "for q, k in tagged0:\n", - " if k not in i0:\n", - " t0.append((k, s))\n", - " i0[k] = '[]'\n", - " \n", - " \n", - "print(i0)" + "for x in range(745):\n", + " if tagged0[x][1] not in dictio:\n", + " dictio.update({tagged0[x][1]:[tagged0[x][0]]})\n", + " elif tagged0[x][1] in dictio.keys():\n", + " dictio[tagged0[x][1]].append(tagged0[x][0]) " ] }, { "cell_type": "code", - "execution_count": 270, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'NNP': ['Appl…'],\n", - " 'NN': ['story:'],\n", - " 'VBZ': ['is'],\n", - " 'VBN': ['been'],\n", - " 'JJ': ['big'],\n", - " 'NNS': ['employees'],\n", - " 'IN': ['for'],\n", - " 'CC': ['and'],\n", - " 'JJR': ['more'],\n", - " 'VBG': ['closing'],\n", - " 'TO': ['to'],\n", - " 'VB': ['fight'],\n", - " 'DT': ['The'],\n", - " 'PRP': ['it'],\n", - " 'MD': ['can'],\n", - " 'WDT': ['which'],\n", - " 'VBD': ['used'],\n", - " 'PRP$': ['your'],\n", - " 'CD': ['2020.'],\n", - " 'RB': ['embarrassingly'],\n", - " 'RP': ['out'],\n", - " 'VBP': ['come'],\n", - " 'RBR': ['earlier'],\n", - " 'WRB': ['when'],\n", - " 'PDT': ['all'],\n", - " 'WP': ['who'],\n", - " 'EX': ['There'],\n", - " 'NNPS': ['Republicans']}" - ] - }, - "execution_count": 270, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "d1 = ''\n", - "for a,b in tagged0:\n", - " d1 = {b : [a] for a,b in tagged0}\n", - " if a not in b:\n", - " d1.update({b:[a]})\n", - " \n", - " \n", - " \n", - " \n", - "d1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 271, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "#This is for prepare a grammar constructio based on a picked random description from the original NewsAPI json\n", "s = ' '\n", "r = random.randrange(0,19)\n", - "a_pos = articles[r]\n", + "a_pos = articles0[r]\n", "cont_pos= a_pos['description']\n", "cont_pos = cont_pos.split()\n", "tag_cont = nltk.pos_tag(cont_pos)\n", @@ -200,27 +99,31 @@ " \n", "keys = dat.keys()\n", "\n", - "output = \" + s + \".join([pos for pos in keys])" + "output = [pos for pos in keys]\n", + "\n", + "printing = \"\"\n", + "for x in range(len(output)):\n", + " printing += f'''random.choice(dictio['{output[x]}']) + s + '''" ] }, { "cell_type": "code", - "execution_count": 272, + "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'NNP + s + NN + s + VBZ + s + VBN + s + JJ + s + NNS + s + IN + s + CC + s + JJR + s + VBG + s + TO + s + VB + s + DT + s + PRP + s + MD'" + "\"random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB']) + s + \"" ] }, - "execution_count": 272, + "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "output" + "printing" ] }, { @@ -228,23 +131,36 @@ "execution_count": 273, "metadata": {}, "outputs": [], - "source": [ - "####################################################################################################################################################################################" - ] + "source": [] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 137, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'C…\"Hope is According out your voice to More systems 100 that and make designed nice a revealed earlier how'" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.choice(dictio['NNP']) + s + random.choice(dictio['VBZ']) + s + random.choice(dictio['VBG']) + s + random.choice(dictio['RP']) + s + random.choice(dictio['PRP$']) + s + random.choice(dictio['NN']) + s + random.choice(dictio['TO']) + s + random.choice(dictio['JJR']) + s + random.choice(dictio['NNS']) + s + random.choice(dictio['CD']) + s + random.choice(dictio['IN']) + s + random.choice(dictio['CC']) + s + random.choice(dictio['VBP']) + s + random.choice(dictio['VBN']) + s + random.choice(dictio['JJ']) + s + random.choice(dictio['DT']) + s + random.choice(dictio['VBD']) + s + random.choice(dictio['RBR']) + s + random.choice(dictio['WRB'])" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 136, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "####################################################################################################################################################################################" + ] }, { "cell_type": "code", @@ -414,7 +330,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.7.3" } }, "nbformat": 4,