SI13_federico_patches/news_ex.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import nltk\n",
    "from newsapi import NewsApiClient\n",
    "s = ' '"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "newsapi = NewsApiClient(api_key='0c00356f65df431ab394d179292075bd')\n",
    "top0 = newsapi.get_everything(q='future', language='en')\n",
    "top1 = newsapi.get_everything(q='futuro', language='it')\n",
    "top2 = newsapi.get_everything(q='futuro', language='es')\n",
    "top3 = newsapi.get_everything(q='future', language='fr')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "articles = top0['articles']\n",
    "\n",
    "a_pos = articles[14]\n",
    "cont_pos= a_pos['description']\n",
    "cont_pos = cont_pos.split()\n",
    "tag_cont = nltk.pos_tag(cont_pos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtot = ''\n",
    "\n",
    "for x in range(20):\n",
    "    a = articles[x]\n",
    "    d = a['description']\n",
    "    dtot += d\n",
    "    \n",
    "dtot = dtot.split()\n",
    "\n",
    "tagged = nltk.pos_tag(dtot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "dat = {}\n",
    "\n",
    "for word, tag in tag_cont:\n",
    "    dat[tag] = word\n",
    "    \n",
    "keys = dat.keys()\n",
    "\n",
    "output = \" + s + \".join([pos for pos in keys])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = {}\n",
    "\n",
    "def my_dict(gr,data,grlista):\n",
    "\n",
    "    data = {}\n",
    "\n",
    "    for word, tag in tagged:\n",
    "        dataset[tag] = word\n",
    "        if tag == gr:\n",
    "            data[tag] = word\n",
    "            grlista.append(word)\n",
    "            \n",
    "            \n",
    "CClista = []\n",
    "my_dict('CC','dataCC',CClista)\n",
    "\n",
    "CDlista = []\n",
    "my_dict('CD','dataCD',CDlista)\n",
    "\n",
    "\n",
    "DTlista = []\n",
    "my_dict('DT','dataDT',DTlista)\n",
    "\n",
    "\n",
    "EXlista = []\n",
    "my_dict('EX','dataEX',EXlista)\n",
    "\n",
    "\n",
    "FWlista = []\n",
    "my_dict('FW','dataFW',FWlista)\n",
    "\n",
    "INlista = []\n",
    "my_dict('IN','dataIN',INlista)\n",
    "\n",
    "\n",
    "JJlista = []\n",
    "my_dict('JJ','dataJJ',JJlista)\n",
    "\n",
    "JJRlista = []\n",
    "my_dict('JJR','dataJJR',JJRlista)\n",
    "\n",
    "\n",
    "\n",
    "JJSlista = []\n",
    "my_dict('JJS','dataJJS',JJSlista)\n",
    "\n",
    "LSlista = []\n",
    "my_dict('LS','dataLS',LSlista)\n",
    "\n",
    "\n",
    "MDlista = []\n",
    "my_dict('MD','dataMD',MDlista)\n",
    "\n",
    "\n",
    "NNlista = []\n",
    "my_dict('NN','dataNN',NNlista)\n",
    "\n",
    "\n",
    "NNSlista = []\n",
    "my_dict('NNS','dataNNS',NNSlista)\n",
    "\n",
    "\n",
    "NNPlista = []\n",
    "my_dict('NNP','dataNNP',NNPlista)\n",
    "\n",
    "\n",
    "PDTlista = []\n",
    "my_dict('PDT','dataPDT',PDTlista)\n",
    "\n",
    "POSlista = []\n",
    "my_dict('POS','dataPOS',POSlista)\n",
    "\n",
    "PRPlista = []\n",
    "my_dict('PRP','dataPRP',PRPlista)\n",
    "\n",
    "\n",
    "RBlista = []\n",
    "my_dict('RB','dataRB',RBlista)\n",
    "RB = random.choice(RBlista)\n",
    "\n",
    "\n",
    "RBRlista = []\n",
    "my_dict('RBR','dataRBR',RBRlista)\n",
    "\n",
    "\n",
    "RBSlista = []\n",
    "my_dict('RBS','dataRBS',RBSlista)\n",
    "\n",
    "\n",
    "RPlista = []\n",
    "my_dict('RP','dataRP',RPlista)\n",
    "\n",
    "\n",
    "SYMlista = []\n",
    "my_dict('SYM','dataSYM',SYMlista)\n",
    "\n",
    "\n",
    "TOlista = []\n",
    "my_dict('TO','dataTO',TOlista)\n",
    "\n",
    "\n",
    "UHlista = []\n",
    "my_dict('UH','dataUH',UHlista)\n",
    "\n",
    "\n",
    "VBlista = []\n",
    "my_dict('VB','dataVB',VBlista)\n",
    "\n",
    "\n",
    "VBDlista = []\n",
    "my_dict('VBD','dataVBD',VBDlista)\n",
    "\n",
    "VBGlista = []\n",
    "my_dict('VBG','dataVBG',VBGlista)\n",
    "\n",
    "VBNlista = []\n",
    "my_dict('VBN','dataVBN',VBNlista)\n",
    "\n",
    "\n",
    "VBPlista = []\n",
    "my_dict('VBP','dataVBP',VBPlista)\n",
    "\n",
    "\n",
    "VBZlista = []\n",
    "my_dict('VBZ','dataVBZ',VBZlista)\n",
    "\n",
    "\n",
    "WDTlista = []\n",
    "my_dict('WDT','dataWDT',WDTlista)\n",
    "\n",
    "\n",
    "WPlista = []\n",
    "my_dict('WP','dataWP',WPlista)\n",
    "\n",
    "\n",
    "WRBlista = []\n",
    "my_dict('WRB','dataWRB',WRBlista)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "Cannot choose from an empty sequence",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-8-4ed1abd4d748>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[0mWDT\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWDTlista\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[0mWP\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWPlista\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0mWRB\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWRBlista\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/random.py\u001b[0m in \u001b[0;36mchoice\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0mi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_randbelow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    289\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Cannot choose from an empty sequence'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    291\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mseq\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mIndexError\u001b[0m: Cannot choose from an empty sequence"
     ]
    }
   ],
   "source": [
    "CC = random.choice(CClista)\n",
    "CD = random.choice(CDlista)\n",
    "DT = random.choice(DTlista)\n",
    "EX = random.choice(EXlista)\n",
    "#FW = random.choice(FWlista)\n",
    "IN = random.choice(INlista)\n",
    "JJ = random.choice(JJlista)\n",
    "JJR = random.choice(JJRlista)\n",
    "JJS = random.choice(JJSlista)\n",
    "#LS = random.choice(LSlista)\n",
    "MD = random.choice(MDlista)\n",
    "NN = random.choice(NNlista)\n",
    "NNS = random.choice(NNSlista)\n",
    "NNP = random.choice(NNPlista)\n",
    "PDT = random.choice(PDTlista)\n",
    "#POS = random.choice(POSlista)\n",
    "PRP = random.choice(PRPlista)\n",
    "RB = random.choice(RBlista)\n",
    "#RBR = random.choice(RBRlista)\n",
    "#RBS = random.choice(RBSlista)\n",
    "RP = random.choice(RPlista)\n",
    "#SYM = random.choice(SYMlista)\n",
    "TO = random.choice(TOlista)\n",
    "#UH = random.choice(UHlista)\n",
    "VB = random.choice(VBlista)\n",
    "VBD = random.choice(VBDlista)\n",
    "VBG = random.choice(VBGlista)\n",
    "VBN = random.choice(VBNlista)\n",
    "VBP = random.choice(VBPlista)\n",
    "VBZ = random.choice(VBZlista)\n",
    "WDT = random.choice(WDTlista)\n",
    "WP = random.choice(WPlista)\n",
    "WRB = random.choice(WRBlista)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DT + s + NNS + s + VBN + s + IN + s + VBG + s + NN + s + WDT + s + VBZ + s + JJ + s + EX + s + VBP + s + CD + s + PRP + s + NNP + s + CC\n"
     ]
    }
   ],
   "source": [
    "print(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'the microswimmers confirmed with tearing software that represents Mashable There it’s 2018. them Mashable or'"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DT + s + NNS + s + VBN + s + IN + s + VBG + s + NN + s + WDT + s + VBZ + s + JJ + s + EX + s + VBP + s + CD + s + PRP + s + NNP + s + CC\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = ['The cable who is of technologies are going to be new Read also and','''no world's who tells from rules are figuring to use near Broomstick, ahead or''',\n",
    "'the mechanism who seeks as owners are figuring to control new Science, again and',\n",
    "'the presidency, who has in works carry flying to build hard Harry ever and',\n",
    "'a repository. who PlantsPhysicists about microswimmers deliver... going to act it. Harry likely But',\n",
    "'the lecture who PlantsPhysicists of issues deliver... flailing to be different Fluora, up and',\n",
    "'a cable who has in poets think averting to be electric CEO half-jokingly and',\n",
    "'the male who fits of submissions are helping to use fellow Texas half-jokingly and',\n",
    "'the toy who is of he’d it’s helping to get electric Black much and',\n",
    "'a unease who represents orgasm. sensors think averting to act free PS5 also and',\n",
    "'the more...Jennifer who fits of reveals believe offering to streaming. major Fluora, actually and',\n",
    "'the sanitizer who represents in reorganizations are figuring to perform adjustable Tech, likely or']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['The cable who is of technologies are going to be new Read also and',\n",
       " \"no world's who tells from rules are figuring to use near Broomstick, ahead or\",\n",
       " 'the mechanism who seeks as owners are figuring to control new Science, again and',\n",
       " 'the presidency, who has in works carry flying to build hard Harry ever and',\n",
       " 'a repository. who PlantsPhysicists about microswimmers deliver... going to act it. Harry likely But',\n",
       " 'the lecture who PlantsPhysicists of issues deliver... flailing to be different Fluora, up and',\n",
       " 'a cable who has in poets think averting to be electric CEO half-jokingly and',\n",
       " 'the male who fits of submissions are helping to use fellow Texas half-jokingly and',\n",
       " 'the toy who is of he’d it’s helping to get electric Black much and',\n",
       " 'a unease who represents orgasm. sensors think averting to act free PS5 also and',\n",
       " 'the more...Jennifer who fits of reveals believe offering to streaming. major Fluora, actually and',\n",
       " 'the sanitizer who represents in reorganizations are figuring to perform adjustable Tech, likely or']"
      ]
     },
     "execution_count": 294,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 447,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<head>harset=utf-8</head>\n"
     ]
    }
   ],
   "source": [
    "export = 'news.txt'\n",
    "with open(export, 'w') as export:\n",
    "    print('<head>harset=utf-8</head>')\n",
    "    print('<h1>News from the future</h1>', file=export)\n",
    "    print('<br><br><br><br><br><br><br><br><br><br><br><br>', file = export)\n",
    "    for x in range(len(res)):\n",
    "        print(f'''{res[x].lower().capitalize()}.\n",
    "        ''',file=export)\n",
    "        print('<br><br><br><br><br><br><br><br><br><br><br><br><br><br>', file = export)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 448,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fontconfig warning: ignoring UTF-8: not a valid region tag\n"
     ]
    }
   ],
   "source": [
    "!pandoc news.txt | weasyprint -s css.css  - newsfromthefuture.pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 411,
   "metadata": {},
   "outputs": [],
   "source": [
    "a_pos = open('language.txt').read()\n",
    "cont_pos = a_pos.split()\n",
    "tag_cont = nltk.pos_tag(cont_pos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {},
   "outputs": [],
   "source": [
    "dat = {}\n",
    "\n",
    "for word, tag in tag_cont:\n",
    "    dat[tag] = word\n",
    "    \n",
    "keys = dat.keys()\n",
    "\n",
    "output = \" + s + \".join([pos for pos in keys])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 330,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Led and broom believe likely American of can test has called sensors the which There smallest 300 selected They who They to'"
      ]
     },
     "execution_count": 330,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}