') #insert an image # https://upload.wikimedia.org/wikipedia/commons/1/15/Joffe_signing_the_Treaty_of_Tartu.jpg FaceApp_img_url = base64.b64encode(open('img/instagram_logo.png', 'rb').read()).decode('utf-8') FaceApp_image = '

Instagram

'.format(FaceApp_img_url) print(FaceApp_image) #info box print('

') infotext = [('Name of Service', 'Instagram'), ('Country of Origin', 'United States'), ('Initial release', 'October, 2010'), ('Type', 'Social Media'), ('Word Counts', '2,359'), ('Original Source', 'link'), ('Description', 'FInstagram is an American photo and video-sharing social networking service owned by Facebook, Inc. The app allows users to upload media, which can be edited with filters and organized with tags and location information. Posts can be shared publicly or with pre-approved followers. Users can browse other users' content by tags and locations, and view trending content. Users can like photos and follow other users to add their content to a feed.')] for title, info in infotext: print('

{0}

{1}

'.format(title, info)) print('

') print('''

stopwords

adjective

verb

noun

proper noun

adverb

possesive pronoun

present participle

adjective superlative

adverb comparative + superative

''') #ToS text print('

') tokenized_all = [] for paragraph in faceapp_text_list: tokenized = word_tokenize(paragraph) tokenized_all += tokenized # add to the tokenized_all tagged = pos_tag(tokenized) print('

') for word, pos in tagged: print('{2}'.format(pos.replace('PRP$', 'PRPS').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks'), word.replace('’', 'apostrophe').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks').lower(), word)) print('

') print('

') #tos top words list print('

Frequent words

') tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized_all if words.lower() not in tos_custom_stopwords) frequency_word = FreqDist(tokens_without_stopwords) top_words = tokens_without_stopwords.most_common(30) for chosen_words, frequency in top_words: print('

{} ({})

'.format(chosen_words, frequency)) print('