') img_url = base64.b64encode(open('img/america.jpg', 'rb').read()).decode('utf-8') t_image = '

Treaty of Paris

'.format(img_url) print(t_image) #t_info box print('

') t_infotext = [('Name of Treaty', 'Kiram-Bates Treaty'), ('Country of Origin', 'United States'), ('Signed', 'August, 1899'), ('Location', 'Jolo, Sultanate of Sulu'), ('Word Counts', '719'), ('Type', 'unilateral treaty'), ('Original Source', 'link'), ('Description', 'The Kiram-Bates Treaty was a treaty signed by the U.S.A. and the Sultanate of Sulu during the Philippine–American War. The treaty functioned to prevent the entry of the Sulu Sultanate into the Philippine-American War.')] for t_title, t_info in t_infotext: print('

{0}

{1}

'.format(t_title, t_info)) print('

') print('''

stopwords

adjective

verb

noun

proper noun

adverb

possesive pronoun

present participle

adjective superlative

adverb comparative + superative

''') #Treaty text print('

') t_tokenized_all = [] for t_paragraph in russia_text_list: t_tokenized = word_tokenize(t_paragraph) t_tokenized_all += t_tokenized # add to the tokenized_all t_tagged = pos_tag(t_tokenized) print('

') for t_word, t_pos in t_tagged: print('{2}'.format(t_pos.replace('PRP$', 'PRPS').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks'), t_word.replace('’', 'apostrophe').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks').lower(), t_word)) print('

') print('

') #treaty colonial top words list print('

Frequent words

') t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized_all if words.lower() not in t_custom_stopwords) t_frequency_word = FreqDist(t_tokens_without_stopwords) t_top_words = t_tokens_without_stopwords.most_common(20) for t_chosen_words, t_frequency in t_top_words: print('

{} ({})

'.format(t_chosen_words, t_frequency)) print('