') img_url = base64.b64encode(open('img/tartu.jpeg', 'rb').read()).decode('utf-8') t_image = '

Peace Treaty of Tartu

'.format(img_url) print(t_image) #t_info box print('

') t_infotext = [('Name of Treaty', 'Peace Treaty of Tartu'), ('Country of Origin', 'Russia'), ('Signed', 'February 2, 1920'), ('Location', 'Tartu, Estonia'), ('Word Counts', '2,104'), ('Type', 'bilateral peace treaty'), ('Original Source', 'link'), ('Description', 'The Tartu Peace Treaty or Treaty of Tartu is a peace treaty between Estonia and Russian Soviet Federative Socialist Republic signed on 2 February 1920, ending the Estonian War of Independence.')] for t_title, t_info in t_infotext: print('

{0}

{1}

'.format(t_title, t_info)) print('

') print('''

verb

noun

adjective

adverb

possesive pronoun

present participle

adjective superlative

adverb comparative + superative

proper noun

stopwords

''') #Treaty text print('

') t_tokenized_all = [] for t_paragraph in russia_text_list: t_tokenized = word_tokenize(t_paragraph) t_tokenized_all += t_tokenized # add to the tokenized_all t_tagged = pos_tag(t_tokenized) print('

') for t_word, t_pos in t_tagged: print('{2}'.format(t_pos.replace('PRP$', 'PRPS').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks'), t_word.replace('’', 'apostrophe').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks').lower(), t_word)) print('

') print('

') #treaty colonial top words list print('

Frequent words

') t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized_all if words.lower() not in t_custom_stopwords) t_frequency_word = FreqDist(t_tokens_without_stopwords) t_top_words = t_tokens_without_stopwords.most_common(20) for t_chosen_words, t_frequency in t_top_words: print('

{} ({})

'.format(t_chosen_words, t_frequency)) print('