') img_url = base64.b64encode(open('img/hk.jpg', 'rb').read()).decode('utf-8') t_image = '

Hong Kong Letters Patent 1843

'.format(img_url) print(t_image) #t_info box print('

') t_infotext = [('Name of Treaty', 'Hong Kong Letters Patent 1843'), ('Country of Origin', 'United Kingdom'), ('Signed', ' April, 1843'), ('Location', 'Westminster, U.K.'), ('Word Counts', '1,543'), ('Type', 'unilateral treaty'), ('Original Source', 'link'), ('Description', 'It was issued by Queen Vcitoria of the United Kingdom of Great Britain and Ireland in 1843 to established the British Colony of Hong Kong in and over Hong Kong Island.')] for t_title, t_info in t_infotext: print('

{0}

{1}

'.format(t_title, t_info)) print('

') print('''

stopwords

adjective

verb

noun

proper noun

adverb

possesive pronoun

present participle

adjective superlative

adverb comparative + superative

''') #Treaty text print('

') t_tokenized_all = [] for t_paragraph in russia_text_list: t_tokenized = word_tokenize(t_paragraph) t_tokenized_all += t_tokenized # add to the tokenized_all t_tagged = pos_tag(t_tokenized) print('

') for t_word, t_pos in t_tagged: print('{2}'.format(t_pos.replace('PRP$', 'PRPS').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks'), t_word.replace('’', 'apostrophe').replace('.', 'dot').replace(',', 'comma').replace('(', 'marks').replace(')', 'marks').replace(':', 'marks').replace(';', 'marks').lower(), t_word)) print('

') print('

') #treaty colonial top words list print('

Frequent words

') t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized_all if words.lower() not in t_custom_stopwords) t_frequency_word = FreqDist(t_tokens_without_stopwords) t_top_words = t_tokens_without_stopwords.most_common(20) for t_chosen_words, t_frequency in t_top_words: print('

{} ({})

'.format(t_chosen_words, t_frequency)) print('