more updates

5 years ago · 2f6003b4ef
parent 2b81f12b04
commit 2f6003b4ef
2 changed files with 7 additions and 1 deletions
--- a/nltk-book/NLTK_V3.py
+++ b/nltk-book/NLTK_V3.py
@ -1,9 +1,11 @@
 import sys
 import codecs
 import nltk
+import json
 from nltk.corpus import stopwords
 from nltk import sent_tokenize, word_tokenize, pos_tag

+
 #read stop words from a file (one stopword per line, UTF-8)
 stopwords_file = './stopwords.txt'
 custom_stopwords = set(codecs.open('stopwords.txt', 'r', 'utf-8').read().splitlines())
@ -24,4 +26,7 @@ tokens = [word.lower() for word in tokens]

 # pos_tag = [word_tokenize(sent) for sent in sent_tokenize(raw)]
 pos_tag = [pos_tag(word_tokenize(sent))for sent in sent_tokenize(raw)]
-print(pos_tag)
+print(pos_tag)
+
+with open ('colonial-glossary.json', 'w') as json_file:
+	json.dump(pos_tag, json_file)
--- a/nltk-book/colonial-glossary.json
+++ b/nltk-book/colonial-glossary.json