import json import argparse import sys from nltk.tokenize import sent_tokenize, word_tokenize from rake_nltk import Rake r= Rake() ap = argparse.ArgumentParser("JSON Dumper") ap.add_argument("text", nargs="+", help="text sources") args=ap.parse_args() with open('src/index.json') as f: try: index = json.load(f) except: index={} # build the index of sentences organized by keywords alltext = "" for n in args.text: text = open(n).read() text = text.replace("\n", " ") sentences = sent_tokenize(text) for sentence in sentences: r.extract_keywords_from_text(sentence) keys = r.get_ranked_phrases() for key in keys: if key not in index: index[key] = [] index[key].append({'filename': n, 'sentence': sentence, 'key': key}) alltext += text #print(index) with open('src/index.json', 'w') as outfile: json.dump(index, outfile)