import json
import argparse
import sys
from nltk.tokenize import sent_tokenize, word_tokenize
from rake_nltk import Rake

r= Rake()

ap = argparse.ArgumentParser("JSON Dumper")
ap.add_argument("text", nargs="+", help="text sources")
args=ap.parse_args()


with open('src/index.json') as f:
    try:
        index = json.load(f)
    except:
        index={}
# build the index of sentences organized by keywords
alltext = ""

for n in args.text:
    text = open(n).read()
    text = text.replace("\n", " ")
    sentences = sent_tokenize(text)
    for sentence in sentences:
        r.extract_keywords_from_text(sentence)
        keys = r.get_ranked_phrases()
        for key in keys:
            if key not in index:
                index[key] = []
            index[key].append({'filename': n, 'sentence': sentence, 'key': key})
    alltext += text

#print(index)

with open('src/index.json', 'w') as outfile:
    json.dump(index, outfile)