OuNuPo/src/build_database.py

import json
import argparse
import sys
from nltk.tokenize import sent_tokenize, word_tokenize
from rake_nltk import Rake

r= Rake()

ap = argparse.ArgumentParser("JSON Dumper")
ap.add_argument("text", nargs="+", help="text sources")
args=ap.parse_args()


with open('src/index.json') as f:
    try:
        index = json.load(f)
    except:
        index={}
# build the index of sentences organized by keywords
alltext = ""

for n in args.text:
    text = open(n).read()
    text = text.replace("\n", " ")
    sentences = sent_tokenize(text)
    for sentence in sentences:
        r.extract_keywords_from_text(sentence)
        keys = r.get_ranked_phrases()
        for key in keys:
            if key not in index:
                index[key] = []
            index[key].append({'filename': n, 'sentence': sentence, 'key': key})
    alltext += text

#print(index)

with open('index.json', 'w') as outfile:
    json.dump(index, outfile)
added chatbook and resources 7 years ago			`import json`
			`import argparse`
			`import sys`
			`from nltk.tokenize import sent_tokenize, word_tokenize`
			`from rake_nltk import Rake`

			`r= Rake()`

			`ap = argparse.ArgumentParser("JSON Dumper")`
			`ap.add_argument("text", nargs="+", help="text sources")`
			`args=ap.parse_args()`


			`with open('src/index.json') as f:`
			`try:`
			`index = json.load(f)`
			`except:`
			`index={}`
			`# build the index of sentences organized by keywords`
			`alltext = ""`

			`for n in args.text:`
			`text = open(n).read()`
			`text = text.replace("\n", " ")`
			`sentences = sent_tokenize(text)`
			`for sentence in sentences:`
			`r.extract_keywords_from_text(sentence)`
			`keys = r.get_ranked_phrases()`
			`for key in keys:`
			`if key not in index:`
			`index[key] = []`
			`index[key].append({'filename': n, 'sentence': sentence, 'key': key})`
			`alltext += text`

			`#print(index)`

			`with open('index.json', 'w') as outfile:`
			`json.dump(index, outfile)`