import nltk from nltk.tokenize import word_tokenize # open two text files as text1 and text2 with open('./rosas.txt', 'r') as result1: text1 = result1.read() with open('./unthought.txt', 'r') as result2: text2 = result2.read() # HTML TOKENIZER (word + span) # return a list of tag from a text # each item in the list is transformed into a html tag, # with the class defined by the text_class argument # # es: to_html('Lorem ipsum dolor', 'test') # return # [ # "Lorem", # "ipsum", # "dolor" # ] def to_html(text, text_class): text_html = [] text_list = word_tokenize(text) for word in text_list: text_html += ['' + word + ''] return text_html # WEAVER # weave two texts following a pattern structured as a string of A and B # es: ABABAAAABBBB # the repetition argument specifies how many times the pattern is repeated # the start1 and start2 arguments specify the starting point in the texts' array # it returns a string def weave(text1, text2, pattern, repetition, start1=0, start2=0): embroidery = '' text1_cursor = start1 text2_cursor = start2 repeated_pattern = pattern * repetition for choice in repeated_pattern: if choice == 'A': embroidery += text1[text1_cursor] text1_cursor += 1 if choice == 'B': embroidery += text2[text2_cursor] text2_cursor += 1 return embroidery # DEMO text_a = to_html(text1, 'text1') text_b = to_html(text2, 'text2') text_embroidery = weave(text_a, text_b, 'AAAABBBBABABAAABBB', 16) # BASIC HTML5 boilerplate html_boilerplate = ''' Document
{{{contents}}}
''' # the {{{contents}}} line will be replaced with our text_embroidery html_out = html_boilerplate.replace('{{{contents}}}', text_embroidery) # Write the results in the index.html file with open('result.html', 'w') as index: index.write(html_out)