You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.4 KiB
Python

import nltk
from nltk.tokenize import word_tokenize
# open two text files as text1 and text2
with open('./rosas.txt', 'r') as result1:
text1 = result1.read()
with open('./unthought.txt', 'r') as result2:
text2 = result2.read()
# HTML TOKENIZER (word + span)
# return a list of tag from a text
# each item in the list is transformed into a html <span> tag,
# with the class defined by the text_class argument
#
# es: to_html('Lorem ipsum dolor', 'test')
# return
# [
# "<span class='test'>Lorem</span>",
# "<span class='test'>ipsum</span>",
# "<span class='test'>dolor</span>"
# ]
def to_html(text, text_class):
text_html = []
text_list = word_tokenize(text)
for word in text_list:
text_html += ['<span class="' + text_class + '">' + word + '</span>']
return text_html
# WEAVER
# weave two texts following a pattern structured as a string of A and B
# es: ABABAAAABBBB
# the repetition argument specifies how many times the pattern is repeated
# the start1 and start2 arguments specify the starting point in the texts' array
# it returns a string
def weave(text1, text2, pattern, repetition, start1=0, start2=0):
embroidery = ''
text1_cursor = start1
text2_cursor = start2
repeated_pattern = pattern * repetition
for choice in repeated_pattern:
if choice == 'A':
embroidery += text1[text1_cursor]
text1_cursor += 1
if choice == 'B':
embroidery += text2[text2_cursor]
text2_cursor += 1
return embroidery
# DEMO
text_a = to_html(text1, 'text1')
text_b = to_html(text2, 'text2')
text_embroidery = weave(text_a, text_b, 'AAAABBBBABABAAABBB', 16)
# BASIC HTML5 boilerplate
html_boilerplate = '''
<!DOCTYPE html >
<html lang="en" >
<head >
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title > Document </title >
<link rel="stylesheet" href="style.css" />
</head >
<body >
<div class= "text">
{{{contents}}}
</div>
</body>
</html>
'''
# the {{{contents}}} line will be replaced with our text_embroidery
html_out = html_boilerplate.replace('{{{contents}}}', text_embroidery)
# Write the results in the index.html file
with open('result.html', 'w') as index:
index.write(html_out)