You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.4 KiB
Python
89 lines
2.4 KiB
Python
3 years ago
|
import nltk
|
||
|
from nltk.tokenize import word_tokenize
|
||
|
|
||
|
# open two text files as text1 and text2
|
||
|
with open('./rosas.txt', 'r') as result1:
|
||
|
text1 = result1.read()
|
||
|
|
||
|
with open('./unthought.txt', 'r') as result2:
|
||
|
text2 = result2.read()
|
||
|
|
||
|
|
||
|
# HTML TOKENIZER (word + span)
|
||
|
# return a list of tag from a text
|
||
|
# each item in the list is transformed into a html <span> tag,
|
||
|
# with the class defined by the text_class argument
|
||
|
#
|
||
|
# es: to_html('Lorem ipsum dolor', 'test')
|
||
|
# return
|
||
|
# [
|
||
|
# "<span class='test'>Lorem</span>",
|
||
|
# "<span class='test'>ipsum</span>",
|
||
|
# "<span class='test'>dolor</span>"
|
||
|
# ]
|
||
|
|
||
|
def to_html(text, text_class):
|
||
|
text_html = []
|
||
|
text_list = word_tokenize(text)
|
||
|
for word in text_list:
|
||
|
text_html += ['<span class="' + text_class + '">' + word + '</span>']
|
||
|
return text_html
|
||
|
|
||
|
|
||
|
# WEAVER
|
||
|
# weave two texts following a pattern structured as a string of A and B
|
||
|
# es: ABABAAAABBBB
|
||
|
# the repetition argument specifies how many times the pattern is repeated
|
||
|
# the start1 and start2 arguments specify the starting point in the texts' array
|
||
|
# it returns a string
|
||
|
|
||
|
def weave(text1, text2, pattern, repetition, start1=0, start2=0):
|
||
|
embroidery = ''
|
||
|
text1_cursor = start1
|
||
|
text2_cursor = start2
|
||
|
repeated_pattern = pattern * repetition
|
||
|
for choice in repeated_pattern:
|
||
|
if choice == 'A':
|
||
|
embroidery += text1[text1_cursor]
|
||
|
text1_cursor += 1
|
||
|
if choice == 'B':
|
||
|
embroidery += text2[text2_cursor]
|
||
|
text2_cursor += 1
|
||
|
return embroidery
|
||
|
|
||
|
# DEMO
|
||
|
|
||
|
|
||
|
text_a = to_html(text1, 'text1')
|
||
|
text_b = to_html(text2, 'text2')
|
||
|
text_embroidery = weave(text_a, text_b, 'AAAABBBBABABAAABBB', 16)
|
||
|
|
||
|
# BASIC HTML5 boilerplate
|
||
|
html_boilerplate = '''
|
||
|
<!DOCTYPE html >
|
||
|
<html lang="en" >
|
||
|
<head >
|
||
|
<meta charset="UTF-8" />
|
||
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
|
<title > Document </title >
|
||
|
<link rel="stylesheet" href="style.css" />
|
||
|
</head >
|
||
|
<body >
|
||
|
<div class= "text">
|
||
|
|
||
|
{{{contents}}}
|
||
|
|
||
|
</div>
|
||
|
</body>
|
||
|
</html>
|
||
|
'''
|
||
|
|
||
|
|
||
|
# the {{{contents}}} line will be replaced with our text_embroidery
|
||
|
html_out = html_boilerplate.replace('{{{contents}}}', text_embroidery)
|
||
|
|
||
|
# Write the results in the index.html file
|
||
|
with open('result.html', 'w') as index:
|
||
|
index.write(html_out)
|