text_weaving/weave.py

import nltk
from nltk.tokenize import word_tokenize

# open two text files as text1 and text2
with open('./rosas.txt', 'r') as result1:
    text1 = result1.read()

with open('./unthought.txt', 'r') as result2:
    text2 = result2.read()


# HTML TOKENIZER (word + span)
# return a list of tag from a text
# each item in the list is transformed into a html <span> tag,
# with the class defined by the text_class argument
#
# es: to_html('Lorem ipsum dolor', 'test')
# return
# [
#   "<span class='test'>Lorem</span>",
#   "<span class='test'>ipsum</span>",
#   "<span class='test'>dolor</span>"
# ]

def to_html(text, text_class):
    text_html = []
    text_list = word_tokenize(text)
    for word in text_list:
        text_html += ['<span class="' + text_class + '">' + word + '</span>']
    return text_html


# WEAVER
# weave two texts following a pattern structured as a string of A and B
# es: ABABAAAABBBB
# the repetition argument specifies how many times the pattern is repeated
# the start1 and start2 arguments specify the starting point in the texts' array
# it returns a string

def weave(text1, text2, pattern, repetition, start1=0, start2=0):
    embroidery = ''
    text1_cursor = start1
    text2_cursor = start2
    repeated_pattern = pattern * repetition
    for choice in repeated_pattern:
        if choice == 'A':
            embroidery += text1[text1_cursor]
            text1_cursor += 1
        if choice == 'B':
            embroidery += text2[text2_cursor]
            text2_cursor += 1
    return embroidery

# DEMO


text_a = to_html(text1, 'text1')
text_b = to_html(text2, 'text2')
text_embroidery = weave(text_a, text_b, 'AAAABBBBABABAAABBB', 16)

# BASIC HTML5 boilerplate
html_boilerplate = '''
<!DOCTYPE html >
<html lang="en" >
<head >
		<meta charset="UTF-8" />
		<meta http-equiv="X-UA-Compatible" content="IE=edge" />
		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
		<title > Document </title >
		<link rel="stylesheet" href="style.css" />
	</head >
	<body >
		<div class= "text">

            {{{contents}}}    
        
        </div>
	</body>
</html>
'''


# the {{{contents}}} line will be replaced with our text_embroidery
html_out = html_boilerplate.replace('{{{contents}}}', text_embroidery)

# Write the results in the index.html file
with open('result.html', 'w') as index:
    index.write(html_out)
init 3 years ago			`import nltk`
			`from nltk.tokenize import word_tokenize`

			`# open two text files as text1 and text2`
			`with open('./rosas.txt', 'r') as result1:`
			`text1 = result1.read()`

			`with open('./unthought.txt', 'r') as result2:`
			`text2 = result2.read()`


			`# HTML TOKENIZER (word + span)`
			`# return a list of tag from a text`
			`# each item in the list is transformed into a html <span> tag,`
			`# with the class defined by the text_class argument`
			`#`
			`# es: to_html('Lorem ipsum dolor', 'test')`
			`# return`
			`# [`
			`# "<span class='test'>Lorem</span>",`
			`# "<span class='test'>ipsum</span>",`
			`# "<span class='test'>dolor</span>"`
			`# ]`

			`def to_html(text, text_class):`
			`text_html = []`
			`text_list = word_tokenize(text)`
			`for word in text_list:`
			`text_html += ['<span class="' + text_class + '">' + word + '</span>']`
			`return text_html`


			`# WEAVER`
			`# weave two texts following a pattern structured as a string of A and B`
			`# es: ABABAAAABBBB`
			`# the repetition argument specifies how many times the pattern is repeated`
			`# the start1 and start2 arguments specify the starting point in the texts' array`
			`# it returns a string`

			`def weave(text1, text2, pattern, repetition, start1=0, start2=0):`
			`embroidery = ''`
			`text1_cursor = start1`
			`text2_cursor = start2`
			`repeated_pattern = pattern * repetition`
			`for choice in repeated_pattern:`
			`if choice == 'A':`
			`embroidery += text1[text1_cursor]`
			`text1_cursor += 1`
			`if choice == 'B':`
			`embroidery += text2[text2_cursor]`
			`text2_cursor += 1`
			`return embroidery`

			`# DEMO`


			`text_a = to_html(text1, 'text1')`
			`text_b = to_html(text2, 'text2')`
			`text_embroidery = weave(text_a, text_b, 'AAAABBBBABABAAABBB', 16)`

			`# BASIC HTML5 boilerplate`
			`html_boilerplate = '''`
			`<!DOCTYPE html >`
			`<html lang="en" >`
			`<head >`
			`<meta charset="UTF-8" />`
			`<meta http-equiv="X-UA-Compatible" content="IE=edge" />`
			`<meta name="viewport" content="width=device-width, initial-scale=1.0" />`
			`<title > Document </title >`
			`<link rel="stylesheet" href="style.css" />`
			`</head >`
			`<body >`
			`<div class= "text">`

			`{{{contents}}}`

			`</div>`
			`</body>`
			`</html>`
			`'''`


			`# the {{{contents}}} line will be replaced with our text_embroidery`
			`html_out = html_boilerplate.replace('{{{contents}}}', text_embroidery)`

			`# Write the results in the index.html file`
			`with open('result.html', 'w') as index:`
			`index.write(html_out)`