migrated all files

master
bootje 5 years ago
parent 35c2ed34f3
commit 683c9af01a

BIN
website/.DS_Store vendored

Binary file not shown.

@ -1,6 +0,0 @@
Platform: FaceApp https://www.faceapp.com
Initial release: December 31, 2016
Type:Image editing
Description: FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence.[1][2][3] The app can transform a face to make it smile, look younger, look older, or change gender.
Original Terms of Service: https://www.faceapp.com/privacy-en.html

@ -1,11 +0,0 @@
[('platform', 'FaceApp'), ('Type', 'Image editing'), ('Initial release', 'December 31, 2016'), ('Type', 'Image editing')]
Platform, FaceApp https://www.faceapp.com
Initial release: December 31, 2016
Type:Image editing
Description: FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence.[1][2][3] The app can transform a face to make it smile, look younger, look older, or change gender.
Original Terms of Service: https://www.faceapp.com/privacy-en.html

@ -1 +0,0 @@
If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account . If you permit others to use your account credentials , you are responsible for the activities of such users that occur in connection with your account .

@ -1,24 +0,0 @@
# # for new_file in tokens_without_stopwords:
# appendFile = open('tokenized_words.txt', 'a')
# appendFile.write(" " + new_file)
# appendFile.close()
# #shows only stopwords
# processed_word_list = []
# for word in tokenized:
# # print(word)
# if word not in all_stopwords:
# processed_word_list.append('*')
# else:
# processed_word_list.append(word)
# print(processed_word_list)
# # # result putting in a graph
# top_words_plot = frequency_word.plot(10)
# print(top_words_plot)

File diff suppressed because one or more lines are too long

@ -10,24 +10,20 @@ from nltk import sent_tokenize, word_tokenize, pos_tag
from nltk.probability import FreqDist from nltk.probability import FreqDist
from nltk.corpus import stopwords from nltk.corpus import stopwords
import base64 import base64
nltk.download('stopwords')
nltk.download('stopwords')
# infofile = open('faceapp_infos.txt','r')
# infotext = infofile.read()
#open the txt file, read, and tokenize # faceapp_file = open('faceapp.txt','r')
file = open('faceapp.txt','r') with open('faceapp.txt', 'r') as faceapp_file:
text = file.read() faceapp_text = faceapp_file.read()
text_list = text.split("\n\n") faceapp_text_list = faceapp_text.split("\n\n")
#not sure if this works.. with open('russia-estonia.txt', 'r') as russia_file:
x = 1 russia_text = russia_file.read()
russia_text_list = russia_text.split("\n\n")
t_file = open('russia-estonia.txt', 'r')
t_text = t_file.read()
#stopwords #stopwords
default_stopwords = set(stopwords.words('english')) default_stopwords = set(stopwords.words('english'))
@ -87,42 +83,47 @@ print('''<!DOCTYPE html>
.tos_wrapper { .tos_wrapper {
width: 50%; # background-color: yellow;
width: 48%;
float: left; float: left;
margin-right: 20px;
} }
.t_wrapper { .t_wrapper {
width: 50%; # background-color: green;
width: 48%;
float: right; float: right;
} }
.NNP { # .NNP {
background-color: pink; # background-color: pink;
} # }
.VBP { # .VBP {
} # }
.VBP:hover { # .VBP:hover {
background-color: gold; # background-color: gold;
} # }
.NN { # .NN {
background-color: LightSkyBlue; # background-color: LightSkyBlue;
} # }
.NNS { # .NNS {
background-color: Aquamarine; # background-color: Aquamarine;
} # }
.t_img { .t_img {
# background-color: Aquamarine;
font-family: SourceCodePro; font-family: SourceCodePro;
font-size: 10pt; font-size: 10pt;
float: left; float: left;
} }
.info { .info {
# background-color: LightSkyBlue;
font-family: SourceCodePro; font-family: SourceCodePro;
font-size: 10pt; font-size: 10pt;
width: 60%; width: 60%;
@ -133,6 +134,7 @@ print('''<!DOCTYPE html>
} }
.t_info { .t_info {
# background-color: LightSkyBlue;
font-family: SourceCodePro; font-family: SourceCodePro;
font-size: 10pt; font-size: 10pt;
width: 90%; width: 90%;
@ -144,22 +146,25 @@ print('''<!DOCTYPE html>
.paragraph { .paragraph {
# background-color: gold;
font-family: SourceCodePro; font-family: SourceCodePro;
font-weight: regular; font-weight: regular;
letter-spacing: -0.5px; letter-spacing: -0.5px;
width: 70%; width: 80%;
float: right; float: right;
} }
.t_paragraph { .t_paragraph {
# background-color: gold;
font-family: SourceCodePro; font-family: SourceCodePro;
font-weight: regular; font-weight: regular;
letter-spacing: -0.5px; letter-spacing: -0.5px;
width: 70%; width: 80%;
float: right; float: right;
} }
.top_words { .top_words {
# background-color: purple;
font-family: Belgika; font-family: Belgika;
font-weight: 8th; font-weight: 8th;
font-size: 9pt; font-size: 9pt;
@ -168,6 +173,7 @@ print('''<!DOCTYPE html>
} }
.t_top_words { .t_top_words {
# background-color: purple;
font-family: Belgika; font-family: Belgika;
font-weight: 8th; font-weight: 8th;
font-size: 9pt; font-size: 9pt;
@ -175,6 +181,16 @@ print('''<!DOCTYPE html>
float: left; float: left;
} }
.chosen_words {
# background-color: pink;
line-height: 0.1;
}
.t_chosen_words {
# background-color: pink;
line-height: 0.1;
}
</style> </style>
</head> </head>
<body> <body>
@ -191,7 +207,7 @@ $('span').click(
function(){ function(){
var selectedclass = $(this).attr('class'); var selectedclass = $(this).attr('class');
$('span').css('background-color', 'white').css('color','black'); // reset $('span').css('background-color', 'white').css('color','black'); // reset
$('span.' + selectedclass).css('background-color', 'red'); //highlighting the select $('span.' + selectedclass).css('background-color', 'lightgreen'); //highlighting the select
}); });
@ -231,8 +247,9 @@ print('</div>')
print('<div class ="paragraph">') print('<div class ="paragraph">')
# for paragraph in text_list: # for paragraph in faceapp_text_list:
tokenized = word_tokenize(text) # #faceapp_text
tokenized = word_tokenize(faceapp_text)
tagged = pos_tag(tokenized) tagged = pos_tag(tokenized)
print('<p>') print('<p>')
for word, pos in tagged: for word, pos in tagged:
@ -243,14 +260,15 @@ print('</div>')
#colonial words list #colonial words list
print('<div class="top_words"> <span style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</span>') print('<div class="top_words"> <div style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</div>')
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in all_stopwords) tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in all_stopwords)
frequency_word = FreqDist(tokens_without_stopwords) frequency_word = FreqDist(tokens_without_stopwords)
top_words = tokens_without_stopwords.most_common(20) top_words = tokens_without_stopwords.most_common(20)
for chosen_words, frequency in top_words: for chosen_words, frequency in top_words:
print('<br><span class="chosen_words" >{}({}) </span>'.format(chosen_words, frequency)) print('<br><div class="chosen_words" >{}({}) </div>'.format(chosen_words, frequency))
print('</div>') print('</div>')
@ -287,7 +305,7 @@ print('</div>')
#ToS text #ToS text
print('<div class="t_paragraph">') print('<div class="t_paragraph">')
t_tokenized = word_tokenize(t_text) t_tokenized = word_tokenize(russia_text)
t_tagged = pos_tag(t_tokenized) t_tagged = pos_tag(t_tokenized)
for t_word, t_pos in t_tagged: for t_word, t_pos in t_tagged:
@ -297,14 +315,14 @@ print('</div>')
#treaty colonial words list #treaty colonial words list
print('<div class="t_top_words" > <span style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</span>') print('<div class="t_top_words" > <div style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</div>')
t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized if words.lower() not in all_stopwords) t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized if words.lower() not in all_stopwords)
t_frequency_word = FreqDist(t_tokens_without_stopwords) t_frequency_word = FreqDist(t_tokens_without_stopwords)
t_top_words = t_tokens_without_stopwords.most_common(20) t_top_words = t_tokens_without_stopwords.most_common(20)
for t_chosen_words, t_frequency in t_top_words: for t_chosen_words, t_frequency in t_top_words:
print('<br><span class="t_chosen_words" >{}({}) </span>'.format(t_chosen_words, t_frequency)) print('<br><div class="t_chosen_words" >{}({}) </div>'.format(t_chosen_words, t_frequency))
print('</div>') print('</div>')

File diff suppressed because it is too large Load Diff

@ -0,0 +1,334 @@
from __future__ import division
import glob
from nltk import *
import re
import nltk
import codecs
from nltk import sent_tokenize, word_tokenize, pos_tag
from nltk.probability import FreqDist
from nltk.corpus import stopwords
import base64
nltk.download('stopwords')
# faceapp_file = open('faceapp.txt','r')
with open('faceapp.txt', 'r') as faceapp_file:
faceapp_text = faceapp_file.read()
faceapp_text_list = faceapp_text.split("\n\n")
with open('russia-estonia.txt', 'r') as russia_file:
russia_text = russia_file.read()
russia_text_list = russia_text.split("\n\n")
#stopwords
default_stopwords = set(stopwords.words('english'))
custom_stopwords = set(codecs.open('stopwords.txt', 'r').read().splitlines())
all_stopwords = default_stopwords | custom_stopwords
# multi-line string HTML
print('''<!DOCTYPE html>
<html>
<head>
<script src="https://code.jquery.com/jquery-3.5.0.min.js"></script>
<meta charset="utf-8">
<title></title>
<style>
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.svg#filename") format("svg");
}
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.svg#filename") format("svg");
}
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.svg#filename") format("svg");
}
@font-face {
font-family: "SourceCodePro";
src: url("http://bohyewoo.com/webfonts/Source_Code_Pro/SourceCodePro-Regular.ttf");
}
body {
background-color: ghostwhite;
# font-family: Belgika;
# font-weight: 8th;
# letter-spacing: -0.3px;
font-size: 14px;
line-height: 1.2;
}
.tos_wrapper {
# background-color: yellow;
width: 48%;
float: left;
margin-right: 20px;
}
.t_wrapper {
# background-color: green;
width: 48%;
float: right;
}
# .NNP {
# background-color: pink;
# }
# .VBP {
# }
# .VBP:hover {
# background-color: gold;
# }
# .NN {
# background-color: LightSkyBlue;
# }
# .NNS {
# background-color: Aquamarine;
# }
.t_img {
# background-color: Aquamarine;
font-family: SourceCodePro;
font-size: 10pt;
float: left;
}
.info {
# background-color: LightSkyBlue;
font-family: SourceCodePro;
font-size: 10pt;
width: 60%;
float: left;
border: 1px solid black;
padding:10px;
margin-bottom: 50px;
}
.t_info {
# background-color: LightSkyBlue;
font-family: SourceCodePro;
font-size: 10pt;
width: 90%;
float: left;
border: 1px solid black;
padding:10px;
margin-bottom: 50px;
}
.paragraph {
# background-color: gold;
font-family: SourceCodePro;
font-weight: regular;
letter-spacing: -0.5px;
width: 80%;
float: right;
}
.t_paragraph {
# background-color: gold;
font-family: SourceCodePro;
font-weight: regular;
letter-spacing: -0.5px;
width: 80%;
float: right;
}
.top_words {
# background-color: purple;
font-family: Belgika;
font-weight: 8th;
font-size: 9pt;
width: 15%;
float: left;
}
.t_top_words {
# background-color: purple;
font-family: Belgika;
font-weight: 8th;
font-size: 9pt;
width: 15%;
float: left;
}
.chosen_words {
# background-color: pink;
line-height: 0.1;
}
.t_chosen_words {
# background-color: pink;
line-height: 0.1;
}
</style>
</head>
<body>
<input type="checkbox" id="myCheck" onclick="myFunction()"> Noun
<p id="text" style="display:none">Checkbox is CHECKED!</p>
<script>
$(document).ready(function(){
$('span').click(
function(){
var selectedclass = $(this).attr('class');
$('span').css('background-color', 'white').css('color','black'); // reset
$('span.' + selectedclass).css('background-color', 'lightgreen'); //highlighting the select
});
})
</script>
''')
#wrapper
print('<div class ="tos_wrapper">')
#insert an image
# https://upload.wikimedia.org/wikipedia/commons/1/15/Joffe_signing_the_Treaty_of_Tartu.jpg
FaceApp_img_url = base64.b64encode(open('img/faceapp_logo.png', 'rb').read()).decode('utf-8')
FaceApp_image = '<div class="t_img"><h1>FaceApp<img style="width:90%" src="data:img/faceapp_logo.png;base64,{}"></div>'.format(FaceApp_img_url)
print(FaceApp_image)
#info box
print('<div class ="info">')
infotext = [('Service', 'FaceApp'), ('Type', 'Image editing'), ('Initial release', 'December 31, 2016'), ('Type', 'Image editing'), ('source', '<a href="https://www.faceapp.com/terms-en.html">link</a>'), ('Description', 'FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence. The app can transform a face to make it smile, look younger, look older, or change gender.')]
for title, info in infotext:
print('<span class="info_{0}"><div class="info_title" style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >{0}</div><div class="info_content">{1}</div></span><br>'.format(title, info))
print('</div>')
#ToS text
print('<div class ="paragraph">')
# for paragraph in faceapp_text_list:
# #faceapp_text
tokenized = word_tokenize(faceapp_text)
tagged = pos_tag(tokenized)
print('<p>')
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(pos, word))
print('</p>')
print('</div>')
#colonial words list
print('<div class="top_words"> <div style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</div>')
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in all_stopwords)
frequency_word = FreqDist(tokens_without_stopwords)
top_words = tokens_without_stopwords.most_common(20)
for chosen_words, frequency in top_words:
print('<br><div class="chosen_words" >{}({}) </div>'.format(chosen_words, frequency))
print('</div>')
#t_wrapper (second wrapper)
print('</div><div class="t_wrapper">')
#insert an image
# https://upload.wikimedia.org/wikipedia/commons/1/15/Joffe_signing_the_Treaty_of_Tartu.jpg
img_url = base64.b64encode(open('img/tartu.jpeg', 'rb').read()).decode('utf-8')
t_image = '<div class="t_img"><h1>Peace Treaty of Tartu<h1><img style="width:90%" src="data:img/tartu.jpeg;base64,{}"></div>'.format(img_url)
print(t_image)
#t_info box
print('<div class ="t_info">')
t_infotext = [('Name of Treaty', 'Peace Treaty of Tartu'), ('Date', 'February 2, 1920'), ('Location', 'Tartu, Estonia'), ('Signed', 'February 2, 1920'), ('Type', 'bilateral peace treaty'), ('source', '<a href="https://en.wikipedia.org/wiki/Treaty_of_Tartu_(RussianEstonian)">link</a>'), ('Description', 'The Tartu Peace Treaty or Treaty of Tartu is a peace treaty between Estonia and Russian Soviet Federative Socialist Republic signed on 2 February 1920, ending the Estonian War of Independence.')]
for t_title, t_info in t_infotext:
print('<span class="t_info-{0}"><div class="info_t_title" style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >{0}</div><div class="t_info_content">{1}</div></span><br>'.format(t_title, t_info))
print('</div>')
#ToS text
print('<div class="t_paragraph">')
t_tokenized = word_tokenize(russia_text)
t_tagged = pos_tag(t_tokenized)
for t_word, t_pos in t_tagged:
print('<span class="{}">{}</span>'.format(t_pos, t_word))
print('<p>')
print('</div>')
#treaty colonial words list
print('<div class="t_top_words" > <div style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</div>')
t_tokens_without_stopwords = nltk.FreqDist(words.lower() for words in t_tokenized if words.lower() not in all_stopwords)
t_frequency_word = FreqDist(t_tokens_without_stopwords)
t_top_words = t_tokens_without_stopwords.most_common(20)
for t_chosen_words, t_frequency in t_top_words:
print('<br><div class="t_chosen_words" >{}({}) </div>'.format(t_chosen_words, t_frequency))
print('</div>')
print('</div>')
print('''</body></html>''')

@ -1,178 +0,0 @@
from __future__ import division
import glob
from nltk import *
import re
import nltk
import codecs
from nltk import sent_tokenize, word_tokenize, pos_tag
from nltk.probability import FreqDist
from nltk.corpus import stopwords
nltk.download('stopwords')
#open the txt file, read, and tokenize
file = open('faceapp.txt','r')
text = file.read()
#stopwords
default_stopwords = set(stopwords.words('english'))
custom_stopwords = set(codecs.open('stopwords.txt', 'r').read().splitlines())
all_stopwords = default_stopwords | custom_stopwords
print('''<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title></title>
<style>
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.svg#filename") format("svg");
}
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.svg#filename") format("svg");
}
@font-face {
font-family: "Belgika";
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.eot");
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.woff") format("woff"),
url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.svg#filename") format("svg");
}
body {
font-family: helvetica;
font-weight: regular;
letter-spacing: 0.5px;
font-size: 20px;
line-height: 1.2;
}
.NNP {
background-color: pink;
}
.VBP {
background-color: gold;
}
.NN {
background-color: LightSkyBlue;
}
.NNS {
background-color: Aquamarine;
}
.paragraph {
width: 70%;
float: right;
}
.top_words {
font-size: 9pt;
width: 25%;
float: left;
}
</style>
</head>
<body>''')
# my stopwords are common words I don't want to count, like "a", "an", "the".
print('<div class ="paragraph">')
for sentence in sent_tokenize(text):
print('<span>')
tokenized = word_tokenize(sentence)
tagged = pos_tag(tokenized)
# for HTML
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(pos, word))
print('</span>')
print('</div>')
# filtering stopwords
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in all_stopwords)
print(tokens_without_stopwords)
# for read_whole_text in tokens_without_stopwords:
# whole_text_tokenized =
# print(whole_text_tokenized)
# #filtered words in sentence
# filtered_sentence = (" ").join(tokens_without_stopwords)
# print(filtered_sentence)
print('<div class="top_words"> colonial words:')
frequency_word = FreqDist(tokens_without_stopwords)
top_words = tokens_without_stopwords.most_common(10)
for chosen_words, frequency in top_words:
print('<br><span class="chosen_words">{}({}) </span>'.format(chosen_words, frequency))
print('''</div></body></html>''')
# for new_file in tokens_without_stopwords:
# appendFile = open('tokenized_words.txt', 'a')
# appendFile.write(" " + new_file)
# appendFile.close()
# #shows only stopwords
# processed_word_list = []
# for word in tokenized:
# # print(word)
# if word not in all_stopwords:
# processed_word_list.append('*')
# else:
# processed_word_list.append(word)
# print(processed_word_list)
# # # result putting in a graph
# top_words_plot = frequency_word.plot(10)
# print(top_words_plot)

@ -1 +0,0 @@
1 eligibility must least 13 years age access use services 18 years age ( legal majority live ) may access use services supervision parent guardian agrees bound agreement parent legal guardian user age 18 ( majority ) agree fully responsible acts omissions connection services accessing using services behalf another person entity represent authorized accept agreement agrees responsible us violates 2 user accounts account security choose login services via third-party platform social media network need use credentials ( e.g. username password ) online must maintain security third party account promptly notify us discover suspect someone accessed without permission permit others use account credentials responsible activities users occur connection 1 eligibility must least 13 years age access use services 18 years age ( legal majority live ) may access use services supervision parent guardian agrees bound agreement parent legal guardian user age 18 ( majority ) agree fully responsible acts omissions connection services accessing using services behalf another person entity represent authorized accept agreement agrees responsible us violates 2 user accounts account security choose login services via third-party platform social media network need use credentials ( e.g. username password ) online must maintain security third party account promptly notify us discover suspect someone accessed without permission permit others use account credentials responsible activities users occur connection 1
Loading…
Cancel
Save