worked on tag_comparison.py
parent
cba64a7b99
commit
778221bf41
Binary file not shown.
@ -0,0 +1,14 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
About:
|
||||
|
||||
Source:
|
||||
https://afrolegends.com/2016/12/14/colonial-treaties-in-africa-british-protection-treaty-with-the-itsekiri-of-nigeria-1884/
|
||||
|
||||
</body>
|
||||
</html>
|
@ -1,30 +0,0 @@
|
||||
import nltk
|
||||
|
||||
file=open('faceapp.txt','r')
|
||||
raw=file.read()
|
||||
tokens = nltk.word_tokenize(raw)
|
||||
faceapp = nltk.Text(tokens)
|
||||
|
||||
|
||||
# my stopwords are common words I don't want to count, like "a", "an", "the".
|
||||
stopwords = set(line.strip() for line in open('stopwords.txt'))
|
||||
|
||||
# dictionary
|
||||
wordcount = {}
|
||||
|
||||
# spliting words from punctuation so "book" and "book!" counts as the same word
|
||||
for word in raw.lower().split():
|
||||
word = word.replace(".","")
|
||||
word = word.replace(",","")
|
||||
word = word.replace(":","")
|
||||
word = word.replace("\"","")
|
||||
word = word.replace("!","")
|
||||
word = word.replace("“","")
|
||||
word = word.replace("‘","")
|
||||
word = word.replace("*","")
|
||||
word = word.replace("(","")
|
||||
word = word.replace(")","")
|
||||
|
||||
|
||||
faceapp.concordance('a')
|
||||
|
@ -0,0 +1,6 @@
|
||||
Platform: FaceApp https://www.faceapp.com
|
||||
Initial release: December 31, 2016
|
||||
Type:Image editing
|
||||
Description: FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence.[1][2][3] The app can transform a face to make it smile, look younger, look older, or change gender.
|
||||
Original Terms of Service: https://www.faceapp.com/privacy-en.html
|
||||
|
@ -0,0 +1,11 @@
|
||||
[('platform', 'FaceApp'), ('Type', 'Image editing'), ('Initial release', 'December 31, 2016'), ('Type', 'Image editing')]
|
||||
|
||||
|
||||
|
||||
|
||||
Platform, FaceApp https://www.faceapp.com
|
||||
Initial release: December 31, 2016
|
||||
Type:Image editing
|
||||
Description: FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence.[1][2][3] The app can transform a face to make it smile, look younger, look older, or change gender.
|
||||
Original Terms of Service: https://www.faceapp.com/privacy-en.html
|
||||
|
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
Binary file not shown.
After Width: | Height: | Size: 75 KiB |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,24 @@
|
||||
|
||||
|
||||
# # for new_file in tokens_without_stopwords:
|
||||
# appendFile = open('tokenized_words.txt', 'a')
|
||||
# appendFile.write(" " + new_file)
|
||||
# appendFile.close()
|
||||
|
||||
|
||||
# #shows only stopwords
|
||||
# processed_word_list = []
|
||||
|
||||
# for word in tokenized:
|
||||
# # print(word)
|
||||
# if word not in all_stopwords:
|
||||
# processed_word_list.append('*')
|
||||
# else:
|
||||
# processed_word_list.append(word)
|
||||
# print(processed_word_list)
|
||||
|
||||
|
||||
|
||||
# # # result putting in a graph
|
||||
# top_words_plot = frequency_word.plot(10)
|
||||
# print(top_words_plot)
|
@ -0,0 +1,11 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import quote as urlquote, unquote as urlunquote
|
||||
|
||||
file = open('tag_comparison.py', 'r')
|
||||
x = 1
|
||||
print(read)
|
||||
|
||||
with open(file, 'w') as new_html:
|
||||
read = new_html.read()
|
||||
html = BeautifulSoup(text, 'html.parser')
|
||||
line = html.find('NN', 'span')
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -0,0 +1,193 @@
|
||||
from __future__ import division
|
||||
import glob
|
||||
from nltk import *
|
||||
import re
|
||||
|
||||
|
||||
import nltk
|
||||
import codecs
|
||||
from nltk import sent_tokenize, word_tokenize, pos_tag
|
||||
from nltk.probability import FreqDist
|
||||
from nltk.corpus import stopwords
|
||||
from PIL import Image
|
||||
import base64
|
||||
|
||||
nltk.download('stopwords')
|
||||
|
||||
#open the txt file, read, and tokenize
|
||||
file = open('faceapp.txt','r')
|
||||
text = file.read()
|
||||
#not sure if this works..
|
||||
x = 1
|
||||
|
||||
#stopwords
|
||||
default_stopwords = set(stopwords.words('english'))
|
||||
custom_stopwords = set(codecs.open('stopwords.txt', 'r').read().splitlines())
|
||||
all_stopwords = default_stopwords | custom_stopwords
|
||||
|
||||
|
||||
|
||||
print('''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title></title>
|
||||
<style>
|
||||
|
||||
@font-face {
|
||||
font-family: "Belgika";
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.eot");
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.woff") format("woff"),
|
||||
url("http://bohyewoo.com/webfonts/belgika/belgika-40th-webfont.svg#filename") format("svg");
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "Belgika";
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.eot");
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.woff") format("woff"),
|
||||
url("http://bohyewoo.com/webfonts/belgika/belgika-16th-webfont.svg#filename") format("svg");
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "Belgika";
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.eot");
|
||||
src: url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.woff") format("woff"),
|
||||
url("http://bohyewoo.com/webfonts/belgika/belgika-8th-webfont.svg#filename") format("svg");
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "SourceCodePro";
|
||||
src: url("http://bohyewoo.com/webfonts/Source_Code_Pro/SourceCodePro-Regular.ttf");
|
||||
}
|
||||
|
||||
|
||||
|
||||
body {
|
||||
background-color: ghostwhite;
|
||||
# font-family: Belgika;
|
||||
# font-weight: 8th;
|
||||
# letter-spacing: -0.3px;
|
||||
font-size: 14px;
|
||||
line-height: 1.2;
|
||||
padding: 20px;
|
||||
|
||||
}
|
||||
|
||||
|
||||
.tos_wrapper {
|
||||
width: 100%;
|
||||
float: left;
|
||||
}
|
||||
|
||||
|
||||
.NNP {
|
||||
background-color: pink;
|
||||
}
|
||||
|
||||
.VBP {
|
||||
}
|
||||
|
||||
.VBP:hover {
|
||||
background-color: gold;
|
||||
}
|
||||
|
||||
.NN {
|
||||
background-color: LightSkyBlue;
|
||||
}
|
||||
|
||||
.NNS {
|
||||
background-color: Aquamarine;
|
||||
}
|
||||
|
||||
.t_img {
|
||||
font-family: SourceCodePro;
|
||||
font-size: 30pt;
|
||||
float: left;
|
||||
width: 20%;
|
||||
clear: both;
|
||||
}
|
||||
|
||||
|
||||
.info {
|
||||
font-family: SourceCodePro;
|
||||
font-weight: regular;
|
||||
font-size: 10pt;
|
||||
width: 25%;
|
||||
float: left;
|
||||
border: 1px solid black;
|
||||
padding:10px;
|
||||
margin-bottom: 50px;
|
||||
}
|
||||
|
||||
.paragraph {
|
||||
font-family: SourceCodePro;
|
||||
font-weight: regular;
|
||||
letter-spacing: -0.5px;
|
||||
width: 70%;
|
||||
float: right;
|
||||
}
|
||||
|
||||
|
||||
.top_words {
|
||||
font-family: Belgika;
|
||||
font-weight: 8th;
|
||||
font-size: 9pt;
|
||||
width: 25%;
|
||||
float: left;
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>''')
|
||||
|
||||
|
||||
print('<div class ="tos_wrapper">')
|
||||
|
||||
#insert an image
|
||||
# https://upload.wikimedia.org/wikipedia/commons/1/15/Joffe_signing_the_Treaty_of_Tartu.jpg
|
||||
FaceApp_img_url = base64.b64encode(open('img/faceapp_logo.png', 'rb').read()).decode('utf-8')
|
||||
FaceApp_image = '<div class="t_img">FaceApp<img style="width:90%" src="data:img/faceapp_logo.png;base64,{}"></div>'.format(FaceApp_img_url)
|
||||
print(FaceApp_image)
|
||||
|
||||
|
||||
#info box
|
||||
print('<div class ="info">')
|
||||
infotext = [('Service', 'FaceApp'), ('Type', 'Image editing'), ('Initial release', 'December 31, 2016'), ('Type', 'Image editing'), ('source', '<a href="https://www.faceapp.com/terms-en.html">link</a>'), ('Description', 'FaceApp is a mobile application for iOS and Android developed by Russian company Wireless Lab. The app generates highly realistic transformations of human faces in photographs by using neural networks based on artificial intelligence. The app can transform a face to make it smile, look younger, look older, or change gender.')]
|
||||
|
||||
for title, info in infotext:
|
||||
print('<span class="info-{0}"><div class="info-title" style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >{0}</div><div class="info-content">{1}</div></span><br>'.format(title, info))
|
||||
|
||||
print('</div>')
|
||||
|
||||
|
||||
|
||||
#ToS text
|
||||
print('<div class ="paragraph">')
|
||||
tokenized = word_tokenize(text)
|
||||
tagged = pos_tag(tokenized)
|
||||
|
||||
for word, pos in tagged:
|
||||
print('<span class="{}">{}</span>'.format(pos, word))
|
||||
print('</div>')
|
||||
|
||||
|
||||
|
||||
#colonial words list
|
||||
print('<div class="top_words"><span style="-webkit-text-decoration-line: underline; text-decoration-line: underline;" >colonial words:</span>')
|
||||
|
||||
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in all_stopwords)
|
||||
frequency_word = FreqDist(tokens_without_stopwords)
|
||||
top_words = tokens_without_stopwords.most_common(100)
|
||||
|
||||
for chosen_words, frequency in top_words:
|
||||
print('<br><span class="chosen_words">{}({}) </span>'.format(chosen_words, frequency))
|
||||
print('</div>')
|
||||
# new_html = open('output.html', 'wb') # open the output file
|
||||
# new_html.write('''</div></body></html>''')
|
||||
# new_html.close() # close the output file
|
||||
|
||||
|
||||
print('''</div></body></html>''')
|
||||
|
||||
|
Loading…
Reference in New Issue