You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

12 KiB

In [179]:
image = 'think-classify.jpg'
image2 = 'think-classify2.jpg'
image3 = 'think-classify3.jpg'
image4 = 'think-classify4.jpg'
image5 = 'think-classify5.jpg'
image6 = 'think-classify6.jpg'
image7 = 'think-classify7.jpg'
image8 = 'think-classify8.jpg'
image9 = 'think-classify9.jpg'
image10 = 'think-classify10.jpg'
image11 = 'think-classify11.jpg'
image12 = 'think-classify12.jpg'
image13 = 'think-classify13.jpg'
image14 = 'think-classify14.jpg'
In [198]:
from nltk.corpus import stopwords
sw = stopwords.words("english")
from urllib.request import urlopen
import json

resultSentences = []
labels_corpus = []

url = f"https://hub.xpub.nl/soupboat/generic-labels/get-labels/?image=think-classify7.jpg"
response = urlopen(url)
data_json = json.loads(response.read()) 

labels = data_json['labels']


for label in labels:
    sent = label['text'].split()
    labels_corpus.append(sent)
    
    
print(labels_corpus)
[['to', 'meet,', 'but', 'also', 'to', 'collect.', 'are', 'you', 'gathering', 'people', 'or', 'a', 'bulk', 'of', 'leaves', 'in', 'your', 'garden', 'when', 'winter', 'is', 'coming?'], ['to', 'gather', 'friends', 'inside', 'your', 'place,', 'to', 'gather', 'information', 'for', 'the', 'police,', 'to', 'gather', 'food', 'for', 'the', 'homeless,', 'to', 'gather', 'objects', 'inside', 'your', 'bag,', 'to', 'gather', 'your', 'drunk', 'friend,', 'to', 'gather', 'a', 'molotof', 'cocktail', 'for', 'a', 'revolutionary', 'party'], ['it', 'is', 'the', 'opposite', 'of', 'dividing', 'because', 'the', 'basic', 'idea', 'is', 'that', 'you', 'have', 'a', 'lot', 'of', 'objects', 'far', 'away', 'from', 'each', 'other,', 'in', 'different', 'rooms,', 'in', 'different', 'building,', 'in', 'different', 'cities', 'and', 'you', 'want', 'to', 'have', 'all', 'of', 'them', 'on', 'your', 'bed', 'so', 'you', 'just', 'go', 'and', 'pick', 'them', 'up', 'from', 'where', 'they', 'are', 'you', 'put', 'them', 'in', 'a', 'bag', 'and', 'go', 'back', 'home'], ['my', 'favorite', 'activity', 'since', '200000000', 'years,', 'to', 'gather', 'food,', 'to', 'find', 'something', 'and', 'to', 'collect', 'it.', 'since', 'im', 'a', 'racoon', 'i', 'like', 'to', 'gather', 'things', 'around', 'from', 'the', 'street.', 'my', 'flatmate', 'is', 'desperate', 'about', 'it.', 'but', 'i', 'always', 'find', 'nice', 'things:', 'a', 'table,', 'a', 'confy', 'armchair,', 'some', 'baskets,', 'some', 'vases.']]
In [ ]:
 
In [199]:
punctuation = ['.', ',', ';', '(', ')', ':']

# since .split(' ') does not split a word from any punctuation, 
# this function search for any string which last character (word[-1]]) is in the variable 'punctuation';
# if that is the case, the function will remove the last charachter, else it will leave it as it is.
def clean_word(word):
    for character in word:
        if word[-1] in punctuation:
            return word[0:-1]
        if word[0] in punctuation:
            return word[1:]
        else:
            return word
In [200]:
# The arguments in this functions are 2 texts (text_a and text_b) an index for where the text_a starts and an index for where it ends.
def bridge(text_a, text_b, start_a, isLast):
    
    matchFound = 0
    start_next = 0
    
    # for index i in text_a from a given index until the end of text_a
    for i in range(start_a, len(text_a)):
        if matchFound:
            break
            
        # we name word_a the index i in text_a
        word_a = text_a[i]
        # if word_a is not in the given list of stopwords:
        if word_a not in sw:
            # for index j in the entire text_b:
            for j in range(0, len(text_b)):
                
                # we name word_b the word with index j in text_b
                word_b = text_b[j]
                
                # if word_a equals to word_b:
                if clean_word(word_a) == clean_word(word_b):
                    
                    # resultSentences is a list to which the following informations will add up:
                    resultSentences.append({
                        'text': text_a,
                        'start': start_a,
                        'end': i,
                        'hasMatch': 1
                    })
                    
                    # if the text in position text_a is the last text to be compared:
                    # the same informations as above will be added, except that there will be no index for its end.
                    if isLast:
                        resultSentences.append({
                            'text': text_b,
                            'start': j,
                            'end': None,
                            'hasMatch': 1
                    })
                    
                    # after the match is found between the 2 texts, the function will break
                    matchFound = 1 
                    start_next = j
                    break
    
    if matchFound == 0:
        resultSentences.append({
            'text': text_a,
            'start': start_a,
            'end': None,
            'hasMatch': 0
        })
        
        if isLast:
            resultSentences.append({
                'text': text_b,
                'start': 0,
                'end': None,
                'hasMatch': 0
        })

                        
    # the function returns the index of the 'same word' in the text_b
    return start_next
In [201]:
def bridge_list(corpus):
    start_a = 0
    result = ""
    
    #for all texts indexes within the corpus to be compared:
    for text_index in range(0, len(corpus)-1):

        # the last text_a to be compared has to be the text indexed as corpus[-2];
        # the last text_b will then be the last text of the corpus (corpus[-1]).
        isLast = text_index == len(corpus)-2
        # text_a is a given index of the corpus and text_b is the following index
        text_a = corpus[text_index]
        text_b = corpus[text_index + 1]
            
    
        #start_a is the index (in text_b) of the first 'common word' between text_a and text_b;
        #start_a is the starting point to compare a text and its following (in index order within the corpus); 

        start_next = bridge(text_a, text_b, start_a, isLast)
        start_a = start_next
        
In [202]:
def render_sentence(sentence, highlightNext):
    result = ''
    start = 0
    end = len(sentence['text'])
    if(sentence['start']):
        start = sentence['start']
    if(sentence['end']):
        end = sentence['end']
    
    text = sentence['text']
        
    highlight = highlightNext
    
    for index in range(start, end):
        word = text[index]
        
        if(highlight == 1):
            result = result + '<span class="highlighit">' + word + '</span>'
            highlight = 0;
            continue
        else:
            if index == end -1 and sentence['hasMatch']:
                highlight = 1

            result = result + " " + word
    
    return result, highlight
        
        
In [203]:
bridge_list(labels_corpus)

endResult = ''

highlightNext = 0

for i in range(0, len(resultSentences)):
    sentence = resultSentences[i]
    start = sentence['start']
    end = sentence['end']
    sentenceText = sentence['text']
    
    sentence, highlight = render_sentence(sentence, highlightNext)
    highlightNext = highlight
    
    endResult = endResult + " " + sentence

print(endResult)
  to meet, but also to collect. are you gathering people or a bulk of leaves in your garden when winter is coming?  to gather friends inside your place, to gather information for the police, to gather food for the homeless, to gather <span class="highlighit">objects</span> far away from each other, in different rooms, in different building, in different cities and you want to have all of them on your bed so you just go and pick them up from where they are you put them in a bag and go back home  my favorite activity since 200000000 years, to gather food, to find something and to collect it. since im a racoon i like to gather things around from the street. my flatmate is desperate about it. but i always find nice things: a table, a confy armchair, some baskets, some vases.
In [ ]:
 
In [ ]:
 
In [ ]: