From a3c037aaaa27c20163c52cf2682c4ef647bf1d3a Mon Sep 17 00:00:00 2001 From: nberting Date: Fri, 23 Mar 2018 23:48:02 +0100 Subject: [PATCH] fixed RGBA bug in replace script --- Makefile | 7 +-- ocr/output.txt | 101 ------------------------------------- src/replace_leastcommon.py | 12 +++-- 3 files changed, 11 insertions(+), 109 deletions(-) delete mode 100644 ocr/output.txt diff --git a/Makefile b/Makefile index b1193b6..5caefe7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -images=$(sort $(wildcard images/*.jpg)) +images=$(sort $(wildcard images/*)) # @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file images-tiff=$(sort $(wildcard images-tiff/*.tiff)) input-hocr=$(sort $(wildcard hocr/*)) @@ -26,7 +26,6 @@ help: # CLEAN clean: ## removes output (target) files rm ocr/output.txt - rm $(wildcard output/*) rm $(tmpfile) @@ -60,7 +59,7 @@ tiffs: ## convert images/ to images-tiff/ Depends on IM echo $(images) for i in $(images); \ do tiff=`basename $$i .jpg`.tiff; \ - convert -density 300 $$i -alpha on images-tiff/$$tiff; \ + convert -density 300 $$i -colorspace RGB -type truecolor -alpha on images-tiff/$$tiff; \ echo $$tiff; \ done; @@ -103,10 +102,12 @@ overunder: ocr/output.txt ## Alice: An interpreted language that translate simpl erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF python3 src/erase_leastcommon.py rm $(input-hocr) + rm $(images-tiff) replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF python3 src/replace_leastcommon.py rm $(input-hocr) + rm $(images-tiff) visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer @echo $(tmpfile) diff --git a/ocr/output.txt b/ocr/output.txt deleted file mode 100644 index 7eaefd3..0000000 --- a/ocr/output.txt +++ /dev/null @@ -1,101 +0,0 @@ -Any one is one having been that one Any one is such a one. - -Any one having been that one is one remembering something oi such a thing, is one -remembering having been that one. - -Each one having been one is being one having been that one. Each one haying been - -one is remembering something of this thing, is remembering something or haying been -that one - -Each one is one. Each one has been one. Each one being one, each one havrng been - -one is remembering something or that thing. - -Each one is one. Each one has been one. Each one is remembering that thing. - -Each one is one. Each one has been one. That is something that any one haying been - -one, any one being one is having happen Each one being one is haying it happen that -that one is being that one. Each one having been one is one havrng had it happen that -that one has been that one. - -Each one is one. Any one is the one that one is Each one is one. - -One who is one is remembering that she is one forgetting anything. One who is one is -remembering that she is forgetting everything again and again She is remembering -this thing She is not interested in this thing She is remembering this thing and she is -remembering that this is a quite necessary thing, it is quite a necessary thing that she IS -remembering that she is iorgetting anything. - -She is to getting anything This is not a disturbing thing, this is not a distressing thing, -this is not an important thing She is iorgetting anything and she is remembering that -thing, she is remembering that she is forgetting anything. - -She is ore being one remembering that she is forgetting anything She is one not -objecting to being one remembering that thing, remembering that she is forgetting -anything She is one objecting to there being some objecting to being ones forgetting -anything She is one objecting to any one being one remembering that they are not - -iorgetting anything She is one objecting to any one objecting to her being one -forgetting anything She is not one remembering being one objecting to any one -objecting to her being one iorgetting anything She is one remembering that she is one -objecting to being one remembering that they are not forgetting anything. She is one -remembering something of being one objecting to some being one objecting to -forgetting anything - -She is one forgetting anything. She is one remembering something of this thing She is -one repeating this thing repeating remembering something of forgetting anything, - -She is one remembering that she has been having something. She is one remembering -something of this thing She has been having something, she is having something, she -is remembering something of this thing. She is not objecting to having something, she -is having something she is remembering something of this thing - -She is one being that one being one having something and remembering something of -that thing She is one being one and she is iorgetting anything and she is remembering -being one forgetting anything. - -Any one she is kissing is one she is kissing then, not kissing again and again, not -fissing and kissing, any one she is kissing is one she kissed then, is one she did kiss -then. one she kissed some then - -Any one she is kissing is one needing something then, needing kissing, needing -anytin‘ng first then, needing some kissing then. Any one she is kissing is one having -been kissed then, having been kissed some then and she was the one who was kissing -that one some just then Any one she was kissing was one whom she was kissing just - -from hows Vests to i narrating Ouhry - - - that Any one she was kissing was one who might have been needing something then, - -needing anything then, needing kissing then, needing a little kissing then, needing any ‘» -Mtg anything then, needing kissing then, needing a little kissing then, needing any - -kiss!!!) then, needing something then, needing kissing then - -She was one living and remembering that she had enough for this thing, enough for - -than She was one remembering that she had enough for being livrng and she was -runemben'ng that she could always be needing that thing needing having enough to be -m She could remembering to remind herself and any one of this thing, she could -W that thing, she could remember to be reminded of that thing. She could -W to beone reminding herself, she could remember to be one havmg any one -rewind her quite often of this thing that she could remember that she had enough and -wot“ be always having enough to be livrng. She could remembering that she was -needing this thing needing having enough always enough for livmg. She could -rentemtler enough oi reminding any one of this thing. She could remember this thing -rectum reminding herself of this thing. She could remember something of being -rein-Iced of this thing. She could remember this thing, she could remember a good -deal of knowing that she was havmg enough for berng living and that she could always -be neodng having enough for liVing. She could remember this thing, she could qurte -new that thing. - -She was one forgetting anything. She was remembering something of that thing of -toasting anything She could always remember something of that thing, remember -m of forgetting anything. - -in giving she was giving what she had then remembered to give then In gwing she - - diff --git a/src/replace_leastcommon.py b/src/replace_leastcommon.py index 9cc61b8..faa697b 100644 --- a/src/replace_leastcommon.py +++ b/src/replace_leastcommon.py @@ -40,7 +40,7 @@ def findleastcommon(list): fdist = FreqDist(word.lower() for word in list) leastcommon = fdist.most_common() for i in leastcommon: - if (i[1] <= limit): + if (i[1] <= 1): leastcommon_list.append(i[0]) return leastcommon_list @@ -92,6 +92,8 @@ for i in scanimg: mostcommon_list = findmostcommon(clean_words, 30) #find most common words and add them to list print ('The most common words until text', x+1, 'are:', mostcommon_list) + print ('The least common words until text', x+1, 'are:', leastcommon_list) + print ('') # loop through every word in hocr file to extract coordinates, then remove or paste into output image @@ -128,11 +130,11 @@ for i in scanimg: wimcolor7 = Image.new('RGBA', wimreplace7.size, (250, 230, 0, 90)) wimcolor_more = Image.new('RGBA', wimreplace_more.size, (250, 230, 0, 90)) - out4 = Image.alpha_composite(wimreplace4, wimcolor4) - out7 = Image.alpha_composite(wimreplace7, wimcolor7) - out_more = Image.alpha_composite(wimreplace_more, wimcolor_more) + out4 = Image.alpha_composite(wimreplace4.convert('RGBA'), wimcolor4) + out7 = Image.alpha_composite(wimreplace7.convert('RGBA'), wimcolor7) + out_more = Image.alpha_composite(wimreplace_more.convert('RGBA'), wimcolor_more) - if word.lower() in leastcommon_list and len(word) <= limit: + if word.lower() in leastcommon_list and len(word) <= 3: oim.paste(wim, (c[0], c[1], c[2], c[3])) elif word.lower() in leastcommon_list and len(word) < 8: