|
|
@ -116,12 +116,12 @@ reading_structure: ocr/output.txt
|
|
|
|
<h3>Chapter 4 - Natasha Berting</h3>
|
|
|
|
<h3>Chapter 4 - Natasha Berting</h3>
|
|
|
|
<em>How Bias Spreads from the Canon to the Web + Erase / Replace</em>
|
|
|
|
<em>How Bias Spreads from the Canon to the Web + Erase / Replace</em>
|
|
|
|
<pre>
|
|
|
|
<pre>
|
|
|
|
erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
|
|
|
|
erase: tiffs hocrs
|
|
|
|
python3 src/erase_leastcommon.py
|
|
|
|
python3 src/erase_leastcommon.py
|
|
|
|
rm $(input-hocr)
|
|
|
|
rm $(input-hocr)
|
|
|
|
rm $(images-tiff)
|
|
|
|
rm $(images-tiff)
|
|
|
|
|
|
|
|
|
|
|
|
replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
|
|
|
|
replace:tiffs hocrs
|
|
|
|
python3 src/replace_leastcommon.py
|
|
|
|
python3 src/replace_leastcommon.py
|
|
|
|
rm $(input-hocr)
|
|
|
|
rm $(input-hocr)
|
|
|
|
rm $(images-tiff)
|
|
|
|
rm $(images-tiff)
|
|
|
|