diff --git a/05/index.html b/05/index.html index bb2a221..e186309 100644 --- a/05/index.html +++ b/05/index.html @@ -116,12 +116,12 @@ reading_structure: ocr/output.txt
-erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF +erase: tiffs hocrs python3 src/erase_leastcommon.py rm $(input-hocr) rm $(images-tiff) -replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF +replace:tiffs hocrs python3 src/replace_leastcommon.py rm $(input-hocr) rm $(images-tiff)