diff --git a/cps/web.py b/cps/web.py index 2dbf1736..9be72a14 100755 --- a/cps/web.py +++ b/cps/web.py @@ -13,6 +13,12 @@ try: except ImportError: goodreads_support = False +try: + import Levenshtein + levenshtein_support = True +except ImportError: + levenshtein_support = False + try: from functools import reduce except ImportError: @@ -1138,17 +1144,32 @@ def author(book_id, page): if goodreads_support and config.config_use_goodreads: gc = GoodreadsClient(config.config_goodreads_api_key, config.config_goodreads_api_secret) author_info = gc.find_author(author_name=name) - - # Get all identifiers (ISBN, Goodreads, etc) and filter author's books by that list so we show fewer duplicates - # Note: Not all images will be shown, even though they're available on Goodreads.com. - # See https://www.goodreads.com/topic/show/18213769-goodreads-book-images - identifiers = reduce(lambda acc, book: acc + map(lambda identifier: identifier.val, book.identifiers), entries.all(), []) - other_books = filter(lambda book: book.isbn not in identifiers and book.gid["#text"] not in identifiers, author_info.books) + other_books = get_unique_other_books(entries.all(), author_info.books) return render_title_template('author.html', entries=entries, pagination=pagination, title=name, author=author_info, other_books=other_books) +def get_unique_other_books(library_books, author_books): + # Get all identifiers (ISBN, Goodreads, etc) and filter author's books by that list so we show fewer duplicates + # Note: Not all images will be shown, even though they're available on Goodreads.com. + # See https://www.goodreads.com/topic/show/18213769-goodreads-book-images + identifiers = reduce(lambda acc, book: acc + map(lambda identifier: identifier.val, book.identifiers), library_books, []) + other_books = filter(lambda book: book.isbn not in identifiers and book.gid["#text"] not in identifiers, author_books) + + # Fuzzy match book titles + if levenshtein_support: + library_titles = reduce(lambda acc, book: acc + [book.title], library_books, []) + other_books = filter(lambda author_book: not filter( + lambda library_book: + Levenshtein.ratio(re.sub(r"\(.*\)", "", author_book.title), library_book) > 0.7, # Remove items in parentheses before comparing + library_titles + ), other_books) + + return other_books + + + @app.route("/series") @login_required_if_no_ano def series_list(): diff --git a/optional-requirements.txt b/optional-requirements.txt index 5e4c4414..cf743dbb 100644 --- a/optional-requirements.txt +++ b/optional-requirements.txt @@ -11,4 +11,5 @@ PyYAML==3.12 rsa==3.4.2 six==1.10.0 uritemplate==3.0.0 -goodreads==0.3.2 \ No newline at end of file +goodreads>=0.3.2 +python-Levenshtein>=0.12.0