|
|
@ -3,6 +3,7 @@ from base64 import b64encode
|
|
|
|
from datetime import datetime
|
|
|
|
from datetime import datetime
|
|
|
|
import io
|
|
|
|
import io
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
|
|
|
|
import multiprocessing
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import random
|
|
|
|
import random
|
|
|
|
import string
|
|
|
|
import string
|
|
|
@ -313,9 +314,7 @@ class Publications(Bureau):
|
|
|
|
en_count += 1
|
|
|
|
en_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
# do this multi-threaded cuz downloads can be slow
|
|
|
|
# do this multi-threaded cuz downloads can be slow
|
|
|
|
threads = []
|
|
|
|
# NOTE: this could be further optimized with 2 threads per host (chunks)
|
|
|
|
for i in range(len(entries)):
|
|
|
|
|
|
|
|
entry = entries[i]
|
|
|
|
|
|
|
|
def fetch_og(entry):
|
|
|
|
def fetch_og(entry):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
get OpenGraph data for entry
|
|
|
|
get OpenGraph data for entry
|
|
|
@ -337,16 +336,10 @@ class Publications(Bureau):
|
|
|
|
urllib.request.urlretrieve(entry.img, filename)
|
|
|
|
urllib.request.urlretrieve(entry.img, filename)
|
|
|
|
entry.img = "file://" + filename
|
|
|
|
entry.img = "file://" + filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fetcher = multiprocessing.pool.ThreadPool(processes=2)
|
|
|
|
|
|
|
|
entries_fetched = fetcher.map(fetch_og, entries)
|
|
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=fetch_og, args=(entry,))
|
|
|
|
return entries_fetched
|
|
|
|
threads.append(thread)
|
|
|
|
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# wait till we're done
|
|
|
|
|
|
|
|
for thread in threads:
|
|
|
|
|
|
|
|
thread.join()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return entries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
def main():
|
|
|
|