tweak connection pooling for news

workspace
Brendan Howell 5 years ago
parent 4c54748be3
commit 580c1888f5

@ -3,6 +3,7 @@ from base64 import b64encode
from datetime import datetime from datetime import datetime
import io import io
import json import json
import multiprocessing
import os import os
import random import random
import string import string
@ -313,40 +314,32 @@ class Publications(Bureau):
en_count += 1 en_count += 1
# do this multi-threaded cuz downloads can be slow # do this multi-threaded cuz downloads can be slow
threads = [] # NOTE: this could be further optimized with 2 threads per host (chunks)
for i in range(len(entries)): def fetch_og(entry):
entry = entries[i] """
def fetch_og(entry): get OpenGraph data for entry
""" and download image
get OpenGraph data for entry TODO: add microdata support here to get author
and download image """
TODO: add microdata support here to get author og_data = self._get_ogdata(entry.link)
""" if hasattr(entry, "media_thumbnail"):
og_data = self._get_ogdata(entry.link) entry.img = entry.media_thumbnail[-1]["url"]
if hasattr(entry, "media_thumbnail"): elif "image" in og_data:
entry.img = entry.media_thumbnail[-1]["url"] entry.img = og_data["image"]
elif "image" in og_data: else:
entry.img = og_data["image"] entry.img = " "
else:
entry.img = " "
if entry.img != " ":
fileext = "." + entry.img.rsplit(".",1)[1]
filename = tempfile.mktemp(fileext)
print("fetching", entry.img, filename)
urllib.request.urlretrieve(entry.img, filename)
entry.img = "file://" + filename
thread = threading.Thread(target=fetch_og, args=(entry,))
threads.append(thread)
thread.start()
# wait till we're done if entry.img != " ":
for thread in threads: fileext = "." + entry.img.rsplit(".",1)[1]
thread.join() filename = tempfile.mktemp(fileext)
print("fetching", entry.img, filename)
urllib.request.urlretrieve(entry.img, filename)
entry.img = "file://" + filename
fetcher = multiprocessing.pool.ThreadPool(processes=2)
entries_fetched = fetcher.map(fetch_og, entries)
return entries return entries_fetched
def main(): def main():

Loading…
Cancel
Save