1
0

New threaded scraper implemented.

This commit is contained in:
Alexander Andreev 2020-07-18 04:43:00 +04:00
parent 3223c0721a
commit 87eecf0a09

View File

@ -0,0 +1,31 @@
"""Implementation of a threaded version of a scraper."""
from typing import List, Callable
from multiprocessing import cpu_count, Lock
from multiprocessing.pool import ThreadPool
from scrapthechan.scraper import Scraper
from scrapthechan.fileinfo import FileInfo
__all__ = ["ThreadedScraper"]
class ThreadedScraper(Scraper):
def __init__(self, save_directory: str, files: List[FileInfo],
download_progress_callback: Callable[[int], None] = None) -> None:
super(ThreadedScraper, self).__init__(save_directory, files,
download_progress_callback)
self._files_downloaded = 0
self._files_downloaded_mutex = Lock()
def run(self):
pool = ThreadPool(cpu_count() * 2)
pool.map(self._thread_run, self._files)
pool.close()
pool.join()
def _thread_run(self, f: FileInfo):
with self._files_downloaded_mutex:
self._files_downloaded += 1
if not self._progress_callback is None:
self._progress_callback(self._files_downloaded)
self._download_file(f)