33 lines
955 B
Python
33 lines
955 B
Python
"""Implementation of a threaded version of a scraper."""
|
|
|
|
from typing import List, Callable
|
|
from multiprocessing import cpu_count, Lock
|
|
from multiprocessing.pool import ThreadPool
|
|
|
|
from scrapthechan.scraper import Scraper
|
|
from scrapthechan.fileinfo import FileInfo
|
|
|
|
|
|
__all__ = ["ThreadedScraper"]
|
|
|
|
|
|
class ThreadedScraper(Scraper):
|
|
def __init__(self, save_directory: str, files: List[FileInfo],
|
|
download_progress_callback: Callable[[int], None] = None) -> None:
|
|
super().__init__(save_directory, files, download_progress_callback)
|
|
self._files_downloaded = 0
|
|
self._files_downloaded_mutex = Lock()
|
|
|
|
def run(self):
|
|
pool = ThreadPool(cpu_count() * 2)
|
|
pool.map(self._thread_run, self._files)
|
|
pool.close()
|
|
pool.join()
|
|
|
|
def _thread_run(self, f: FileInfo):
|
|
if not self._progress_callback is None:
|
|
with self._files_downloaded_mutex:
|
|
self._files_downloaded += 1
|
|
self._progress_callback(self._files_downloaded)
|
|
self._download_file(f)
|