"""Implementation of a threaded version of a scraper.""" from typing import List, Callable from multiprocessing import cpu_count, Lock from multiprocessing.pool import ThreadPool from scrapthechan.scraper import Scraper from scrapthechan.fileinfo import FileInfo __all__ = ["ThreadedScraper"] class ThreadedScraper(Scraper): def __init__(self, save_directory: str, files: List[FileInfo], download_progress_callback: Callable[[int], None] = None) -> None: super().__init__(save_directory, files, download_progress_callback) self._files_downloaded = 0 self._files_downloaded_mutex = Lock() def run(self): pool = ThreadPool(cpu_count() * 2) pool.map(self._thread_run, self._files) pool.close() pool.join() def _thread_run(self, f: FileInfo): if not self._progress_callback is None: with self._files_downloaded_mutex: self._files_downloaded += 1 self._progress_callback(self._files_downloaded) self._download_file(f)