New threaded scraper implemented.
This commit is contained in:
parent
3223c0721a
commit
87eecf0a09
31
scrapthechan/scrapers/threadedscraper.py
Normal file
31
scrapthechan/scrapers/threadedscraper.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
"""Implementation of a threaded version of a scraper."""
|
||||||
|
|
||||||
|
from typing import List, Callable
|
||||||
|
from multiprocessing import cpu_count, Lock
|
||||||
|
from multiprocessing.pool import ThreadPool
|
||||||
|
|
||||||
|
from scrapthechan.scraper import Scraper
|
||||||
|
from scrapthechan.fileinfo import FileInfo
|
||||||
|
|
||||||
|
__all__ = ["ThreadedScraper"]
|
||||||
|
|
||||||
|
class ThreadedScraper(Scraper):
|
||||||
|
def __init__(self, save_directory: str, files: List[FileInfo],
|
||||||
|
download_progress_callback: Callable[[int], None] = None) -> None:
|
||||||
|
super(ThreadedScraper, self).__init__(save_directory, files,
|
||||||
|
download_progress_callback)
|
||||||
|
self._files_downloaded = 0
|
||||||
|
self._files_downloaded_mutex = Lock()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
pool = ThreadPool(cpu_count() * 2)
|
||||||
|
pool.map(self._thread_run, self._files)
|
||||||
|
pool.close()
|
||||||
|
pool.join()
|
||||||
|
|
||||||
|
def _thread_run(self, f: FileInfo):
|
||||||
|
with self._files_downloaded_mutex:
|
||||||
|
self._files_downloaded += 1
|
||||||
|
if not self._progress_callback is None:
|
||||||
|
self._progress_callback(self._files_downloaded)
|
||||||
|
self._download_file(f)
|
Loading…
Reference in New Issue
Block a user