Compare commits
2 Commits
9ad9fcfd6f
...
7825b53121
Author | SHA1 | Date |
---|---|---|
Alexander Andreev | 7825b53121 | |
Alexander Andreev | b26152f3ca |
|
@ -8,6 +8,9 @@ __license__ = \
|
|||
For a copy see COPYING file in a directory of the program, or
|
||||
see <https://opensource.org/licenses/MIT>"""
|
||||
|
||||
|
||||
USER_AGENT = f"ScrapTheChan/{__version__}"
|
||||
|
||||
VERSION = \
|
||||
f"ScrapTheChan ver. {__version__} ({__date__})\n{__copyright__}\n"\
|
||||
f"\n{__license__}"
|
||||
|
|
|
@ -6,10 +6,9 @@ from sys import argv
|
|||
from typing import List
|
||||
|
||||
from scrapthechan import VERSION
|
||||
from scrapthechan.parser import Parser, ParserThreadNotFoundError
|
||||
from scrapthechan.parser import Parser, ThreadNotFoundError
|
||||
from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \
|
||||
SUPPORTED_IMAGEBOARDS
|
||||
#from scrapthechan.scrapers.basicscraper import BasicScraper
|
||||
from scrapthechan.scrapers.threadedscraper import ThreadedScraper
|
||||
|
||||
|
||||
|
@ -40,12 +39,12 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
|
|||
|
||||
def parse_common_arguments(args: str) -> dict:
|
||||
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
|
||||
argd = search(r, args)
|
||||
if not argd is None:
|
||||
argd = argd.groupdict()
|
||||
args = search(r, args)
|
||||
if not args is None:
|
||||
args = args.groupdict()
|
||||
return {
|
||||
"help": not argd["help"] is None,
|
||||
"version": not argd["version"] is None }
|
||||
"help": not args["help"] is None,
|
||||
"version": not args["version"] is None }
|
||||
return None
|
||||
|
||||
def parse_arguments(args: str) -> dict:
|
||||
|
@ -84,16 +83,13 @@ def main() -> None:
|
|||
print(f"{str(ex)}.")
|
||||
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
|
||||
exit()
|
||||
except ParserThreadNotFoundError:
|
||||
except ThreadNotFoundError:
|
||||
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
|
||||
"is no longer exist.")
|
||||
exit()
|
||||
|
||||
flen = len(parser.files)
|
||||
|
||||
|
||||
print(f"There are {flen} files in " \
|
||||
f"{args['site']}/{args['board']}/{args['thread']}.")
|
||||
files_count = len(parser.files)
|
||||
|
||||
if not args["output-dir"] is None:
|
||||
save_dir = args["output-dir"]
|
||||
|
@ -101,7 +97,9 @@ def main() -> None:
|
|||
save_dir = join(parser.imageboard, parser.board,
|
||||
parser.thread)
|
||||
|
||||
print(f"They will be saved in {save_dir}.")
|
||||
print(f"There are {files_count} files in " \
|
||||
f"{args['site']}/{args['board']}/{args['thread']}." \
|
||||
f"They will be saved in {save_dir}.")
|
||||
|
||||
makedirs(save_dir, exist_ok=True)
|
||||
|
||||
|
@ -119,7 +117,7 @@ def main() -> None:
|
|||
|
||||
|
||||
scraper = ThreadedScraper(save_dir, parser.files, \
|
||||
lambda i: print(f"{i}/{flen}", end="\r"))
|
||||
lambda i: print(f"{i}/{files_count}", end="\r"))
|
||||
scraper.run()
|
||||
|
||||
|
||||
|
|
|
@ -9,10 +9,10 @@ from urllib.request import urlopen, urlretrieve
|
|||
from scrapthechan.fileinfo import FileInfo
|
||||
|
||||
|
||||
__all__ = ["Parser", "ParserThreadNotFoundError"]
|
||||
__all__ = ["Parser", "ThreadNotFoundError"]
|
||||
|
||||
|
||||
class ParserThreadNotFoundError(Exception):
|
||||
class ThreadNotFoundError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -74,7 +74,7 @@ class Parser:
|
|||
with urlopen(thread_url) as url:
|
||||
return loads(url.read().decode('utf-8'))
|
||||
except:
|
||||
raise ParserThreadNotFoundError
|
||||
raise ThreadNotFoundError
|
||||
|
||||
def _parse_post(self, post: dict) -> List[FileInfo]:
|
||||
"""Parses a single post and extracts files into `FileInfo` object."""
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
"""Base Scraper implementation."""
|
||||
"""Base class for all scrapers that will actually do the job."""
|
||||
|
||||
from base64 import b64encode
|
||||
from os import remove, stat
|
||||
|
@ -8,14 +8,14 @@ from typing import List, Callable
|
|||
from urllib.request import urlretrieve, URLopener
|
||||
import hashlib
|
||||
|
||||
from scrapthechan import __version__
|
||||
from scrapthechan import USER_AGENT
|
||||
from scrapthechan.fileinfo import FileInfo
|
||||
|
||||
__all__ = ["Scraper"]
|
||||
|
||||
|
||||
class Scraper:
|
||||
"""Base scraper implementation.
|
||||
"""Base class for all scrapers that will actually do the job.
|
||||
|
||||
Arguments:
|
||||
save_directory -- a path to a directory where file will be
|
||||
|
@ -29,7 +29,7 @@ class Scraper:
|
|||
self._save_directory = save_directory
|
||||
self._files = files
|
||||
self._url_opener = URLopener()
|
||||
self._url_opener.version = f"ScrapTheChan/{__version__}"
|
||||
self._url_opener.version = USER_AGENT
|
||||
self._progress_callback = download_progress_callback
|
||||
|
||||
def run(self):
|
||||
|
|
Loading…
Reference in New Issue