Added option --skip-posts and messages are now takes just one line.
This commit is contained in:
parent
751549f575
commit
caf18a1bf0
@ -3,7 +3,7 @@ from os import makedirs
|
|||||||
from os.path import join, exists
|
from os.path import join, exists
|
||||||
from re import search
|
from re import search
|
||||||
from sys import argv
|
from sys import argv
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
from scrapthechan import VERSION
|
from scrapthechan import VERSION
|
||||||
from scrapthechan.parser import Parser, ThreadNotFoundError
|
from scrapthechan.parser import Parser, ThreadNotFoundError
|
||||||
@ -15,7 +15,7 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper
|
|||||||
__all__ = ["main"]
|
__all__ = ["main"]
|
||||||
|
|
||||||
|
|
||||||
USAGE = \
|
USAGE: str = \
|
||||||
f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)
|
f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
@ -26,6 +26,7 @@ Options:
|
|||||||
\t <imageboard>/<board>/<thread>;
|
\t <imageboard>/<board>/<thread>;
|
||||||
\t-N,--no-op -- by default OP's post will be written in !op.txt file. This
|
\t-N,--no-op -- by default OP's post will be written in !op.txt file. This
|
||||||
\t option disables this behaviour;
|
\t option disables this behaviour;
|
||||||
|
\t-S,--skip-posts <num> -- skip given number of posts.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
\tURL -- URL of a thread;
|
\tURL -- URL of a thread;
|
||||||
@ -37,7 +38,7 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def parse_common_arguments(args: str) -> dict:
|
def parse_common_arguments(args: str) -> Optional[dict]:
|
||||||
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
|
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
|
||||||
args = search(r, args)
|
args = search(r, args)
|
||||||
if not args is None:
|
if not args is None:
|
||||||
@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict:
|
|||||||
if not link is None:
|
if not link is None:
|
||||||
link = link.groupdict()
|
link = link.groupdict()
|
||||||
out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
|
out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
|
||||||
|
skip_posts = search(r"(?=(-S|--skip-posts) (?P<skip>\d+))", args)
|
||||||
return {
|
return {
|
||||||
"site": None if link is None else link["site"],
|
"site": None if link is None else link["site"],
|
||||||
"board": None if link is None else link["board"],
|
"board": None if link is None else link["board"],
|
||||||
"thread": None if link is None else link["thread"],
|
"thread": None if link is None else link["thread"],
|
||||||
|
"skip-posts": None if skip_posts is None else int(skip_posts.group('skip')),
|
||||||
"no-op": not search(r"-N|--no-op", args) is None,
|
"no-op": not search(r"-N|--no-op", args) is None,
|
||||||
"output-dir": None if out_dir is None \
|
"output-dir": None if out_dir is None \
|
||||||
else out_dir.groupdict()["outdir"] }
|
else out_dir.groupdict()["outdir"] }
|
||||||
@ -82,17 +85,21 @@ def main() -> None:
|
|||||||
exit()
|
exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = get_parser_by_site(args["site"], args["board"], args["thread"])
|
if not args["skip-posts"] is None:
|
||||||
|
parser = get_parser_by_site(args["site"], args["board"],
|
||||||
|
args["thread"], args["skip-posts"])
|
||||||
|
else:
|
||||||
|
parser = get_parser_by_site(args["site"], args["board"],
|
||||||
|
args["thread"])
|
||||||
except NotImplementedError as ex:
|
except NotImplementedError as ex:
|
||||||
print(f"{str(ex)}.")
|
print(f"{str(ex)}.")
|
||||||
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
|
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
|
||||||
exit()
|
exit()
|
||||||
except ThreadNotFoundError:
|
except ThreadNotFoundError as e:
|
||||||
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
|
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
|
||||||
"is no longer exist.")
|
"not found. Reason: {e.reason}")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
files_count = len(parser.files)
|
files_count = len(parser.files)
|
||||||
|
|
||||||
if not args["output-dir"] is None:
|
if not args["output-dir"] is None:
|
||||||
@ -101,23 +108,22 @@ def main() -> None:
|
|||||||
save_dir = join(parser.imageboard, parser.board,
|
save_dir = join(parser.imageboard, parser.board,
|
||||||
parser.thread)
|
parser.thread)
|
||||||
|
|
||||||
print(f"There are {files_count} files in " \
|
print(f"{files_count} files in " \
|
||||||
f"{args['site']}/{args['board']}/{args['thread']}. " \
|
f"{args['site']}/{args['board']}/{args['thread']}. " \
|
||||||
f"They will be saved in {save_dir}.")
|
f"They're going to {save_dir}. ", end="")
|
||||||
|
|
||||||
makedirs(save_dir, exist_ok=True)
|
makedirs(save_dir, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
if not args["no-op"]:
|
if not args["no-op"]:
|
||||||
print("Writing OP... ", end='')
|
|
||||||
if parser.op is None:
|
if parser.op is None:
|
||||||
print("No text's there.")
|
print("OP's empty.")
|
||||||
elif not exists(join(save_dir, "!op.txt")):
|
elif not exists(join(save_dir, "!op.txt")):
|
||||||
with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
|
with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
|
||||||
opf.write(f"{parser.op}\n")
|
opf.write(f"{parser.op}\n")
|
||||||
print("Done.")
|
print("OP's written.")
|
||||||
else:
|
else:
|
||||||
print("Exists.")
|
print("OP exists.")
|
||||||
|
|
||||||
|
|
||||||
scraper = ThreadedScraper(save_dir, parser.files, \
|
scraper = ThreadedScraper(save_dir, parser.files, \
|
||||||
|
Loading…
Reference in New Issue
Block a user