From caf18a1bf0ea8bc3b5d4f8c1cfcfedc41596ccf2 Mon Sep 17 00:00:00 2001
From: "Alexander \"Arav\" Andreev" <me@arav.top>
Date: Mon, 3 May 2021 02:35:31 +0400
Subject: [PATCH] Added option --skip-posts and messages are now takes just one
 line.

---
 scrapthechan/cli/scraper.py | 64 ++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 29 deletions(-)
diff --git a/scrapthechan/cli/scraper.py b/scrapthechan/cli/scraper.py
index 2f1d44a..97a2cc4 100644
--- a/scrapthechan/cli/scraper.py
+++ b/scrapthechan/cli/scraper.py
@@ -3,7 +3,7 @@ from os import makedirs
 from os.path import join, exists
 from re import search
 from sys import argv
-from typing import List
+from typing import List, Optional
 
 from scrapthechan import VERSION
 from scrapthechan.parser import Parser, ThreadNotFoundError
@@ -15,17 +15,18 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper
 __all__ = ["main"]
 
 
-USAGE = \
+USAGE: str = \
 f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)
 
 Options:
-\t-h,--help       -- print this help and exit;
-\t-v,--version    -- print program's version and exit;
-\t-o,--output-dir -- directory where to place scraped files. By default
-\t                   following structure will be created in current directory:
-\t                   <imageboard>/<board>/<thread>;
-\t-N,--no-op      -- by default OP's post will be written in !op.txt file. This
-\t                   option disables this behaviour;
+\t-h,--help             -- print this help and exit;
+\t-v,--version          -- print program's version and exit;
+\t-o,--output-dir       -- directory where to place scraped files. By default
+\t                         following structure will be created in current directory:
+\t                         <imageboard>/<board>/<thread>;
+\t-N,--no-op            -- by default OP's post will be written in !op.txt file. This
+\t                         option disables this behaviour;
+\t-S,--skip-posts <num> -- skip given number of posts.
 
 Arguments:
 \tURL        -- URL of a thread;
@@ -37,15 +38,15 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
 """
 
 
-def parse_common_arguments(args: str) -> dict:
-    r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
-    args = search(r, args)
-    if not args is None:
-        args = args.groupdict()
-        return {
-            "help": not args["help"] is None,
-            "version": not args["version"] is None }
-    return None
+def parse_common_arguments(args: str) -> Optional[dict]:
+	r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
+	args = search(r, args)
+	if not args is None:
+		args = args.groupdict()
+		return {
+			"help": not args["help"] is None,
+			"version": not args["version"] is None }
+	return None
 
 def parse_arguments(args: str) -> dict:
 	rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)"
@@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict:
 	if not link is None:
 		link = link.groupdict()
 	out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
+	skip_posts = search(r"(?=(-S|--skip-posts) (?P<skip>\d+))", args)
 	return {
 		"site": None if link is None else link["site"],
 		"board": None if link is None else link["board"],
 		"thread": None if link is None else link["thread"],
+		"skip-posts": None if skip_posts is None else int(skip_posts.group('skip')),
 		"no-op": not search(r"-N|--no-op", args) is None,
 		"output-dir": None if out_dir is None \
 					  else out_dir.groupdict()["outdir"] }
@@ -82,17 +85,21 @@ def main() -> None:
 		exit()
 
 	try:
-		parser = get_parser_by_site(args["site"], args["board"], args["thread"])
+		if not args["skip-posts"] is None:
+			parser = get_parser_by_site(args["site"], args["board"],
+										args["thread"], args["skip-posts"])
+		else:
+			parser = get_parser_by_site(args["site"], args["board"],
+										args["thread"])
 	except NotImplementedError as ex:
 		print(f"{str(ex)}.")
 		print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
 		exit()
-	except ThreadNotFoundError:
+	except ThreadNotFoundError as e:
 		print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
-			   "is no longer exist.")
+			   "not found. Reason: {e.reason}")
 		exit()
 
-
 	files_count = len(parser.files)
 
 	if not args["output-dir"] is None:
@@ -101,23 +108,22 @@ def main() -> None:
 		save_dir = join(parser.imageboard, parser.board,
 						parser.thread)
 
-	print(f"There are {files_count} files in " \
-		  f"{args['site']}/{args['board']}/{args['thread']}." \
-		  f"They will be saved in {save_dir}.")
+	print(f"{files_count} files in " \
+		  f"{args['site']}/{args['board']}/{args['thread']}. " \
+		  f"They're going to {save_dir}. ", end="")
 
 	makedirs(save_dir, exist_ok=True)
 
 
 	if not args["no-op"]:
-		print("Writing OP... ", end='')
 		if parser.op is None:
-			print("No text's there.")
+			print("OP's empty.")
 		elif not exists(join(save_dir, "!op.txt")):
 			with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
 				opf.write(f"{parser.op}\n")
-			print("Done.")
+			print("OP's written.")
 		else:
-			print("Exists.")
+			print("OP exists.")
 
 
 	scraper = ThreadedScraper(save_dir, parser.files, \