diff --git a/scrapthechan/parsers/__init__.py b/scrapthechan/parsers/__init__.py index 0fc99b4..a32f219 100644 --- a/scrapthechan/parsers/__init__.py +++ b/scrapthechan/parsers/__init__.py @@ -1,6 +1,6 @@ """Here are defined the JSON parsers for imageboards.""" from re import search -from typing import List +from typing import List, Optional from scrapthechan.parser import Parser @@ -8,33 +8,31 @@ from scrapthechan.parser import Parser __all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"] +URLRX = r"https?:\/\/(?P[\w\.]+)\/(?P\w+)\/(?:\w+)?\/(?P\w+)" SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk", \ - "8kun.top", "lolifox.cc"] + "8kun.top"] -def get_parser_by_url(url: str) -> Parser: +def get_parser_by_url(url: str, skip_posts: Optional[int] = None) -> Parser: """Parses URL and extracts from it site name, board and thread. And then returns initialised Parser object for detected imageboard.""" - URLRX = r"https?:\/\/(?P[\w\.]+)\/(?P\w+)\/(?:\w+)?\/(?P\w+)" site, board, thread = search(URLRX, url).groups() - return get_parser_by_site(site, board, thread) + return get_parser_by_site(site, board, thread, skip_posts) -def get_parser_by_site(site: str, board: str, thread: str) -> Parser: +def get_parser_by_site(site: str, board: str, thread: str, + skip_posts: Optional[int] = None) -> Parser: """Returns an initialised parser for `site` with `board` and `thread`.""" if '4chan' in site: from .fourchan import FourChanParser - return FourChanParser(board, thread) + return FourChanParser(board, thread, skip_posts) elif 'lainchan' in site: from .lainchan import LainchanParser - return LainchanParser(board, thread) + return LainchanParser(board, thread, skip_posts) elif '2ch' in site: from .dvach import DvachParser - return DvachParser(board, thread) + return DvachParser(board, thread, skip_posts) elif '8kun' in site: from .eightkun import EightKunParser - return EightKunParser(board, thread) - elif 'lolifox' in site: - from .lolifox import LolifoxParser - return LolifoxParser(board, thread) + return EightKunParser(board, thread, skip_posts) else: raise NotImplementedError(f"Parser for {site} is not implemented")