1
0
Fork 0

Lolifox removed. Added skip_posts handling.

This commit is contained in:
Alexander Andreev 2021-05-03 02:40:57 +04:00
parent 78d4a62c17
commit 1213cef776
Signed by: Arav
GPG Key ID: 610DF2574456329F
1 changed files with 11 additions and 13 deletions

View File

@ -1,6 +1,6 @@
"""Here are defined the JSON parsers for imageboards.""" """Here are defined the JSON parsers for imageboards."""
from re import search from re import search
from typing import List from typing import List, Optional
from scrapthechan.parser import Parser from scrapthechan.parser import Parser
@ -8,33 +8,31 @@ from scrapthechan.parser import Parser
__all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"] __all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"]
URLRX = r"https?:\/\/(?P<s>[\w\.]+)\/(?P<b>\w+)\/(?:\w+)?\/(?P<t>\w+)"
SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk", \ SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk", \
"8kun.top", "lolifox.cc"] "8kun.top"]
def get_parser_by_url(url: str) -> Parser: def get_parser_by_url(url: str, skip_posts: Optional[int] = None) -> Parser:
"""Parses URL and extracts from it site name, board and thread. """Parses URL and extracts from it site name, board and thread.
And then returns initialised Parser object for detected imageboard.""" And then returns initialised Parser object for detected imageboard."""
URLRX = r"https?:\/\/(?P<s>[\w\.]+)\/(?P<b>\w+)\/(?:\w+)?\/(?P<t>\w+)"
site, board, thread = search(URLRX, url).groups() site, board, thread = search(URLRX, url).groups()
return get_parser_by_site(site, board, thread) return get_parser_by_site(site, board, thread, skip_posts)
def get_parser_by_site(site: str, board: str, thread: str) -> Parser: def get_parser_by_site(site: str, board: str, thread: str,
skip_posts: Optional[int] = None) -> Parser:
"""Returns an initialised parser for `site` with `board` and `thread`.""" """Returns an initialised parser for `site` with `board` and `thread`."""
if '4chan' in site: if '4chan' in site:
from .fourchan import FourChanParser from .fourchan import FourChanParser
return FourChanParser(board, thread) return FourChanParser(board, thread, skip_posts)
elif 'lainchan' in site: elif 'lainchan' in site:
from .lainchan import LainchanParser from .lainchan import LainchanParser
return LainchanParser(board, thread) return LainchanParser(board, thread, skip_posts)
elif '2ch' in site: elif '2ch' in site:
from .dvach import DvachParser from .dvach import DvachParser
return DvachParser(board, thread) return DvachParser(board, thread, skip_posts)
elif '8kun' in site: elif '8kun' in site:
from .eightkun import EightKunParser from .eightkun import EightKunParser
return EightKunParser(board, thread) return EightKunParser(board, thread, skip_posts)
elif 'lolifox' in site:
from .lolifox import LolifoxParser
return LolifoxParser(board, thread)
else: else:
raise NotImplementedError(f"Parser for {site} is not implemented") raise NotImplementedError(f"Parser for {site} is not implemented")