35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
"""Here are defined the JSON parsers for imageboards."""
|
|
from re import search
|
|
from typing import List
|
|
|
|
from scrapthechan.parser import Parser
|
|
|
|
|
|
__all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"]
|
|
|
|
|
|
SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk"]
|
|
|
|
|
|
def get_parser_by_url(url: str) -> Parser:
|
|
"""Parses URL and extracts from it site name, board and thread.
|
|
And then returns initialised Parser object for detected imageboard."""
|
|
URLRX = r"https?:\/\/(?P<s>[\w\.]+)\/(?P<b>\w+)\/(?:\w+)?\/(?P<t>\w+)"
|
|
site, board, thread = search(URLRX, url).groups()
|
|
return get_parser_by_site(site, board, thread)
|
|
|
|
def get_parser_by_site(site: str, board: str, thread: str) -> Parser:
|
|
"""Returns an initialised parser for `site` with `board` and `thread`."""
|
|
if site in ['boards.4chan.org', 'boards.4channel.org',
|
|
'4chan', '4chan.org']:
|
|
from .fourchan import FourChanParser
|
|
return FourChanParser(board, thread)
|
|
elif site in ['lainchan.org', 'lainchan']:
|
|
from .lainchan import LainchanParser
|
|
return LainchanParser(board, thread)
|
|
elif site in ['2ch.hk', '2ch']:
|
|
from .dvach import DvachParser
|
|
return DvachParser(board, thread)
|
|
else:
|
|
raise NotImplementedError(f"Parser for {site} is not implemented")
|