Parser for 8kun.top added. And I changed compares in __init__.
This commit is contained in:
parent
93d2904a4f
commit
520d88c76a
@ -8,7 +8,8 @@ from scrapthechan.parser import Parser
|
|||||||
__all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"]
|
__all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"]
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk"]
|
SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk", \
|
||||||
|
"8kun.top"]
|
||||||
|
|
||||||
|
|
||||||
def get_parser_by_url(url: str) -> Parser:
|
def get_parser_by_url(url: str) -> Parser:
|
||||||
@ -20,15 +21,17 @@ def get_parser_by_url(url: str) -> Parser:
|
|||||||
|
|
||||||
def get_parser_by_site(site: str, board: str, thread: str) -> Parser:
|
def get_parser_by_site(site: str, board: str, thread: str) -> Parser:
|
||||||
"""Returns an initialised parser for `site` with `board` and `thread`."""
|
"""Returns an initialised parser for `site` with `board` and `thread`."""
|
||||||
if site in ['boards.4chan.org', 'boards.4channel.org',
|
if '4chan' in site:
|
||||||
'4chan', '4chan.org']:
|
|
||||||
from .fourchan import FourChanParser
|
from .fourchan import FourChanParser
|
||||||
return FourChanParser(board, thread)
|
return FourChanParser(board, thread)
|
||||||
elif site in ['lainchan.org', 'lainchan']:
|
elif 'lainchan' in site:
|
||||||
from .lainchan import LainchanParser
|
from .lainchan import LainchanParser
|
||||||
return LainchanParser(board, thread)
|
return LainchanParser(board, thread)
|
||||||
elif site in ['2ch.hk', '2ch']:
|
elif '2ch' in site:
|
||||||
from .dvach import DvachParser
|
from .dvach import DvachParser
|
||||||
return DvachParser(board, thread)
|
return DvachParser(board, thread)
|
||||||
|
elif '8kun' in site:
|
||||||
|
from .eightkun import EightKunParser
|
||||||
|
return EightKunParser(board, thread)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Parser for {site} is not implemented")
|
raise NotImplementedError(f"Parser for {site} is not implemented")
|
||||||
|
63
scrapthechan/parsers/eightkun.py
Normal file
63
scrapthechan/parsers/eightkun.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from re import match
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from scrapthechan.fileinfo import FileInfo
|
||||||
|
from scrapthechan.parser import Parser
|
||||||
|
|
||||||
|
__all__ = ["EightKunParser"]
|
||||||
|
|
||||||
|
|
||||||
|
class EightKunParser(Parser):
|
||||||
|
"""JSON parser for 8kun.top image board."""
|
||||||
|
|
||||||
|
__url_thread_json = "https://8kun.top/{board}/res/{thread}.json"
|
||||||
|
__url_file_link = "https://media.8kun.top/file_store/{filename}"
|
||||||
|
|
||||||
|
def __init__(self, board: str, thread: str,
|
||||||
|
skip_posts: Optional[int] = None) -> None:
|
||||||
|
posts = self._get_json(self.__url_thread_json.format(board=board, \
|
||||||
|
thread=thread))['posts']
|
||||||
|
super(EightKunParser, self).__init__(board, thread, posts, skip_posts)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def imageboard(self) -> str:
|
||||||
|
return "8kun.top"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def op(self) -> Optional[str]:
|
||||||
|
op = ""
|
||||||
|
if 'sub' in self._op_post:
|
||||||
|
op = f"{self._op_post['sub']}\n"
|
||||||
|
if 'com' in self._op_post:
|
||||||
|
op += self._op_post['com']
|
||||||
|
return op if not op == "" else None
|
||||||
|
|
||||||
|
def _parse_post(self, post: dict) -> List[FileInfo]:
|
||||||
|
if not 'tim' in post: return None
|
||||||
|
|
||||||
|
dlfname = f"{post['tim']}{post['ext']}"
|
||||||
|
|
||||||
|
if "filename" in post:
|
||||||
|
if match(post['filename'], r"^image\.\w{1,4}$") is None:
|
||||||
|
filename = dlfname
|
||||||
|
else:
|
||||||
|
filename = f"{post['filename']}{post['ext']}"
|
||||||
|
|
||||||
|
files = []
|
||||||
|
files.append(FileInfo(filename, post['fsize'],
|
||||||
|
self.__url_file_link.format(board=self.board, filename=dlfname),
|
||||||
|
post['md5'], 'md5'))
|
||||||
|
|
||||||
|
if "extra_files" in post:
|
||||||
|
for f in post["extra_files"]:
|
||||||
|
dlfname = f"{f['tim']}{f['ext']}"
|
||||||
|
if "filename" in post:
|
||||||
|
if match(post['filename'], r"^image\.\w+$") is None:
|
||||||
|
filename = dlfname
|
||||||
|
else:
|
||||||
|
filename = f"{post['filename']}{post['ext']}"
|
||||||
|
dlurl = self.__url_file_link.format(board=self.board, \
|
||||||
|
filename=dlfname)
|
||||||
|
files.append(FileInfo(filename, f['fsize'], \
|
||||||
|
dlurl, f['md5'], 'md5'))
|
||||||
|
return files
|
@ -3,7 +3,7 @@ name = scrapthechan
|
|||||||
version = attr: scrapthechan.__version__
|
version = attr: scrapthechan.__version__
|
||||||
description =
|
description =
|
||||||
Scrap the files posted in a thread on an imageboard. Currently supports
|
Scrap the files posted in a thread on an imageboard. Currently supports
|
||||||
4chan.org, lainchan.org and 2ch.hk.
|
4chan.org, lainchan.org, 2ch.hk and 8kun.top.
|
||||||
long_description = file: README.md
|
long_description = file: README.md
|
||||||
long_description_content_type = text/markdown
|
long_description_content_type = text/markdown
|
||||||
author = Alexander "Arav" Andreev
|
author = Alexander "Arav" Andreev
|
||||||
@ -15,6 +15,7 @@ keywords =
|
|||||||
4chan
|
4chan
|
||||||
2ch
|
2ch
|
||||||
lainchan
|
lainchan
|
||||||
|
8kun.top
|
||||||
license = MIT
|
license = MIT
|
||||||
license_file = COPYING
|
license_file = COPYING
|
||||||
classifiers =
|
classifiers =
|
||||||
|
Loading…
Reference in New Issue
Block a user