58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
from re import match
|
|
from typing import List, Optional
|
|
|
|
from scrapthechan.parser import Parser
|
|
from scrapthechan.fileinfo import FileInfo
|
|
|
|
__all__ = ["LainchanParser"]
|
|
|
|
|
|
class LainchanParser(Parser):
|
|
"""JSON parser for lainchan.org image board.
|
|
JSON structure is identical to 4chan.org's, so this parser is just inherited
|
|
from 4chan.org's parser and only needed things are redefined.
|
|
"""
|
|
|
|
__url_thread_json = "https://lainchan.org/{board}/res/{thread}.json"
|
|
__url_file_link = "https://lainchan.org/{board}/src/{filename}"
|
|
|
|
def __init__(self, board: str, thread: str,
|
|
skip_posts: Optional[int] = None) -> None:
|
|
posts = self._get_json(self.__url_thread_json.format(board=board, \
|
|
thread=thread))['posts']
|
|
super(LainchanParser, self).__init__(board, thread, posts, skip_posts)
|
|
|
|
@property
|
|
def imageboard(self) -> str:
|
|
return "lainchan.org"
|
|
|
|
def _parse_post(self, post) -> List[FileInfo]:
|
|
if not 'tim' in post: return None
|
|
|
|
dlfname = f"{post['tim']}{post['ext']}"
|
|
|
|
if "filename" in post:
|
|
if match(post['filename'], r"^image\.\w+$") is None:
|
|
filename = dlfname
|
|
else:
|
|
filename = f"{post['filename']}{post['ext']}"
|
|
|
|
files = []
|
|
files.append(FileInfo(filename, post['fsize'],
|
|
self.__url_file_link.format(board=self.board, filename=dlfname),
|
|
post['md5'], 'md5'))
|
|
|
|
if "extra_files" in post:
|
|
for f in post["extra_files"]:
|
|
dlfname = f"{f['tim']}{f['ext']}"
|
|
if "filename" in post:
|
|
if match(post['filename'], r"^image\.\w+$") is None:
|
|
filename = dlfname
|
|
else:
|
|
filename = f"{post['filename']}{post['ext']}"
|
|
dlurl = self.__url_file_link.format(board=self.board, \
|
|
filename=dlfname)
|
|
files.append(FileInfo(filename, f['fsize'], \
|
|
dlurl, f['md5'], 'md5'))
|
|
return files
|