"""Base `Parser` class for JSON parsers to inherit."""

from itertools import chain
from json import loads
from re import findall, match
from typing import List, Optional
from urllib.request import urlopen, Request, HTTPError

from scrapthechan import USER_AGENT
from scrapthechan.fileinfo import FileInfo


__all__ = ["Parser", "ThreadNotFoundError"]


class ThreadNotFoundError(Exception):
	def __init__(self, reason: str = ""):
		self._reason = reason

	@property
	def reason(self) -> str:
		return self._reason


class Parser:
	"""Base class for all parsers.
	It fetches JSON of a specified thread and collects all the files from it
	into a list of the `FileInfo` objects.
	Also it extracts OP's post, that may come handy if you do bulk scraping.

	Arguments:
		board      -- is a name of a board on an image board;
		thread     -- is an id of a thread inside a board;
		skip_posts -- number of posts to skip.

	All the extracted files will be stored as the `FileInfo` objects."""

	def __init__(self, board: str, thread: str,
				 skip_posts: Optional[int] = None) -> None:

		self._board: str = board
		self._thread: str = thread
		self._posts = self._extract_posts_list(self._get_json())
		self._op_post: dict = self._posts[0]
		self._posts = self._posts[skip_posts:] if not skip_posts is None else self._posts
		self._files = list(chain.from_iterable(filter(None, \
			map(self._parse_post, self._posts))))

	@property
	def json_thread_url(self) -> str:
		raise NotImplementedError

	@property
	def file_base_url(self) -> str:
		raise NotImplementedError
	
	@property
	def subject_field(self) -> str:
		return "sub"
	
	@property
	def comment_field(self) -> str:
		return "com"

	@property
	def imageboard(self) -> str:
		"""Returns image board's name."""
		raise NotImplementedError

	@property
	def board(self) -> str:
		"""Returns a name of a board of image board."""
		return self._board

	@property
	def thread(self) -> str:
		"""Returns a name of thread from a board."""
		return self._thread

	@property
	def op(self) -> str:
		"""Returns OP's post as combination of subject and comment separated
		by a new line."""
		op = ""
		if self.subject_field in self._op_post:
			op = f"{self._op_post[self.subject_field]}\n"
		if self.comment_field in self._op_post:
			op += self._op_post[self.comment_field]
		return op if not op == "" else None

	@property
	def files(self) -> List[FileInfo]:
		"""Returns a list of retrieved files as `FileInfo` objects."""
		return self._files

	def _extract_posts_list(self, lst: List) -> List[dict]:
		"""This method must be overridden in child classes where you specify
		a path in a JSON document where posts are stored. E.g., on 4chan this is
		['posts'], and on 2ch.hk it's ['threads'][0]['posts']."""
		return lst

	def _get_json(self) -> dict:
		"""Retrieves a JSON representation of a thread and converts it in
		a dictionary."""
		try:
			thread_url = self.json_thread_url.format(board=self._board, \
				thread=self._thread)
			req = Request(thread_url, headers={'User-Agent': USER_AGENT})
			with urlopen(req) as url:
				return loads(url.read().decode('utf-8'))
		except HTTPError as e:
			raise ThreadNotFoundError(str(e))
		except Exception as e:
			raise e

	def _parse_post(self, post: dict) -> Optional[List[FileInfo]]:
		"""Parses a single post and extracts files into `FileInfo` object.
		Single object is wrapped in a list for convenient insertion into
		a list."""
		raise NotImplementedError