from typing import Iterator, List, Optional

import pysbd

from sttts.api.message import ModelError
from sttts.api.model import SentenceSegmenter


class SentenceBoundarySegmenter(SentenceSegmenter):
    """
    Use the `pySBD <https://github.com/nipunsadvilkar/pySBD>`__ module for sentence boundary disambiguation.
    """

    def __init__(self, *, language: str = "en") -> None:
        """
        :param str language: Implementation to use, default ``en``.
        """

        try:
            self._buffer: str = ""
            self._segmenter: pysbd.Segmenter = pysbd.Segmenter(
                language=language,
                clean=False, char_span=False,  # XXX: cannot clean *and* get offsets
            )
        except ValueError as e:
            raise ModelError(self.__class__.__name__, language, str(e))

    def push(self, utterance: str) -> Iterator[str]:
        self._buffer += utterance
        parts: List[str] = self._segmenter.segment(self._buffer)
        if len(parts) > 1:
            while len(parts) > 1:
                span: str = parts.pop(0).strip()
                if span:
                    yield span
            self._buffer = parts[0]

    def drain(self, utterance: Optional[str]) -> Iterator[str]:
        sentence, self._buffer = self._buffer.strip(), ""
        if sentence:
            yield sentence
        if utterance:
            yield utterance