llm_voice_assistant/sttts/synthesizer/sentence/segmenter.py
import re
from typing import Iterator, Optional
from sttts.api.model import SentenceSegmenter
class SentenceSplitSegmenter(SentenceSegmenter):
"""
Split streamed text into sentences by applying a simple expression that recognizes newlines or certain punctuation
characters followed by space.
"""
def __init__(self, delimiter_chars: str = ".!!??::;") -> None:
"""
:param str delimiter_chars: Characters that end sentences if followed by a space, default ``.!!??::;``.
"""
self._delimiter: re.Pattern = re.compile(rf"[{delimiter_chars}]\s+|[\r\n]\s*")
self._buffer: str = ""
def push(self, utterance: str) -> Iterator[str]:
self._buffer += utterance
while True:
m: Optional[re.Match] = self._delimiter.search(self._buffer)
if m is None:
break
sentence, self._buffer = self._buffer[:m.end()].strip(), self._buffer[m.end():]
if sentence:
yield sentence
def drain(self, utterance: Optional[str]) -> Iterator[str]:
sentence, self._buffer = self._buffer.strip(), ""
if sentence:
yield sentence
if utterance:
yield utterance