"""Text recognizer plugin — spine image → raw text + structured fields. Input: book spine image. Output: {"raw_text": "...", "title": "...", "author": "...", "year": "...", "publisher": "...", "other": "..."} raw_text — all visible text verbatim, line-break separated. other fields — VLM interpretation of raw_text. Result added to books.candidates and books.raw_text. """ from models import AIConfig, TextRecognizeResult from ._client import AIClient class TextRecognizerPlugin: """Reads text from a book spine image using a VLM.""" category = "text_recognizers" OUTPUT_FORMAT = ( '{"raw_text": "The Great Gatsby\\nF. Scott Fitzgerald\\nScribner", ' '"title": "The Great Gatsby", "author": "F. Scott Fitzgerald", ' '"year": "", "publisher": "Scribner", "other": ""}' ) def __init__( self, plugin_id: str, name: str, ai_config: AIConfig, prompt_text: str, auto_queue: bool, rate_limit_seconds: float, ): self.plugin_id = plugin_id self.name = name self.auto_queue = auto_queue self.rate_limit_seconds = rate_limit_seconds self._client = AIClient(ai_config, self.OUTPUT_FORMAT) self._prompt_text = prompt_text def recognize(self, image_b64: str, image_mime: str) -> TextRecognizeResult: """Returns TextRecognizeResult with raw_text, title, author, year, publisher, other.""" raw = self._client.call(self._prompt_text, [(image_b64, image_mime)]) return TextRecognizeResult( raw_text=str(raw.get("raw_text") or ""), title=str(raw.get("title") or ""), author=str(raw.get("author") or ""), year=str(raw.get("year") or ""), publisher=str(raw.get("publisher") or ""), other=str(raw.get("other") or ""), ) @property def model(self) -> str: return self._client.cfg["model"] @property def max_image_px(self) -> int: return self._client.cfg["max_image_px"]