Initial commit

Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
2026-03-09 14:17:13 +03:00
commit 084d1aebd5
64 changed files with 8605 additions and 0 deletions
--- a/src/plugins/ai_compat/text_recognizer.py
+++ b/src/plugins/ai_compat/text_recognizer.py
@@ -0,0 +1,56 @@
+"""Text recognizer plugin — spine image → raw text + structured fields.
+
+Input: book spine image.
+Output: {"raw_text": "...", "title": "...", "author": "...", "year": "...",
+         "publisher": "...", "other": "..."}
+  raw_text — all visible text verbatim, line-break separated.
+  other fields — VLM interpretation of raw_text.
+Result added to books.candidates and books.raw_text.
+"""
+
+from models import AIConfig, TextRecognizeResult
+
+from ._client import AIClient
+
+
+class TextRecognizerPlugin:
+    """Reads text from a book spine image using a VLM."""
+
+    category = "text_recognizers"
+    OUTPUT_FORMAT = (
+        '{"raw_text": "The Great Gatsby\\nF. Scott Fitzgerald\\nScribner", '
+        '"title": "The Great Gatsby", "author": "F. Scott Fitzgerald", '
+        '"year": "", "publisher": "Scribner", "other": ""}'
+    )
+
+    def __init__(
+        self,
+        plugin_id: str,
+        name: str,
+        ai_config: AIConfig,
+        prompt_text: str,
+        auto_queue: bool,
+        rate_limit_seconds: float,
+    ):
+        self.plugin_id = plugin_id
+        self.name = name
+        self.auto_queue = auto_queue
+        self.rate_limit_seconds = rate_limit_seconds
+        self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
+        self._prompt_text = prompt_text
+
+    def recognize(self, image_b64: str, image_mime: str) -> TextRecognizeResult:
+        """Returns TextRecognizeResult with raw_text, title, author, year, publisher, other."""
+        raw = self._client.call(self._prompt_text, [(image_b64, image_mime)])
+        return TextRecognizeResult(
+            raw_text=str(raw.get("raw_text") or ""),
+            title=str(raw.get("title") or ""),
+            author=str(raw.get("author") or ""),
+            year=str(raw.get("year") or ""),
+            publisher=str(raw.get("publisher") or ""),
+            other=str(raw.get("other") or ""),
+        )
+
+    @property
+    def max_image_px(self) -> int:
+        return self._client.cfg["max_image_px"]