Initial commit

Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
2026-03-09 14:11:11 +03:00
commit f29678ebf1
64 changed files with 8605 additions and 0 deletions
--- a/src/logic/identification.py
+++ b/src/logic/identification.py
@@ -0,0 +1,245 @@
+"""Book identification logic: status computation, AI result application, plugin runners."""
+
+import json
+
+import db
+from db import now
+from errors import BookNotFoundError, NoRawTextError
+from logic.boundaries import book_spine_source
+from logic.images import prep_img_b64
+from models import (
+    AIIdentifyResult,
+    BookIdentifierPlugin,
+    BookRow,
+    CandidateRecord,
+    TextRecognizeResult,
+    TextRecognizerPlugin,
+)
+
+AI_FIELDS = ("title", "author", "year", "isbn", "publisher")
+_APPROVED_REQUIRED = ("title", "author", "year")
+
+
+def compute_status(book: BookRow) -> str:
+    """Return the identification_status string derived from current book field values.
+
+    Args:
+        book: The book row to evaluate.
+
+    Returns:
+        One of 'unidentified', 'ai_identified', or 'user_approved'.
+    """
+    if not (book.ai_title or "").strip():
+        return "unidentified"
+    filled = all((getattr(book, f) or "").strip() for f in _APPROVED_REQUIRED)
+    no_diff = all(
+        not (getattr(book, f"ai_{f}") or "").strip()
+        or (getattr(book, f) or "").strip() == (getattr(book, f"ai_{f}") or "").strip()
+        for f in AI_FIELDS
+    )
+    return "user_approved" if (filled and no_diff) else "ai_identified"
+
+
+def build_query(book: BookRow) -> str:
+    """Build a search query string from the best available candidate fields.
+
+    Prefers the first candidate with a non-empty author+title pair; falls back to
+    AI fields, then raw OCR text.
+
+    Args:
+        book: The book row to build a query for.
+
+    Returns:
+        Query string, empty if no usable data is available.
+    """
+    candidates: list[dict[str, object]] = json.loads(book.candidates or "[]")
+    for c in candidates:
+        q = " ".join(filter(None, [(str(c.get("author") or "")).strip(), (str(c.get("title") or "")).strip()]))
+        if q:
+            return q
+    q = " ".join(filter(None, [(book.ai_author or "").strip(), (book.ai_title or "").strip()]))
+    if q:
+        return q
+    return (book.raw_text or "").strip()
+
+
+def save_user_fields(book_id: str, title: str, author: str, year: str, isbn: str, publisher: str, notes: str) -> str:
+    """Persist user-edited fields and recompute identification status.
+
+    Also sets ai_* fields to match user values so they are treated as approved.
+
+    Args:
+        book_id: ID of the book to update.
+        title: User-provided title.
+        author: User-provided author.
+        year: User-provided year.
+        isbn: User-provided ISBN.
+        publisher: User-provided publisher.
+        notes: User-provided notes.
+
+    Returns:
+        Updated identification_status string.
+    """
+    with db.transaction() as c:
+        db.set_user_book_fields(c, book_id, title, author, year, isbn, publisher, notes)
+        book = db.get_book(c, book_id)
+        status = compute_status(book) if book else "unidentified"
+        db.set_book_status(c, book_id, status)
+    return status
+
+
+def dismiss_field(book_id: str, field: str, value: str) -> tuple[str, list[CandidateRecord]]:
+    """Dismiss a candidate suggestion for a field.
+
+    If value is non-empty: removes matching candidates and reverts ai_field to the
+    user value if it matched. If value is empty: sets ai_field to the current user value.
+
+    Args:
+        book_id: ID of the book.
+        field: Field name (one of AI_FIELDS).
+        value: Candidate value to dismiss, or empty string to dismiss the AI suggestion.
+
+    Returns:
+        (identification_status, updated_candidates).
+
+    Raises:
+        BookNotFoundError: If book_id does not exist.
+    """
+    with db.transaction() as c:
+        book = db.get_book(c, book_id)
+        if not book:
+            raise BookNotFoundError(book_id)
+        candidates: list[CandidateRecord] = json.loads(book.candidates or "[]")
+        if value:
+            candidates = [cand for cand in candidates if (str(cand.get(field) or "")).strip() != value]
+            db.set_book_candidates(c, book_id, json.dumps(candidates))
+            if (getattr(book, f"ai_{field}") or "").strip() == value:
+                db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
+        else:
+            db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
+        book = db.get_book(c, book_id)
+        status = compute_status(book) if book else "unidentified"
+        db.set_book_status(c, book_id, status)
+        candidates = json.loads(book.candidates or "[]") if book else []
+    return status, candidates
+
+
+def apply_ai_result(book_id: str, result: AIIdentifyResult, confidence_threshold: float = 0.8) -> None:
+    """Apply an AI identification result to a book.
+
+    Stores confidence unconditionally; sets ai_* fields only when confidence meets the threshold.
+
+    Args:
+        book_id: ID of the book to update.
+        result: AI identification result dict.
+        confidence_threshold: Minimum confidence to write ai_* fields (default 0.8).
+    """
+    confidence = float(result.get("confidence") or 0)
+    with db.transaction() as c:
+        db.set_book_confidence(c, book_id, confidence, now())
+        if confidence < confidence_threshold:
+            return
+        db.set_book_ai_fields(
+            c,
+            book_id,
+            result.get("title") or "",
+            result.get("author") or "",
+            result.get("year") or "",
+            result.get("isbn") or "",
+            result.get("publisher") or "",
+        )
+        book = db.get_book(c, book_id)
+        if book:
+            db.set_book_status(c, book_id, compute_status(book))
+
+
+def run_text_recognizer(plugin: TextRecognizerPlugin, book_id: str) -> BookRow:
+    """Recognize text from a book spine image and store the result.
+
+    Calls the plugin with the book's spine image, stores raw_text, and merges
+    the result into the candidates list.
+
+    Args:
+        plugin: The text recognizer plugin to execute.
+        book_id: ID of the book to process.
+
+    Returns:
+        Updated BookRow after storing the result.
+
+    Raises:
+        BookNotFoundError: If book_id does not exist.
+    """
+    with db.transaction() as c:
+        book = db.get_book(c, book_id)
+        if not book:
+            raise BookNotFoundError(book_id)
+        spine_path, spine_crop = book_spine_source(c, book_id)
+        b64, mt = prep_img_b64(spine_path, spine_crop, max_px=plugin.max_image_px)
+        result: TextRecognizeResult = plugin.recognize(b64, mt)
+        raw_text = result.get("raw_text") or ""
+        cand: CandidateRecord = {
+            "source": plugin.plugin_id,
+            "title": (result.get("title") or "").strip(),
+            "author": (result.get("author") or "").strip(),
+            "year": (result.get("year") or "").strip(),
+            "publisher": (result.get("publisher") or "").strip(),
+            "isbn": "",
+        }
+        existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
+        existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
+        if any([cand["title"], cand["author"], cand["year"], cand["publisher"]]):
+            existing.append(cand)
+        db.set_book_raw_text(c, book_id, raw_text)
+        db.set_book_candidates(c, book_id, json.dumps(existing))
+        updated = db.get_book(c, book_id)
+        if not updated:
+            raise BookNotFoundError(book_id)
+        return updated
+
+
+def run_book_identifier(plugin: BookIdentifierPlugin, book_id: str) -> BookRow:
+    """Identify a book using AI and update ai_* fields and candidates.
+
+    Requires raw_text to have been populated by a text recognizer first.
+
+    Args:
+        plugin: The book identifier plugin to execute.
+        book_id: ID of the book to process.
+
+    Returns:
+        Updated BookRow after storing the identification result.
+
+    Raises:
+        BookNotFoundError: If book_id does not exist.
+        NoRawTextError: If the book has no raw_text (text recognizer has not run).
+    """
+    with db.transaction() as c:
+        book = db.get_book(c, book_id)
+        if not book:
+            raise BookNotFoundError(book_id)
+        raw_text = (book.raw_text or "").strip()
+        if not raw_text:
+            raise NoRawTextError(book_id)
+        result: AIIdentifyResult = plugin.identify(raw_text)
+    # apply_ai_result manages its own transaction
+    apply_ai_result(book_id, result, plugin.confidence_threshold)
+    with db.transaction() as c:
+        book = db.get_book(c, book_id)
+        if not book:
+            raise BookNotFoundError(book_id)
+        cand: CandidateRecord = {
+            "source": plugin.plugin_id,
+            "title": (result.get("title") or "").strip(),
+            "author": (result.get("author") or "").strip(),
+            "year": (result.get("year") or "").strip(),
+            "isbn": (result.get("isbn") or "").strip(),
+            "publisher": (result.get("publisher") or "").strip(),
+        }
+        existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
+        existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
+        existing.append(cand)
+        db.set_book_candidates(c, book_id, json.dumps(existing))
+        updated = db.get_book(c, book_id)
+        if not updated:
+            raise BookNotFoundError(book_id)
+        return updated