Initial commit
Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
This commit is contained in:
245
src/logic/identification.py
Normal file
245
src/logic/identification.py
Normal file
@@ -0,0 +1,245 @@
|
||||
"""Book identification logic: status computation, AI result application, plugin runners."""
|
||||
|
||||
import json
|
||||
|
||||
import db
|
||||
from db import now
|
||||
from errors import BookNotFoundError, NoRawTextError
|
||||
from logic.boundaries import book_spine_source
|
||||
from logic.images import prep_img_b64
|
||||
from models import (
|
||||
AIIdentifyResult,
|
||||
BookIdentifierPlugin,
|
||||
BookRow,
|
||||
CandidateRecord,
|
||||
TextRecognizeResult,
|
||||
TextRecognizerPlugin,
|
||||
)
|
||||
|
||||
AI_FIELDS = ("title", "author", "year", "isbn", "publisher")
|
||||
_APPROVED_REQUIRED = ("title", "author", "year")
|
||||
|
||||
|
||||
def compute_status(book: BookRow) -> str:
|
||||
"""Return the identification_status string derived from current book field values.
|
||||
|
||||
Args:
|
||||
book: The book row to evaluate.
|
||||
|
||||
Returns:
|
||||
One of 'unidentified', 'ai_identified', or 'user_approved'.
|
||||
"""
|
||||
if not (book.ai_title or "").strip():
|
||||
return "unidentified"
|
||||
filled = all((getattr(book, f) or "").strip() for f in _APPROVED_REQUIRED)
|
||||
no_diff = all(
|
||||
not (getattr(book, f"ai_{f}") or "").strip()
|
||||
or (getattr(book, f) or "").strip() == (getattr(book, f"ai_{f}") or "").strip()
|
||||
for f in AI_FIELDS
|
||||
)
|
||||
return "user_approved" if (filled and no_diff) else "ai_identified"
|
||||
|
||||
|
||||
def build_query(book: BookRow) -> str:
|
||||
"""Build a search query string from the best available candidate fields.
|
||||
|
||||
Prefers the first candidate with a non-empty author+title pair; falls back to
|
||||
AI fields, then raw OCR text.
|
||||
|
||||
Args:
|
||||
book: The book row to build a query for.
|
||||
|
||||
Returns:
|
||||
Query string, empty if no usable data is available.
|
||||
"""
|
||||
candidates: list[dict[str, object]] = json.loads(book.candidates or "[]")
|
||||
for c in candidates:
|
||||
q = " ".join(filter(None, [(str(c.get("author") or "")).strip(), (str(c.get("title") or "")).strip()]))
|
||||
if q:
|
||||
return q
|
||||
q = " ".join(filter(None, [(book.ai_author or "").strip(), (book.ai_title or "").strip()]))
|
||||
if q:
|
||||
return q
|
||||
return (book.raw_text or "").strip()
|
||||
|
||||
|
||||
def save_user_fields(book_id: str, title: str, author: str, year: str, isbn: str, publisher: str, notes: str) -> str:
|
||||
"""Persist user-edited fields and recompute identification status.
|
||||
|
||||
Also sets ai_* fields to match user values so they are treated as approved.
|
||||
|
||||
Args:
|
||||
book_id: ID of the book to update.
|
||||
title: User-provided title.
|
||||
author: User-provided author.
|
||||
year: User-provided year.
|
||||
isbn: User-provided ISBN.
|
||||
publisher: User-provided publisher.
|
||||
notes: User-provided notes.
|
||||
|
||||
Returns:
|
||||
Updated identification_status string.
|
||||
"""
|
||||
with db.transaction() as c:
|
||||
db.set_user_book_fields(c, book_id, title, author, year, isbn, publisher, notes)
|
||||
book = db.get_book(c, book_id)
|
||||
status = compute_status(book) if book else "unidentified"
|
||||
db.set_book_status(c, book_id, status)
|
||||
return status
|
||||
|
||||
|
||||
def dismiss_field(book_id: str, field: str, value: str) -> tuple[str, list[CandidateRecord]]:
|
||||
"""Dismiss a candidate suggestion for a field.
|
||||
|
||||
If value is non-empty: removes matching candidates and reverts ai_field to the
|
||||
user value if it matched. If value is empty: sets ai_field to the current user value.
|
||||
|
||||
Args:
|
||||
book_id: ID of the book.
|
||||
field: Field name (one of AI_FIELDS).
|
||||
value: Candidate value to dismiss, or empty string to dismiss the AI suggestion.
|
||||
|
||||
Returns:
|
||||
(identification_status, updated_candidates).
|
||||
|
||||
Raises:
|
||||
BookNotFoundError: If book_id does not exist.
|
||||
"""
|
||||
with db.transaction() as c:
|
||||
book = db.get_book(c, book_id)
|
||||
if not book:
|
||||
raise BookNotFoundError(book_id)
|
||||
candidates: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
||||
if value:
|
||||
candidates = [cand for cand in candidates if (str(cand.get(field) or "")).strip() != value]
|
||||
db.set_book_candidates(c, book_id, json.dumps(candidates))
|
||||
if (getattr(book, f"ai_{field}") or "").strip() == value:
|
||||
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
|
||||
else:
|
||||
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
|
||||
book = db.get_book(c, book_id)
|
||||
status = compute_status(book) if book else "unidentified"
|
||||
db.set_book_status(c, book_id, status)
|
||||
candidates = json.loads(book.candidates or "[]") if book else []
|
||||
return status, candidates
|
||||
|
||||
|
||||
def apply_ai_result(book_id: str, result: AIIdentifyResult, confidence_threshold: float = 0.8) -> None:
|
||||
"""Apply an AI identification result to a book.
|
||||
|
||||
Stores confidence unconditionally; sets ai_* fields only when confidence meets the threshold.
|
||||
|
||||
Args:
|
||||
book_id: ID of the book to update.
|
||||
result: AI identification result dict.
|
||||
confidence_threshold: Minimum confidence to write ai_* fields (default 0.8).
|
||||
"""
|
||||
confidence = float(result.get("confidence") or 0)
|
||||
with db.transaction() as c:
|
||||
db.set_book_confidence(c, book_id, confidence, now())
|
||||
if confidence < confidence_threshold:
|
||||
return
|
||||
db.set_book_ai_fields(
|
||||
c,
|
||||
book_id,
|
||||
result.get("title") or "",
|
||||
result.get("author") or "",
|
||||
result.get("year") or "",
|
||||
result.get("isbn") or "",
|
||||
result.get("publisher") or "",
|
||||
)
|
||||
book = db.get_book(c, book_id)
|
||||
if book:
|
||||
db.set_book_status(c, book_id, compute_status(book))
|
||||
|
||||
|
||||
def run_text_recognizer(plugin: TextRecognizerPlugin, book_id: str) -> BookRow:
|
||||
"""Recognize text from a book spine image and store the result.
|
||||
|
||||
Calls the plugin with the book's spine image, stores raw_text, and merges
|
||||
the result into the candidates list.
|
||||
|
||||
Args:
|
||||
plugin: The text recognizer plugin to execute.
|
||||
book_id: ID of the book to process.
|
||||
|
||||
Returns:
|
||||
Updated BookRow after storing the result.
|
||||
|
||||
Raises:
|
||||
BookNotFoundError: If book_id does not exist.
|
||||
"""
|
||||
with db.transaction() as c:
|
||||
book = db.get_book(c, book_id)
|
||||
if not book:
|
||||
raise BookNotFoundError(book_id)
|
||||
spine_path, spine_crop = book_spine_source(c, book_id)
|
||||
b64, mt = prep_img_b64(spine_path, spine_crop, max_px=plugin.max_image_px)
|
||||
result: TextRecognizeResult = plugin.recognize(b64, mt)
|
||||
raw_text = result.get("raw_text") or ""
|
||||
cand: CandidateRecord = {
|
||||
"source": plugin.plugin_id,
|
||||
"title": (result.get("title") or "").strip(),
|
||||
"author": (result.get("author") or "").strip(),
|
||||
"year": (result.get("year") or "").strip(),
|
||||
"publisher": (result.get("publisher") or "").strip(),
|
||||
"isbn": "",
|
||||
}
|
||||
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
||||
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
|
||||
if any([cand["title"], cand["author"], cand["year"], cand["publisher"]]):
|
||||
existing.append(cand)
|
||||
db.set_book_raw_text(c, book_id, raw_text)
|
||||
db.set_book_candidates(c, book_id, json.dumps(existing))
|
||||
updated = db.get_book(c, book_id)
|
||||
if not updated:
|
||||
raise BookNotFoundError(book_id)
|
||||
return updated
|
||||
|
||||
|
||||
def run_book_identifier(plugin: BookIdentifierPlugin, book_id: str) -> BookRow:
|
||||
"""Identify a book using AI and update ai_* fields and candidates.
|
||||
|
||||
Requires raw_text to have been populated by a text recognizer first.
|
||||
|
||||
Args:
|
||||
plugin: The book identifier plugin to execute.
|
||||
book_id: ID of the book to process.
|
||||
|
||||
Returns:
|
||||
Updated BookRow after storing the identification result.
|
||||
|
||||
Raises:
|
||||
BookNotFoundError: If book_id does not exist.
|
||||
NoRawTextError: If the book has no raw_text (text recognizer has not run).
|
||||
"""
|
||||
with db.transaction() as c:
|
||||
book = db.get_book(c, book_id)
|
||||
if not book:
|
||||
raise BookNotFoundError(book_id)
|
||||
raw_text = (book.raw_text or "").strip()
|
||||
if not raw_text:
|
||||
raise NoRawTextError(book_id)
|
||||
result: AIIdentifyResult = plugin.identify(raw_text)
|
||||
# apply_ai_result manages its own transaction
|
||||
apply_ai_result(book_id, result, plugin.confidence_threshold)
|
||||
with db.transaction() as c:
|
||||
book = db.get_book(c, book_id)
|
||||
if not book:
|
||||
raise BookNotFoundError(book_id)
|
||||
cand: CandidateRecord = {
|
||||
"source": plugin.plugin_id,
|
||||
"title": (result.get("title") or "").strip(),
|
||||
"author": (result.get("author") or "").strip(),
|
||||
"year": (result.get("year") or "").strip(),
|
||||
"isbn": (result.get("isbn") or "").strip(),
|
||||
"publisher": (result.get("publisher") or "").strip(),
|
||||
}
|
||||
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
||||
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
|
||||
existing.append(cand)
|
||||
db.set_book_candidates(c, book_id, json.dumps(existing))
|
||||
updated = db.get_book(c, book_id)
|
||||
if not updated:
|
||||
raise BookNotFoundError(book_id)
|
||||
return updated
|
||||
Reference in New Issue
Block a user