Add per-request AI logging, DB batch queue, WS entity updates, and UI polish
- log_thread.py: thread-safe ContextVar bridge so executor threads can log
individual LLM calls and archive searches back to the event loop
- ai_log.py: init_thread_logging(), notify_entity_update(); WS now pushes
entity_update messages when book data changes after any plugin or batch run
- batch.py: replace batch_pending.json with batch_queue SQLite table;
run_batch_consumer() reads queue dynamically so new books can be added
while batch is running; add_to_queue() deduplicates
- migrate.py: fix _migrate_v1 (clear-on-startup bug); add _migrate_v2 for
batch_queue table
- _client.py / archive.py / identification.py: wrap each LLM API call and
archive search with log_thread start/finish entries
- api.py: POST /api/batch returns {already_running, added}; notify_entity_update
after identify pipeline
- models.default.yaml: strengthen ai_identify confidence-scoring instructions;
warn against placeholder data
- detail-render.js: book log entries show clickable ID + spine thumbnail;
book spine/title images open full-screen popup
- events.js: batch-start handles already_running+added; open-img-popup action
- init.js: entity_update WS handler; image popup close listeners
- overlays.css / index.html: full-screen image popup overlay
- eslint.config.js: add new globals; fix no-redeclare/no-unused-vars for
multi-file global architecture; all lint errors resolved
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,23 +1,38 @@
|
||||
"""Book identifier plugin — raw spine text → bibliographic metadata.
|
||||
"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks.
|
||||
|
||||
Input: raw_text string (from text_recognizer).
|
||||
Output: {"title": "...", "author": "...", "year": "...", "isbn": "...",
|
||||
"publisher": "...", "confidence": 0.95}
|
||||
confidence — float 0-1; results below confidence_threshold are discarded by logic.py.
|
||||
Result added to books.candidates and books.ai_* fields.
|
||||
Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates),
|
||||
images (list of (b64, mime) pairs if is_vlm).
|
||||
Output: list of IdentifyBlock dicts ranked by descending confidence score.
|
||||
Result stored as books.ai_blocks JSON.
|
||||
"""
|
||||
|
||||
from models import AIConfig, AIIdentifyResult
|
||||
import json
|
||||
from typing import Any, TypeGuard
|
||||
|
||||
from models import AIConfig, CandidateRecord, IdentifyBlock
|
||||
|
||||
from ._client import AIClient
|
||||
|
||||
|
||||
def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]:
|
||||
return isinstance(v, dict)
|
||||
|
||||
|
||||
def _is_any_list(v: object) -> TypeGuard[list[Any]]:
|
||||
return isinstance(v, list)
|
||||
|
||||
|
||||
class BookIdentifierPlugin:
|
||||
"""Identifies a book from spine text using a VLM with web-search capability."""
|
||||
"""Identifies a book by combining VLM spine text with archive search results."""
|
||||
|
||||
category = "book_identifiers"
|
||||
OUTPUT_FORMAT = (
|
||||
'{"title": "...", "author": "...", "year": "...", ' '"isbn": "...", "publisher": "...", "confidence": 0.95}'
|
||||
'[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", '
|
||||
'"year": "1967", "isbn": "", "publisher": "YMCA Press", '
|
||||
'"score": 0.95, "sources": ["rusneb", "openlibrary"]}, '
|
||||
'{"title": "Master i Margarita", "author": "M. Bulgakov", '
|
||||
'"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", '
|
||||
'"score": 0.72, "sources": ["web"]}]'
|
||||
)
|
||||
|
||||
def __init__(
|
||||
@@ -36,21 +51,67 @@ class BookIdentifierPlugin:
|
||||
self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
|
||||
self._prompt_text = prompt_text
|
||||
|
||||
def identify(self, raw_text: str) -> AIIdentifyResult:
|
||||
"""Returns AIIdentifyResult with title/author/year/isbn/publisher/confidence."""
|
||||
raw = self._client.call(self._prompt_text, [], text_vars={"RAW_TEXT": raw_text})
|
||||
result = AIIdentifyResult(
|
||||
title=str(raw.get("title") or ""),
|
||||
author=str(raw.get("author") or ""),
|
||||
year=str(raw.get("year") or ""),
|
||||
isbn=str(raw.get("isbn") or ""),
|
||||
publisher=str(raw.get("publisher") or ""),
|
||||
def identify(
|
||||
self,
|
||||
raw_text: str,
|
||||
archive_results: list[CandidateRecord],
|
||||
images: list[tuple[str, str]],
|
||||
) -> list[IdentifyBlock]:
|
||||
"""Call the AI model to produce ranked identification blocks.
|
||||
|
||||
Args:
|
||||
raw_text: Verbatim text read from the book spine.
|
||||
archive_results: Deduplicated candidates from archive searchers.
|
||||
images: (base64, mime_type) pairs; non-empty only when is_vlm is True.
|
||||
|
||||
Returns:
|
||||
List of IdentifyBlock dicts ranked by descending score.
|
||||
"""
|
||||
archive_json = json.dumps(archive_results, ensure_ascii=False)
|
||||
raw = self._client.call(
|
||||
self._prompt_text,
|
||||
images,
|
||||
text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json},
|
||||
output_is_list=True,
|
||||
)
|
||||
conf = raw.get("confidence")
|
||||
if conf is not None:
|
||||
result["confidence"] = float(conf)
|
||||
return result
|
||||
blocks: list[IdentifyBlock] = []
|
||||
for item in raw:
|
||||
if not _is_str_dict(item):
|
||||
continue
|
||||
sources: list[str] = []
|
||||
sources_val = item.get("sources")
|
||||
if _is_any_list(sources_val):
|
||||
for sv in sources_val:
|
||||
if isinstance(sv, str):
|
||||
sources.append(sv)
|
||||
block = IdentifyBlock(
|
||||
title=str(item.get("title") or "").strip(),
|
||||
author=str(item.get("author") or "").strip(),
|
||||
year=str(item.get("year") or "").strip(),
|
||||
isbn=str(item.get("isbn") or "").strip(),
|
||||
publisher=str(item.get("publisher") or "").strip(),
|
||||
score=float(item.get("score") or 0.0),
|
||||
sources=sources,
|
||||
)
|
||||
blocks.append(block)
|
||||
return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True)
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
"""AI model name used for identification."""
|
||||
return self._client.cfg["model"]
|
||||
|
||||
@property
|
||||
def max_image_px(self) -> int:
|
||||
"""Maximum pixel dimension for images passed to the AI model."""
|
||||
return self._client.cfg["max_image_px"]
|
||||
|
||||
@property
|
||||
def confidence_threshold(self) -> float:
|
||||
"""Minimum score threshold for the top block to set ai_* fields."""
|
||||
return self._client.cfg["confidence_threshold"]
|
||||
|
||||
@property
|
||||
def is_vlm(self) -> bool:
|
||||
"""True if images should be included in the request."""
|
||||
return self._client.cfg["is_vlm"]
|
||||
|
||||
Reference in New Issue
Block a user