"""Network integration tests for archive searcher plugins. Each test queries a live external service for "War and Peace" by Tolstoy, a book universally catalogued in all supported archives. Run with: pytest tests/ -m network Skip with: pytest tests/ -m "not network" (default in presubmit) """ import re import pytest from models import CandidateRecord from plugins.archives.alib import AlibPlugin from plugins.archives.openlibrary import OpenLibraryPlugin from plugins.archives.rsl import RSLPlugin from plugins.archives.rusneb import RusnebPlugin from plugins.archives.shpl import ShplPlugin from plugins.archives.sru_catalog import SRUCatalogPlugin from plugins.rate_limiter import RateLimiter pytestmark = pytest.mark.network _RL = RateLimiter() _TIMEOUT = 15 _YEAR_PAT = re.compile(r"^\d{4}$") def _titles(results: list[CandidateRecord]) -> list[str]: return [r["title"] for r in results] def _authors(results: list[CandidateRecord]) -> list[str]: return [r["author"] for r in results] def _years(results: list[CandidateRecord]) -> list[str]: return [r["year"] for r in results] def _has_title(results: list[CandidateRecord], fragment: str) -> bool: """Return True if any result title contains fragment (case-insensitive).""" low = fragment.lower() return any(low in r["title"].lower() for r in results) def _has_author(results: list[CandidateRecord], fragment: str) -> bool: """Return True if any result author contains fragment (case-insensitive).""" low = fragment.lower() return any(low in r["author"].lower() for r in results) def _valid_year(year: str) -> bool: """Return True if year is a 4-digit string or empty.""" return year == "" or bool(_YEAR_PAT.match(year)) # ── OpenLibrary ─────────────────────────────────────────────────────────────── def test_openlibrary_war_and_peace() -> None: plugin = OpenLibraryPlugin( plugin_id="openlibrary", name="OpenLibrary", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={}, ) results = plugin.search("War and Peace Tolstoy") assert results, "OpenLibrary returned no results" assert all(r["source"] == "openlibrary" for r in results) assert _has_title(results, "war and peace"), f"titles={_titles(results)}" # OpenLibrary stores authors in their original language; accept both forms. assert _has_author(results, "tolstoy") or _has_author(results, "толст"), f"authors={_authors(results)}" assert all(_valid_year(r["year"]) for r in results), f"years={_years(results)}" # OpenLibrary returns isbn and publisher from its JSON API. assert all(isinstance(r["isbn"], str) for r in results) assert all(isinstance(r["publisher"], str) for r in results) # ── RSL (РГБ) ───────────────────────────────────────────────────────────────── def test_rsl_voina_i_mir() -> None: plugin = RSLPlugin( plugin_id="rsl", name="РГБ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={}, ) results = plugin.search("Толстой Война и мир") assert results, "RSL returned no results" assert all(r["source"] == "rsl" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert all(_valid_year(r["year"]) for r in results), f"years={_years(results)}" assert all(r["isbn"] == "" for r in results) assert all(r["publisher"] == "" for r in results) # ── НЭБ (rusneb) ───────────────────────────────────────────────────────────── def test_rusneb_voina_i_mir() -> None: plugin = RusnebPlugin( plugin_id="rusneb", name="НЭБ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={}, ) results = plugin.search("Война и мир Толстой") assert results, "НЭБ returned no results" assert all(r["source"] == "rusneb" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert _has_author(results, "толст"), f"authors={_authors(results)}" assert all(_valid_year(r["year"]) for r in results), f"years={_years(results)}" assert all(r["isbn"] == "" for r in results) assert all(r["publisher"] == "" for r in results) # ── Alib ───────────────────────────────────────────────────────────────────── def test_alib_voina_i_mir() -> None: plugin = AlibPlugin( plugin_id="alib_web", name="Alib (web)", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={}, ) results = plugin.search("Война и мир Толстой") assert results, "Alib returned no results" assert all(r["source"] == "alib_web" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert _has_author(results, "толст"), f"authors={_authors(results)}" # Alib entries always include a publication year in the bibliographic text. assert all(_YEAR_PAT.match(r["year"]) for r in results), f"years={_years(results)}" assert all(r["isbn"] == "" for r in results) assert all(r["publisher"] == "" for r in results) # ── НЛР (SRU) ──────────────────────────────────────────────────────────────── # The NLR SRU endpoint (www.nlr.ru/search/query) no longer exists (HTTP 404). @pytest.mark.xfail(reason="nlr.ru SRU endpoint no longer available (HTTP 404)", strict=False) def test_nlr_voina_i_mir() -> None: plugin = SRUCatalogPlugin( plugin_id="nlr", name="НЛР", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={ "url": "http://www.nlr.ru/search/query", "query_prefix": "title=", }, ) results = plugin.search("Война и мир") assert results, "НЛР returned no results" assert all(r["source"] == "nlr" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert all(_valid_year(r["year"]) for r in results), f"years={_years(results)}" assert all(r["isbn"] == "" for r in results) assert all(r["publisher"] == "" for r in results) # ── ШПИЛ ───────────────────────────────────────────────────────────────────── # The ШПИЛ IRBIS64 CGI endpoint no longer exists (HTTP 404). @pytest.mark.xfail(reason="shpl.ru IRBIS64 CGI endpoint no longer available (HTTP 404)", strict=False) def test_shpl_voina_i_mir() -> None: plugin = ShplPlugin( plugin_id="shpl", name="ШПИЛ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={}, ) results = plugin.search("Война и мир") assert results, "ШПИЛ returned no results" assert all(r["source"] == "shpl" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert all(_valid_year(r["year"]) for r in results), f"years={_years(results)}" assert all(r["isbn"] == "" for r in results) assert all(r["publisher"] == "" for r in results)