"""Network integration tests for archive searcher plugins. Each test queries a live external service for "War and Peace" by Tolstoy, a book universally catalogued in all supported archives. Run with: pytest tests/ -m network Skip with: pytest tests/ -m "not network" (default in presubmit) """ import pytest from models import CandidateRecord from plugins.archives.html_scraper import HtmlScraperPlugin from plugins.archives.openlibrary import OpenLibraryPlugin from plugins.archives.rsl import RSLPlugin from plugins.archives.sru_catalog import SRUCatalogPlugin from plugins.rate_limiter import RateLimiter pytestmark = pytest.mark.network _RL = RateLimiter() _TIMEOUT = 15 def _titles(results: list[CandidateRecord]) -> list[str]: return [r["title"] for r in results] def _authors(results: list[CandidateRecord]) -> list[str]: return [r["author"] for r in results] def _has_title(results: list[CandidateRecord], fragment: str) -> bool: """Return True if any result title contains fragment (case-insensitive).""" low = fragment.lower() return any(low in r["title"].lower() for r in results) def _has_author(results: list[CandidateRecord], fragment: str) -> bool: """Return True if any result author contains fragment (case-insensitive).""" low = fragment.lower() return any(low in r["author"].lower() for r in results) # ── OpenLibrary ─────────────────────────────────────────────────────────────── def test_openlibrary_war_and_peace() -> None: plugin = OpenLibraryPlugin( plugin_id="openlibrary", name="OpenLibrary", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={}, ) results = plugin.search("War and Peace Tolstoy") assert results, "OpenLibrary returned no results" assert all(r["source"] == "openlibrary" for r in results) assert _has_title(results, "war and peace"), f"titles={_titles(results)}" # OpenLibrary stores authors in their original language; accept both forms. assert _has_author(results, "tolstoy") or _has_author(results, "толст"), f"authors={_authors(results)}" # ── RSL (РГБ) ───────────────────────────────────────────────────────────────── def test_rsl_voina_i_mir() -> None: plugin = RSLPlugin( plugin_id="rsl", name="РГБ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={}, ) results = plugin.search("Толстой Война и мир") assert results, "RSL returned no results" assert all(r["source"] == "rsl" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" # ── НЭБ (rusneb) ───────────────────────────────────────────────────────────── def test_rusneb_voina_i_mir() -> None: plugin = HtmlScraperPlugin( plugin_id="rusneb", name="НЭБ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=True, timeout=_TIMEOUT, config={ "url": "https://rusneb.ru/search/", "search_param": "q", "img_alt": True, "author_class": "search-list__item_subtext", }, ) results = plugin.search("Война и мир Толстой") assert results, "НЭБ returned no results" assert all(r["source"] == "rusneb" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert _has_author(results, "толст"), f"authors={_authors(results)}" # ── Alib ───────────────────────────────────────────────────────────────────── def test_alib_voina_i_mir() -> None: plugin = HtmlScraperPlugin( plugin_id="alib_web", name="Alib (web)", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={ "url": "https://www.alib.ru/find3.php4", "search_param": "tfind", "extra_params": {"f": "5", "s": "0"}, "encoding": "cp1251", "bold_text": True, }, ) results = plugin.search("Война и мир Толстой") assert results, "Alib returned no results" assert all(r["source"] == "alib_web" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" assert _has_author(results, "толст"), f"authors={_authors(results)}" # ── НЛР (SRU) ──────────────────────────────────────────────────────────────── # The NLR SRU endpoint (www.nlr.ru/search/query) no longer exists (HTTP 404). @pytest.mark.xfail(reason="nlr.ru SRU endpoint no longer available (HTTP 404)", strict=False) def test_nlr_voina_i_mir() -> None: plugin = SRUCatalogPlugin( plugin_id="nlr", name="НЛР", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={ "url": "http://www.nlr.ru/search/query", "query_prefix": "title=", }, ) results = plugin.search("Война и мир") assert results, "НЛР returned no results" assert all(r["source"] == "nlr" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}" # ── ШПИЛ ───────────────────────────────────────────────────────────────────── # The ШПИЛ IRBIS64 CGI endpoint no longer exists (HTTP 404). @pytest.mark.xfail(reason="shpl.ru IRBIS64 CGI endpoint no longer available (HTTP 404)", strict=False) def test_shpl_voina_i_mir() -> None: plugin = HtmlScraperPlugin( plugin_id="shpl", name="ШПИЛ", rate_limiter=_RL, rate_limit_seconds=0, auto_queue=False, timeout=_TIMEOUT, config={ "url": "https://www.shpl.ru/cgi-bin/irbis64/cgiirbis_64.exe", "search_param": "S21ALL", "extra_params": { "C21COM": "S", "I21DBN": "BIBL", "P21DBN": "BIBL", "S21FMT": "briefWebRus", "Z21ID": "", }, "brief_class": "brief", }, ) results = plugin.search("Война и мир") assert results, "ШПИЛ returned no results" assert all(r["source"] == "shpl" for r in results) assert _has_title(results, "война"), f"titles={_titles(results)}"