Fix webscraper bugs from v1.2 review: HTTPStatusError catch, CSS selector guard, relative hrefs, shared _fetch_page refactor, test fixes (18/18 passing)

This commit is contained in:
Patrick Plate
2026-04-03 13:43:44 +02:00
parent 38a2b89bd3
commit bbeca4e27e
4 changed files with 228 additions and 79 deletions
-23
View File
@@ -5,26 +5,3 @@ from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
import pytest
from unittest.mock import MagicMock
@pytest.fixture
def mock_httpx():
"""Mock httpx for all network calls."""
mock_get = MagicMock()
mock_get.return_value.status_code = 200
mock_get.return_value.text = "<html><body>Test</body></html>"
mock_get.return_value.headers = {"content-type": "text/html"}
with MagicMock() as mock_module:
mock_module.get.return_value = mock_get
sys.modules["httpx"] = mock_module
yield mock_module
@pytest.fixture
def mock_bs4():
"""Mock BeautifulSoup for parsing."""
from bs4 import BeautifulSoup
soup = BeautifulSoup("<html><body>Test</body></html>", "html.parser")
return soup