diff --git a/mcp/webscraper/src/server.py b/mcp/webscraper/src/server.py index 476a637..6969e2d 100644 --- a/mcp/webscraper/src/server.py +++ b/mcp/webscraper/src/server.py @@ -6,13 +6,31 @@ from html2text import html2text from urllib.parse import urljoin from typing import List, Dict, Tuple import re +import ssl +import os +import certifi +from pathlib import Path from fastmcp import FastMCP mcp = FastMCP("webscraper") +# Build a single SSL context at module load — certifi bundle + any extra certs +# shipped in the certs/ directory next to this file. +_EXTRA_CERTS_DIR = Path(__file__).resolve().parent.parent / "certs" + +def _build_ssl_context() -> ssl.SSLContext: + """Build an SSL context from certifi + extra bundled root certs.""" + ctx = ssl.create_default_context(cafile=certifi.where()) + if _EXTRA_CERTS_DIR.is_dir(): + for pem in _EXTRA_CERTS_DIR.glob("*.pem"): + ctx.load_verify_locations(cafile=str(pem)) + return ctx + +_SSL_CTX = _build_ssl_context() + def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]: """Shared fetch helper — returns response and parsed soup.""" - response = httpx.get(url, timeout=10.0) + response = httpx.get(url, timeout=10.0, verify=_SSL_CTX) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') return response, soup diff --git a/mcp/webscraper/tests/test_server.py b/mcp/webscraper/tests/test_server.py index 404d5ec..ac2d9c9 100644 --- a/mcp/webscraper/tests/test_server.py +++ b/mcp/webscraper/tests/test_server.py @@ -180,10 +180,11 @@ def test_empty_page(mock_get): @patch('httpx.get') def test_404(mock_get): """Test 404 response.""" + mock_req = MagicMock() mock_resp = MagicMock() mock_resp.status_code = 404 mock_resp.text = "Not Found" - mock_get.side_effect = httpx.HTTPStatusError("Client Error", response=mock_resp) + mock_get.side_effect = httpx.HTTPStatusError("404 Not Found", request=mock_req, response=mock_resp) result = webscraper_fetch("https://notfound.com") assert "Error fetching" in result assert "404" in result