From 5a96359bb1fb7c10b6c1f6dbfcb77ed700afba2b Mon Sep 17 00:00:00 2001 From: pplate Date: Sat, 4 Apr 2026 09:52:26 +0200 Subject: [PATCH] fix(mcp-webscraper): use certifi SSL context + bundled Comodo root cert - _build_ssl_context() loads certifi bundle + all *.pem from certs/ dir - _SSL_CTX singleton built at module load, passed to httpx.get(verify=...) - Fixes SSLCertVerificationError on Cloudflare-served sites on Fedora 43 (Comodo AAA root cert missing from system trust store) - test_server.py: fix HTTPStatusError mock to include request= param --- mcp/webscraper/src/server.py | 20 +++++++++++++++++++- mcp/webscraper/tests/test_server.py | 3 ++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/mcp/webscraper/src/server.py b/mcp/webscraper/src/server.py index 476a637..6969e2d 100644 --- a/mcp/webscraper/src/server.py +++ b/mcp/webscraper/src/server.py @@ -6,13 +6,31 @@ from html2text import html2text from urllib.parse import urljoin from typing import List, Dict, Tuple import re +import ssl +import os +import certifi +from pathlib import Path from fastmcp import FastMCP mcp = FastMCP("webscraper") +# Build a single SSL context at module load — certifi bundle + any extra certs +# shipped in the certs/ directory next to this file. +_EXTRA_CERTS_DIR = Path(__file__).resolve().parent.parent / "certs" + +def _build_ssl_context() -> ssl.SSLContext: + """Build an SSL context from certifi + extra bundled root certs.""" + ctx = ssl.create_default_context(cafile=certifi.where()) + if _EXTRA_CERTS_DIR.is_dir(): + for pem in _EXTRA_CERTS_DIR.glob("*.pem"): + ctx.load_verify_locations(cafile=str(pem)) + return ctx + +_SSL_CTX = _build_ssl_context() + def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]: """Shared fetch helper — returns response and parsed soup.""" - response = httpx.get(url, timeout=10.0) + response = httpx.get(url, timeout=10.0, verify=_SSL_CTX) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') return response, soup diff --git a/mcp/webscraper/tests/test_server.py b/mcp/webscraper/tests/test_server.py index 404d5ec..ac2d9c9 100644 --- a/mcp/webscraper/tests/test_server.py +++ b/mcp/webscraper/tests/test_server.py @@ -180,10 +180,11 @@ def test_empty_page(mock_get): @patch('httpx.get') def test_404(mock_get): """Test 404 response.""" + mock_req = MagicMock() mock_resp = MagicMock() mock_resp.status_code = 404 mock_resp.text = "Not Found" - mock_get.side_effect = httpx.HTTPStatusError("Client Error", response=mock_resp) + mock_get.side_effect = httpx.HTTPStatusError("404 Not Found", request=mock_req, response=mock_resp) result = webscraper_fetch("https://notfound.com") assert "Error fetching" in result assert "404" in result