fix(mcp-webscraper): use certifi SSL context + bundled Comodo root cert

- _build_ssl_context() loads certifi bundle + all *.pem from certs/ dir
- _SSL_CTX singleton built at module load, passed to httpx.get(verify=...)
- Fixes SSLCertVerificationError on Cloudflare-served sites on Fedora 43
  (Comodo AAA root cert missing from system trust store)
- test_server.py: fix HTTPStatusError mock to include request= param
This commit is contained in:
pplate
2026-04-04 09:52:26 +02:00
parent 87e0b9359e
commit 5a96359bb1
2 changed files with 21 additions and 2 deletions
+19 -1
View File
@@ -6,13 +6,31 @@ from html2text import html2text
from urllib.parse import urljoin
from typing import List, Dict, Tuple
import re
import ssl
import os
import certifi
from pathlib import Path
from fastmcp import FastMCP
mcp = FastMCP("webscraper")
# Build a single SSL context at module load — certifi bundle + any extra certs
# shipped in the certs/ directory next to this file.
_EXTRA_CERTS_DIR = Path(__file__).resolve().parent.parent / "certs"
def _build_ssl_context() -> ssl.SSLContext:
"""Build an SSL context from certifi + extra bundled root certs."""
ctx = ssl.create_default_context(cafile=certifi.where())
if _EXTRA_CERTS_DIR.is_dir():
for pem in _EXTRA_CERTS_DIR.glob("*.pem"):
ctx.load_verify_locations(cafile=str(pem))
return ctx
_SSL_CTX = _build_ssl_context()
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
"""Shared fetch helper — returns response and parsed soup."""
response = httpx.get(url, timeout=10.0)
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'lxml')
return response, soup
+2 -1
View File
@@ -180,10 +180,11 @@ def test_empty_page(mock_get):
@patch('httpx.get')
def test_404(mock_get):
"""Test 404 response."""
mock_req = MagicMock()
mock_resp = MagicMock()
mock_resp.status_code = 404
mock_resp.text = "Not Found"
mock_get.side_effect = httpx.HTTPStatusError("Client Error", response=mock_resp)
mock_get.side_effect = httpx.HTTPStatusError("404 Not Found", request=mock_req, response=mock_resp)
result = webscraper_fetch("https://notfound.com")
assert "Error fetching" in result
assert "404" in result