fix(mcp-webscraper): use certifi SSL context + bundled Comodo root cert
- _build_ssl_context() loads certifi bundle + all *.pem from certs/ dir - _SSL_CTX singleton built at module load, passed to httpx.get(verify=...) - Fixes SSLCertVerificationError on Cloudflare-served sites on Fedora 43 (Comodo AAA root cert missing from system trust store) - test_server.py: fix HTTPStatusError mock to include request= param
This commit is contained in:
@@ -6,13 +6,31 @@ from html2text import html2text
|
||||
from urllib.parse import urljoin
|
||||
from typing import List, Dict, Tuple
|
||||
import re
|
||||
import ssl
|
||||
import os
|
||||
import certifi
|
||||
from pathlib import Path
|
||||
from fastmcp import FastMCP
|
||||
|
||||
mcp = FastMCP("webscraper")
|
||||
|
||||
# Build a single SSL context at module load — certifi bundle + any extra certs
|
||||
# shipped in the certs/ directory next to this file.
|
||||
_EXTRA_CERTS_DIR = Path(__file__).resolve().parent.parent / "certs"
|
||||
|
||||
def _build_ssl_context() -> ssl.SSLContext:
|
||||
"""Build an SSL context from certifi + extra bundled root certs."""
|
||||
ctx = ssl.create_default_context(cafile=certifi.where())
|
||||
if _EXTRA_CERTS_DIR.is_dir():
|
||||
for pem in _EXTRA_CERTS_DIR.glob("*.pem"):
|
||||
ctx.load_verify_locations(cafile=str(pem))
|
||||
return ctx
|
||||
|
||||
_SSL_CTX = _build_ssl_context()
|
||||
|
||||
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
|
||||
"""Shared fetch helper — returns response and parsed soup."""
|
||||
response = httpx.get(url, timeout=10.0)
|
||||
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
return response, soup
|
||||
|
||||
@@ -180,10 +180,11 @@ def test_empty_page(mock_get):
|
||||
@patch('httpx.get')
|
||||
def test_404(mock_get):
|
||||
"""Test 404 response."""
|
||||
mock_req = MagicMock()
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 404
|
||||
mock_resp.text = "Not Found"
|
||||
mock_get.side_effect = httpx.HTTPStatusError("Client Error", response=mock_resp)
|
||||
mock_get.side_effect = httpx.HTTPStatusError("404 Not Found", request=mock_req, response=mock_resp)
|
||||
result = webscraper_fetch("https://notfound.com")
|
||||
assert "Error fetching" in result
|
||||
assert "404" in result
|
||||
|
||||
Reference in New Issue
Block a user