fix(mcp-webscraper): use certifi SSL context + bundled Comodo root cert
- _build_ssl_context() loads certifi bundle + all *.pem from certs/ dir - _SSL_CTX singleton built at module load, passed to httpx.get(verify=...) - Fixes SSLCertVerificationError on Cloudflare-served sites on Fedora 43 (Comodo AAA root cert missing from system trust store) - test_server.py: fix HTTPStatusError mock to include request= param
This commit is contained in:
@@ -6,13 +6,31 @@ from html2text import html2text
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from typing import List, Dict, Tuple
|
from typing import List, Dict, Tuple
|
||||||
import re
|
import re
|
||||||
|
import ssl
|
||||||
|
import os
|
||||||
|
import certifi
|
||||||
|
from pathlib import Path
|
||||||
from fastmcp import FastMCP
|
from fastmcp import FastMCP
|
||||||
|
|
||||||
mcp = FastMCP("webscraper")
|
mcp = FastMCP("webscraper")
|
||||||
|
|
||||||
|
# Build a single SSL context at module load — certifi bundle + any extra certs
|
||||||
|
# shipped in the certs/ directory next to this file.
|
||||||
|
_EXTRA_CERTS_DIR = Path(__file__).resolve().parent.parent / "certs"
|
||||||
|
|
||||||
|
def _build_ssl_context() -> ssl.SSLContext:
|
||||||
|
"""Build an SSL context from certifi + extra bundled root certs."""
|
||||||
|
ctx = ssl.create_default_context(cafile=certifi.where())
|
||||||
|
if _EXTRA_CERTS_DIR.is_dir():
|
||||||
|
for pem in _EXTRA_CERTS_DIR.glob("*.pem"):
|
||||||
|
ctx.load_verify_locations(cafile=str(pem))
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
_SSL_CTX = _build_ssl_context()
|
||||||
|
|
||||||
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
|
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
|
||||||
"""Shared fetch helper — returns response and parsed soup."""
|
"""Shared fetch helper — returns response and parsed soup."""
|
||||||
response = httpx.get(url, timeout=10.0)
|
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, 'lxml')
|
soup = BeautifulSoup(response.text, 'lxml')
|
||||||
return response, soup
|
return response, soup
|
||||||
|
|||||||
@@ -180,10 +180,11 @@ def test_empty_page(mock_get):
|
|||||||
@patch('httpx.get')
|
@patch('httpx.get')
|
||||||
def test_404(mock_get):
|
def test_404(mock_get):
|
||||||
"""Test 404 response."""
|
"""Test 404 response."""
|
||||||
|
mock_req = MagicMock()
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.status_code = 404
|
mock_resp.status_code = 404
|
||||||
mock_resp.text = "Not Found"
|
mock_resp.text = "Not Found"
|
||||||
mock_get.side_effect = httpx.HTTPStatusError("Client Error", response=mock_resp)
|
mock_get.side_effect = httpx.HTTPStatusError("404 Not Found", request=mock_req, response=mock_resp)
|
||||||
result = webscraper_fetch("https://notfound.com")
|
result = webscraper_fetch("https://notfound.com")
|
||||||
assert "Error fetching" in result
|
assert "Error fetching" in result
|
||||||
assert "404" in result
|
assert "404" in result
|
||||||
|
|||||||
Reference in New Issue
Block a user