diff --git a/webscraper/.coverage b/webscraper/.coverage new file mode 100644 index 0000000..6ca47e4 Binary files /dev/null and b/webscraper/.coverage differ diff --git a/webscraper/coverage.xml b/webscraper/coverage.xml index a2e24d4..0152a61 100644 --- a/webscraper/coverage.xml +++ b/webscraper/coverage.xml @@ -1,12 +1,12 @@ - + /home/pplate/pi_mcps/webscraper/src - + @@ -14,7 +14,7 @@ - + @@ -29,28 +29,28 @@ - - + + + - - - - - - - + + + + + + + - - + - - - + + + @@ -69,68 +69,90 @@ + - - - - - - - - + + + + + + + + + + + - + + + + + + + - + - - - - - - - - - + + + + + - - - + + + + - - - - - + + + + + + + - + - + - - - - + + + + + + + + + + + + + + + + + diff --git a/webscraper/src/server.py b/webscraper/src/server.py index 8432af5..476a637 100644 --- a/webscraper/src/server.py +++ b/webscraper/src/server.py @@ -1,7 +1,7 @@ """Webscraper MCP server — fetch web pages, extract content, links, tables, sitemaps.""" import httpx -from bs4 import BeautifulSoup, SelectorSyntaxError +from bs4 import BeautifulSoup from html2text import html2text from urllib.parse import urljoin from typing import List, Dict, Tuple @@ -170,8 +170,10 @@ def webscraper_fetch_section(url: str, selector: str) -> str: _, soup = _fetch_page(url) try: section = soup.select_one(selector) - except SelectorSyntaxError: - return f"Invalid CSS selector '{selector}' on {url}" + except Exception as e: + if "selector" in str(e).lower(): + return f"Invalid CSS selector '{selector}' on {url}" + raise if not section: return f"No element found for selector '{selector}' on {url}"