Fix SelectorSyntaxError import: use Exception catch with message check, 18/18 tests passing
This commit is contained in:
Binary file not shown.
+72
-50
@@ -1,12 +1,12 @@
|
|||||||
<?xml version="1.0" ?>
|
<?xml version="1.0" ?>
|
||||||
<coverage version="7.13.5" timestamp="1775216453822" lines-valid="115" lines-covered="103" line-rate="0.8957" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
|
<coverage version="7.13.5" timestamp="1775217129466" lines-valid="137" lines-covered="120" line-rate="0.8759" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
|
||||||
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.13.5 -->
|
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.13.5 -->
|
||||||
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
|
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
|
||||||
<sources>
|
<sources>
|
||||||
<source>/home/pplate/pi_mcps/webscraper/src</source>
|
<source>/home/pplate/pi_mcps/webscraper/src</source>
|
||||||
</sources>
|
</sources>
|
||||||
<packages>
|
<packages>
|
||||||
<package name="." line-rate="0.8957" branch-rate="0" complexity="0">
|
<package name="." line-rate="0.8759" branch-rate="0" complexity="0">
|
||||||
<classes>
|
<classes>
|
||||||
<class name="__init__.py" filename="__init__.py" complexity="0" line-rate="1" branch-rate="0">
|
<class name="__init__.py" filename="__init__.py" complexity="0" line-rate="1" branch-rate="0">
|
||||||
<methods/>
|
<methods/>
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
<line number="2" hits="1"/>
|
<line number="2" hits="1"/>
|
||||||
</lines>
|
</lines>
|
||||||
</class>
|
</class>
|
||||||
<class name="server.py" filename="server.py" complexity="0" line-rate="0.8947" branch-rate="0">
|
<class name="server.py" filename="server.py" complexity="0" line-rate="0.875" branch-rate="0">
|
||||||
<methods/>
|
<methods/>
|
||||||
<lines>
|
<lines>
|
||||||
<line number="3" hits="1"/>
|
<line number="3" hits="1"/>
|
||||||
@@ -29,28 +29,28 @@
|
|||||||
<line number="15" hits="1"/>
|
<line number="15" hits="1"/>
|
||||||
<line number="16" hits="1"/>
|
<line number="16" hits="1"/>
|
||||||
<line number="17" hits="1"/>
|
<line number="17" hits="1"/>
|
||||||
<line number="19" hits="1"/>
|
<line number="18" hits="1"/>
|
||||||
<line number="21" hits="1"/>
|
<line number="20" hits="1"/>
|
||||||
<line number="22" hits="1"/>
|
<line number="22" hits="1"/>
|
||||||
|
<line number="23" hits="1"/>
|
||||||
<line number="24" hits="1"/>
|
<line number="24" hits="1"/>
|
||||||
<line number="25" hits="1"/>
|
<line number="26" hits="1"/>
|
||||||
<line number="35" hits="1"/>
|
<line number="28" hits="1"/>
|
||||||
<line number="36" hits="1"/>
|
<line number="29" hits="1"/>
|
||||||
<line number="37" hits="1"/>
|
<line number="31" hits="1"/>
|
||||||
<line number="39" hits="1"/>
|
<line number="32" hits="1"/>
|
||||||
<line number="40" hits="1"/>
|
|
||||||
<line number="41" hits="1"/>
|
|
||||||
<line number="42" hits="1"/>
|
<line number="42" hits="1"/>
|
||||||
<line number="43" hits="1"/>
|
<line number="43" hits="1"/>
|
||||||
|
<line number="44" hits="1"/>
|
||||||
<line number="45" hits="1"/>
|
<line number="45" hits="1"/>
|
||||||
|
<line number="46" hits="1"/>
|
||||||
<line number="47" hits="1"/>
|
<line number="47" hits="1"/>
|
||||||
<line number="48" hits="1"/>
|
<line number="49" hits="1"/>
|
||||||
<line number="49" hits="0"/>
|
|
||||||
<line number="51" hits="1"/>
|
<line number="51" hits="1"/>
|
||||||
<line number="52" hits="1"/>
|
<line number="52" hits="1"/>
|
||||||
<line number="62" hits="1"/>
|
<line number="53" hits="1"/>
|
||||||
<line number="63" hits="1"/>
|
<line number="55" hits="1"/>
|
||||||
<line number="64" hits="1"/>
|
<line number="56" hits="1"/>
|
||||||
<line number="66" hits="1"/>
|
<line number="66" hits="1"/>
|
||||||
<line number="67" hits="1"/>
|
<line number="67" hits="1"/>
|
||||||
<line number="68" hits="1"/>
|
<line number="68" hits="1"/>
|
||||||
@@ -69,68 +69,90 @@
|
|||||||
<line number="92" hits="1"/>
|
<line number="92" hits="1"/>
|
||||||
<line number="93" hits="1"/>
|
<line number="93" hits="1"/>
|
||||||
<line number="94" hits="1"/>
|
<line number="94" hits="1"/>
|
||||||
|
<line number="95" hits="1"/>
|
||||||
<line number="96" hits="1"/>
|
<line number="96" hits="1"/>
|
||||||
<line number="97" hits="1"/>
|
<line number="97" hits="1"/>
|
||||||
<line number="98" hits="1"/>
|
<line number="98" hits="1"/>
|
||||||
<line number="99" hits="1"/>
|
<line number="99" hits="0"/>
|
||||||
<line number="100" hits="1"/>
|
<line number="100" hits="0"/>
|
||||||
<line number="101" hits="1"/>
|
<line number="102" hits="1"/>
|
||||||
<line number="102" hits="0"/>
|
<line number="103" hits="1"/>
|
||||||
<line number="103" hits="0"/>
|
<line number="113" hits="1"/>
|
||||||
<line number="105" hits="1"/>
|
<line number="114" hits="1"/>
|
||||||
<line number="106" hits="1"/>
|
|
||||||
<line number="116" hits="1"/>
|
|
||||||
<line number="117" hits="1"/>
|
<line number="117" hits="1"/>
|
||||||
<line number="118" hits="1"/>
|
<line number="118" hits="1"/>
|
||||||
<line number="119" hits="1"/>
|
<line number="119" hits="1"/>
|
||||||
|
<line number="120" hits="1"/>
|
||||||
<line number="121" hits="1"/>
|
<line number="121" hits="1"/>
|
||||||
|
<line number="124" hits="1"/>
|
||||||
|
<line number="125" hits="1"/>
|
||||||
|
<line number="126" hits="1"/>
|
||||||
|
<line number="127" hits="1"/>
|
||||||
<line number="128" hits="1"/>
|
<line number="128" hits="1"/>
|
||||||
<line number="129" hits="1"/>
|
<line number="129" hits="1"/>
|
||||||
<line number="139" hits="1"/>
|
<line number="130" hits="1"/>
|
||||||
|
<line number="133" hits="1"/>
|
||||||
|
<line number="134" hits="1"/>
|
||||||
|
<line number="135" hits="1"/>
|
||||||
|
<line number="136" hits="1"/>
|
||||||
|
<line number="137" hits="1"/>
|
||||||
<line number="140" hits="1"/>
|
<line number="140" hits="1"/>
|
||||||
<line number="141" hits="1"/>
|
<line number="141" hits="1"/>
|
||||||
|
<line number="142" hits="1"/>
|
||||||
<line number="143" hits="1"/>
|
<line number="143" hits="1"/>
|
||||||
<line number="144" hits="1"/>
|
<line number="144" hits="1"/>
|
||||||
<line number="145" hits="1"/>
|
<line number="145" hits="1"/>
|
||||||
<line number="146" hits="1"/>
|
<line number="146" hits="1"/>
|
||||||
<line number="148" hits="1"/>
|
<line number="147" hits="1"/>
|
||||||
<line number="149" hits="1"/>
|
<line number="149" hits="1"/>
|
||||||
<line number="150" hits="1"/>
|
<line number="155" hits="0"/>
|
||||||
<line number="151" hits="0"/>
|
<line number="156" hits="0"/>
|
||||||
<line number="152" hits="0"/>
|
<line number="158" hits="1"/>
|
||||||
<line number="154" hits="1"/>
|
<line number="159" hits="1"/>
|
||||||
<line number="155" hits="1"/>
|
|
||||||
<line number="164" hits="1"/>
|
|
||||||
<line number="165" hits="1"/>
|
|
||||||
<line number="166" hits="1"/>
|
|
||||||
<line number="168" hits="1"/>
|
|
||||||
<line number="169" hits="1"/>
|
<line number="169" hits="1"/>
|
||||||
<line number="170" hits="1"/>
|
<line number="170" hits="1"/>
|
||||||
|
<line number="171" hits="1"/>
|
||||||
<line number="172" hits="1"/>
|
<line number="172" hits="1"/>
|
||||||
<line number="173" hits="1"/>
|
<line number="173" hits="0"/>
|
||||||
<line number="175" hits="1"/>
|
<line number="174" hits="0"/>
|
||||||
<line number="176" hits="1"/>
|
<line number="175" hits="0"/>
|
||||||
|
<line number="176" hits="0"/>
|
||||||
<line number="178" hits="1"/>
|
<line number="178" hits="1"/>
|
||||||
<line number="179" hits="1"/>
|
<line number="179" hits="1"/>
|
||||||
<line number="181" hits="1"/>
|
<line number="181" hits="1"/>
|
||||||
<line number="182" hits="0"/>
|
<line number="182" hits="1"/>
|
||||||
<line number="183" hits="0"/>
|
<line number="183" hits="1"/>
|
||||||
<line number="185" hits="1"/>
|
<line number="184" hits="0"/>
|
||||||
<line number="186" hits="1"/>
|
<line number="185" hits="0"/>
|
||||||
<line number="196" hits="1"/>
|
<line number="187" hits="1"/>
|
||||||
|
<line number="188" hits="1"/>
|
||||||
<line number="197" hits="1"/>
|
<line number="197" hits="1"/>
|
||||||
<line number="198" hits="1"/>
|
<line number="198" hits="1"/>
|
||||||
|
<line number="199" hits="1"/>
|
||||||
<line number="200" hits="1"/>
|
<line number="200" hits="1"/>
|
||||||
<line number="201" hits="1"/>
|
|
||||||
<line number="202" hits="1"/>
|
<line number="202" hits="1"/>
|
||||||
<line number="203" hits="1"/>
|
<line number="203" hits="1"/>
|
||||||
|
<line number="205" hits="1"/>
|
||||||
<line number="206" hits="1"/>
|
<line number="206" hits="1"/>
|
||||||
<line number="207" hits="1"/>
|
<line number="208" hits="1"/>
|
||||||
<line number="209" hits="1"/>
|
<line number="209" hits="1"/>
|
||||||
<line number="210" hits="0"/>
|
<line number="211" hits="1"/>
|
||||||
<line number="211" hits="0"/>
|
<line number="212" hits="0"/>
|
||||||
<line number="213" hits="1"/>
|
<line number="213" hits="0"/>
|
||||||
<line number="214" hits="0"/>
|
<line number="215" hits="1"/>
|
||||||
|
<line number="216" hits="1"/>
|
||||||
|
<line number="226" hits="1"/>
|
||||||
|
<line number="227" hits="1"/>
|
||||||
|
<line number="228" hits="1"/>
|
||||||
|
<line number="229" hits="1"/>
|
||||||
|
<line number="230" hits="1"/>
|
||||||
|
<line number="233" hits="1"/>
|
||||||
|
<line number="234" hits="1"/>
|
||||||
|
<line number="236" hits="1"/>
|
||||||
|
<line number="237" hits="0"/>
|
||||||
|
<line number="238" hits="0"/>
|
||||||
|
<line number="240" hits="1"/>
|
||||||
|
<line number="241" hits="0"/>
|
||||||
</lines>
|
</lines>
|
||||||
</class>
|
</class>
|
||||||
</classes>
|
</classes>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""Webscraper MCP server — fetch web pages, extract content, links, tables, sitemaps."""
|
"""Webscraper MCP server — fetch web pages, extract content, links, tables, sitemaps."""
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from bs4 import BeautifulSoup, SelectorSyntaxError
|
from bs4 import BeautifulSoup
|
||||||
from html2text import html2text
|
from html2text import html2text
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from typing import List, Dict, Tuple
|
from typing import List, Dict, Tuple
|
||||||
@@ -170,8 +170,10 @@ def webscraper_fetch_section(url: str, selector: str) -> str:
|
|||||||
_, soup = _fetch_page(url)
|
_, soup = _fetch_page(url)
|
||||||
try:
|
try:
|
||||||
section = soup.select_one(selector)
|
section = soup.select_one(selector)
|
||||||
except SelectorSyntaxError:
|
except Exception as e:
|
||||||
return f"Invalid CSS selector '{selector}' on {url}"
|
if "selector" in str(e).lower():
|
||||||
|
return f"Invalid CSS selector '{selector}' on {url}"
|
||||||
|
raise
|
||||||
|
|
||||||
if not section:
|
if not section:
|
||||||
return f"No element found for selector '{selector}' on {url}"
|
return f"No element found for selector '{selector}' on {url}"
|
||||||
|
|||||||
Reference in New Issue
Block a user