Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ab847f51d | |||
| d5510f590e | |||
| cf102e8b3e | |||
| 13659fd414 |
@@ -10,11 +10,10 @@
|
||||
"alwaysAllow": [
|
||||
"git_status",
|
||||
"git_diff_unstaged",
|
||||
"git_log",
|
||||
"git_add",
|
||||
"git_commit",
|
||||
"git_branch",
|
||||
"git_create_branch"
|
||||
"git_create_branch",
|
||||
"git_add",
|
||||
"git_commit"
|
||||
]
|
||||
},
|
||||
"filesystem": {
|
||||
@@ -34,7 +33,8 @@
|
||||
"src/server.py"
|
||||
],
|
||||
"alwaysAllow": [
|
||||
"webscraper_fetch"
|
||||
"webscraper_fetch",
|
||||
"webscraper_fetch_links"
|
||||
]
|
||||
},
|
||||
"gitea": {
|
||||
@@ -54,8 +54,10 @@
|
||||
"create_issue_comment",
|
||||
"create_pull_request",
|
||||
"get_repository",
|
||||
"list_my_repositories"
|
||||
]
|
||||
"list_my_repositories",
|
||||
"create_wiki_page"
|
||||
],
|
||||
"disabled": true
|
||||
},
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
@@ -82,7 +84,13 @@
|
||||
"env": {
|
||||
"COMFYUI_URL": "http://localhost:8188",
|
||||
"IMAGE_OUTPUT_DIR": "/home/pplate/Pictures/mcp-generated"
|
||||
}
|
||||
},
|
||||
"alwaysAllow": [
|
||||
"list_available_models",
|
||||
"get_generation_status",
|
||||
"get_output_directory",
|
||||
"generate_image"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,8 @@ def _render_achievements(achievements: list) -> str:
|
||||
|
||||
if a.get("image"):
|
||||
tier = a["id"].rsplit("_", 1)[-1]
|
||||
visual_html = f'<div class="ach-image tier-{tier}">{lock_overlay}</div>'
|
||||
img_url = _esc(a["image"])
|
||||
visual_html = f'<div class="ach-image tier-{tier}" style="background-image: url({img_url});">{lock_overlay}</div>'
|
||||
else:
|
||||
visual_html = f'<div class="ach-icon">{a["icon"]}{lock_overlay}</div>'
|
||||
|
||||
|
||||
|
After Width: | Height: | Size: 2.0 MiB |
|
After Width: | Height: | Size: 1.6 MiB |
|
After Width: | Height: | Size: 1.9 MiB |
|
After Width: | Height: | Size: 459 KiB |
|
After Width: | Height: | Size: 1.4 MiB |
|
After Width: | Height: | Size: 1.4 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.6 MiB |
|
After Width: | Height: | Size: 1.4 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.4 MiB |
|
After Width: | Height: | Size: 1.3 MiB |
@@ -28,9 +28,16 @@ def _build_ssl_context() -> ssl.SSLContext:
|
||||
|
||||
_SSL_CTX = _build_ssl_context()
|
||||
|
||||
_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
|
||||
"""Shared fetch helper — returns response and parsed soup."""
|
||||
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX)
|
||||
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX, headers=_HEADERS)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
return response, soup
|
||||
@@ -255,5 +262,51 @@ def webscraper_fetch_sitemap(url: str, max_urls: int = 100) -> List[str]:
|
||||
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
||||
return [f"Error: {str(e)}"]
|
||||
|
||||
@mcp.tool()
|
||||
def webscraper_search_hint(query: str, max_results: int = 5) -> Dict:
|
||||
"""Search Brave Search and return top results as a scraping hint.
|
||||
|
||||
Use this sparingly — once per research task — to get oriented before
|
||||
scraping individual pages. Returns top result URLs + snippets so you
|
||||
can decide which pages are worth scraping deeply.
|
||||
|
||||
Args:
|
||||
query: Search query (e.g. "MacBook Pro M4 price Germany")
|
||||
max_results: Maximum number of results to return (default: 5)
|
||||
|
||||
Returns:
|
||||
Dict with 'query', 'results' (list of {title, url, snippet}), 'hint'
|
||||
"""
|
||||
try:
|
||||
search_url = f"https://search.brave.com/search?q={query.replace(' ', '+')}&source=web"
|
||||
_, soup = _fetch_page(search_url)
|
||||
|
||||
results = []
|
||||
# Brave Search result cards: each <a> with class snippet contains title + description
|
||||
for card in soup.select('.snippet')[:max_results]:
|
||||
title_el = card.select_one('.snippet-title')
|
||||
url_el = card.select_one('a')
|
||||
desc_el = card.select_one('.snippet-description')
|
||||
|
||||
title = title_el.get_text(strip=True) if title_el else ""
|
||||
url = url_el['href'] if url_el and url_el.get('href') else ""
|
||||
snippet = desc_el.get_text(strip=True) if desc_el else ""
|
||||
|
||||
if url and url.startswith('http'):
|
||||
results.append({"title": title, "url": url, "snippet": snippet})
|
||||
|
||||
hint = "; ".join(
|
||||
f"{r['title']}: {r['url']}" for r in results
|
||||
) if results else "No results found"
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"results": results,
|
||||
"hint": hint,
|
||||
}
|
||||
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
||||
return {"query": query, "results": [], "hint": f"Error: {str(e)}"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run(transport="stdio")
|
||||
|
||||
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch
|
||||
from src.server import (
|
||||
webscraper_fetch, webscraper_fetch_links, webscraper_fetch_tables,
|
||||
webscraper_fetch_all, webscraper_fetch_section, webscraper_fetch_meta,
|
||||
webscraper_fetch_sitemap, clean_soup, filter_junk_links
|
||||
webscraper_fetch_sitemap, webscraper_search_hint, clean_soup, filter_junk_links
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
@@ -203,4 +203,84 @@ def test_sitemap_max_urls(mock_get, mock_sitemap_response):
|
||||
result = webscraper_fetch_sitemap("https://example.com/sitemap.xml", max_urls=1)
|
||||
assert len(result) == 1
|
||||
|
||||
# Total: 18 tests covering all tools and edge cases
|
||||
|
||||
# --- webscraper_search_hint tests ---
|
||||
|
||||
@pytest.fixture
|
||||
def mock_brave_response():
|
||||
"""Mock Brave Search HTML response with result cards."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = """
|
||||
<html><body>
|
||||
<div class="snippet">
|
||||
<a href="https://example.com/article1" class="snippet-title">Feynman on Electric Fields</a>
|
||||
<div class="snippet-title">Feynman on Electric Fields</div>
|
||||
<div class="snippet-description">Richard Feynman explains that all matter has an electric field.</div>
|
||||
</div>
|
||||
<div class="snippet">
|
||||
<a href="https://example.com/article2" class="snippet-title">Electric Fields Everywhere</a>
|
||||
<div class="snippet-title">Electric Fields Everywhere</div>
|
||||
<div class="snippet-description">Everything in the universe is surrounded by electric fields.</div>
|
||||
</div>
|
||||
<div class="snippet">
|
||||
<a href="javascript:void(0)" class="snippet-title">JS Junk</a>
|
||||
<div class="snippet-title">JS Junk</div>
|
||||
<div class="snippet-description">Should be filtered out.</div>
|
||||
</div>
|
||||
</body></html>
|
||||
"""
|
||||
mock_resp.headers = {"content-type": "text/html"}
|
||||
return mock_resp
|
||||
|
||||
|
||||
@patch('httpx.get')
|
||||
def test_webscraper_search_hint_returns_structure(mock_get, mock_brave_response):
|
||||
"""Test that search hint returns correct dict structure."""
|
||||
mock_get.return_value = mock_brave_response
|
||||
result = webscraper_search_hint("Feynman electric field")
|
||||
assert isinstance(result, dict)
|
||||
assert "query" in result
|
||||
assert "results" in result
|
||||
assert "hint" in result
|
||||
assert result["query"] == "Feynman electric field"
|
||||
|
||||
|
||||
@patch('httpx.get')
|
||||
def test_webscraper_search_hint_filters_non_http(mock_get, mock_brave_response):
|
||||
"""Test that javascript: URLs are excluded from results."""
|
||||
mock_get.return_value = mock_brave_response
|
||||
result = webscraper_search_hint("Feynman electric field")
|
||||
urls = [r["url"] for r in result["results"]]
|
||||
assert all(u.startswith("http") for u in urls)
|
||||
assert "javascript:void(0)" not in urls
|
||||
|
||||
|
||||
@patch('httpx.get')
|
||||
def test_webscraper_search_hint_max_results(mock_get, mock_brave_response):
|
||||
"""Test max_results limits output."""
|
||||
mock_get.return_value = mock_brave_response
|
||||
result = webscraper_search_hint("Feynman electric field", max_results=1)
|
||||
assert len(result["results"]) <= 1
|
||||
|
||||
|
||||
@patch('httpx.get')
|
||||
def test_webscraper_search_hint_error(mock_get):
|
||||
"""Test error handling in search hint."""
|
||||
mock_get.side_effect = httpx.RequestError("Connection failed")
|
||||
result = webscraper_search_hint("something")
|
||||
assert result["results"] == []
|
||||
assert "Error" in result["hint"]
|
||||
|
||||
|
||||
@patch('httpx.get')
|
||||
def test_webscraper_search_hint_hint_string(mock_get, mock_brave_response):
|
||||
"""Test that hint string is non-empty when results exist."""
|
||||
mock_get.return_value = mock_brave_response
|
||||
result = webscraper_search_hint("Feynman electric field")
|
||||
# hint should summarise results
|
||||
assert len(result["hint"]) > 0
|
||||
assert "No results found" not in result["hint"]
|
||||
|
||||
|
||||
# Total: 23 tests covering all tools and edge cases
|
||||
|
||||