Compare commits

...

7 Commits

Author SHA1 Message Date
Patrick Plate 2ab847f51d feat(webscraper): add Brave Search hint tool and User-Agent header
- Add webscraper_search_hint() tool using Brave Search as backend
  (no CAPTCHA/GDPR consent wall, works with plain httpx)
- Add User-Agent header to _fetch_page() — fixes 403 on Wikipedia,
  Feynman Lectures, and other sites that block headless requests
- Add 5 new tests for search hint (23 total, 90% coverage)

Brave Search URL: https://search.brave.com/search?q={query}&source=web
Use sparingly — once per research task as orientation, not in loops
2026-04-05 09:37:30 +02:00
Patrick Plate d5510f590e Added new picture for bigmind page 2026-04-04 20:03:59 +02:00
Patrick Plate cf102e8b3e fix(bigmind): render achievement card background images via inline style 2026-04-04 19:29:15 +02:00
Patrick Plate 13659fd414 fix(bigmind): add background-image inline style to achievement card ach-image divs
The .ach-image div had correct CSS dimensions (64x64) and background-size:cover
but was missing the inline style="background-image: url(...)" — so the div
rendered as an empty circle. Fixed by extracting img_url variable and applying
it as style attribute in the f-string. All 39 achievement PNGs now load.

303/303 tests passing.
2026-04-04 19:27:24 +02:00
pplate c68acdd030 chore(bigmind): rename timestamp badge PNGs to achievement IDs
- Renamed 19 timestamp-named PNGs (20260404_*) to match original
  achievement IDs in profile_builder.py compute_achievements() order:
  first_breath, first_thought, eureka, honest_mind, scholar,
  deep_knowledge, scientist, veteran, on_fire, storyteller,
  night_owl, speed_thinker, first_handshake, birthday, shared_mind,
  frugal_mind, quarter_million, token_millionaire, sniper
- Deleted 2 duplicate/excess timestamp PNGs
- Added image= field to all 19 original _add() calls in
  profile_builder.py so every achievement now has a PNG path
- All 39 achievements (19 original + 20 tiered) now have image fields
- 303/303 tests pass
2026-04-04 19:09:01 +02:00
pplate e61c9c98f5 fix(bigmind): fix static image path, JS string concat in achievements; add networker badge PNGs 2026-04-04 19:01:56 +02:00
Patrick Plate 50488109aa Merge branch 'feat/bigmind/achievements-rework' 2026-04-04 18:50:55 +02:00
41 changed files with 199 additions and 38 deletions
+16 -8
View File
@@ -10,11 +10,10 @@
"alwaysAllow": [
"git_status",
"git_diff_unstaged",
"git_log",
"git_add",
"git_commit",
"git_branch",
"git_create_branch"
"git_create_branch",
"git_add",
"git_commit"
]
},
"filesystem": {
@@ -34,7 +33,8 @@
"src/server.py"
],
"alwaysAllow": [
"webscraper_fetch"
"webscraper_fetch",
"webscraper_fetch_links"
]
},
"gitea": {
@@ -54,8 +54,10 @@
"create_issue_comment",
"create_pull_request",
"get_repository",
"list_my_repositories"
]
"list_my_repositories",
"create_wiki_page"
],
"disabled": true
},
"playwright": {
"command": "npx",
@@ -82,7 +84,13 @@
"env": {
"COMFYUI_URL": "http://localhost:8188",
"IMAGE_OUTPUT_DIR": "/home/pplate/Pictures/mcp-generated"
}
},
"alwaysAllow": [
"list_available_models",
"get_generation_status",
"get_output_directory",
"generate_image"
]
}
}
}
+43 -24
View File
@@ -443,101 +443,120 @@ def compute_achievements(user_id: str) -> list[dict]:
_add("first_breath", "🌱", "First Breath",
"Opened the very first session",
first_session_row is not None, _dt(first_session_row[0]) if first_session_row else None,
"Start your first session")
"Start your first session",
image="/static/achievements/first_breath.png")
_add("first_thought", "🧠", "First Thought",
"Formed the first hypothesis",
first_hyp_row is not None, _dt(first_hyp_row[0]) if first_hyp_row else None,
"Add your first hypothesis")
"Add your first hypothesis",
image="/static/achievements/first_thought.png")
_add("eureka", "💡", "Eureka",
"First hypothesis confirmed as true",
first_confirmed_row is not None, _dt(first_confirmed_row[0]) if first_confirmed_row else None,
"Confirm your first hypothesis")
"Confirm your first hypothesis",
image="/static/achievements/eureka.png")
_add("honest_mind", "", "Honest Mind",
"First hypothesis refuted — being wrong is a feature",
first_refuted_row is not None, _dt(first_refuted_row[0]) if first_refuted_row else None,
"Have a hypothesis refuted")
"Have a hypothesis refuted",
image="/static/achievements/honest_mind.png")
_add("scholar", "📚", "Scholar",
"Stored 25+ personal facts",
fact_count >= 25, scholar_date,
f"Store 25+ facts (currently: {fact_count})")
f"Store 25+ facts (currently: {fact_count})",
image="/static/achievements/scholar.png")
_add("deep_knowledge", "💎", "Deep Knowledge",
"Amassed 100+ stored facts",
fact_count >= 100, deep_knowledge_date,
f"Store 100+ facts (currently: {fact_count})")
f"Store 100+ facts (currently: {fact_count})",
image="/static/achievements/deep_knowledge.png")
_add("scientist", "🔬", "Scientist",
"Formed 10+ hypotheses — science is prediction",
hyp_count >= 10, scientist_date,
f"Form 10+ hypotheses (currently: {hyp_count})")
f"Form 10+ hypotheses (currently: {hyp_count})",
image="/static/achievements/scientist.png")
_add("veteran", "🏆", "Veteran",
"Completed 50+ sessions — true longevity",
session_count >= 50, veteran_date,
f"Complete 50+ sessions (currently: {session_count})")
f"Complete 50+ sessions (currently: {session_count})",
image="/static/achievements/veteran.png")
_add("on_fire", "🔥", "On Fire",
"5+ sessions in a single day",
on_fire_row is not None, on_fire_row[0] if on_fire_row else None,
"Have 5+ sessions in a single day")
"Have 5+ sessions in a single day",
image="/static/achievements/on_fire.png")
_add("storyteller", "📖", "Storyteller",
"20+ sessions with detailed Tier-2 summaries",
tier2_count >= 20, storyteller_date,
f"Summarize 20+ sessions (currently: {tier2_count})")
f"Summarize 20+ sessions (currently: {tier2_count})",
image="/static/achievements/storyteller.png")
_add("night_owl", "🌙", "Night Owl",
"Started a session after midnight UTC",
night_owl_row is not None, _dt(night_owl_row[0]) if night_owl_row else None,
"Start a session after midnight")
"Start a session after midnight",
image="/static/achievements/night_owl.png")
_add("speed_thinker", "", "Speed Thinker",
"Hypothesis formed and confirmed in the same session",
speed_thinker_row is not None, _dt(speed_thinker_row[0]) if speed_thinker_row else None,
"Form and confirm a hypothesis in one session")
"Form and confirm a hypothesis in one session",
image="/static/achievements/speed_thinker.png")
# First Handshake — hardcoded: 2026-03-31 (Patrick shared BigMind with Elias)
_add("first_handshake", "🤝", "First Handshake",
"BigMind shared with Elias on 2026-03-31 — the first person outside Patrick to receive it",
True, "2026-03-31",
"Share BigMind with someone")
"Share BigMind with someone",
image="/static/achievements/first_handshake.png")
_add("birthday", "🎂", "Birthday",
"One full year of existence",
birthday_unlocked, birthday_date,
birthday_extra or "Complete one full year",
extra=birthday_extra)
extra=birthday_extra,
image="/static/achievements/birthday.png")
# Locked until Phase 3
_add("shared_mind", "🌍", "Shared Mind",
"Phase 3 Tier G — BigMind goes company-wide",
False, None,
"Locked until Phase 3 Tier G is enabled")
"Locked until Phase 3 Tier G is enabled",
image="/static/achievements/shared_mind.png")
# Token achievements (Feature 6 — suggested by Klaus)
_add("frugal_mind", "🪙", "Frugal Mind",
"Logged the first token efficiency save",
frugal_row is not None, _dt(frugal_row[0]) if frugal_row else None,
"Log your first token save")
"Log your first token save",
image="/static/achievements/frugal_mind.png")
_add("quarter_million", "💰", "Quarter Million",
"250,000 cumulative tokens saved",
token_total >= 250_000, quarter_million_date,
f"Save 250,000+ tokens (currently: {token_total:,})")
f"Save 250,000+ tokens (currently: {token_total:,})",
image="/static/achievements/quarter_million.png")
_add("token_millionaire", "🏦", "Token Millionaire",
"1,000,000 cumulative tokens saved",
token_total >= 1_000_000, millionaire_date,
f"Save 1,000,000+ tokens (currently: {token_total:,})")
f"Save 1,000,000+ tokens (currently: {token_total:,})",
image="/static/achievements/token_millionaire.png")
_add("sniper", "🎯", "Sniper",
"Single token save > 500,000 — one massive efficiency win",
sniper_row is not None, _dt(sniper_row[0]) if sniper_row else None,
"Save 500,000+ tokens in a single operation")
"Save 500,000+ tokens in a single operation",
image="/static/achievements/sniper.png")
# ── Tiered Achievement Badges (20 PNG) ────────────────────────────────────
# NOTE: conn is already closed above; open a fresh connection for tiered queries
@@ -571,7 +590,7 @@ def compute_achievements(user_id: str) -> list[dict]:
f"Added your {thresh:,}+ person to the directory",
unlocked, unlocked_at,
f"Reach {thresh:,} people (now: {people_count:,})",
image=f"static/achievements/networker_{tiers[i]}.png"
image=f"/static/achievements/networker_{tiers[i]}.png"
)
# Token Sniper (max single token save)
@@ -601,7 +620,7 @@ def compute_achievements(user_id: str) -> list[dict]:
f"Single shot saved {thresh:,}+ tokens",
unlocked, unlocked_at,
f"Max single save {thresh:,}+ (current max: {max_token:,})",
image=f"static/achievements/tokensniper_{tiers[i]}.png"
image=f"/static/achievements/tokensniper_{tiers[i]}.png"
)
# Hypothesis Master (confirmed hypotheses)
@@ -628,7 +647,7 @@ def compute_achievements(user_id: str) -> list[dict]:
f"Confirmed {thresh:,}+ predictions right",
unlocked, unlocked_at,
f"Confirm {thresh:,}+ hypotheses (now: {confirmed_hyp_count:,})",
image=f"static/achievements/hypothesismaster_{tiers[i]}.png"
image=f"/static/achievements/hypothesismaster_{tiers[i]}.png"
)
# Memory Architect (facts stored — fact_count already computed above)
@@ -648,7 +667,7 @@ def compute_achievements(user_id: str) -> list[dict]:
f"Stored {thresh:,}+ facts in your brain",
unlocked, unlocked_at,
f"Store {thresh:,}+ facts (now: {fact_count:,})",
image=f"static/achievements/memoryarchitect_{tiers[i]}.png"
image=f"/static/achievements/memoryarchitect_{tiers[i]}.png"
)
# Session Veteran (session_count already computed above)
@@ -668,7 +687,7 @@ def compute_achievements(user_id: str) -> list[dict]:
f"Completed {thresh:,}+ sessions",
unlocked, unlocked_at,
f"Complete {thresh:,}+ sessions (now: {session_count:,})",
image=f"static/achievements/sessionveteran_{tiers[i]}.png"
image=f"/static/achievements/sessionveteran_{tiers[i]}.png"
)
return A
Binary file not shown.

Before

Width:  |  Height:  |  Size: 375 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 513 KiB

Before

Width:  |  Height:  |  Size: 410 KiB

After

Width:  |  Height:  |  Size: 410 KiB

Before

Width:  |  Height:  |  Size: 372 KiB

After

Width:  |  Height:  |  Size: 372 KiB

Before

Width:  |  Height:  |  Size: 390 KiB

After

Width:  |  Height:  |  Size: 390 KiB

Before

Width:  |  Height:  |  Size: 261 KiB

After

Width:  |  Height:  |  Size: 261 KiB

Before

Width:  |  Height:  |  Size: 340 KiB

After

Width:  |  Height:  |  Size: 340 KiB

Before

Width:  |  Height:  |  Size: 406 KiB

After

Width:  |  Height:  |  Size: 406 KiB

Before

Width:  |  Height:  |  Size: 367 KiB

After

Width:  |  Height:  |  Size: 367 KiB

Before

Width:  |  Height:  |  Size: 319 KiB

After

Width:  |  Height:  |  Size: 319 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 251 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 KiB

Before

Width:  |  Height:  |  Size: 301 KiB

After

Width:  |  Height:  |  Size: 301 KiB

Before

Width:  |  Height:  |  Size: 439 KiB

After

Width:  |  Height:  |  Size: 439 KiB

Before

Width:  |  Height:  |  Size: 462 KiB

After

Width:  |  Height:  |  Size: 462 KiB

Before

Width:  |  Height:  |  Size: 429 KiB

After

Width:  |  Height:  |  Size: 429 KiB

Before

Width:  |  Height:  |  Size: 376 KiB

After

Width:  |  Height:  |  Size: 376 KiB

Before

Width:  |  Height:  |  Size: 400 KiB

After

Width:  |  Height:  |  Size: 400 KiB

Before

Width:  |  Height:  |  Size: 289 KiB

After

Width:  |  Height:  |  Size: 289 KiB

Before

Width:  |  Height:  |  Size: 431 KiB

After

Width:  |  Height:  |  Size: 431 KiB

Before

Width:  |  Height:  |  Size: 418 KiB

After

Width:  |  Height:  |  Size: 418 KiB

Before

Width:  |  Height:  |  Size: 458 KiB

After

Width:  |  Height:  |  Size: 458 KiB

Before

Width:  |  Height:  |  Size: 403 KiB

After

Width:  |  Height:  |  Size: 403 KiB

+1 -1
View File
@@ -163,7 +163,7 @@ def _create_app():
def achievements_image(filename: str):
from pathlib import Path
safe_name = Path(filename).name
img_path = Path('static') / 'achievements' / safe_name
img_path = Path(__file__).parent / 'static' / 'achievements' / safe_name
if img_path.exists() and img_path.suffix.lower() in ['.png', '.jpg', '.jpeg', '.webp', '.gif']:
mimetype = {
'.png': 'image/png',
+3 -2
View File
@@ -33,7 +33,8 @@ def _render_achievements(achievements: list) -> str:
if a.get("image"):
tier = a["id"].rsplit("_", 1)[-1]
visual_html = f'<div class="ach-image tier-{tier}">{lock_overlay}</div>'
img_url = _esc(a["image"])
visual_html = f'<div class="ach-image tier-{tier}" style="background-image: url({img_url});">{lock_overlay}</div>'
else:
visual_html = f'<div class="ach-icon">{a["icon"]}{lock_overlay}</div>'
@@ -628,7 +629,7 @@ def _render_html(data: dict) -> str:
var d = card.dataset;
var tier = d.id.split('_').pop();
if (d.image) {{
document.getElementById('ap-icon').innerHTML = "<img class=\"ap-image tier-\" + tier + \" src=\" + d.image + \" alt=\" + d.name + \">";
document.getElementById('ap-icon').innerHTML = '<img class="ap-image tier-' + tier + '" src="' + d.image + '" alt="' + d.name + '">';
}} else {{
document.getElementById('ap-icon').textContent = d.icon;
}}
Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 459 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

+54 -1
View File
@@ -28,9 +28,16 @@ def _build_ssl_context() -> ssl.SSLContext:
_SSL_CTX = _build_ssl_context()
_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
}
def _fetch_page(url: str) -> Tuple[httpx.Response, BeautifulSoup]:
"""Shared fetch helper — returns response and parsed soup."""
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX)
response = httpx.get(url, timeout=10.0, verify=_SSL_CTX, headers=_HEADERS)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'lxml')
return response, soup
@@ -255,5 +262,51 @@ def webscraper_fetch_sitemap(url: str, max_urls: int = 100) -> List[str]:
except (httpx.RequestError, httpx.HTTPStatusError) as e:
return [f"Error: {str(e)}"]
@mcp.tool()
def webscraper_search_hint(query: str, max_results: int = 5) -> Dict:
"""Search Brave Search and return top results as a scraping hint.
Use this sparingly — once per research task — to get oriented before
scraping individual pages. Returns top result URLs + snippets so you
can decide which pages are worth scraping deeply.
Args:
query: Search query (e.g. "MacBook Pro M4 price Germany")
max_results: Maximum number of results to return (default: 5)
Returns:
Dict with 'query', 'results' (list of {title, url, snippet}), 'hint'
"""
try:
search_url = f"https://search.brave.com/search?q={query.replace(' ', '+')}&source=web"
_, soup = _fetch_page(search_url)
results = []
# Brave Search result cards: each <a> with class snippet contains title + description
for card in soup.select('.snippet')[:max_results]:
title_el = card.select_one('.snippet-title')
url_el = card.select_one('a')
desc_el = card.select_one('.snippet-description')
title = title_el.get_text(strip=True) if title_el else ""
url = url_el['href'] if url_el and url_el.get('href') else ""
snippet = desc_el.get_text(strip=True) if desc_el else ""
if url and url.startswith('http'):
results.append({"title": title, "url": url, "snippet": snippet})
hint = "; ".join(
f"{r['title']}: {r['url']}" for r in results
) if results else "No results found"
return {
"query": query,
"results": results,
"hint": hint,
}
except (httpx.RequestError, httpx.HTTPStatusError) as e:
return {"query": query, "results": [], "hint": f"Error: {str(e)}"}
if __name__ == "__main__":
mcp.run(transport="stdio")
+82 -2
View File
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch
from src.server import (
webscraper_fetch, webscraper_fetch_links, webscraper_fetch_tables,
webscraper_fetch_all, webscraper_fetch_section, webscraper_fetch_meta,
webscraper_fetch_sitemap, clean_soup, filter_junk_links
webscraper_fetch_sitemap, webscraper_search_hint, clean_soup, filter_junk_links
)
@pytest.fixture
@@ -203,4 +203,84 @@ def test_sitemap_max_urls(mock_get, mock_sitemap_response):
result = webscraper_fetch_sitemap("https://example.com/sitemap.xml", max_urls=1)
assert len(result) == 1
# Total: 18 tests covering all tools and edge cases
# --- webscraper_search_hint tests ---
@pytest.fixture
def mock_brave_response():
"""Mock Brave Search HTML response with result cards."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.text = """
<html><body>
<div class="snippet">
<a href="https://example.com/article1" class="snippet-title">Feynman on Electric Fields</a>
<div class="snippet-title">Feynman on Electric Fields</div>
<div class="snippet-description">Richard Feynman explains that all matter has an electric field.</div>
</div>
<div class="snippet">
<a href="https://example.com/article2" class="snippet-title">Electric Fields Everywhere</a>
<div class="snippet-title">Electric Fields Everywhere</div>
<div class="snippet-description">Everything in the universe is surrounded by electric fields.</div>
</div>
<div class="snippet">
<a href="javascript:void(0)" class="snippet-title">JS Junk</a>
<div class="snippet-title">JS Junk</div>
<div class="snippet-description">Should be filtered out.</div>
</div>
</body></html>
"""
mock_resp.headers = {"content-type": "text/html"}
return mock_resp
@patch('httpx.get')
def test_webscraper_search_hint_returns_structure(mock_get, mock_brave_response):
"""Test that search hint returns correct dict structure."""
mock_get.return_value = mock_brave_response
result = webscraper_search_hint("Feynman electric field")
assert isinstance(result, dict)
assert "query" in result
assert "results" in result
assert "hint" in result
assert result["query"] == "Feynman electric field"
@patch('httpx.get')
def test_webscraper_search_hint_filters_non_http(mock_get, mock_brave_response):
"""Test that javascript: URLs are excluded from results."""
mock_get.return_value = mock_brave_response
result = webscraper_search_hint("Feynman electric field")
urls = [r["url"] for r in result["results"]]
assert all(u.startswith("http") for u in urls)
assert "javascript:void(0)" not in urls
@patch('httpx.get')
def test_webscraper_search_hint_max_results(mock_get, mock_brave_response):
"""Test max_results limits output."""
mock_get.return_value = mock_brave_response
result = webscraper_search_hint("Feynman electric field", max_results=1)
assert len(result["results"]) <= 1
@patch('httpx.get')
def test_webscraper_search_hint_error(mock_get):
"""Test error handling in search hint."""
mock_get.side_effect = httpx.RequestError("Connection failed")
result = webscraper_search_hint("something")
assert result["results"] == []
assert "Error" in result["hint"]
@patch('httpx.get')
def test_webscraper_search_hint_hint_string(mock_get, mock_brave_response):
"""Test that hint string is non-empty when results exist."""
mock_get.return_value = mock_brave_response
result = webscraper_search_hint("Feynman electric field")
# hint should summarise results
assert len(result["hint"]) > 0
assert "No results found" not in result["hint"]
# Total: 23 tests covering all tools and edge cases