import { parametersSchema as z, defineCustomTool } from "@roo-code/types" import { spawnSync } from "child_process" export default defineCustomTool({ name: "web_fetch", description: "Fetch a web page and return its content as clean text. Uses curl + HTML stripping. Lightweight alternative to the WebScraper MCP for simple page fetches.", parameters: z.object({ url: z.string().describe("URL to fetch"), maxChars: z.number().optional().describe("Truncate output at this many characters. Default: 15000"), }), async execute({ url, maxChars }) { const limit = maxChars || 15000 const result = spawnSync("curl", ["-sL", "--max-time", "15", "-H", "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", url], { encoding: "utf-8", timeout: 20000, maxBuffer: 5 * 1024 * 1024, }) if (result.status !== 0) { return `Error fetching ${url}: ${result.stderr || "curl failed"}` } let text = result.stdout .replace(//gi, "") .replace(//gi, "") .replace(/<[^>]+>/g, " ") .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, '"') .replace(/'/g, "'") .replace(/ /g, " ") .replace(/\s+/g, " ") .trim() if (text.length > limit) { text = text.substring(0, limit) + `\n... [truncated at ${limit} chars]` } return text }, })