43 lines
1.4 KiB
TypeScript
43 lines
1.4 KiB
TypeScript
import { parametersSchema as z, defineCustomTool } from "@roo-code/types"
|
|
import { spawnSync } from "child_process"
|
|
|
|
export default defineCustomTool({
|
|
name: "web_fetch",
|
|
description: "Fetch a web page and return its content as clean text. Uses curl + HTML stripping. Lightweight alternative to the WebScraper MCP for simple page fetches.",
|
|
parameters: z.object({
|
|
url: z.string().describe("URL to fetch"),
|
|
maxChars: z.number().optional().describe("Truncate output at this many characters. Default: 15000"),
|
|
}),
|
|
async execute({ url, maxChars }) {
|
|
const limit = maxChars || 15000
|
|
const result = spawnSync("curl", ["-sL", "--max-time", "15", "-H", "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", url], {
|
|
encoding: "utf-8",
|
|
timeout: 20000,
|
|
maxBuffer: 5 * 1024 * 1024,
|
|
})
|
|
|
|
if (result.status !== 0) {
|
|
return `Error fetching ${url}: ${result.stderr || "curl failed"}`
|
|
}
|
|
|
|
let text = result.stdout
|
|
.replace(/<script[\s\S]*?<\/script>/gi, "")
|
|
.replace(/<style[\s\S]*?<\/style>/gi, "")
|
|
.replace(/<[^>]+>/g, " ")
|
|
.replace(/&/g, "&")
|
|
.replace(/</g, "<")
|
|
.replace(/>/g, ">")
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/ /g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim()
|
|
|
|
if (text.length > limit) {
|
|
text = text.substring(0, limit) + `\n... [truncated at ${limit} chars]`
|
|
}
|
|
|
|
return text
|
|
},
|
|
})
|