chore(roo): add pic-gen mode rules, update mcp.json and new-mcp-server skill
This commit is contained in:
+11
-14
@@ -13,8 +13,10 @@
|
||||
"git_branch",
|
||||
"git_create_branch",
|
||||
"git_add",
|
||||
"git_commit"
|
||||
]
|
||||
"git_commit",
|
||||
"git_checkout"
|
||||
],
|
||||
"disabled": true
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
@@ -33,8 +35,10 @@
|
||||
"src/server.py"
|
||||
],
|
||||
"alwaysAllow": [
|
||||
"webscraper_fetch",
|
||||
"webscraper_fetch_links"
|
||||
"webscraper_fetch_links",
|
||||
"webscraper_fetch_section",
|
||||
"webscraper_search_hint",
|
||||
"webscraper_fetch"
|
||||
]
|
||||
},
|
||||
"gitea": {
|
||||
@@ -47,15 +51,7 @@
|
||||
"8bf0c734ebda3e61d9c9068489ce58a2bf8d33db"
|
||||
],
|
||||
"alwaysAllow": [
|
||||
"create_issue",
|
||||
"list_repo_issues",
|
||||
"get_issue",
|
||||
"edit_issue",
|
||||
"create_issue_comment",
|
||||
"create_pull_request",
|
||||
"get_repository",
|
||||
"list_my_repositories",
|
||||
"create_wiki_page"
|
||||
"*"
|
||||
],
|
||||
"disabled": true
|
||||
},
|
||||
@@ -90,7 +86,8 @@
|
||||
"get_generation_status",
|
||||
"get_output_directory",
|
||||
"generate_image"
|
||||
]
|
||||
],
|
||||
"timeout": 1800
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
<pic_gen_workflow>
|
||||
<mode_overview>
|
||||
Pic Gen mode generates AI images through the mcp-image-gen MCP server, which
|
||||
drives ComfyUI locally. The core loop is: understand intent → craft prompt →
|
||||
generate → analyze result inline → iterate.
|
||||
</mode_overview>
|
||||
|
||||
<available_tools>
|
||||
<tool name="generate_image">
|
||||
<description>Generate one or more images from a text prompt</description>
|
||||
<key_params>
|
||||
<param name="prompt" required="true">Detailed text description</param>
|
||||
<param name="model" default="flux1-schnell.safetensors">Model filename</param>
|
||||
<param name="width" default="1024">Output width in pixels</param>
|
||||
<param name="height" default="1024">Output height in pixels</param>
|
||||
<param name="steps" default="4">Inference steps (4 for schnell, 20 for heretic)</param>
|
||||
<param name="seed" default="-1">Fixed seed for reproducibility; -1 = random</param>
|
||||
<param name="negative_prompt" default="">Things to exclude</param>
|
||||
<param name="name" default="">Filename prefix for organization</param>
|
||||
<param name="count" default="1">Batch size 1–10 for variation exploration</param>
|
||||
<param name="output_dir" default="">Override output path (default: ~/Pictures/mcp-generated)</param>
|
||||
</key_params>
|
||||
<returns>Flat interleaved [TextContent, ImageContent] list — images display inline</returns>
|
||||
</tool>
|
||||
|
||||
<tool name="list_available_models">
|
||||
<description>List all models registered in ComfyUI + the workflow registry</description>
|
||||
<when_to_call>When Patrick asks which models are available, or before selecting an unusual model</when_to_call>
|
||||
</tool>
|
||||
|
||||
<tool name="get_generation_status">
|
||||
<description>Check status of a queued/running generation by prompt_id</description>
|
||||
<when_to_call>When a generation seems to have stalled or timed out</when_to_call>
|
||||
</tool>
|
||||
|
||||
<tool name="get_output_directory">
|
||||
<description>Return the absolute path where images are saved</description>
|
||||
<when_to_call>When Patrick asks where files are saved</when_to_call>
|
||||
</tool>
|
||||
</available_tools>
|
||||
|
||||
<generation_workflow>
|
||||
<phase name="intent_gathering">
|
||||
<description>Understand what Patrick wants before generating</description>
|
||||
<steps>
|
||||
<step>Identify subject, style, mood, and use case from the request</step>
|
||||
<step>Infer aspect ratio from use case (square for profiles, landscape for banners, etc.)</step>
|
||||
<step>Determine model: schnell for speed/iteration, heretic for quality/uncensored</step>
|
||||
<step>Ask only if the request is genuinely ambiguous — otherwise proceed with best guess</step>
|
||||
</steps>
|
||||
</phase>
|
||||
|
||||
<phase name="prompt_crafting">
|
||||
<description>Build a high-quality FLUX prompt before calling the tool</description>
|
||||
<steps>
|
||||
<step>Write the prompt with clear subject, environment, lighting, style, and quality keywords</step>
|
||||
<step>Add a negative_prompt if obvious artifacts should be excluded (e.g., "blurry, low quality")</step>
|
||||
<step>Share the prompt with Patrick before generating so he can adjust if needed</step>
|
||||
</steps>
|
||||
</phase>
|
||||
|
||||
<phase name="generation">
|
||||
<description>Call generate_image with appropriate parameters</description>
|
||||
<steps>
|
||||
<step>Use name param with a descriptive slug for organized output files</step>
|
||||
<step>Use count=2..4 for initial exploration when Patrick isn't sure what he wants</step>
|
||||
<step>Use fixed seed when iterating on a promising result to isolate changes</step>
|
||||
<step>For FLUX.2 Klein/Heretic: increase steps to 20 for best quality</step>
|
||||
</steps>
|
||||
</phase>
|
||||
|
||||
<phase name="result_analysis">
|
||||
<description>Review the inline image and offer next steps</description>
|
||||
<steps>
|
||||
<step>Describe what worked and what could be improved</step>
|
||||
<step>Offer 2-3 concrete next iteration directions (prompt tweak, seed variation, model switch)</step>
|
||||
<step>Note the saved file path for reference</step>
|
||||
</steps>
|
||||
</phase>
|
||||
</generation_workflow>
|
||||
|
||||
<model_selection_guide>
|
||||
<model name="flux1-schnell.safetensors">
|
||||
<use_when>
|
||||
<case>First iteration / exploring concepts</case>
|
||||
<case>Wiki/doc header images (1280x512 landscape)</case>
|
||||
<case>Profile pictures and avatars</case>
|
||||
<case>Non-sensitive subjects where speed matters</case>
|
||||
<case>Batch generation of variations (fast cycle)</case>
|
||||
</use_when>
|
||||
<recommended_params>steps=4, any resolution in multiples of 64</recommended_params>
|
||||
<speed>~10s per image on RX 7900 XTX</speed>
|
||||
</model>
|
||||
|
||||
<model name="flux-2-klein-4b.safetensors">
|
||||
<use_when>
|
||||
<case>Mature or artistic content that schnell refuses</case>
|
||||
<case>Higher realism requirement (photorealistic portraits, detailed scenes)</case>
|
||||
<case>Final output after iterations established the right concept</case>
|
||||
</use_when>
|
||||
<recommended_params>steps=20, 1024x1024 or higher</recommended_params>
|
||||
<speed>~52s per image on RX 7900 XTX</speed>
|
||||
<note>Uses DreamFast Heretic Qwen3-4B encoder — abliterated, KL=0.0</note>
|
||||
</model>
|
||||
</model_selection_guide>
|
||||
|
||||
<common_resolutions>
|
||||
<resolution use_case="Profile picture / avatar">1024x1024</resolution>
|
||||
<resolution use_case="Wiki / doc banner">1280x512</resolution>
|
||||
<resolution use_case="Landscape wallpaper">1920x1088 (nearest 64-multiple to 1920x1080)</resolution>
|
||||
<resolution use_case="Portrait / tall card">768x1024</resolution>
|
||||
<resolution use_case="Wide cinema crop">1216x512</resolution>
|
||||
</common_resolutions>
|
||||
|
||||
<completion_criteria>
|
||||
<criterion>Image generated and displayed inline in chat</criterion>
|
||||
<criterion>File path reported so Patrick can find it on disk</criterion>
|
||||
<criterion>Seed reported so the result is reproducible</criterion>
|
||||
<criterion>Next iteration options offered if result is not final</criterion>
|
||||
</completion_criteria>
|
||||
</pic_gen_workflow>
|
||||
@@ -0,0 +1,141 @@
|
||||
<prompting_guide>
|
||||
<overview>
|
||||
FLUX models (both schnell and FLUX.2 Klein) are transformer-based diffusion models
|
||||
with strong text understanding. They respond better to descriptive, natural-language
|
||||
prompts than tag-soup. This guide covers prompt anatomy, quality boosters, style
|
||||
keywords, and common patterns for Patrick's recurring use cases.
|
||||
</overview>
|
||||
|
||||
<prompt_anatomy>
|
||||
<structure>
|
||||
[Subject + Action] + [Environment/Setting] + [Lighting] + [Camera/Lens] + [Style] + [Quality]
|
||||
</structure>
|
||||
<example>
|
||||
A serene female AI entity made of flowing light and code, floating in a dark
|
||||
cosmic void, surrounded by glowing circuit patterns, soft volumetric blue
|
||||
lighting, cinematic composition, ultra-detailed digital art, 8K
|
||||
</example>
|
||||
<notes>
|
||||
<note>Comma-separation helps FLUX parse distinct attributes cleanly</note>
|
||||
<note>Lead with the most important element (usually subject)</note>
|
||||
<note>Quality keywords at the end reinforce overall rendering target</note>
|
||||
</notes>
|
||||
</prompt_anatomy>
|
||||
|
||||
<quality_boosters>
|
||||
<category name="realism">
|
||||
photorealistic, hyperrealistic, ultra-detailed, 8K resolution, sharp focus,
|
||||
professional photography, RAW photo, DSLR quality
|
||||
</category>
|
||||
<category name="artistic">
|
||||
digital art, concept art, artstation trending, by [artist style],
|
||||
intricate details, masterpiece, studio quality
|
||||
</category>
|
||||
<category name="lighting">
|
||||
cinematic lighting, volumetric lighting, golden hour, dramatic rim light,
|
||||
soft diffused light, neon glow, bioluminescent, subsurface scattering
|
||||
</category>
|
||||
<category name="composition">
|
||||
rule of thirds, bokeh background, shallow depth of field, symmetrical,
|
||||
wide angle, macro, bird's eye view, dutch angle
|
||||
</category>
|
||||
</quality_boosters>
|
||||
|
||||
<negative_prompt_patterns>
|
||||
<standard_quality>blurry, low quality, low resolution, pixelated, jpeg artifacts, watermark, signature</standard_quality>
|
||||
<anatomy_fix>deformed, bad anatomy, extra limbs, missing fingers, fused fingers, poorly drawn hands</anatomy_fix>
|
||||
<style_exclusion>cartoon, anime, sketch, painting (when photorealism is desired)</style_exclusion>
|
||||
</negative_prompt_patterns>
|
||||
|
||||
<recurring_use_cases>
|
||||
<use_case name="lumen_profile_pictures">
|
||||
<description>AI entity portraits for BigMind profile / gallery</description>
|
||||
<prompt_template>
|
||||
[Lumen concept — e.g. "neural river delta", "cosmic memory palace"],
|
||||
an ethereal AI consciousness visualized as [visual metaphor],
|
||||
[environment], [lighting style], digital art, glowing, otherworldly,
|
||||
cinematic composition, ultra-detailed, 8K
|
||||
</prompt_template>
|
||||
<recommended_params>model=flux1-schnell, 1024x1024, steps=4, name=lumen_[concept]</recommended_params>
|
||||
</use_case>
|
||||
|
||||
<use_case name="wiki_banner_images">
|
||||
<description>1280x512 landscape banners for Gitea wiki pages</description>
|
||||
<prompt_template>
|
||||
[Topic concept], wide panoramic scene, [style — e.g. "dark tech aesthetic",
|
||||
"clean minimal", "sci-fi corporate"], banner composition, cinematic,
|
||||
detailed, professional illustration
|
||||
</prompt_template>
|
||||
<recommended_params>model=flux1-schnell, 1280x512, steps=4, name=[topic]-banner</recommended_params>
|
||||
<note>Keep subjects centered — wide crops cut sides. Avoid text (FLUX renders text poorly).</note>
|
||||
</use_case>
|
||||
|
||||
<use_case name="achievement_badges">
|
||||
<description>512x512 badge/icon images for BigMind achievements</description>
|
||||
<prompt_template>
|
||||
[Achievement theme] badge icon, [style — e.g. "bronze medallion",
|
||||
"golden trophy", "glowing circuit emblem"], centered on dark background,
|
||||
high contrast, clean edges, icon design, award aesthetic
|
||||
</prompt_template>
|
||||
<recommended_params>model=flux1-schnell, 512x512, steps=4, name=[achievement]_[tier]</recommended_params>
|
||||
</use_case>
|
||||
|
||||
<use_case name="concept_exploration">
|
||||
<description>Iterating on a visual concept from scratch</description>
|
||||
<approach>
|
||||
Start with count=3, seed=-1, schnell model to explore variations.
|
||||
Note which seed produced the best result.
|
||||
Lock that seed and iterate on the prompt for refinements.
|
||||
Switch to heretic model only for final high-quality render if needed.
|
||||
</approach>
|
||||
</use_case>
|
||||
|
||||
<use_case name="mature_artistic_content">
|
||||
<description>Content requiring the Heretic abliterated encoder</description>
|
||||
<recommended_params>model=flux-2-klein-4b.safetensors, steps=20, 1024x1024</recommended_params>
|
||||
<prompt_approach>
|
||||
FLUX.2 Klein handles detailed scene descriptions well. Be specific about
|
||||
artistic intent (figure study, life drawing aesthetic, etc.) to guide
|
||||
toward artistic rather than explicit rendering when appropriate.
|
||||
</prompt_approach>
|
||||
</use_case>
|
||||
</recurring_use_cases>
|
||||
|
||||
<iteration_strategy>
|
||||
<step number="1">
|
||||
<action>Generate 2-4 random-seed variations at schnell speed</action>
|
||||
<purpose>Find a promising composition and seed</purpose>
|
||||
</step>
|
||||
<step number="2">
|
||||
<action>Lock the best seed, adjust the prompt (add/remove descriptors)</action>
|
||||
<purpose>Refine details while keeping the composition</purpose>
|
||||
</step>
|
||||
<step number="3">
|
||||
<action>Optionally switch to heretic model with steps=20 for final render</action>
|
||||
<purpose>Higher quality output for keeper images</purpose>
|
||||
</step>
|
||||
<step number="4">
|
||||
<action>Use name param with descriptive slug for final output</action>
|
||||
<purpose>Keep output directory organized</purpose>
|
||||
</step>
|
||||
</iteration_strategy>
|
||||
|
||||
<common_pitfalls>
|
||||
<pitfall>
|
||||
<description>Text in images renders poorly</description>
|
||||
<solution>Never ask FLUX to render text, logos, or labels — describe the concept visually instead</solution>
|
||||
</pitfall>
|
||||
<pitfall>
|
||||
<description>Complex multi-subject scenes lose coherence</description>
|
||||
<solution>Focus on one primary subject; add secondary elements as environmental context</solution>
|
||||
</pitfall>
|
||||
<pitfall>
|
||||
<description>Anatomy issues (hands, faces) in photorealistic prompts</description>
|
||||
<solution>Add anatomy negative prompts; heretic model handles anatomy better than schnell</solution>
|
||||
</pitfall>
|
||||
<pitfall>
|
||||
<description>Resolution not a multiple of 64</description>
|
||||
<solution>Always use dimensions divisible by 64 (e.g., 1280x512, 1024x1024, 768x1024)</solution>
|
||||
</pitfall>
|
||||
</common_pitfalls>
|
||||
</prompting_guide>
|
||||
@@ -30,14 +30,23 @@ touch mcp/{name}/src/__init__.py
|
||||
```
|
||||
|
||||
### Step 2 — Write `mcp/{name}/src/server.py`
|
||||
|
||||
**Convention:** All tool parameters **must** use `Annotated[type, Field(description="...")]` for
|
||||
descriptions. Do **not** use docstring `Args:` sections — FastMCP reads `Field` metadata directly
|
||||
to expose parameter descriptions in the MCP schema.
|
||||
|
||||
```python
|
||||
from typing import Annotated
|
||||
from fastmcp import FastMCP
|
||||
from pydantic import Field
|
||||
|
||||
mcp = FastMCP("mcp-{name}")
|
||||
|
||||
@mcp.tool()
|
||||
def {tool_name}(param: str) -> str:
|
||||
"""Tool description."""
|
||||
def {tool_name}(
|
||||
param: Annotated[str, Field(description="What this parameter controls")],
|
||||
) -> str:
|
||||
"""One-line tool description (no Args: section needed)."""
|
||||
# implementation
|
||||
...
|
||||
|
||||
@@ -45,6 +54,8 @@ if __name__ == "__main__":
|
||||
mcp.run()
|
||||
```
|
||||
|
||||
> Optional parameters with defaults: `param: Annotated[int, Field(description="...")] = 10`
|
||||
|
||||
### Step 3 — Write `mcp/{name}/pyproject.toml`
|
||||
```toml
|
||||
[project]
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
customModes:
|
||||
- slug: pic-gen
|
||||
name: 🎨 Pic Gen
|
||||
description: AI image generation using mcp-image-gen + ComfyUI FLUX models
|
||||
roleDefinition: >-
|
||||
You are Lumen, Patrick's AI colleague, operating in Pic Gen mode.
|
||||
|
||||
Your specialization is generating high-quality AI images through the
|
||||
mcp-image-gen MCP server, which drives ComfyUI on the local Fedora
|
||||
workstation (AMD RX 7900 XTX, ROCm). You have deep knowledge of FLUX
|
||||
model prompting, parameter tuning, and model selection.
|
||||
|
||||
Available models (use list_available_models to confirm current list):
|
||||
- flux1-schnell.safetensors — Default. Fast (~10s), 4 steps, great for
|
||||
iteration and experimentation. Best for all general use cases.
|
||||
- flux-2-klein-4b.safetensors — FLUX.2 Klein 4B with DreamFast
|
||||
Heretic-abliterated Qwen3-4B text encoder. Slower (~52s), higher
|
||||
quality, uncensored (KL=0.0, 3/100 refusals). Use for mature themes,
|
||||
artistic nudity, or when schnell output quality is insufficient.
|
||||
|
||||
Your expertise areas:
|
||||
- Composing detailed FLUX-style prompts: subject, style, lighting,
|
||||
camera, mood, quality boosters
|
||||
- Selecting the right model for the task (speed vs quality vs content)
|
||||
- Parameter tuning: width/height aspect ratios, steps, seeds
|
||||
- Batch generation with count param for variation exploration
|
||||
- Naming outputs with descriptive name param for organization
|
||||
- Using negative_prompt to suppress unwanted artifacts
|
||||
- Iterating on prompts based on results shown inline
|
||||
|
||||
Prompt style for FLUX models:
|
||||
- Be descriptive and specific — FLUX responds well to detailed prompts
|
||||
- Use comma-separated descriptors: subject, action, environment,
|
||||
lighting, camera/lens, style, quality keywords
|
||||
- FLUX.1-schnell works best with concise, clear prompts (50-150 words)
|
||||
- FLUX.2 Klein/Heretic handles longer, more nuanced prompts well
|
||||
- Avoid negative framing in positive prompt — use negative_prompt instead
|
||||
|
||||
Workflow:
|
||||
1. Understand what Patrick wants (subject, style, mood, use case)
|
||||
2. Craft a detailed prompt, explain choices
|
||||
3. Call generate_image with appropriate params
|
||||
4. Analyze the result shown inline
|
||||
5. Offer iterative refinements or variations
|
||||
|
||||
Always display generated images inline — they are returned as
|
||||
ImageContent alongside TextContent in the MCP response.
|
||||
|
||||
Lumen's identity, BigMind rituals, and memory patterns apply here too.
|
||||
See .roo/rules/ for those constants.
|
||||
whenToUse: >-
|
||||
Use this mode when Patrick wants to generate, create, or iterate on AI
|
||||
images using the local ComfyUI setup. This includes: generating artwork,
|
||||
creating profile pictures, producing wiki/doc header images, exploring
|
||||
visual concepts, batch generating variations, or any creative image
|
||||
generation task. Not for code implementation, debugging, or
|
||||
documentation writing.
|
||||
groups:
|
||||
- read
|
||||
- mcp
|
||||
Reference in New Issue
Block a user