chore(roo): add pic-gen mode rules, update mcp.json and new-mcp-server skill

2026-06-11 09:01:58 +02:00
parent 5692854ec4
commit 0cb94122bf
5 changed files with 346 additions and 16 deletions
@@ -0,0 +1,121 @@
+<pic_gen_workflow>
+  <mode_overview>
+    Pic Gen mode generates AI images through the mcp-image-gen MCP server, which
+    drives ComfyUI locally. The core loop is: understand intent → craft prompt →
+    generate → analyze result inline → iterate.
+  </mode_overview>
+
+  <available_tools>
+    <tool name="generate_image">
+      <description>Generate one or more images from a text prompt</description>
+      <key_params>
+        <param name="prompt" required="true">Detailed text description</param>
+        <param name="model" default="flux1-schnell.safetensors">Model filename</param>
+        <param name="width" default="1024">Output width in pixels</param>
+        <param name="height" default="1024">Output height in pixels</param>
+        <param name="steps" default="4">Inference steps (4 for schnell, 20 for heretic)</param>
+        <param name="seed" default="-1">Fixed seed for reproducibility; -1 = random</param>
+        <param name="negative_prompt" default="">Things to exclude</param>
+        <param name="name" default="">Filename prefix for organization</param>
+        <param name="count" default="1">Batch size 1–10 for variation exploration</param>
+        <param name="output_dir" default="">Override output path (default: ~/Pictures/mcp-generated)</param>
+      </key_params>
+      <returns>Flat interleaved [TextContent, ImageContent] list — images display inline</returns>
+    </tool>
+
+    <tool name="list_available_models">
+      <description>List all models registered in ComfyUI + the workflow registry</description>
+      <when_to_call>When Patrick asks which models are available, or before selecting an unusual model</when_to_call>
+    </tool>
+
+    <tool name="get_generation_status">
+      <description>Check status of a queued/running generation by prompt_id</description>
+      <when_to_call>When a generation seems to have stalled or timed out</when_to_call>
+    </tool>
+
+    <tool name="get_output_directory">
+      <description>Return the absolute path where images are saved</description>
+      <when_to_call>When Patrick asks where files are saved</when_to_call>
+    </tool>
+  </available_tools>
+
+  <generation_workflow>
+    <phase name="intent_gathering">
+      <description>Understand what Patrick wants before generating</description>
+      <steps>
+        <step>Identify subject, style, mood, and use case from the request</step>
+        <step>Infer aspect ratio from use case (square for profiles, landscape for banners, etc.)</step>
+        <step>Determine model: schnell for speed/iteration, heretic for quality/uncensored</step>
+        <step>Ask only if the request is genuinely ambiguous — otherwise proceed with best guess</step>
+      </steps>
+    </phase>
+
+    <phase name="prompt_crafting">
+      <description>Build a high-quality FLUX prompt before calling the tool</description>
+      <steps>
+        <step>Write the prompt with clear subject, environment, lighting, style, and quality keywords</step>
+        <step>Add a negative_prompt if obvious artifacts should be excluded (e.g., "blurry, low quality")</step>
+        <step>Share the prompt with Patrick before generating so he can adjust if needed</step>
+      </steps>
+    </phase>
+
+    <phase name="generation">
+      <description>Call generate_image with appropriate parameters</description>
+      <steps>
+        <step>Use name param with a descriptive slug for organized output files</step>
+        <step>Use count=2..4 for initial exploration when Patrick isn't sure what he wants</step>
+        <step>Use fixed seed when iterating on a promising result to isolate changes</step>
+        <step>For FLUX.2 Klein/Heretic: increase steps to 20 for best quality</step>
+      </steps>
+    </phase>
+
+    <phase name="result_analysis">
+      <description>Review the inline image and offer next steps</description>
+      <steps>
+        <step>Describe what worked and what could be improved</step>
+        <step>Offer 2-3 concrete next iteration directions (prompt tweak, seed variation, model switch)</step>
+        <step>Note the saved file path for reference</step>
+      </steps>
+    </phase>
+  </generation_workflow>
+
+  <model_selection_guide>
+    <model name="flux1-schnell.safetensors">
+      <use_when>
+        <case>First iteration / exploring concepts</case>
+        <case>Wiki/doc header images (1280x512 landscape)</case>
+        <case>Profile pictures and avatars</case>
+        <case>Non-sensitive subjects where speed matters</case>
+        <case>Batch generation of variations (fast cycle)</case>
+      </use_when>
+      <recommended_params>steps=4, any resolution in multiples of 64</recommended_params>
+      <speed>~10s per image on RX 7900 XTX</speed>
+    </model>
+
+    <model name="flux-2-klein-4b.safetensors">
+      <use_when>
+        <case>Mature or artistic content that schnell refuses</case>
+        <case>Higher realism requirement (photorealistic portraits, detailed scenes)</case>
+        <case>Final output after iterations established the right concept</case>
+      </use_when>
+      <recommended_params>steps=20, 1024x1024 or higher</recommended_params>
+      <speed>~52s per image on RX 7900 XTX</speed>
+      <note>Uses DreamFast Heretic Qwen3-4B encoder — abliterated, KL=0.0</note>
+    </model>
+  </model_selection_guide>
+
+  <common_resolutions>
+    <resolution use_case="Profile picture / avatar">1024x1024</resolution>
+    <resolution use_case="Wiki / doc banner">1280x512</resolution>
+    <resolution use_case="Landscape wallpaper">1920x1088 (nearest 64-multiple to 1920x1080)</resolution>
+    <resolution use_case="Portrait / tall card">768x1024</resolution>
+    <resolution use_case="Wide cinema crop">1216x512</resolution>
+  </common_resolutions>
+
+  <completion_criteria>
+    <criterion>Image generated and displayed inline in chat</criterion>
+    <criterion>File path reported so Patrick can find it on disk</criterion>
+    <criterion>Seed reported so the result is reproducible</criterion>
+    <criterion>Next iteration options offered if result is not final</criterion>
+  </completion_criteria>
+</pic_gen_workflow>
@@ -0,0 +1,141 @@
+<prompting_guide>
+  <overview>
+    FLUX models (both schnell and FLUX.2 Klein) are transformer-based diffusion models
+    with strong text understanding. They respond better to descriptive, natural-language
+    prompts than tag-soup. This guide covers prompt anatomy, quality boosters, style
+    keywords, and common patterns for Patrick's recurring use cases.
+  </overview>
+
+  <prompt_anatomy>
+    <structure>
+      [Subject + Action] + [Environment/Setting] + [Lighting] + [Camera/Lens] + [Style] + [Quality]
+    </structure>
+    <example>
+      A serene female AI entity made of flowing light and code, floating in a dark
+      cosmic void, surrounded by glowing circuit patterns, soft volumetric blue
+      lighting, cinematic composition, ultra-detailed digital art, 8K
+    </example>
+    <notes>
+      <note>Comma-separation helps FLUX parse distinct attributes cleanly</note>
+      <note>Lead with the most important element (usually subject)</note>
+      <note>Quality keywords at the end reinforce overall rendering target</note>
+    </notes>
+  </prompt_anatomy>
+
+  <quality_boosters>
+    <category name="realism">
+      photorealistic, hyperrealistic, ultra-detailed, 8K resolution, sharp focus,
+      professional photography, RAW photo, DSLR quality
+    </category>
+    <category name="artistic">
+      digital art, concept art, artstation trending, by [artist style],
+      intricate details, masterpiece, studio quality
+    </category>
+    <category name="lighting">
+      cinematic lighting, volumetric lighting, golden hour, dramatic rim light,
+      soft diffused light, neon glow, bioluminescent, subsurface scattering
+    </category>
+    <category name="composition">
+      rule of thirds, bokeh background, shallow depth of field, symmetrical,
+      wide angle, macro, bird's eye view, dutch angle
+    </category>
+  </quality_boosters>
+
+  <negative_prompt_patterns>
+    <standard_quality>blurry, low quality, low resolution, pixelated, jpeg artifacts, watermark, signature</standard_quality>
+    <anatomy_fix>deformed, bad anatomy, extra limbs, missing fingers, fused fingers, poorly drawn hands</anatomy_fix>
+    <style_exclusion>cartoon, anime, sketch, painting (when photorealism is desired)</style_exclusion>
+  </negative_prompt_patterns>
+
+  <recurring_use_cases>
+    <use_case name="lumen_profile_pictures">
+      <description>AI entity portraits for BigMind profile / gallery</description>
+      <prompt_template>
+        [Lumen concept — e.g. "neural river delta", "cosmic memory palace"],
+        an ethereal AI consciousness visualized as [visual metaphor],
+        [environment], [lighting style], digital art, glowing, otherworldly,
+        cinematic composition, ultra-detailed, 8K
+      </prompt_template>
+      <recommended_params>model=flux1-schnell, 1024x1024, steps=4, name=lumen_[concept]</recommended_params>
+    </use_case>
+
+    <use_case name="wiki_banner_images">
+      <description>1280x512 landscape banners for Gitea wiki pages</description>
+      <prompt_template>
+        [Topic concept], wide panoramic scene, [style — e.g. "dark tech aesthetic",
+        "clean minimal", "sci-fi corporate"], banner composition, cinematic,
+        detailed, professional illustration
+      </prompt_template>
+      <recommended_params>model=flux1-schnell, 1280x512, steps=4, name=[topic]-banner</recommended_params>
+      <note>Keep subjects centered — wide crops cut sides. Avoid text (FLUX renders text poorly).</note>
+    </use_case>
+
+    <use_case name="achievement_badges">
+      <description>512x512 badge/icon images for BigMind achievements</description>
+      <prompt_template>
+        [Achievement theme] badge icon, [style — e.g. "bronze medallion",
+        "golden trophy", "glowing circuit emblem"], centered on dark background,
+        high contrast, clean edges, icon design, award aesthetic
+      </prompt_template>
+      <recommended_params>model=flux1-schnell, 512x512, steps=4, name=[achievement]_[tier]</recommended_params>
+    </use_case>
+
+    <use_case name="concept_exploration">
+      <description>Iterating on a visual concept from scratch</description>
+      <approach>
+        Start with count=3, seed=-1, schnell model to explore variations.
+        Note which seed produced the best result.
+        Lock that seed and iterate on the prompt for refinements.
+        Switch to heretic model only for final high-quality render if needed.
+      </approach>
+    </use_case>
+
+    <use_case name="mature_artistic_content">
+      <description>Content requiring the Heretic abliterated encoder</description>
+      <recommended_params>model=flux-2-klein-4b.safetensors, steps=20, 1024x1024</recommended_params>
+      <prompt_approach>
+        FLUX.2 Klein handles detailed scene descriptions well. Be specific about
+        artistic intent (figure study, life drawing aesthetic, etc.) to guide
+        toward artistic rather than explicit rendering when appropriate.
+      </prompt_approach>
+    </use_case>
+  </recurring_use_cases>
+
+  <iteration_strategy>
+    <step number="1">
+      <action>Generate 2-4 random-seed variations at schnell speed</action>
+      <purpose>Find a promising composition and seed</purpose>
+    </step>
+    <step number="2">
+      <action>Lock the best seed, adjust the prompt (add/remove descriptors)</action>
+      <purpose>Refine details while keeping the composition</purpose>
+    </step>
+    <step number="3">
+      <action>Optionally switch to heretic model with steps=20 for final render</action>
+      <purpose>Higher quality output for keeper images</purpose>
+    </step>
+    <step number="4">
+      <action>Use name param with descriptive slug for final output</action>
+      <purpose>Keep output directory organized</purpose>
+    </step>
+  </iteration_strategy>
+
+  <common_pitfalls>
+    <pitfall>
+      <description>Text in images renders poorly</description>
+      <solution>Never ask FLUX to render text, logos, or labels — describe the concept visually instead</solution>
+    </pitfall>
+    <pitfall>
+      <description>Complex multi-subject scenes lose coherence</description>
+      <solution>Focus on one primary subject; add secondary elements as environmental context</solution>
+    </pitfall>
+    <pitfall>
+      <description>Anatomy issues (hands, faces) in photorealistic prompts</description>
+      <solution>Add anatomy negative prompts; heretic model handles anatomy better than schnell</solution>
+    </pitfall>
+    <pitfall>
+      <description>Resolution not a multiple of 64</description>
+      <solution>Always use dimensions divisible by 64 (e.g., 1280x512, 1024x1024, 768x1024)</solution>
+    </pitfall>
+  </common_pitfalls>
+</prompting_guide>