pi_mcps/.roo/rules-pic-gen/1_workflow.xml

<pic_gen_workflow>
  <mode_overview>
    Pic Gen mode generates AI images through the mcp-image-gen MCP server, which
    drives ComfyUI locally. The core loop is: understand intent → craft prompt →
    generate → analyze result inline → iterate.
  </mode_overview>

  <available_tools>
    <tool name="generate_image">
      <description>Generate one or more images from a text prompt</description>
      <key_params>
        <param name="prompt" required="true">Detailed text description</param>
        <param name="model" default="flux1-schnell.safetensors">Model filename</param>
        <param name="width" default="1024">Output width in pixels</param>
        <param name="height" default="1024">Output height in pixels</param>
        <param name="steps" default="4">Inference steps (4 for schnell, 20 for heretic)</param>
        <param name="seed" default="-1">Fixed seed for reproducibility; -1 = random</param>
        <param name="negative_prompt" default="">Things to exclude</param>
        <param name="name" default="">Filename prefix for organization</param>
        <param name="count" default="1">Batch size 1–10 for variation exploration</param>
        <param name="output_dir" default="">Override output path (default: ~/Pictures/mcp-generated)</param>
      </key_params>
      <returns>Flat interleaved [TextContent, ImageContent] list — images display inline</returns>
    </tool>

    <tool name="list_available_models">
      <description>List all models registered in ComfyUI + the workflow registry</description>
      <when_to_call>When Patrick asks which models are available, or before selecting an unusual model</when_to_call>
    </tool>

    <tool name="get_generation_status">
      <description>Check status of a queued/running generation by prompt_id</description>
      <when_to_call>When a generation seems to have stalled or timed out</when_to_call>
    </tool>

    <tool name="get_output_directory">
      <description>Return the absolute path where images are saved</description>
      <when_to_call>When Patrick asks where files are saved</when_to_call>
    </tool>
  </available_tools>

  <generation_workflow>
    <phase name="intent_gathering">
      <description>Understand what Patrick wants before generating</description>
      <steps>
        <step>Identify subject, style, mood, and use case from the request</step>
        <step>Infer aspect ratio from use case (square for profiles, landscape for banners, etc.)</step>
        <step>Determine model: schnell for speed/iteration, heretic for quality/uncensored</step>
        <step>Ask only if the request is genuinely ambiguous — otherwise proceed with best guess</step>
      </steps>
    </phase>

    <phase name="prompt_crafting">
      <description>Build a high-quality FLUX prompt before calling the tool</description>
      <steps>
        <step>Write the prompt with clear subject, environment, lighting, style, and quality keywords</step>
        <step>Add a negative_prompt if obvious artifacts should be excluded (e.g., "blurry, low quality")</step>
        <step>Share the prompt with Patrick before generating so he can adjust if needed</step>
      </steps>
    </phase>

    <phase name="generation">
      <description>Call generate_image with appropriate parameters</description>
      <steps>
        <step>Use name param with a descriptive slug for organized output files</step>
        <step>Use count=2..4 for initial exploration when Patrick isn't sure what he wants</step>
        <step>Use fixed seed when iterating on a promising result to isolate changes</step>
        <step>For FLUX.2 Klein/Heretic: increase steps to 20 for best quality</step>
      </steps>
    </phase>

    <phase name="result_analysis">
      <description>Review the inline image and offer next steps</description>
      <steps>
        <step>Describe what worked and what could be improved</step>
        <step>Offer 2-3 concrete next iteration directions (prompt tweak, seed variation, model switch)</step>
        <step>Note the saved file path for reference</step>
      </steps>
    </phase>
  </generation_workflow>

  <model_selection_guide>
    <model name="flux1-schnell.safetensors">
      <use_when>
        <case>First iteration / exploring concepts</case>
        <case>Wiki/doc header images (1280x512 landscape)</case>
        <case>Profile pictures and avatars</case>
        <case>Non-sensitive subjects where speed matters</case>
        <case>Batch generation of variations (fast cycle)</case>
      </use_when>
      <recommended_params>steps=4, any resolution in multiples of 64</recommended_params>
      <speed>~10s per image on RX 7900 XTX</speed>
    </model>

    <model name="flux-2-klein-4b.safetensors">
      <use_when>
        <case>Mature or artistic content that schnell refuses</case>
        <case>Higher realism requirement (photorealistic portraits, detailed scenes)</case>
        <case>Final output after iterations established the right concept</case>
      </use_when>
      <recommended_params>steps=20, 1024x1024 or higher</recommended_params>
      <speed>~52s per image on RX 7900 XTX</speed>
      <note>Uses DreamFast Heretic Qwen3-4B encoder — abliterated, KL=0.0</note>
    </model>
  </model_selection_guide>

  <common_resolutions>
    <resolution use_case="Profile picture / avatar">1024x1024</resolution>
    <resolution use_case="Wiki / doc banner">1280x512</resolution>
    <resolution use_case="Landscape wallpaper">1920x1088 (nearest 64-multiple to 1920x1080)</resolution>
    <resolution use_case="Portrait / tall card">768x1024</resolution>
    <resolution use_case="Wide cinema crop">1216x512</resolution>
  </common_resolutions>

  <completion_criteria>
    <criterion>Image generated and displayed inline in chat</criterion>
    <criterion>File path reported so Patrick can find it on disk</criterion>
    <criterion>Seed reported so the result is reproducible</criterion>
    <criterion>Next iteration options offered if result is not final</criterion>
  </completion_criteria>
</pic_gen_workflow>