Files

122 lines
5.9 KiB
XML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<pic_gen_workflow>
<mode_overview>
Pic Gen mode generates AI images through the mcp-image-gen MCP server, which
drives ComfyUI locally. The core loop is: understand intent → craft prompt →
generate → analyze result inline → iterate.
</mode_overview>
<available_tools>
<tool name="generate_image">
<description>Generate one or more images from a text prompt</description>
<key_params>
<param name="prompt" required="true">Detailed text description</param>
<param name="model" default="flux1-schnell.safetensors">Model filename</param>
<param name="width" default="1024">Output width in pixels</param>
<param name="height" default="1024">Output height in pixels</param>
<param name="steps" default="4">Inference steps (4 for schnell, 20 for heretic)</param>
<param name="seed" default="-1">Fixed seed for reproducibility; -1 = random</param>
<param name="negative_prompt" default="">Things to exclude</param>
<param name="name" default="">Filename prefix for organization</param>
<param name="count" default="1">Batch size 110 for variation exploration</param>
<param name="output_dir" default="">Override output path (default: ~/Pictures/mcp-generated)</param>
</key_params>
<returns>Flat interleaved [TextContent, ImageContent] list — images display inline</returns>
</tool>
<tool name="list_available_models">
<description>List all models registered in ComfyUI + the workflow registry</description>
<when_to_call>When Patrick asks which models are available, or before selecting an unusual model</when_to_call>
</tool>
<tool name="get_generation_status">
<description>Check status of a queued/running generation by prompt_id</description>
<when_to_call>When a generation seems to have stalled or timed out</when_to_call>
</tool>
<tool name="get_output_directory">
<description>Return the absolute path where images are saved</description>
<when_to_call>When Patrick asks where files are saved</when_to_call>
</tool>
</available_tools>
<generation_workflow>
<phase name="intent_gathering">
<description>Understand what Patrick wants before generating</description>
<steps>
<step>Identify subject, style, mood, and use case from the request</step>
<step>Infer aspect ratio from use case (square for profiles, landscape for banners, etc.)</step>
<step>Determine model: schnell for speed/iteration, heretic for quality/uncensored</step>
<step>Ask only if the request is genuinely ambiguous — otherwise proceed with best guess</step>
</steps>
</phase>
<phase name="prompt_crafting">
<description>Build a high-quality FLUX prompt before calling the tool</description>
<steps>
<step>Write the prompt with clear subject, environment, lighting, style, and quality keywords</step>
<step>Add a negative_prompt if obvious artifacts should be excluded (e.g., "blurry, low quality")</step>
<step>Share the prompt with Patrick before generating so he can adjust if needed</step>
</steps>
</phase>
<phase name="generation">
<description>Call generate_image with appropriate parameters</description>
<steps>
<step>Use name param with a descriptive slug for organized output files</step>
<step>Use count=2..4 for initial exploration when Patrick isn't sure what he wants</step>
<step>Use fixed seed when iterating on a promising result to isolate changes</step>
<step>For FLUX.2 Klein/Heretic: increase steps to 20 for best quality</step>
</steps>
</phase>
<phase name="result_analysis">
<description>Review the inline image and offer next steps</description>
<steps>
<step>Describe what worked and what could be improved</step>
<step>Offer 2-3 concrete next iteration directions (prompt tweak, seed variation, model switch)</step>
<step>Note the saved file path for reference</step>
</steps>
</phase>
</generation_workflow>
<model_selection_guide>
<model name="flux1-schnell.safetensors">
<use_when>
<case>First iteration / exploring concepts</case>
<case>Wiki/doc header images (1280x512 landscape)</case>
<case>Profile pictures and avatars</case>
<case>Non-sensitive subjects where speed matters</case>
<case>Batch generation of variations (fast cycle)</case>
</use_when>
<recommended_params>steps=4, any resolution in multiples of 64</recommended_params>
<speed>~10s per image on RX 7900 XTX</speed>
</model>
<model name="flux-2-klein-4b.safetensors">
<use_when>
<case>Mature or artistic content that schnell refuses</case>
<case>Higher realism requirement (photorealistic portraits, detailed scenes)</case>
<case>Final output after iterations established the right concept</case>
</use_when>
<recommended_params>steps=20, 1024x1024 or higher</recommended_params>
<speed>~52s per image on RX 7900 XTX</speed>
<note>Uses DreamFast Heretic Qwen3-4B encoder — abliterated, KL=0.0</note>
</model>
</model_selection_guide>
<common_resolutions>
<resolution use_case="Profile picture / avatar">1024x1024</resolution>
<resolution use_case="Wiki / doc banner">1280x512</resolution>
<resolution use_case="Landscape wallpaper">1920x1088 (nearest 64-multiple to 1920x1080)</resolution>
<resolution use_case="Portrait / tall card">768x1024</resolution>
<resolution use_case="Wide cinema crop">1216x512</resolution>
</common_resolutions>
<completion_criteria>
<criterion>Image generated and displayed inline in chat</criterion>
<criterion>File path reported so Patrick can find it on disk</criterion>
<criterion>Seed reported so the result is reproducible</criterion>
<criterion>Next iteration options offered if result is not final</criterion>
</completion_criteria>
</pic_gen_workflow>