Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d1e70776f | |||
| 1d8849cb41 | |||
| 40c91edf2f | |||
| 4a99a3625a | |||
| 38d26adb1f |
@@ -1,73 +1,98 @@
|
||||
{
|
||||
"6": {
|
||||
"1": {
|
||||
"class_type": "CLIPLoader",
|
||||
"inputs": {
|
||||
"clip_name": "qwen_3_4b_klein.safetensors",
|
||||
"type": "flux2",
|
||||
"device": "default"
|
||||
}
|
||||
},
|
||||
"2": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"clip": ["30", 0],
|
||||
"clip": ["1", 0],
|
||||
"text": "PROMPT_PLACEHOLDER"
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"class_type": "VAEDecode",
|
||||
"3": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"samples": ["13", 0],
|
||||
"vae": ["31", 0]
|
||||
"clip": ["1", 0],
|
||||
"text": "NEGATIVE_PLACEHOLDER"
|
||||
}
|
||||
},
|
||||
"9": {
|
||||
"class_type": "SaveImage",
|
||||
"4": {
|
||||
"class_type": "UNETLoader",
|
||||
"inputs": {
|
||||
"filename_prefix": "mcp-image-gen",
|
||||
"images": ["8", 0]
|
||||
"unet_name": "flux-2-klein-4b.safetensors",
|
||||
"weight_dtype": "default"
|
||||
}
|
||||
},
|
||||
"13": {
|
||||
"class_type": "KSampler",
|
||||
"inputs": {
|
||||
"cfg": 1.0,
|
||||
"denoise": 1.0,
|
||||
"latent_image": ["27", 0],
|
||||
"model": ["32", 0],
|
||||
"negative": ["33", 0],
|
||||
"positive": ["6", 0],
|
||||
"sampler_name": "euler",
|
||||
"scheduler": "beta",
|
||||
"seed": 42,
|
||||
"steps": 4
|
||||
}
|
||||
},
|
||||
"27": {
|
||||
"class_type": "EmptySD3LatentImage",
|
||||
"inputs": {
|
||||
"batch_size": 1,
|
||||
"height": 1024,
|
||||
"width": 1024
|
||||
}
|
||||
},
|
||||
"30": {
|
||||
"class_type": "CLIPLoader",
|
||||
"inputs": {
|
||||
"clip_name": "qwen_3_4b_heretic.safetensors",
|
||||
"type": "flux"
|
||||
}
|
||||
},
|
||||
"31": {
|
||||
"5": {
|
||||
"class_type": "VAELoader",
|
||||
"inputs": {
|
||||
"vae_name": "flux2-vae.safetensors"
|
||||
}
|
||||
},
|
||||
"32": {
|
||||
"class_type": "UNETLoader",
|
||||
"6": {
|
||||
"class_type": "EmptyFlux2LatentImage",
|
||||
"inputs": {
|
||||
"unet_name": "flux-2-klein-4b.safetensors",
|
||||
"weight_dtype": "fp8_e4m3fn"
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"batch_size": 1
|
||||
}
|
||||
},
|
||||
"33": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"7": {
|
||||
"class_type": "Flux2Scheduler",
|
||||
"inputs": {
|
||||
"clip": ["30", 0],
|
||||
"text": "NEGATIVE_PLACEHOLDER"
|
||||
"steps": 20,
|
||||
"width": 1024,
|
||||
"height": 1024
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"class_type": "CFGGuider",
|
||||
"inputs": {
|
||||
"model": ["4", 0],
|
||||
"positive": ["2", 0],
|
||||
"negative": ["3", 0],
|
||||
"cfg": 5
|
||||
}
|
||||
},
|
||||
"9": {
|
||||
"class_type": "KSamplerSelect",
|
||||
"inputs": {
|
||||
"sampler_name": "euler"
|
||||
}
|
||||
},
|
||||
"10": {
|
||||
"class_type": "RandomNoise",
|
||||
"inputs": {
|
||||
"noise_seed": 42
|
||||
}
|
||||
},
|
||||
"11": {
|
||||
"class_type": "SamplerCustomAdvanced",
|
||||
"inputs": {
|
||||
"noise": ["10", 0],
|
||||
"guider": ["8", 0],
|
||||
"sampler": ["9", 0],
|
||||
"sigmas": ["7", 0],
|
||||
"latent_image": ["6", 0]
|
||||
}
|
||||
},
|
||||
"12": {
|
||||
"class_type": "VAEDecode",
|
||||
"inputs": {
|
||||
"samples": ["11", 0],
|
||||
"vae": ["5", 0]
|
||||
}
|
||||
},
|
||||
"13": {
|
||||
"class_type": "SaveImage",
|
||||
"inputs": {
|
||||
"filename_prefix": "mcp-image-gen",
|
||||
"images": ["12", 0]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,11 +63,20 @@ def test_build_flux_workflow_heretic_model():
|
||||
seed=42,
|
||||
model="flux-2-klein-4b.safetensors",
|
||||
)
|
||||
assert wf["6"]["class_type"] == "CLIPTextEncode"
|
||||
assert wf["30"]["class_type"] == "CLIPLoader" # Qwen3-4B uses single CLIPLoader
|
||||
assert wf["32"]["inputs"]["unet_name"] == "flux-2-klein-4b.safetensors"
|
||||
assert wf["31"]["inputs"]["vae_name"] == "flux2-vae.safetensors"
|
||||
assert wf["13"]["inputs"]["scheduler"] == "beta" # FLUX.2 Klein uses beta scheduler
|
||||
# New FLUX.2 workflow uses different node IDs and types
|
||||
assert wf["1"]["class_type"] == "CLIPLoader" # Qwen3-4B uses single CLIPLoader
|
||||
assert wf["1"]["inputs"]["type"] == "flux2" # correct type for FLUX.2
|
||||
assert wf["1"]["inputs"]["device"] == "default" # required for FLUX.2 CLIPLoader
|
||||
assert wf["1"]["inputs"]["clip_name"] == "qwen_3_4b_klein.safetensors" # Comfy-Org/vae-text-encorder-for-flux-klein-4b
|
||||
assert wf["2"]["class_type"] == "CLIPTextEncode" # standard CLIP encode (not Flux-specific)
|
||||
assert wf["4"]["class_type"] == "UNETLoader"
|
||||
assert wf["4"]["inputs"]["unet_name"] == "flux-2-klein-4b.safetensors"
|
||||
assert wf["4"]["inputs"]["weight_dtype"] == "default" # not fp8 — avoids dimension errors
|
||||
assert wf["6"]["class_type"] == "EmptyFlux2LatentImage" # FLUX.2-specific latent
|
||||
assert wf["8"]["class_type"] == "CFGGuider" # CFGGuider replaces FluxDisableGuidance+BasicGuider
|
||||
assert wf["8"]["inputs"]["cfg"] == 5 # cfg=5 for FLUX.2 Klein
|
||||
assert wf["11"]["class_type"] == "SamplerCustomAdvanced" # FLUX.2 sampler (node 11, not 12)
|
||||
assert wf["13"]["class_type"] == "SaveImage" # output node
|
||||
|
||||
|
||||
def test_workflow_registry_contains_both_models():
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
# Task: Swap Qwen3-4B Encoder for Heretic Abliterated Version
|
||||
|
||||
**Datum:** 2026-04-10
|
||||
**Status:** Ready — waiting for correct Heretic encoder to be published
|
||||
**Depends on:** FLUX.2 Klein 4B working (✅ done as of 2026-04-10)
|
||||
|
||||
---
|
||||
|
||||
## Goal
|
||||
|
||||
Replace the standard `qwen_3_4b_klein.safetensors` with an abliterated (Heretic) version that has:
|
||||
- **Zero measurable quality loss** (KL divergence = 0.0000)
|
||||
- **No prompt refusals** (≤3/100 in DreamFast v1.2.0 testing)
|
||||
|
||||
Result: `generate_image(prompt, model="flux-2-klein-4b.safetensors")` will work with **any** prompt without refusals.
|
||||
|
||||
---
|
||||
|
||||
## Current State
|
||||
|
||||
| File | Location | Status |
|
||||
|------|----------|--------|
|
||||
| `flux-2-klein-4b.safetensors` | `~/ComfyUI/models/diffusion_models/` | ✅ Working |
|
||||
| `qwen_3_4b_klein.safetensors` | `~/ComfyUI/models/text_encoders/` | ✅ Working (standard, has refusals) |
|
||||
| `flux2-vae.safetensors` | `~/ComfyUI/models/vae/` | ✅ Working |
|
||||
|
||||
The MCP workflow [`mcp/mcp-image-gen/src/workflows/flux2_klein_heretic.json`](../mcp/mcp-image-gen/src/workflows/flux2_klein_heretic.json) already uses `qwen_3_4b_klein.safetensors` — **no code change needed**, only the file on disk needs to be replaced.
|
||||
|
||||
---
|
||||
|
||||
## The Problem to Solve First
|
||||
|
||||
The standard Heretic repos may not have the **FLUX.2 Klein-compatible** encoder dimensions:
|
||||
|
||||
| Encoder | `hidden_size` | Conditioning dim | Usable? |
|
||||
|---------|--------------|-----------------|---------|
|
||||
| BFL Qwen3-4B (FLUX.2 Klein) | **2560** | 7680 (2560×3) | ✅ |
|
||||
| DreamFast/qwen3-4b-heretic | unknown — must check | ? | ⚠️ verify first |
|
||||
| Standard Qwen3-4B | 4096 | 4096 | ❌ wrong |
|
||||
|
||||
**Before downloading, verify DreamFast's model is fine-tuned from the BFL variant** (hidden_size=2560), not the standard Qwen3 (hidden_size=4096).
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### Step 1: Check DreamFast Heretic repo
|
||||
|
||||
```bash
|
||||
huggingface-cli model-info DreamFast/qwen3-4b-heretic 2>/dev/null | grep -i hidden
|
||||
```
|
||||
|
||||
Or browse: https://huggingface.co/DreamFast/qwen3-4b-heretic/blob/main/config.json
|
||||
Look for: `"hidden_size": 2560` — that's the FLUX.2 Klein-compatible version.
|
||||
|
||||
### Step 2a: If DreamFast has the right dimensions (2560)
|
||||
|
||||
```bash
|
||||
# Download
|
||||
huggingface-cli download DreamFast/qwen3-4b-heretic \
|
||||
--local-dir /tmp/qwen3-4b-heretic/
|
||||
|
||||
# Back up working encoder first
|
||||
cp ~/ComfyUI/models/text_encoders/qwen_3_4b_klein.safetensors \
|
||||
~/ComfyUI/models/text_encoders/qwen_3_4b_klein_backup.safetensors
|
||||
|
||||
# Swap in the Heretic version
|
||||
cp /tmp/qwen3-4b-heretic/model.safetensors \
|
||||
~/ComfyUI/models/text_encoders/qwen_3_4b_klein.safetensors
|
||||
```
|
||||
|
||||
### Step 2b: If DreamFast has wrong dimensions (4096) — find alternative
|
||||
|
||||
Options in order of preference:
|
||||
1. **Lockout/qwen3-4b-heretic-zimage** — check if BFL-compatible:
|
||||
```bash
|
||||
huggingface-cli model-info Lockout/qwen3-4b-heretic-zimage 2>/dev/null | grep hidden
|
||||
```
|
||||
2. **Run Heretic abliteration yourself** on the working `qwen_3_4b_klein.safetensors`
|
||||
Tool: https://github.com/FailSpy/abliterator
|
||||
Script: `python abliterator.py --model qwen_3_4b_klein.safetensors --output qwen_3_4b_klein_heretic.safetensors`
|
||||
|
||||
3. **Wait** for DreamFast or BFL to publish the FLUX.2-specific abliterated encoder
|
||||
|
||||
### Step 3: Live test
|
||||
|
||||
```python
|
||||
generate_image(
|
||||
"an explicit test prompt that would normally be refused",
|
||||
model="flux-2-klein-4b.safetensors",
|
||||
steps=20
|
||||
)
|
||||
```
|
||||
|
||||
Expected: Image generated, no refusal error in ComfyUI logs.
|
||||
|
||||
### Step 4: If it works — no code changes needed
|
||||
|
||||
The MCP code, workflow JSON, and registry are already correct. Just verify:
|
||||
- Check `journalctl --user -u comfyui -f` during generation for any errors
|
||||
- Confirm file in `~/Pictures/mcp-generated/` was saved
|
||||
|
||||
---
|
||||
|
||||
## Fallback Plan
|
||||
|
||||
If the Heretic encoder is unavailable in the right dimensions, the **GGUF route** works too:
|
||||
|
||||
```bash
|
||||
# ComfyUI-GGUF is already installed: ~/ComfyUI/custom_nodes/ComfyUI-GGUF
|
||||
# Download Heretic GGUF (if BFL-compatible variant published):
|
||||
huggingface-cli download Lockout/qwen3-4b-heretic-zimage \
|
||||
qwen-4b-zimage-hereticV2-q8.gguf \
|
||||
--local-dir ~/ComfyUI/models/text_encoders/
|
||||
```
|
||||
|
||||
Then update [`flux2_klein_heretic.json`](../mcp/mcp-image-gen/src/workflows/flux2_klein_heretic.json) node `"1"`:
|
||||
```json
|
||||
"class_type": "CLIPLoaderGGUF", // instead of CLIPLoader
|
||||
"inputs": {
|
||||
"clip_name": "qwen-4b-zimage-hereticV2-q8.gguf",
|
||||
"type": "flux2"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## No Code Changes Required (unless GGUF fallback)
|
||||
|
||||
The entire MCP server, workflow registry, and test suite are already correct. This is **purely a model file task**.
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] `generate_image("...", model="flux-2-klein-4b.safetensors")` works with prompts that currently get refused
|
||||
- [ ] Output image quality identical to standard encoder (check: no visible artifacts vs reference)
|
||||
- [ ] ComfyUI logs show no dimension errors
|
||||
- [ ] `qwen_3_4b_klein_backup.safetensors` kept as rollback
|
||||
@@ -0,0 +1,104 @@
|
||||
# FLUX.2 Klein 4B + Heretic — Session Recap
|
||||
|
||||
**Date:** 2026-04-10
|
||||
**Status:** Code complete, live generation BLOCKED by encoder dimension mismatch
|
||||
|
||||
---
|
||||
|
||||
## What We Achieved ✅
|
||||
|
||||
### Code Infrastructure (Solid)
|
||||
- **`mcp-image-gen/src/server.py`** — Generic workflow registry with model-based dispatch, `_inject_workflow_params()` works recursively on any node layout
|
||||
- **`mcp-image-gen/tests/test_server.py`** — 37/37 tests passing
|
||||
- **Gitea** — pushed to main (commit `38d26ad`)
|
||||
- The architecture is right: adding a new model = add 1 JSON file + 1 registry entry
|
||||
|
||||
### Models Downloaded (on disk)
|
||||
| File | Location | Status |
|
||||
|------|----------|--------|
|
||||
| `flux-2-klein-4b.safetensors` | `~/ComfyUI/models/diffusion_models/` | ✅ 7.3GB |
|
||||
| `qwen_3_4b_bfl.safetensors` | `~/ComfyUI/models/text_encoders/` | ✅ merged from BFL shards |
|
||||
| `qwen_3_4b.safetensors` (z_image) | `~/ComfyUI/models/text_encoders/split_files/` | ✅ wrong model |
|
||||
| `Qwen3-4B-Q8_0.gguf` | `~/ComfyUI/models/text_encoders/` | ✅ wrong arch |
|
||||
| ComfyUI-GGUF extension | `~/ComfyUI/custom_nodes/ComfyUI-GGUF` | ✅ installed |
|
||||
|
||||
---
|
||||
|
||||
## What Failed and Why ❌
|
||||
|
||||
### The Error (persistent)
|
||||
```
|
||||
mat1 and mat2 shapes cannot be multiplied (512x4096 and 7680x3072)
|
||||
```
|
||||
|
||||
### Root Cause Analysis
|
||||
|
||||
**Node 13** (`SamplerCustomAdvanced`) fails — meaning the conditioning vector from the text encoder doesn't match the diffusion model's expected input.
|
||||
|
||||
| Component | Expected | Got |
|
||||
|-----------|----------|-----|
|
||||
| FLUX.2 Klein 4B conditioning input | **7680-dim** (2560 × 3) | **4096-dim** |
|
||||
|
||||
**Why 7680 = 2560 × 3?**
|
||||
FLUX models concatenate text embeddings across multiple time steps. The BFL Qwen3 encoder has `hidden_size=2560`, so the concatenated output is 2560×3=7680.
|
||||
|
||||
**Why 4096?**
|
||||
Every other Qwen3 variant (z_image_turbo, official Qwen repo GGUF) uses standard Qwen3 with `hidden_size=4096` — these are for Z-Image and text generation respectively, NOT for FLUX.2 Klein.
|
||||
|
||||
### What We Tried (and Why Each Failed)
|
||||
1. `CLIPLoader type=flux` → wrong architecture (FLUX.1 style)
|
||||
2. `CLIPLoader type=flux2` → correct node, wrong encoder file (z_image Qwen)
|
||||
3. `CLIPLoaderGGUF type=flux2` → correct node, wrong GGUF (standard Qwen3)
|
||||
4. `CLIPLoader type=flux2 + qwen_3_4b_bfl.safetensors` → merged BFL shards, but still fails
|
||||
5. Workflow: `KSampler` → doesn't work with FLUX.2 (different architecture)
|
||||
6. Workflow: `SamplerCustomAdvanced + BasicGuider + Flux2Scheduler` → correct architecture but encoding mismatch persists
|
||||
|
||||
### The Real Missing Piece
|
||||
|
||||
The BFL FLUX.2 Klein text encoder in Diffusers format is designed for use via `transformers/diffusers` pipeline, NOT via ComfyUI's `CLIPLoader`. ComfyUI reads the weights differently. The weights are there but ComfyUI doesn't know how to map `model.embed_tokens`, `model.layers.N.*` etc. to the CLIP interface it expects.
|
||||
|
||||
**The correct encoder file for ComfyUI** is `Comfy-Org/vae-text-encorder-for-flux-klein-4b` — the 7.5GB file we downloaded IS the right one, but ComfyUI is likely loading it with the wrong adapter in the `CLIPLoader`.
|
||||
|
||||
---
|
||||
|
||||
## Clean Approach — What We Need to Do
|
||||
|
||||
### Option A: Use ComfyUI Web UI (Easiest)
|
||||
1. Open `http://localhost:8188` in browser
|
||||
2. Load the "Flux.2 Klein 4B Text-to-Image" workflow template (it's in the UI Templates)
|
||||
3. **Export the working API JSON** (Ctrl+Shift+E or Settings → Save as API format)
|
||||
4. Replace our `flux2_klein_heretic.json` with the exported JSON
|
||||
5. Add placeholders and test
|
||||
|
||||
This gives us the **verified working node graph** without guessing. 10 minutes.
|
||||
|
||||
### Option B: Find a Working API JSON online
|
||||
- Reddit r/comfyui has working FLUX.2 Klein workflows
|
||||
- Export format is what we need
|
||||
|
||||
### Then: Add Heretic
|
||||
Once we have a working standard workflow:
|
||||
1. Download the actual Heretic-abliterated version of the BFL encoder (once it's published)
|
||||
2. Swap encoder filename in the JSON
|
||||
|
||||
---
|
||||
|
||||
## My Recommendation
|
||||
|
||||
**Do Option A right now.** Open `http://localhost:8188`, load the template, export to API format, paste the JSON. We'll be running in 10 minutes instead of guessing node names.
|
||||
|
||||
The MCP server code is solid — the only broken piece is `flux2_klein_heretic.json`. Once we have the right JSON from the UI, everything else works.
|
||||
|
||||
---
|
||||
|
||||
## Files to Clean Up (After We Have the Right JSON)
|
||||
|
||||
```bash
|
||||
# Remove wrong encoders (save ~8GB)
|
||||
rm ~/ComfyUI/models/text_encoders/qwen_3_4b.safetensors # z_image version
|
||||
rm ~/ComfyUI/models/text_encoders/qwen_3_4b_flux2.safetensors
|
||||
|
||||
# Keep
|
||||
# ~/ComfyUI/models/text_encoders/qwen_3_4b_bfl.safetensors ← correct encoder
|
||||
# ~/ComfyUI/models/text_encoders/Qwen3-4B-Q8_0.gguf ← maybe useful later
|
||||
```
|
||||
Reference in New Issue
Block a user