ComfyUI v0.3.77

add check for the format arg type in VideoFromComponents.save_to function (#11046 )
* add check for the format var type in VideoFromComponents.save_to function * convert "format" to VideoContainer enum
2026-01-27 07:16:09 +08:00 · 2025-12-03 00:02:09 -05:00 · 2025-12-03 00:01:24 -05:00 · 2025-12-01 20:25:35 -05:00 · 2025-12-01 16:11:52 -08:00 · 2025-12-01 17:13:48 -05:00
652 changed files with 883313 additions and 54667 deletions
--- a/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat
+++ b/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat
@ -1,3 +0,0 @@
-..\python_embeded\python.exe .\update.py ..\ComfyUI\
-..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -r ../ComfyUI/requirements.txt pygit2
-pause
--- a/.ci/update_windows/update.py
+++ b/.ci/update_windows/update.py
@ -1,6 +1,9 @@
 import pygit2
 from datetime import datetime
 import sys
+import os
+import shutil
+import filecmp

 def pull(repo, remote_name='origin', branch='master'):
    for remote in repo.remotes:
@ -25,41 +28,124 @@ def pull(repo, remote_name='origin', branch='master'):

                if repo.index.conflicts is not None:
                    for conflict in repo.index.conflicts:
-                        print('Conflicts found in:', conflict[0].path)
+                        print('Conflicts found in:', conflict[0].path)  # noqa: T201
                    raise AssertionError('Conflicts, ahhhhh!!')

                user = repo.default_signature
                tree = repo.index.write_tree()
-                commit = repo.create_commit('HEAD',
-                                            user,
-                                            user,
-                                            'Merge!',
-                                            tree,
-                                            [repo.head.target, remote_master_id])
+                repo.create_commit('HEAD',
+                                    user,
+                                    user,
+                                    'Merge!',
+                                    tree,
+                                    [repo.head.target, remote_master_id])
                # We need to do this or git CLI will think we are still merging.
                repo.state_cleanup()
            else:
                raise AssertionError('Unknown merge analysis result')

-
-repo = pygit2.Repository(str(sys.argv[1]))
+pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0)
+repo_path = str(sys.argv[1])
+repo = pygit2.Repository(repo_path)
 ident = pygit2.Signature('comfyui', 'comfy@ui')
 try:
-    print("stashing current changes")
+    print("stashing current changes")  # noqa: T201
    repo.stash(ident)
 except KeyError:
-    print("nothing to stash")
+    print("nothing to stash")  # noqa: T201
 backup_branch_name = 'backup_branch_{}'.format(datetime.today().strftime('%Y-%m-%d_%H_%M_%S'))
-print("creating backup branch: {}".format(backup_branch_name))
-repo.branches.local.create(backup_branch_name, repo.head.peel())
+print("creating backup branch: {}".format(backup_branch_name))  # noqa: T201
+try:
+    repo.branches.local.create(backup_branch_name, repo.head.peel())
+except:
+    pass

-print("checking out master branch")
+print("checking out master branch")  # noqa: T201
 branch = repo.lookup_branch('master')
-ref = repo.lookup_reference(branch.name)
-repo.checkout(ref)
+if branch is None:
+    try:
+        ref = repo.lookup_reference('refs/remotes/origin/master')
+    except:
+        print("pulling.")  # noqa: T201
+        pull(repo)
+        ref = repo.lookup_reference('refs/remotes/origin/master')
+    repo.checkout(ref)
+    branch = repo.lookup_branch('master')
+    if branch is None:
+        repo.create_branch('master', repo.get(ref.target))
+else:
+    ref = repo.lookup_reference(branch.name)
+    repo.checkout(ref)

-print("pulling latest changes")
+print("pulling latest changes")  # noqa: T201
 pull(repo)

-print("Done!")
+if "--stable" in sys.argv:
+    def latest_tag(repo):
+        versions = []
+        for k in repo.references:
+            try:
+                prefix = "refs/tags/v"
+                if k.startswith(prefix):
+                    version = list(map(int, k[len(prefix):].split(".")))
+                    versions.append((version[0] * 10000000000 + version[1] * 100000 + version[2], k))
+            except:
+                pass
+        versions.sort()
+        if len(versions) > 0:
+            return versions[-1][1]
+        return None
+    latest_tag = latest_tag(repo)
+    if latest_tag is not None:
+        repo.checkout(latest_tag)

+print("Done!")  # noqa: T201
+
+self_update = True
+if len(sys.argv) > 2:
+    self_update = '--skip_self_update' not in sys.argv
+
+update_py_path = os.path.realpath(__file__)
+repo_update_py_path = os.path.join(repo_path, ".ci/update_windows/update.py")
+
+cur_path = os.path.dirname(update_py_path)
+
+
+req_path = os.path.join(cur_path, "current_requirements.txt")
+repo_req_path = os.path.join(repo_path, "requirements.txt")
+
+
+def files_equal(file1, file2):
+    try:
+        return filecmp.cmp(file1, file2, shallow=False)
+    except:
+        return False
+
+def file_size(f):
+    try:
+        return os.path.getsize(f)
+    except:
+        return 0
+
+
+if self_update and not files_equal(update_py_path, repo_update_py_path) and file_size(repo_update_py_path) > 10:
+    shutil.copy(repo_update_py_path, os.path.join(cur_path, "update_new.py"))
+    exit()
+
+if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
+    import subprocess
+    try:
+        subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_req_path])
+        shutil.copy(repo_req_path, req_path)
+    except:
+        pass
+
+
+stable_update_script = os.path.join(repo_path, ".ci/update_windows/update_comfyui_stable.bat")
+stable_update_script_to = os.path.join(cur_path, "update_comfyui_stable.bat")
+
+try:
+    if not file_size(stable_update_script_to) > 10:
+        shutil.copy(stable_update_script, stable_update_script_to)
+except:
+    pass
--- a/.ci/update_windows/update_comfyui.bat
+++ b/.ci/update_windows/update_comfyui.bat
@ -1,2 +1,8 @@
+@echo off
 ..\python_embeded\python.exe .\update.py ..\ComfyUI\
-pause
+if exist update_new.py (
+  move /y update_new.py update.py
+  echo Running updater again since it got updated.
+  ..\python_embeded\python.exe .\update.py ..\ComfyUI\ --skip_self_update
+)
+if "%~1"=="" pause
--- a/.ci/update_windows/update_comfyui_and_python_dependencies.bat
+++ b/.ci/update_windows/update_comfyui_and_python_dependencies.bat
@ -1,3 +0,0 @@
-..\python_embeded\python.exe .\update.py ..\ComfyUI\
-..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 xformers -r ../ComfyUI/requirements.txt pygit2
-pause
--- a/.ci/update_windows/update_comfyui_stable.bat
+++ b/.ci/update_windows/update_comfyui_stable.bat
@ -0,0 +1,8 @@
+@echo off
+..\python_embeded\python.exe .\update.py ..\ComfyUI\ --stable
+if exist update_new.py (
+  move /y update_new.py update.py
+  echo Running updater again since it got updated.
+  ..\python_embeded\python.exe .\update.py ..\ComfyUI\ --skip_self_update --stable
+)
+if "%~1"=="" pause
--- a/.ci/update_windows_cu118/update_comfyui_and_python_dependencies.bat
+++ b/.ci/update_windows_cu118/update_comfyui_and_python_dependencies.bat
@ -1,11 +0,0 @@
-@echo off
-..\python_embeded\python.exe .\update.py ..\ComfyUI\
-echo
-echo This will try to update pytorch and all python dependencies, if you get an error wait for pytorch/xformers to fix their stuff
-echo You should not be running this anyways unless you really have to
-echo
-echo If you just want to update normally, close this and run update_comfyui.bat instead.
-echo
-pause
-..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 xformers -r ../ComfyUI/requirements.txt pygit2
-pause
--- a/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
@ -0,0 +1,28 @@
+As of the time of writing this you need this driver for best results:
+https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-7-1-1.html
+
+HOW TO RUN:
+
+If you have a AMD gpu:
+
+run_amd_gpu.bat
+
+If you have memory issues you can try disabling the smart memory management by running comfyui with:
+
+run_amd_gpu_disable_smart_memory.bat
+
+IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints
+
+You can download the stable diffusion XL one from: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0_0.9vae.safetensors
+
+
+RECOMMENDED WAY TO UPDATE:
+To update the ComfyUI code: update\update_comfyui.bat
+
+
+TO SHARE MODELS BETWEEN COMFYUI AND ANOTHER UI:
+In the ComfyUI directory you will find a file: extra_model_paths.yaml.example
+Rename this file to: extra_model_paths.yaml and edit it with your favorite text editor.
+
+
+
--- a/.ci/windows_amd_base_files/run_amd_gpu.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu.bat
--- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
@ -1,2 +1,2 @@
-.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --use-pytorch-cross-attention
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
 pause
--- a/.ci/windows_nightly_base_files/run_nvidia_gpu_fast.bat
+++ b/.ci/windows_nightly_base_files/run_nvidia_gpu_fast.bat
@ -0,0 +1,2 @@
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast
+pause
--- a/.ci/windows_nvidia_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_nvidia_base_files/README_VERY_IMPORTANT.txt
@ -4,6 +4,9 @@ if you have a NVIDIA gpu:

 run_nvidia_gpu.bat

+if you want to enable the fast fp16 accumulation (faster for fp16 models with slightly less quality):
+
+run_nvidia_gpu_fast_fp16_accumulation.bat


 To run it in slow CPU mode:
@ -14,7 +17,7 @@ run_cpu.bat

 IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints

-You can download the stable diffusion 1.5 one from: https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt
+You can download the stable diffusion 1.5 one from: https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/blob/main/v1-5-pruned-emaonly-fp16.safetensors


 RECOMMENDED WAY TO UPDATE:
--- a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
+++ b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
@ -0,0 +1,3 @@
+..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+pause
--- a/.ci/windows_nvidia_base_files/run_cpu.bat
+++ b/.ci/windows_nvidia_base_files/run_cpu.bat
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@ -0,0 +1,3 @@
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
@ -0,0 +1,3 @@
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+pause
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,3 @@
+/web/assets/** linguist-generated
+/web/** linguist-vendored
+comfy_api_nodes/apis/__init__.py linguist-generated
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -0,0 +1,58 @@
+name: Bug Report
+description: "Something is broken inside of ComfyUI. (Do not use this if you're just having issues and need help, or if the issue relates to a custom node)"
+labels: ["Potential Bug"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Before submitting a **Bug Report**, please ensure the following:
+
+        - **1:** You are running the latest version of ComfyUI.
+        - **2:** You have your ComfyUI logs and relevant workflow on hand and will post them in this bug report.
+        - **3:** You confirmed that the bug is not caused by a custom node. You can disable all custom nodes by passing
+        `--disable-all-custom-nodes` command line argument. If you have custom node try updating them to the latest version.
+        - **4:** This is an actual bug in ComfyUI, not just a support question. A bug is when you can specify exact
+        steps to replicate what went wrong and others will be able to repeat your steps and see the same issue happen.
+
+        ## Very Important
+
+        Please make sure that you post ALL your ComfyUI logs in the bug report. A bug report without logs will likely be ignored.
+  - type: checkboxes
+    id: custom-nodes-test
+    attributes:
+      label: Custom Node Testing
+      description: Please confirm you have tried to reproduce the issue with all custom nodes disabled.
+      options:
+        - label: I have tried disabling custom nodes and the issue persists (see [how to disable custom nodes](https://docs.comfy.org/troubleshooting/custom-node-issues#step-1%3A-test-with-all-custom-nodes-disabled) if you need help)
+          required: false
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+      description: "What you expected to happen."
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Actual Behavior
+      description: "What actually happened. Please include a screenshot of the issue if possible."
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps to Reproduce
+      description: "Describe how to reproduce the issue. Please be sure to attach a workflow JSON or PNG, ideally one that doesn't require custom nodes to test. If the bug open happens when certain custom nodes are used, most likely that custom node is what has the bug rather than ComfyUI, in which case it should be reported to the node's author."
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Debug Logs
+      description: "Please copy the output from your terminal logs here."
+      render: powershell
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Other
+      description: "Any other additional information you think might be helpful."
+    validations:
+      required: false
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -0,0 +1,11 @@
+blank_issues_enabled: true
+contact_links:
+  - name: ComfyUI Frontend Issues
+    url: https://github.com/Comfy-Org/ComfyUI_frontend/issues
+    about: Issues related to the ComfyUI frontend (display issues, user interaction bugs), please go to the frontend repo to file the issue
+  - name: ComfyUI Matrix Space
+    url: https://app.element.io/#/room/%23comfyui_space%3Amatrix.org
+    about: The ComfyUI Matrix Space is available for support and general discussion related to ComfyUI (Matrix is like Discord but open source).
+  - name: Comfy Org Discord
+    url: https://discord.gg/comfyorg
+    about: The Comfy Org Discord is available for support and general discussion related to ComfyUI.
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -0,0 +1,32 @@
+name: Feature Request
+description: "You have an idea for something new you would like to see added to ComfyUI's core."
+labels: [ "Feature" ]
+body:
+    - type: markdown
+      attributes:
+        value: |
+                Before submitting a **Feature Request**, please ensure the following:
+
+                **1:** You are running the latest version of ComfyUI.
+                **2:** You have looked to make sure there is not already a feature that does what you need, and there is not already a Feature Request listed for the same idea.
+                **3:** This is something that makes sense to add to ComfyUI Core, and wouldn't make more sense as a custom node.
+
+                If unsure, ask on the [ComfyUI Matrix Space](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) or the [Comfy Org Discord](https://discord.gg/comfyorg) first.
+    - type: textarea
+      attributes:
+            label: Feature Idea
+            description: "Describe the feature you want to see."
+      validations:
+            required: true
+    - type: textarea
+      attributes:
+                label: Existing Solutions
+                description: "Please search through available custom nodes / extensions to see if there are existing custom solutions for this. If so, please link the options you found here as a reference."
+      validations:
+                required: false
+    - type: textarea
+      attributes:
+                label: Other
+                description: "Any other additional information you think might be helpful."
+      validations:
+                required: false
--- a/.github/ISSUE_TEMPLATE/user-support.yml
+++ b/.github/ISSUE_TEMPLATE/user-support.yml
@ -0,0 +1,40 @@
+name: User Support
+description: "Use this if you need help with something, or you're experiencing an issue."
+labels: [ "User Support" ]
+body:
+    - type: markdown
+      attributes:
+        value: |
+            Before submitting a **User Report** issue, please ensure the following:
+
+            **1:** You are running the latest version of ComfyUI.
+            **2:** You have made an effort to find public answers to your question before asking here. In other words, you googled it first, and scrolled through recent help topics.
+
+                If unsure, ask on the [ComfyUI Matrix Space](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) or the [Comfy Org Discord](https://discord.gg/comfyorg) first.
+    - type: checkboxes
+      id: custom-nodes-test
+      attributes:
+        label: Custom Node Testing
+        description: Please confirm you have tried to reproduce the issue with all custom nodes disabled.
+        options:
+          - label: I have tried disabling custom nodes and the issue persists (see [how to disable custom nodes](https://docs.comfy.org/troubleshooting/custom-node-issues#step-1%3A-test-with-all-custom-nodes-disabled) if you need help)
+            required: false
+    - type: textarea
+      attributes:
+            label: Your question
+            description: "Post your question here. Please be as detailed as possible."
+      validations:
+            required: true
+    - type: textarea
+      attributes:
+                label: Logs
+                description: "If your question relates to an issue you're experiencing, please go to `Server` -> `Logs` -> potentially set `View Type` to `Debug` as well, then copypaste all the text into here."
+                render: powershell
+      validations:
+                required: false
+    - type: textarea
+      attributes:
+                label: Other
+                description: "Any other additional information you think might be helpful."
+      validations:
+                required: false
--- a/.github/PULL_REQUEST_TEMPLATE/api-node.md
+++ b/.github/PULL_REQUEST_TEMPLATE/api-node.md
@ -0,0 +1,21 @@
+<!-- API_NODE_PR_CHECKLIST: do not remove -->
+
+## API Node PR Checklist
+
+### Scope
+- [ ] **Is API Node Change**
+
+### Pricing & Billing
+- [ ] **Need pricing update**
+- [ ] **No pricing update**
+
+If **Need pricing update**:
+- [ ] Metronome rate cards updated
+- [ ] Auto‑billing tests updated and passing
+
+### QA
+- [ ] **QA done**
+- [ ] **QA not required**
+
+### Comms
+- [ ] Informed **Kosinkadink**
--- a/.github/workflows/api-node-template.yml
+++ b/.github/workflows/api-node-template.yml
@ -0,0 +1,58 @@
+name: Append API Node PR template
+
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize, ready_for_review]
+    paths:
+      - 'comfy_api_nodes/**'   # only run if these files changed
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  inject:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Ensure template exists and append to PR body
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { owner, repo } = context.repo;
+            const number = context.payload.pull_request.number;
+            const templatePath = '.github/PULL_REQUEST_TEMPLATE/api-node.md';
+            const marker = '<!-- API_NODE_PR_CHECKLIST: do not remove -->';
+
+            const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: number });
+
+            let templateText;
+            try {
+              const res = await github.rest.repos.getContent({
+                owner,
+                repo,
+                path: templatePath,
+                ref: pr.base.ref
+              });
+              const buf = Buffer.from(res.data.content, res.data.encoding || 'base64');
+              templateText = buf.toString('utf8');
+            } catch (e) {
+              core.setFailed(`Required PR template not found at "${templatePath}" on ${pr.base.ref}. Please add it to the repo.`);
+              return;
+            }
+
+            // Enforce the presence of the marker inside the template (for idempotence)
+            if (!templateText.includes(marker)) {
+              core.setFailed(`Template at "${templatePath}" does not contain the required marker:\n${marker}\nAdd it so we can detect duplicates safely.`);
+              return;
+            }
+
+            // If the PR already contains the marker, do not append again.
+            const body = pr.body || '';
+            if (body.includes(marker)) {
+              core.info('Template already present in PR body; nothing to inject.');
+              return;
+            }
+
+            const newBody = (body ? body + '\n\n' : '') + templateText + '\n';
+            await github.rest.pulls.update({ owner, repo, pull_number: number, body: newBody });
+            core.notice('API Node template appended to PR description.');
--- a/.github/workflows/check-line-endings.yml
+++ b/.github/workflows/check-line-endings.yml
@ -0,0 +1,40 @@
+name: Check for Windows Line Endings
+
+on:
+  pull_request:
+    branches: ['*'] # Trigger on all pull requests to any branch
+
+jobs:
+  check-line-endings:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history to compare changes
+
+      - name: Check for Windows line endings (CRLF)
+        run: |
+          # Get the list of changed files in the PR
+          CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }})
+
+          # Flag to track if CRLF is found
+          CRLF_FOUND=false
+
+          # Loop through each changed file
+          for FILE in $CHANGED_FILES; do
+            # Check if the file exists and is a text file
+            if [ -f "$FILE" ] && file "$FILE" | grep -q "text"; then
+              # Check for CRLF line endings
+              if grep -UP '\r$' "$FILE"; then
+                echo "Error: Windows line endings (CRLF) detected in $FILE"
+                CRLF_FOUND=true
+              fi
+            fi
+          done
+
+          # Exit with error if CRLF was found
+          if [ "$CRLF_FOUND" = true ]; then
+            exit 1
+          fi
--- a/.github/workflows/pullrequest-ci-run.yml
+++ b/.github/workflows/pullrequest-ci-run.yml
@ -0,0 +1,53 @@
+# This is the GitHub Workflow that drives full-GPU-enabled tests of pull requests to ComfyUI, when the 'Run-CI-Test' label is added
+# Results are reported as checkmarks on the commits, as well as onto https://ci.comfy.org/
+name: Pull Request CI Workflow Runs
+on:
+    pull_request_target:
+        types: [labeled]
+
+jobs:
+  pr-test-stable:
+    if: ${{ github.event.label.name == 'Run-CI-Test' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos, linux, windows]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
+        cuda_version: ["12.1"]
+        torch_version: ["stable"]
+        include:
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+          - os: windows
+            runner_label: [self-hosted, Windows]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
+          use_prior_commit: 'true'
+  comment:
+    if: ${{ github.event.label.name == 'Run-CI-Test' }}
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    steps:
+      - uses: actions/github-script@v6
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: '(Automated Bot Message) CI Tests are running, you can view the results at https://ci.comfy.org/?branch=${{ github.event.pull_request.number }}%2Fmerge'
+            })
--- a/.github/workflows/release-stable-all.yml
+++ b/.github/workflows/release-stable-all.yml
@ -0,0 +1,78 @@
+name: "Release Stable All Portable Versions"
+
+on:
+  workflow_dispatch:
+    inputs:
+      git_tag:
+        description: 'Git tag'
+        required: true
+        type: string
+
+jobs:
+  release_nvidia_default:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release NVIDIA Default (cu130)"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "cu130"
+      python_minor: "13"
+      python_patch: "9"
+      rel_name: "nvidia"
+      rel_extra_name: ""
+      test_release: true
+    secrets: inherit
+
+  release_nvidia_cu128:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release NVIDIA cu128"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "cu128"
+      python_minor: "12"
+      python_patch: "10"
+      rel_name: "nvidia"
+      rel_extra_name: "_cu128"
+      test_release: true
+    secrets: inherit
+
+  release_nvidia_cu126:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release NVIDIA cu126"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "cu126"
+      python_minor: "12"
+      python_patch: "10"
+      rel_name: "nvidia"
+      rel_extra_name: "_cu126"
+      test_release: true
+    secrets: inherit
+
+  release_amd_rocm:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release AMD ROCm 7.1.1"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "rocm711"
+      python_minor: "12"
+      python_patch: "10"
+      rel_name: "amd"
+      rel_extra_name: ""
+      test_release: false
+    secrets: inherit
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@ -0,0 +1,108 @@
+name: Release Webhook
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  send-webhook:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Send release webhook
+        env:
+          WEBHOOK_URL: ${{ secrets.RELEASE_GITHUB_WEBHOOK_URL }}
+          WEBHOOK_SECRET: ${{ secrets.RELEASE_GITHUB_WEBHOOK_SECRET }}
+        run: |
+          # Generate UUID for delivery ID
+          DELIVERY_ID=$(uuidgen)
+          HOOK_ID="release-webhook-$(date +%s)"
+          
+          # Create webhook payload matching GitHub release webhook format
+          PAYLOAD=$(cat <<EOF
+          {
+            "action": "published",
+            "release": {
+              "id": ${{ github.event.release.id }},
+              "node_id": "${{ github.event.release.node_id }}",
+              "url": "${{ github.event.release.url }}",
+              "html_url": "${{ github.event.release.html_url }}",
+              "assets_url": "${{ github.event.release.assets_url }}",
+              "upload_url": "${{ github.event.release.upload_url }}",
+              "tag_name": "${{ github.event.release.tag_name }}",
+              "target_commitish": "${{ github.event.release.target_commitish }}",
+              "name": ${{ toJSON(github.event.release.name) }},
+              "body": ${{ toJSON(github.event.release.body) }},
+              "draft": ${{ github.event.release.draft }},
+              "prerelease": ${{ github.event.release.prerelease }},
+              "created_at": "${{ github.event.release.created_at }}",
+              "published_at": "${{ github.event.release.published_at }}",
+              "author": {
+                "login": "${{ github.event.release.author.login }}",
+                "id": ${{ github.event.release.author.id }},
+                "node_id": "${{ github.event.release.author.node_id }}",
+                "avatar_url": "${{ github.event.release.author.avatar_url }}",
+                "url": "${{ github.event.release.author.url }}",
+                "html_url": "${{ github.event.release.author.html_url }}",
+                "type": "${{ github.event.release.author.type }}",
+                "site_admin": ${{ github.event.release.author.site_admin }}
+              },
+              "tarball_url": "${{ github.event.release.tarball_url }}",
+              "zipball_url": "${{ github.event.release.zipball_url }}",
+              "assets": ${{ toJSON(github.event.release.assets) }}
+            },
+            "repository": {
+              "id": ${{ github.event.repository.id }},
+              "node_id": "${{ github.event.repository.node_id }}",
+              "name": "${{ github.event.repository.name }}",
+              "full_name": "${{ github.event.repository.full_name }}",
+              "private": ${{ github.event.repository.private }},
+              "owner": {
+                "login": "${{ github.event.repository.owner.login }}",
+                "id": ${{ github.event.repository.owner.id }},
+                "node_id": "${{ github.event.repository.owner.node_id }}",
+                "avatar_url": "${{ github.event.repository.owner.avatar_url }}",
+                "url": "${{ github.event.repository.owner.url }}",
+                "html_url": "${{ github.event.repository.owner.html_url }}",
+                "type": "${{ github.event.repository.owner.type }}",
+                "site_admin": ${{ github.event.repository.owner.site_admin }}
+              },
+              "html_url": "${{ github.event.repository.html_url }}",
+              "clone_url": "${{ github.event.repository.clone_url }}",
+              "git_url": "${{ github.event.repository.git_url }}",
+              "ssh_url": "${{ github.event.repository.ssh_url }}",
+              "url": "${{ github.event.repository.url }}",
+              "created_at": "${{ github.event.repository.created_at }}",
+              "updated_at": "${{ github.event.repository.updated_at }}",
+              "pushed_at": "${{ github.event.repository.pushed_at }}",
+              "default_branch": "${{ github.event.repository.default_branch }}",
+              "fork": ${{ github.event.repository.fork }}
+            },
+            "sender": {
+              "login": "${{ github.event.sender.login }}",
+              "id": ${{ github.event.sender.id }},
+              "node_id": "${{ github.event.sender.node_id }}",
+              "avatar_url": "${{ github.event.sender.avatar_url }}",
+              "url": "${{ github.event.sender.url }}",
+              "html_url": "${{ github.event.sender.html_url }}",
+              "type": "${{ github.event.sender.type }}",
+              "site_admin": ${{ github.event.sender.site_admin }}
+            }
+          }
+          EOF
+          )
+          
+          # Generate HMAC-SHA256 signature
+          SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$WEBHOOK_SECRET" -hex | cut -d' ' -f2)
+          
+          # Send webhook with required headers
+          curl -X POST "$WEBHOOK_URL" \
+            -H "Content-Type: application/json" \
+            -H "X-GitHub-Event: release" \
+            -H "X-GitHub-Delivery: $DELIVERY_ID" \
+            -H "X-GitHub-Hook-ID: $HOOK_ID" \
+            -H "X-Hub-Signature-256: sha256=$SIGNATURE" \
+            -H "User-Agent: GitHub-Actions-Webhook/1.0" \
+            -d "$PAYLOAD" \
+            --fail --silent --show-error
+          
+          echo "✅ Release webhook sent successfully"
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@ -0,0 +1,48 @@
+name: Python Linting
+
+on: [push, pull_request]
+
+jobs:
+  ruff:
+    name: Run Ruff
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.x
+
+    - name: Install Ruff
+      run: pip install ruff
+
+    - name: Run Ruff
+      run: ruff check .
+
+  pylint:
+    name: Run Pylint
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install requirements
+      run: |
+        python -m pip install --upgrade pip
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+        pip install -r requirements.txt
+
+    - name: Install Pylint
+      run: pip install pylint
+
+    - name: Run Pylint
+      run: pylint comfy_api_nodes
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -0,0 +1,170 @@
+
+name: "Release Stable Version"
+
+on:
+  workflow_call:
+    inputs:
+      git_tag:
+        description: 'Git tag'
+        required: true
+        type: string
+      cache_tag:
+        description: 'Cached dependencies tag'
+        required: true
+        type: string
+        default: "cu129"
+      python_minor:
+        description: 'Python minor version'
+        required: true
+        type: string
+        default: "13"
+      python_patch:
+        description: 'Python patch version'
+        required: true
+        type: string
+        default: "6"
+      rel_name:
+        description: 'Release name'
+        required: true
+        type: string
+        default: "nvidia"
+      rel_extra_name:
+        description: 'Release extra name'
+        required: false
+        type: string
+        default: ""
+      test_release:
+        description: 'Test Release'
+        required: true
+        type: boolean
+        default: true
+  workflow_dispatch:
+    inputs:
+      git_tag:
+        description: 'Git tag'
+        required: true
+        type: string
+      cache_tag:
+        description: 'Cached dependencies tag'
+        required: true
+        type: string
+        default: "cu129"
+      python_minor:
+        description: 'Python minor version'
+        required: true
+        type: string
+        default: "13"
+      python_patch:
+        description: 'Python patch version'
+        required: true
+        type: string
+        default: "6"
+      rel_name:
+        description: 'Release name'
+        required: true
+        type: string
+        default: "nvidia"
+      rel_extra_name:
+        description: 'Release extra name'
+        required: false
+        type: string
+        default: ""
+      test_release:
+        description: 'Test Release'
+        required: true
+        type: boolean
+        default: true
+
+jobs:
+  package_comfy_windows:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.git_tag }}
+          fetch-depth: 150
+          persist-credentials: false
+      - uses: actions/cache/restore@v4
+        id: cache
+        with:
+          path: |
+            ${{ inputs.cache_tag }}_python_deps.tar
+            update_comfyui_and_python_dependencies.bat
+          key: ${{ runner.os }}-build-${{ inputs.cache_tag }}-${{ inputs.python_minor }}
+      - shell: bash
+        run: |
+          mv ${{ inputs.cache_tag }}_python_deps.tar ../
+          mv update_comfyui_and_python_dependencies.bat ../
+          cd ..
+          tar xf ${{ inputs.cache_tag }}_python_deps.tar
+          pwd
+          ls
+
+      - shell: bash
+        run: |
+          cd ..
+          cp -r ComfyUI ComfyUI_copy
+          curl https://www.python.org/ftp/python/3.${{ inputs.python_minor }}.${{ inputs.python_patch }}/python-3.${{ inputs.python_minor }}.${{ inputs.python_patch }}-embed-amd64.zip -o python_embeded.zip
+          unzip python_embeded.zip -d python_embeded
+          cd python_embeded
+          echo ${{ env.MINOR_VERSION }}
+          echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
+          curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+          ./python.exe get-pip.py
+          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*
+
+          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          ./python.exe -s -m pip install -r requirements_comfyui.txt
+          rm requirements_comfyui.txt
+
+          sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
+
+          if test -f ./Lib/site-packages/torch/lib/dnnl.lib; then
+            rm ./Lib/site-packages/torch/lib/dnnl.lib #I don't think this is actually used and I need the space
+            rm ./Lib/site-packages/torch/lib/libprotoc.lib
+            rm ./Lib/site-packages/torch/lib/libprotobuf.lib
+          fi
+
+          cd ..
+
+          git clone --depth 1 https://github.com/comfyanonymous/taesd
+          cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/
+
+          mkdir ComfyUI_windows_portable
+          mv python_embeded ComfyUI_windows_portable
+          mv ComfyUI_copy ComfyUI_windows_portable/ComfyUI
+
+          cd ComfyUI_windows_portable
+
+          mkdir update
+          cp -r ComfyUI/.ci/update_windows/* ./update/
+          cp -r ComfyUI/.ci/windows_${{ inputs.rel_name }}_base_files/* ./
+          cp ../update_comfyui_and_python_dependencies.bat ./update/
+
+          cd ..
+
+          "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=768m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable
+          mv ComfyUI_windows_portable.7z ComfyUI/ComfyUI_windows_portable_${{ inputs.rel_name }}${{ inputs.rel_extra_name }}.7z
+
+      - shell: bash
+        if: ${{ inputs.test_release }}
+        run: |
+          cd ..
+          cd ComfyUI_windows_portable
+          python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
+
+          python_embeded/python.exe -s ./update/update.py ComfyUI/
+
+          ls
+
+      - name: Upload binaries to release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: ComfyUI_windows_portable_${{ inputs.rel_name }}${{ inputs.rel_extra_name }}.7z
+          tag_name: ${{ inputs.git_tag }}
+          draft: true
+          overwrite_files: true
--- a/.github/workflows/stale-issues.yml
+++ b/.github/workflows/stale-issues.yml
@ -0,0 +1,21 @@
+name: 'Close stale issues'
+on:
+  schedule:
+    # Run daily at 430 am PT
+    - cron: '30 11 * * *'
+permissions:
+  issues: write
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v9
+        with:
+          stale-issue-message: "This issue is being marked stale because it has not had any activity for 30 days. Reply below within 7 days if your issue still isn't solved, and it will be left open. Otherwise, the issue will be closed automatically."
+          days-before-stale: 30
+          days-before-close: 7
+          stale-issue-label: 'Stale'
+          only-labels: 'User Support'
+          exempt-all-assignees: true
+          exempt-all-milestones: true
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@ -0,0 +1,31 @@
+name: Build package
+
+#
+# This workflow is a test of the python package build.
+# Install Python dependencies across different Python versions.
+#
+
+on:
+  push:
+    paths:
+      - "requirements.txt"
+      - ".github/workflows/test-build.yml"
+
+jobs:
+  build:
+    name: Build Test
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@ -0,0 +1,98 @@
+# This is the GitHub Workflow that drives automatic full-GPU-enabled tests of all new commits to the master branch of ComfyUI
+# Results are reported as checkmarks on the commits, as well as onto https://ci.comfy.org/
+name: Full Comfy CI Workflow Runs
+on:
+  push:
+    branches:
+      - master
+    paths-ignore:
+      - 'app/**'
+      - 'input/**'
+      - 'output/**'
+      - 'notebooks/**'
+      - 'script_examples/**'
+      - '.github/**'
+      - 'web/**'
+  workflow_dispatch:
+
+jobs:
+  test-stable:
+    strategy:
+      fail-fast: false
+      matrix:
+        # os: [macos, linux, windows]
+        # os: [macos, linux]
+        os: [linux]
+        python_version: ["3.10", "3.11", "3.12"]
+        cuda_version: ["12.1"]
+        torch_version: ["stable"]
+        include:
+          # - os: macos
+          #   runner_label: [self-hosted, macOS]
+          #   flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+          # - os: windows
+          #   runner_label: [self-hosted, Windows]
+          #   flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
+
+  # test-win-nightly:
+  #   strategy:
+  #     fail-fast: true
+  #     matrix:
+  #       os: [windows]
+  #       python_version: ["3.9", "3.10", "3.11", "3.12"]
+  #       cuda_version: ["12.1"]
+  #       torch_version: ["nightly"]
+  #       include:
+  #         - os: windows
+  #           runner_label: [self-hosted, Windows]
+  #           flags: ""
+  #   runs-on: ${{ matrix.runner_label }}
+  #   steps:
+  #     - name: Test Workflows
+  #       uses: comfy-org/comfy-action@main
+  #       with:
+  #         os: ${{ matrix.os }}
+  #         python_version: ${{ matrix.python_version }}
+  #         torch_version: ${{ matrix.torch_version }}
+  #         google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+  #         comfyui_flags: ${{ matrix.flags }}
+
+  test-unix-nightly:
+    strategy:
+      fail-fast: false
+      matrix:
+        # os: [macos, linux]
+        os: [linux]
+        python_version: ["3.11"]
+        cuda_version: ["12.1"]
+        torch_version: ["nightly"]
+        include:
+          # - os: macos
+          #   runner_label: [self-hosted, macOS]
+          #   flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
--- a/.github/workflows/test-execution.yml
+++ b/.github/workflows/test-execution.yml
@ -0,0 +1,30 @@
+name: Execution Tests
+
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+
+jobs:
+  test:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    continue-on-error: true
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python      
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+    - name: Install requirements
+      run: |
+        python -m pip install --upgrade pip
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+        pip install -r requirements.txt
+        pip install -r tests-unit/requirements.txt
+    - name: Run Execution Tests
+      run: |
+        python -m pytest tests/execution -v --skip-timing-checks
--- a/.github/workflows/test-launch.yml
+++ b/.github/workflows/test-launch.yml
@ -0,0 +1,45 @@
+name: Test server launches without errors
+
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout ComfyUI
+      uses: actions/checkout@v4
+      with:
+        repository: "comfyanonymous/ComfyUI"
+        path: "ComfyUI"
+    - uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - name: Install requirements
+      run: |
+        python -m pip install --upgrade pip
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+        pip install -r requirements.txt
+        pip install wait-for-it
+      working-directory: ComfyUI
+    - name: Start ComfyUI server
+      run: |
+        python main.py --cpu 2>&1 | tee console_output.log &
+        wait-for-it --service 127.0.0.1:8188 -t 30
+      working-directory: ComfyUI
+    - name: Check for unhandled exceptions in server log
+      run: |
+        if grep -qE "Exception|Error" console_output.log; then
+          echo "Unhandled exception/error found in server log."
+          exit 1
+        fi
+      working-directory: ComfyUI
+    - uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: console-output
+        path: ComfyUI/console_output.log
+        retention-days: 30
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@ -0,0 +1,30 @@
+name: Unit Tests
+
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+
+jobs:
+  test:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-2022, macos-latest]
+    runs-on: ${{ matrix.os }}
+    continue-on-error: true
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python      
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+    - name: Install requirements
+      run: |
+        python -m pip install --upgrade pip
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+        pip install -r requirements.txt
+    - name: Run Unit Tests
+      run: |
+        pip install -r tests-unit/requirements.txt
+        python -m pytest tests-unit
--- a/.github/workflows/update-api-stubs.yml
+++ b/.github/workflows/update-api-stubs.yml
@ -0,0 +1,56 @@
+name: Generate Pydantic Stubs from api.comfy.org
+
+on:
+  schedule:
+    - cron: '0 0 * * 1'
+  workflow_dispatch:
+
+jobs:
+  generate-models:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install 'datamodel-code-generator[http]'
+          npm install @redocly/cli
+      
+      - name: Download OpenAPI spec
+        run: |
+          curl -o openapi.yaml https://api.comfy.org/openapi
+      
+      - name: Filter OpenAPI spec with Redocly
+        run: |
+          npx @redocly/cli bundle openapi.yaml --output filtered-openapi.yaml --config comfy_api_nodes/redocly.yaml --remove-unused-components
+      
+      - name: Generate API models
+        run: |
+          datamodel-codegen --use-subclass-enum --input filtered-openapi.yaml --output comfy_api_nodes/apis --output-model-type pydantic_v2.BaseModel
+      
+      - name: Check for changes
+        id: git-check
+        run: |
+          git diff --exit-code comfy_api_nodes/apis || echo "changes=true" >> $GITHUB_OUTPUT
+      
+      - name: Create Pull Request
+        if: steps.git-check.outputs.changes == 'true'
+        uses: peter-evans/create-pull-request@v5
+        with:
+          commit-message: 'chore: update API models from OpenAPI spec'
+          title: 'Update API models from api.comfy.org'
+          body: |
+            This PR updates the API models based on the latest api.comfy.org OpenAPI specification.
+            
+            Generated automatically by the a Github workflow.
+          branch: update-api-stubs
+          delete-branch: true
+          base: master
--- a/.github/workflows/update-version.yml
+++ b/.github/workflows/update-version.yml
@ -0,0 +1,58 @@
+name: Update Version File
+
+on:
+  pull_request:
+    paths:
+      - "pyproject.toml"
+    branches:
+      - master
+
+jobs:
+  update-version:
+    runs-on: ubuntu-latest
+    # Don't run on fork PRs
+    if: github.event.pull_request.head.repo.full_name == github.repository
+    permissions:
+      pull-requests: write
+      contents: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+
+      - name: Update comfyui_version.py
+        run: |
+          # Read version from pyproject.toml and update comfyui_version.py
+          python -c '
+          import tomllib
+
+          # Read version from pyproject.toml
+          with open("pyproject.toml", "rb") as f:
+              config = tomllib.load(f)
+              version = config["project"]["version"]
+
+          # Write version to comfyui_version.py
+          with open("comfyui_version.py", "w") as f:
+              f.write("# This file is automatically generated by the build process when version is\n")
+              f.write("# updated in pyproject.toml.\n")
+              f.write(f"__version__ = \"{version}\"\n")
+          '
+
+      - name: Commit changes
+        run: |
+          git config --local user.name "github-actions"
+          git config --local user.email "github-actions@github.com"
+          git fetch origin ${{ github.head_ref }}
+          git checkout -B ${{ github.head_ref }} origin/${{ github.head_ref }}
+          git add comfyui_version.py
+          git diff --quiet && git diff --staged --quiet || git commit -m "chore: Update comfyui_version.py to match pyproject.toml"
+          git push origin HEAD:${{ github.head_ref }}
--- a/.github/workflows/windows_release_cu118_dependencies.yml
+++ b/.github/workflows/windows_release_cu118_dependencies.yml
@ -1,71 +0,0 @@
-name: "Windows Release cu118 dependencies"
-
-on:
-  workflow_dispatch:
-#  push:
-#    branches:
-#      - master
-
-jobs:
-  build_dependencies:
-    env:
-        # you need at least cuda 5.0 for some of the stuff compiled here.
-        TORCH_CUDA_ARCH_LIST: "5.0+PTX 6.0 6.1 7.0 7.5 8.0 8.6 8.9"
-        FORCE_CUDA: 1
-        MAX_JOBS: 1 # will crash otherwise
-        DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
-        XFORMERS_BUILD_TYPE: "Release"
-    runs-on: windows-latest
-    steps:
-        - name: Cache Built Dependencies
-          uses: actions/cache@v3
-          id: cache-cu118_python_stuff
-          with:
-            path: cu118_python_deps.tar
-            key: ${{ runner.os }}-build-cu118
-
-        - if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
-          uses: actions/checkout@v3
-
-        - if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
-          uses: actions/setup-python@v4
-          with:
-            python-version: '3.10.9'
-
-        - if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
-          uses: comfyanonymous/cuda-toolkit@test
-          id: cuda-toolkit
-          with:
-            cuda: '11.8.0'
-        # copied from xformers github
-        - name: Setup MSVC
-          uses: ilammy/msvc-dev-cmd@v1
-        - name: Configure Pagefile
-          # windows runners will OOM with many CUDA architectures
-          # we cheat here with a page file
-          uses: al-cheb/configure-pagefile-action@v1.3
-          with:
-            minimum-size: 2GB
-        # really unfortunate: https://github.com/ilammy/msvc-dev-cmd#name-conflicts-with-shell-bash
-        - name: Remove link.exe
-          shell: bash
-          run: rm /usr/bin/link
-
-        - if: steps.cache-cu118_python_stuff.outputs.cache-hit != 'true'
-          shell: bash
-          run: |
-            python -m pip wheel --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
-            python -m pip install --no-cache-dir ./temp_wheel_dir/*
-            echo installed basic
-            git clone --recurse-submodules https://github.com/facebookresearch/xformers.git
-            cd xformers
-            python -m pip install --no-cache-dir wheel setuptools twine
-            echo building xformers
-            python setup.py bdist_wheel -d ../temp_wheel_dir/
-            cd ..
-            rm -rf xformers
-            ls -lah temp_wheel_dir
-            mv temp_wheel_dir cu118_python_deps
-            tar cf cu118_python_deps.tar cu118_python_deps
-
-
--- a/.github/workflows/windows_release_cu118_dependencies_2.yml
+++ b/.github/workflows/windows_release_cu118_dependencies_2.yml
@ -1,30 +0,0 @@
-name: "Windows Release cu118 dependencies 2"
-
-on:
-  workflow_dispatch:
-#  push:
-#    branches:
-#      - master
-
-jobs:
-  build_dependencies:
-    runs-on: windows-latest
-    steps:
-        - uses: actions/checkout@v3
-        - uses: actions/setup-python@v4
-          with:
-            python-version: '3.10.9'
-
-        - shell: bash
-          run: |
-            python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
-            python -m pip install --no-cache-dir ./temp_wheel_dir/*
-            echo installed basic
-            ls -lah temp_wheel_dir
-            mv temp_wheel_dir cu118_python_deps
-            tar cf cu118_python_deps.tar cu118_python_deps
-
-        - uses: actions/cache/save@v3
-          with:
-            path: cu118_python_deps.tar
-            key: ${{ runner.os }}-build-cu118
--- a/.github/workflows/windows_release_cu118_package.yml
+++ b/.github/workflows/windows_release_cu118_package.yml
@ -1,76 +0,0 @@
-name: "Windows Release cu118 packaging"
-
-on:
-  workflow_dispatch:
-#  push:
-#    branches:
-#      - master
-
-jobs:
-  package_comfyui:
-    permissions:
-        contents: "write"
-        packages: "write"
-        pull-requests: "read"
-    runs-on: windows-latest
-    steps:
-        - uses: actions/cache/restore@v3
-          id: cache
-          with:
-            path: cu118_python_deps.tar
-            key: ${{ runner.os }}-build-cu118
-        - shell: bash
-          run: |
-            mv cu118_python_deps.tar ../
-            cd ..
-            tar xf cu118_python_deps.tar
-            pwd
-            ls
-
-        - uses: actions/checkout@v3
-          with:
-            fetch-depth: 0
-        - shell: bash
-          run: |
-            cd ..
-            cp -r ComfyUI ComfyUI_copy
-            curl https://www.python.org/ftp/python/3.10.9/python-3.10.9-embed-amd64.zip -o python_embeded.zip
-            unzip python_embeded.zip -d python_embeded
-            cd python_embeded
-            echo 'import site' >> ./python310._pth
-            curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
-            ./python.exe get-pip.py
-            ./python.exe -s -m pip install ../cu118_python_deps/*
-            sed -i '1i../ComfyUI' ./python310._pth
-            cd ..
-
-
-            mkdir ComfyUI_windows_portable
-            mv python_embeded ComfyUI_windows_portable
-            mv ComfyUI_copy ComfyUI_windows_portable/ComfyUI
-
-            cd ComfyUI_windows_portable
-
-            mkdir update
-            cp -r ComfyUI/.ci/update_windows/* ./update/
-            cp -r ComfyUI/.ci/update_windows_cu118/* ./update/
-            cp -r ComfyUI/.ci/windows_base_files/* ./
-
-            cd ..
-
-            "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma -mx=8 -mfb=64 -md=32m -ms=on ComfyUI_windows_portable.7z ComfyUI_windows_portable
-            mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z
-
-            cd ComfyUI_windows_portable
-            python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
-
-            ls
-
-        - name: Upload binaries to release
-          uses: svenstaro/upload-release-action@v2
-          with:
-                repo_token: ${{ secrets.GITHUB_TOKEN }}
-                file: new_ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z
-                tag: "latest"
-                overwrite: true
-
--- a/.github/workflows/windows_release_dependencies.yml
+++ b/.github/workflows/windows_release_dependencies.yml
@ -0,0 +1,72 @@
+name: "Windows Release dependencies"
+
+on:
+  workflow_dispatch:
+    inputs:
+      xformers:
+        description: 'xformers version'
+        required: false
+        type: string
+        default: ""
+      extra_dependencies:
+        description: 'extra dependencies'
+        required: false
+        type: string
+        default: ""
+      cu:
+        description: 'cuda version'
+        required: true
+        type: string
+        default: "130"
+
+      python_minor:
+        description: 'python minor version'
+        required: true
+        type: string
+        default: "13"
+
+      python_patch:
+        description: 'python patch version'
+        required: true
+        type: string
+        default: "9"
+#  push:
+#    branches:
+#      - master
+
+jobs:
+  build_dependencies:
+    runs-on: windows-latest
+    steps:
+        - uses: actions/checkout@v4
+        - uses: actions/setup-python@v5
+          with:
+            python-version: 3.${{ inputs.python_minor }}.${{ inputs.python_patch }}
+
+        - shell: bash
+          run: |
+            echo "@echo off
+            call update_comfyui.bat nopause
+            echo -
+            echo This will try to update pytorch and all python dependencies.
+            echo -
+            echo If you just want to update normally, close this and run update_comfyui.bat instead.
+            echo -
+            pause
+            ..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2
+            pause" > update_comfyui_and_python_dependencies.bat
+
+            grep -v comfyui requirements.txt > requirements_nocomfyui.txt
+            python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} ${{ inputs.extra_dependencies }} --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cu }} -r requirements_nocomfyui.txt pygit2 -w ./temp_wheel_dir
+            python -m pip install --no-cache-dir ./temp_wheel_dir/*
+            echo installed basic
+            ls -lah temp_wheel_dir
+            mv temp_wheel_dir cu${{ inputs.cu }}_python_deps
+            tar cf cu${{ inputs.cu }}_python_deps.tar cu${{ inputs.cu }}_python_deps
+
+        - uses: actions/cache/save@v4
+          with:
+            path: |
+              cu${{ inputs.cu }}_python_deps.tar
+              update_comfyui_and_python_dependencies.bat
+            key: ${{ runner.os }}-build-cu${{ inputs.cu }}-${{ inputs.python_minor }}
--- a/.github/workflows/windows_release_dependencies_manual.yml
+++ b/.github/workflows/windows_release_dependencies_manual.yml
@ -0,0 +1,64 @@
+name: "Windows Release dependencies Manual"
+
+on:
+  workflow_dispatch:
+    inputs:
+      torch_dependencies:
+        description: 'torch dependencies'
+        required: false
+        type: string
+        default: "torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128"
+      cache_tag:
+        description: 'Cached dependencies tag'
+        required: true
+        type: string
+        default: "cu128"
+
+      python_minor:
+        description: 'python minor version'
+        required: true
+        type: string
+        default: "12"
+
+      python_patch:
+        description: 'python patch version'
+        required: true
+        type: string
+        default: "10"
+
+jobs:
+  build_dependencies:
+    runs-on: windows-latest
+    steps:
+        - uses: actions/checkout@v4
+        - uses: actions/setup-python@v5
+          with:
+            python-version: 3.${{ inputs.python_minor }}.${{ inputs.python_patch }}
+
+        - shell: bash
+          run: |
+            echo "@echo off
+            call update_comfyui.bat nopause
+            echo -
+            echo This will try to update pytorch and all python dependencies.
+            echo -
+            echo If you just want to update normally, close this and run update_comfyui.bat instead.
+            echo -
+            pause
+            ..\python_embeded\python.exe -s -m pip install --upgrade ${{ inputs.torch_dependencies }} -r ../ComfyUI/requirements.txt pygit2
+            pause" > update_comfyui_and_python_dependencies.bat
+
+            grep -v comfyui requirements.txt > requirements_nocomfyui.txt
+            python -m pip wheel --no-cache-dir ${{ inputs.torch_dependencies }} -r requirements_nocomfyui.txt pygit2 -w ./temp_wheel_dir
+            python -m pip install --no-cache-dir ./temp_wheel_dir/*
+            echo installed basic
+            ls -lah temp_wheel_dir
+            mv temp_wheel_dir ${{ inputs.cache_tag }}_python_deps
+            tar cf ${{ inputs.cache_tag }}_python_deps.tar ${{ inputs.cache_tag }}_python_deps
+
+        - uses: actions/cache/save@v4
+          with:
+            path: |
+              ${{ inputs.cache_tag }}_python_deps.tar
+              update_comfyui_and_python_dependencies.bat
+            key: ${{ runner.os }}-build-${{ inputs.cache_tag }}-${{ inputs.python_minor }}
--- a/.github/workflows/windows_release_nightly_pytorch.yml
+++ b/.github/workflows/windows_release_nightly_pytorch.yml
@ -2,6 +2,24 @@ name: "Windows Release Nightly pytorch"

 on:
  workflow_dispatch:
+    inputs:
+      cu:
+        description: 'cuda version'
+        required: true
+        type: string
+        default: "129"
+
+      python_minor:
+        description: 'python minor version'
+        required: true
+        type: string
+        default: "13"
+
+      python_patch:
+        description: 'python patch version'
+        required: true
+        type: string
+        default: "5"
 #  push:
 #    branches:
 #      - master
@ -14,28 +32,33 @@ jobs:
        pull-requests: "read"
    runs-on: windows-latest
    steps:
-        - uses: actions/checkout@v3
+        - uses: actions/checkout@v4
          with:
-            fetch-depth: 0
-        - uses: actions/setup-python@v4
+            fetch-depth: 30
+            persist-credentials: false
+        - uses: actions/setup-python@v5
          with:
-            python-version: '3.11.3'
+            python-version: 3.${{ inputs.python_minor }}.${{ inputs.python_patch }}
        - shell: bash
          run: |
            cd ..
            cp -r ComfyUI ComfyUI_copy
-            curl https://www.python.org/ftp/python/3.11.3/python-3.11.3-embed-amd64.zip -o python_embeded.zip
+            curl https://www.python.org/ftp/python/3.${{ inputs.python_minor }}.${{ inputs.python_patch }}/python-3.${{ inputs.python_minor }}.${{ inputs.python_patch }}-embed-amd64.zip -o python_embeded.zip
            unzip python_embeded.zip -d python_embeded
            cd python_embeded
-            echo 'import site' >> ./python311._pth
+            echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
            curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
            ./python.exe get-pip.py
-            python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
+            python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
            ls ../temp_wheel_dir
            ./python.exe -s -m pip install --pre ../temp_wheel_dir/*
-            sed -i '1i../ComfyUI' ./python311._pth
+            sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
+
+            rm ./Lib/site-packages/torch/lib/dnnl.lib #I don't think this is actually used and I need the space
            cd ..

+            git clone --depth 1 https://github.com/comfyanonymous/taesd
+            cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/

            mkdir ComfyUI_windows_portable_nightly_pytorch
            mv python_embeded ComfyUI_windows_portable_nightly_pytorch
@ -45,13 +68,15 @@ jobs:

            mkdir update
            cp -r ComfyUI/.ci/update_windows/* ./update/
-            cp -r ComfyUI/.ci/windows_base_files/* ./
-            cp -r ComfyUI/.ci/nightly/update_windows/* ./update/
-            cp -r ComfyUI/.ci/nightly/windows_base_files/* ./
+            cp -r ComfyUI/.ci/windows_nvidia_base_files/* ./
+            cp -r ComfyUI/.ci/windows_nightly_base_files/* ./

+            echo "call update_comfyui.bat nopause
+            ..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2
+            pause" > ./update/update_comfyui_and_python_dependencies.bat
            cd ..

-            "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma -mx=8 -mfb=64 -md=32m -ms=on ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch
+            "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch
            mv ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI/ComfyUI_windows_portable_nvidia_or_cpu_nightly_pytorch.7z

            cd ComfyUI_windows_portable_nightly_pytorch
--- a/.github/workflows/windows_release_package.yml
+++ b/.github/workflows/windows_release_package.yml
@ -0,0 +1,106 @@
+name: "Windows Release packaging"
+
+on:
+  workflow_dispatch:
+    inputs:
+      cu:
+        description: 'cuda version'
+        required: true
+        type: string
+        default: "129"
+
+      python_minor:
+        description: 'python minor version'
+        required: true
+        type: string
+        default: "13"
+
+      python_patch:
+        description: 'python patch version'
+        required: true
+        type: string
+        default: "6"
+#  push:
+#    branches:
+#      - master
+
+jobs:
+  package_comfyui:
+    permissions:
+        contents: "write"
+        packages: "write"
+        pull-requests: "read"
+    runs-on: windows-latest
+    steps:
+        - uses: actions/cache/restore@v4
+          id: cache
+          with:
+            path: |
+              cu${{ inputs.cu }}_python_deps.tar
+              update_comfyui_and_python_dependencies.bat
+            key: ${{ runner.os }}-build-cu${{ inputs.cu }}-${{ inputs.python_minor }}
+        - shell: bash
+          run: |
+            mv cu${{ inputs.cu }}_python_deps.tar ../
+            mv update_comfyui_and_python_dependencies.bat ../
+            cd ..
+            tar xf cu${{ inputs.cu }}_python_deps.tar
+            pwd
+            ls
+
+        - uses: actions/checkout@v4
+          with:
+            fetch-depth: 150
+            persist-credentials: false
+        - shell: bash
+          run: |
+            cd ..
+            cp -r ComfyUI ComfyUI_copy
+            curl https://www.python.org/ftp/python/3.${{ inputs.python_minor }}.${{ inputs.python_patch }}/python-3.${{ inputs.python_minor }}.${{ inputs.python_patch }}-embed-amd64.zip -o python_embeded.zip
+            unzip python_embeded.zip -d python_embeded
+            cd python_embeded
+            echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
+            curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+            ./python.exe get-pip.py
+            ./python.exe -s -m pip install ../cu${{ inputs.cu }}_python_deps/*
+            sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
+
+            rm ./Lib/site-packages/torch/lib/dnnl.lib #I don't think this is actually used and I need the space
+            rm ./Lib/site-packages/torch/lib/libprotoc.lib
+            rm ./Lib/site-packages/torch/lib/libprotobuf.lib
+            cd ..
+
+            git clone --depth 1 https://github.com/comfyanonymous/taesd
+            cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/
+
+            mkdir ComfyUI_windows_portable
+            mv python_embeded ComfyUI_windows_portable
+            mv ComfyUI_copy ComfyUI_windows_portable/ComfyUI
+
+            cd ComfyUI_windows_portable
+
+            mkdir update
+            cp -r ComfyUI/.ci/update_windows/* ./update/
+            cp -r ComfyUI/.ci/windows_nvidia_base_files/* ./
+            cp ../update_comfyui_and_python_dependencies.bat ./update/
+
+            cd ..
+
+            "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=768m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable
+            mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z
+
+            cd ComfyUI_windows_portable
+            python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
+
+            python_embeded/python.exe -s ./update/update.py ComfyUI/
+
+            ls
+
+        - name: Upload binaries to release
+          uses: svenstaro/upload-release-action@v2
+          with:
+                repo_token: ${{ secrets.GITHUB_TOKEN }}
+                file: new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z
+                tag: "latest"
+                overwrite: true
+
--- a/.gitignore
+++ b/.gitignore
@ -1,11 +1,26 @@
 __pycache__/
 *.py[cod]
-output/
-input/
-!input/example.png
-models/
-temp/
-custom_nodes/
+/output/
+/input/
+!/input/example.png
+/models/
+/temp/
+/custom_nodes/
 !custom_nodes/example_node.py.example
 extra_model_paths.yaml
 /.vs
+.vscode/
+.idea/
+venv/
+.venv/
+/web/extensions/*
+!/web/extensions/logging.js.example
+!/web/extensions/core/
+/tests-ui/data/object_info.json
+/user/
+*.log
+web_custom_versions/
+.DS_Store
+openapi.yaml
+filtered-openapi.yaml
+uv.lock
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+# Admins
+* @comfyanonymous
+* @kosinkadink
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,41 @@
+# Contributing to ComfyUI
+
+Welcome, and thank you for your interest in contributing to ComfyUI!
+
+There are several ways in which you can contribute, beyond writing code. The goal of this document is to provide a high-level overview of how you can get involved.
+
+## Asking Questions
+
+Have a question? Instead of opening an issue, please ask on [Discord](https://comfy.org/discord) or [Matrix](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) channels. Our team and the community will help you.
+
+## Providing Feedback
+
+Your comments and feedback are welcome, and the development team is available via a handful of different channels.
+
+See the `#bug-report`, `#feature-request` and `#feedback` channels on Discord.
+
+## Reporting Issues
+
+Have you identified a reproducible problem in ComfyUI? Do you have a feature request? We want to hear about it! Here's how you can report your issue as effectively as possible.
+
+
+### Look For an Existing Issue
+
+Before you create a new issue, please do a search in [open issues](https://github.com/comfyanonymous/ComfyUI/issues) to see if the issue or feature request has already been filed.
+
+If you find your issue already exists, make relevant comments and add your [reaction](https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments). Use a reaction in place of a "+1" comment:
+
+* 👍 - upvote
+* 👎 - downvote
+
+If you cannot find an existing issue that describes your bug or feature, create a new issue. We have an issue template in place to organize new issues.
+
+
+### Creating Pull Requests
+
+* Please refer to the article on [creating pull requests](https://github.com/comfyanonymous/ComfyUI/wiki/How-to-Contribute-Code) and contributing to this project.
+
+
+## Thank You
+
+Your contributions to open source, large or small, make great projects like this possible. Thank you for taking the time to contribute.
--- a/QUANTIZATION.md
+++ b/QUANTIZATION.md
@ -0,0 +1,168 @@
+# The Comfy guide to Quantization
+
+
+## How does quantization work?
+
+Quantization aims to map a high-precision value x_f to a lower precision format with minimal loss in accuracy. These smaller formats then serve to reduce the models memory footprint and increase throughput by using specialized hardware.
+
+When simply converting a value from FP16 to FP8 using the round-nearest method we might hit two issues:
+- The dynamic range of FP16 (-65,504, 65,504) far exceeds FP8 formats like E4M3 (-448, 448) or E5M2 (-57,344, 57,344), potentially resulting in clipped values
+- The original values are concentrated in a small range (e.g. -1,1) leaving many FP8-bits "unused"
+
+By using a scaling factor, we aim to map these values into the quantized-dtype range, making use of the full spectrum. One of the easiest approaches, and common, is using per-tensor absolute-maximum scaling.
+
+```
+absmax = max(abs(tensor))
+scale = amax / max_dynamic_range_low_precision
+
+# Quantization
+tensor_q = (tensor / scale).to(low_precision_dtype)
+
+# De-Quantization
+tensor_dq = tensor_q.to(fp16) * scale
+
+tensor_dq ~ tensor
+```
+
+Given that additional information (scaling factor) is needed to "interpret" the quantized values, we describe those as derived datatypes.
+
+
+## Quantization in Comfy
+
+```
+QuantizedTensor (torch.Tensor subclass)
+  ↓ __torch_dispatch__
+Two-Level Registry (generic + layout handlers)
+  ↓
+MixedPrecisionOps + Metadata Detection
+```
+
+### Representation
+
+To represent these derived datatypes, ComfyUI uses a subclass of torch.Tensor to implements these using the `QuantizedTensor` class found in `comfy/quant_ops.py`
+
+A `Layout` class defines how a specific quantization format behaves:
+- Required parameters
+- Quantize method
+- De-Quantize method
+
+```python
+from comfy.quant_ops import QuantizedLayout
+
+class MyLayout(QuantizedLayout):
+    @classmethod
+    def quantize(cls, tensor, **kwargs):
+        # Convert to quantized format
+        qdata = ...
+        params = {'scale': ..., 'orig_dtype': tensor.dtype}
+        return qdata, params
+    
+    @staticmethod
+    def dequantize(qdata, scale, orig_dtype, **kwargs):
+        return qdata.to(orig_dtype) * scale
+```
+
+To then run operations using these QuantizedTensors we use two registry systems to define supported operations. 
+The first is a **generic registry** that handles operations common to all quantized formats (e.g., `.to()`, `.clone()`, `.reshape()`).
+
+The second registry is layout-specific and allows to implement fast-paths like nn.Linear.
+```python
+from comfy.quant_ops import register_layout_op
+
+@register_layout_op(torch.ops.aten.linear.default, MyLayout)
+def my_linear(func, args, kwargs):
+    # Extract tensors, call optimized kernel
+    ...
+```
+When `torch.nn.functional.linear()` is called with QuantizedTensor arguments, `__torch_dispatch__` automatically routes to the registered implementation.
+For any unsupported operation, QuantizedTensor will fallback to call `dequantize` and dispatch using the high-precision implementation.
+
+
+### Mixed Precision
+
+The `MixedPrecisionOps` class (lines 542-648 in `comfy/ops.py`) enables per-layer quantization decisions, allowing different layers in a model to use different precisions. This is activated when a model config contains a `layer_quant_config` dictionary that specifies which layers should be quantized and how.
+
+**Architecture:**
+
+```python
+class MixedPrecisionOps(disable_weight_init):
+    _layer_quant_config = {}  # Maps layer names to quantization configs
+    _compute_dtype = torch.bfloat16  # Default compute / dequantize precision
+```
+
+**Key mechanism:**
+
+The custom `Linear._load_from_state_dict()` method inspects each layer during model loading:
+- If the layer name is **not** in `_layer_quant_config`: load weight as regular tensor in `_compute_dtype`
+- If the layer name **is** in `_layer_quant_config`: 
+  - Load weight as `QuantizedTensor` with the specified layout (e.g., `TensorCoreFP8Layout`)
+  - Load associated quantization parameters (scales, block_size, etc.)
+
+**Why it's needed:**
+
+Not all layers tolerate quantization equally. Sensitive operations like final projections can be kept in higher precision, while compute-heavy matmuls are quantized. This provides most of the performance benefits while maintaining quality.
+
+The system is selected in `pick_operations()` when `model_config.layer_quant_config` is present, making it the highest-priority operation mode.
+
+
+## Checkpoint Format
+
+Quantized checkpoints are stored as standard safetensors files with quantized weight tensors and associated scaling parameters, plus a `_quantization_metadata` JSON entry describing the quantization scheme.
+
+The quantized checkpoint will contain the same layers as the original checkpoint but:
+- The weights are stored as quantized values, sometimes using a different storage datatype. E.g. uint8 container for fp8.
+- For each quantized weight a number of additional scaling parameters are stored alongside depending on the recipe.
+- We store a metadata.json in the metadata of the final safetensor containing the `_quantization_metadata` describing which layers are quantized and what layout has been used.
+
+### Scaling Parameters details
+We define 4 possible scaling parameters that should cover most recipes in the near-future:
+- **weight_scale**: quantization scalers for the weights
+- **weight_scale_2**: global scalers in the context of double scaling
+- **pre_quant_scale**: scalers used for smoothing salient weights
+- **input_scale**: quantization scalers for the activations
+
+| Format | Storage dtype | weight_scale | weight_scale_2 | pre_quant_scale | input_scale |
+|--------|---------------|--------------|----------------|-----------------|-------------|
+| float8_e4m3fn | float32 | float32 (scalar) | - | - | float32 (scalar) |
+
+You can find the defined formats in `comfy/quant_ops.py` (QUANT_ALGOS).
+
+### Quantization Metadata
+
+The metadata stored alongside the checkpoint contains:
+- **format_version**: String to define a version of the standard
+- **layers**: A dictionary mapping layer names to their quantization format. The format string maps to the definitions found in `QUANT_ALGOS`. 
+
+Example:
+```json
+{
+  "_quantization_metadata": {
+    "format_version": "1.0",
+    "layers": {
+      "model.layers.0.mlp.up_proj": "float8_e4m3fn",
+      "model.layers.0.mlp.down_proj": "float8_e4m3fn",
+      "model.layers.1.mlp.up_proj": "float8_e4m3fn"
+    }
+  }
+}
+```
+
+
+## Creating Quantized Checkpoints
+
+To create compatible checkpoints, use any quantization tool provided the output follows the checkpoint format described above and uses a layout defined in `QUANT_ALGOS`.
+
+### Weight Quantization
+
+Weight quantization is straightforward - compute the scaling factor directly from the weight tensor using the absolute maximum method described earlier. Each layer's weights are quantized independently and stored with their corresponding `weight_scale` parameter.
+
+### Calibration (for Activation Quantization)
+
+Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_scale` parameters that cannot be determined from static weights alone. Since activation values depend on actual inputs, we use **post-training calibration (PTQ)**:
+
+1. **Collect statistics**: Run inference on N representative samples
+2. **Track activations**: Record the absolute maximum (`amax`) of inputs to each quantized layer
+3. **Compute scales**: Derive `input_scale` from collected statistics
+4. **Store in checkpoint**: Save `input_scale` parameters alongside weights
+
+The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.
--- a/README.md
+++ b/README.md
@ -1,104 +1,270 @@
-ComfyUI
-=======
-A powerful and modular stable diffusion GUI and backend.
-----------
-![ComfyUI Screenshot](comfyui_screenshot.png)
+<div align="center">

-This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. For some workflow examples and see what ComfyUI can do you can check out:
-### [ComfyUI Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
+# ComfyUI
+**The most powerful and modular visual AI engine and application.**

-### [Installing ComfyUI](#installing)
+
+[![Website][website-shield]][website-url]
+[![Dynamic JSON Badge][discord-shield]][discord-url]
+[![Twitter][twitter-shield]][twitter-url]
+[![Matrix][matrix-shield]][matrix-url]
+<br>
+[![][github-release-shield]][github-release-link]
+[![][github-release-date-shield]][github-release-link]
+[![][github-downloads-shield]][github-downloads-link]
+[![][github-downloads-latest-shield]][github-downloads-link]
+
+[matrix-shield]: https://img.shields.io/badge/Matrix-000000?style=flat&logo=matrix&logoColor=white
+[matrix-url]: https://app.element.io/#/room/%23comfyui_space%3Amatrix.org
+[website-shield]: https://img.shields.io/badge/ComfyOrg-4285F4?style=flat
+[website-url]: https://www.comfy.org/
+<!-- Workaround to display total user from https://github.com/badges/shields/issues/4500#issuecomment-2060079995 -->
+[discord-shield]: https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fdiscord.com%2Fapi%2Finvites%2Fcomfyorg%3Fwith_counts%3Dtrue&query=%24.approximate_member_count&logo=discord&logoColor=white&label=Discord&color=green&suffix=%20total
+[discord-url]: https://www.comfy.org/discord
+[twitter-shield]: https://img.shields.io/twitter/follow/ComfyUI
+[twitter-url]: https://x.com/ComfyUI
+
+[github-release-shield]: https://img.shields.io/github/v/release/comfyanonymous/ComfyUI?style=flat&sort=semver
+[github-release-link]: https://github.com/comfyanonymous/ComfyUI/releases
+[github-release-date-shield]: https://img.shields.io/github/release-date/comfyanonymous/ComfyUI?style=flat
+[github-downloads-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/total?style=flat
+[github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
+[github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases
+
+![ComfyUI Screenshot](https://github.com/user-attachments/assets/7ccaf2c1-9b72-41ae-9a89-5688c94b7abe)
+</div>
+
+ComfyUI lets you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. Available on Windows, Linux, and macOS.
+
+## Get Started
+
+#### [Desktop Application](https://www.comfy.org/download)
+- The easiest way to get started.
+- Available on Windows & macOS.
+
+#### [Windows Portable Package](#installing)
+- Get the latest commits and completely portable.
+- Available on Windows.
+
+#### [Manual Install](#manual-install-windows-linux)
+Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, Ascend).
+
+## [Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
+See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).

 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
- Fully supports SD1.x and SD2.x
+- Image Models
+   - SD1.x, SD2.x ([unCLIP](https://comfyanonymous.github.io/ComfyUI_examples/unclip/))
+   - [SDXL](https://comfyanonymous.github.io/ComfyUI_examples/sdxl/), [SDXL Turbo](https://comfyanonymous.github.io/ComfyUI_examples/sdturbo/)
+   - [Stable Cascade](https://comfyanonymous.github.io/ComfyUI_examples/stable_cascade/)
+   - [SD3 and SD3.5](https://comfyanonymous.github.io/ComfyUI_examples/sd3/)
+   - Pixart Alpha and Sigma
+   - [AuraFlow](https://comfyanonymous.github.io/ComfyUI_examples/aura_flow/)
+   - [HunyuanDiT](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_dit/)
+   - [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
+   - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
+   - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
+   - [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
+   - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
+   - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/)
+   - [Z Image](https://comfyanonymous.github.io/ComfyUI_examples/z_image/)
+- Image Editing Models
+   - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
+   - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
+   - [HiDream E1.1](https://comfyanonymous.github.io/ComfyUI_examples/hidream/#hidream-e11)
+   - [Qwen Image Edit](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/#edit-model)
+- Video Models
+   - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
+   - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
+   - [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
+   - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
+   - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
+   - [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
+- Audio Models
+   - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
+   - [ACE Step](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
+- 3D Models
+   - [Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2)
 - Asynchronous Queue system
 - Many optimizations: Only re-executes the parts of the workflow that changes between executions.
- Command line option: ```--lowvram``` to make it work on GPUs with less than 3GB vram (enabled automatically on GPUs with low vram)
+- Smart memory management: can automatically run large models on GPUs with as low as 1GB vram with smart offloading.
 - Works even if you don't have a GPU with: ```--cpu``` (slow)
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
+- Can load ckpt and safetensors: All in one checkpoints or standalone diffusion models, VAEs and CLIP models.
+- Safe loading of ckpt, pt, pth, etc.. files.
 - Embeddings/Textual inversion
 - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
 - [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/)
- Loading full workflows (with seeds) from generated PNG files.
+- Loading full workflows (with seeds) from generated PNG, WebP and FLAC files.
 - Saving/Loading workflows as Json files.
 - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones.
 - [Area Composition](https://comfyanonymous.github.io/ComfyUI_examples/area_composition/)
 - [Inpainting](https://comfyanonymous.github.io/ComfyUI_examples/inpaint/) with both regular and inpainting models.
 - [ControlNet and T2I-Adapter](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/)
 - [Upscale Models (ESRGAN, ESRGAN variants, SwinIR, Swin2SR, etc...)](https://comfyanonymous.github.io/ComfyUI_examples/upscale_models/)
- [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/)
 - [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/)
- Starts up very fast.
- Works fully offline: will never download anything.
+- [Model Merging](https://comfyanonymous.github.io/ComfyUI_examples/model_merging/)
+- [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
+- Latent previews with [TAESD](#how-to-show-high-quality-previews)
+- Works fully offline: core will never download anything unless you want to.
+- Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview).
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.

 Workflow examples can be found on the [Examples page](https://comfyanonymous.github.io/ComfyUI_examples/)

+## Release Process
+
+ComfyUI follows a weekly release cycle targeting Monday but this regularly changes because of model releases or large changes to the codebase. There are three interconnected repositories:
+
+1. **[ComfyUI Core](https://github.com/comfyanonymous/ComfyUI)**
+   - Releases a new stable version (e.g., v0.7.0) roughly every week.
+   - Commits outside of the stable release tags may be very unstable and break many custom nodes.
+   - Serves as the foundation for the desktop release
+
+2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)**
+   - Builds a new release using the latest stable core version
+
+3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)**
+   - Weekly frontend updates are merged into the core repository
+   - Features are frozen for the upcoming core release
+   - Development continues for the next release cycle
+
 ## Shortcuts

-| Keybind | Explanation |
-| - | - |
-| Ctrl + Enter | Queue up current graph for generation |
-| Ctrl + Shift + Enter | Queue up current graph as first for generation |
-| Ctrl + S | Save workflow |
-| Ctrl + O | Load workflow |
-| Ctrl + A | Select all nodes |
-| Ctrl + M | Mute/unmute selected nodes |
-| Delete/Backspace | Delete selected nodes |
-| Ctrl + Delete/Backspace | Delete the current graph |
-| Space | Move the canvas around when held and moving the cursor |
-| Ctrl/Shift + Click | Add clicked node to selection |
-| Ctrl + C/Ctrl + V | Copy and paste selected nodes (without maintaining connections to outputs of unselected nodes) |
-| Ctrl + C/Ctrl + Shift + V| Copy and paste selected nodes (maintaining connections from outputs of unselected nodes to inputs of pasted nodes) |
-| Shift + Drag | Move multiple selected nodes at the same time |
-| Ctrl + D | Load default graph |
-| Q | Toggle visibility of the queue |
-| H | Toggle visibility of history |
-| R | Refresh graph |
-| Double-Click LMB | Open node quick search palette |
+| Keybind                            | Explanation                                                                                                        |
+|------------------------------------|--------------------------------------------------------------------------------------------------------------------|
+| `Ctrl` + `Enter`                      | Queue up current graph for generation                                                                              |
+| `Ctrl` + `Shift` + `Enter`              | Queue up current graph as first for generation                                                                     |
+| `Ctrl` + `Alt` + `Enter`                | Cancel current generation                                                                                          |
+| `Ctrl` + `Z`/`Ctrl` + `Y`                 | Undo/Redo                                                                                                          |
+| `Ctrl` + `S`                          | Save workflow                                                                                                      |
+| `Ctrl` + `O`                          | Load workflow                                                                                                      |
+| `Ctrl` + `A`                          | Select all nodes                                                                                                   |
+| `Alt `+ `C`                           | Collapse/uncollapse selected nodes                                                                                 |
+| `Ctrl` + `M`                          | Mute/unmute selected nodes                                                                                         |
+| `Ctrl` + `B`                           | Bypass selected nodes (acts like the node was removed from the graph and the wires reconnected through)            |
+| `Delete`/`Backspace`                   | Delete selected nodes                                                                                              |
+| `Ctrl` + `Backspace`                   | Delete the current graph                                                                                           |
+| `Space`                              | Move the canvas around when held and moving the cursor                                                             |
+| `Ctrl`/`Shift` + `Click`                 | Add clicked node to selection                                                                                      |
+| `Ctrl` + `C`/`Ctrl` + `V`                  | Copy and paste selected nodes (without maintaining connections to outputs of unselected nodes)                     |
+| `Ctrl` + `C`/`Ctrl` + `Shift` + `V`          | Copy and paste selected nodes (maintaining connections from outputs of unselected nodes to inputs of pasted nodes) |
+| `Shift` + `Drag`                       | Move multiple selected nodes at the same time                                                                      |
+| `Ctrl` + `D`                           | Load default graph                                                                                                 |
+| `Alt` + `+`                          | Canvas Zoom in                                                                                                     |
+| `Alt` + `-`                          | Canvas Zoom out                                                                                                    |
+| `Ctrl` + `Shift` + LMB + Vertical drag | Canvas Zoom in/out                                                                                                 |
+| `P`                                  | Pin/Unpin selected nodes                                                                                           |
+| `Ctrl` + `G`                           | Group selected nodes                                                                                               |
+| `Q`                                 | Toggle visibility of the queue                                                                                     |
+| `H`                                  | Toggle visibility of history                                                                                       |
+| `R`                                  | Refresh graph                                                                                                      |
+| `F`                                  | Show/Hide menu                                                                                                      |
+| `.`                                  | Fit view to selection (Whole graph when nothing is selected)                                                        |
+| Double-Click LMB                   | Open node quick search palette                                                                                     |
+| `Shift` + Drag                       | Move multiple wires at once                                                                                        |
+| `Ctrl` + `Alt` + LMB                   | Disconnect all wires from clicked slot                                                                             |

-Ctrl can also be replaced with Cmd instead for MacOS users
+`Ctrl` can also be replaced with `Cmd` instead for macOS users

 # Installing

-## Windows
+## Windows Portable

 There is a portable standalone build for Windows that should work for running on Nvidia GPUs or for running on your CPU only on the [releases page](https://github.com/comfyanonymous/ComfyUI/releases).

-### [Direct link to download](https://github.com/comfyanonymous/ComfyUI/releases/download/latest/ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z)
+### [Direct link to download](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia.7z)

-Just download, extract and run. Make sure you put your Stable Diffusion checkpoints/models (the huge ckpt/safetensors files) in: ComfyUI\models\checkpoints
+Simply download, extract with [7-Zip](https://7-zip.org) or with the windows explorer on recent windows versions and run. For smaller models you normally only need to put the checkpoints (the huge ckpt/safetensors files) in: ComfyUI\models\checkpoints but many of the larger models have multiple files. Make sure to follow the instructions to know which subfolder to put them in ComfyUI\models\
+
+If you have trouble extracting it, right click the file -> properties -> unblock
+
+Update your Nvidia drivers if it doesn't start.
+
+#### Alternative Downloads:
+
+[Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)
+
+[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z).
+
+[Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).

 #### How do I share models between another UI and ComfyUI?

 See the [Config file](extra_model_paths.yaml.example) to set the search paths for models. In the standalone windows build you can find this file in the ComfyUI directory. Rename this file to extra_model_paths.yaml and edit it with your favorite text editor.

-## Colab Notebook

-To run it on colab or paperspace you can use my [Colab Notebook](notebooks/comfyui_colab.ipynb) here: [Link to open with google colab](https://colab.research.google.com/github/comfyanonymous/ComfyUI/blob/master/notebooks/comfyui_colab.ipynb)
+## [comfy-cli](https://docs.comfy.org/comfy-cli/getting-started)
+
+You can install and start ComfyUI using comfy-cli:
+```bash
+pip install comfy-cli
+comfy install
+```

 ## Manual Install (Windows, Linux)

+Python 3.14 works but you may encounter issues with the torch compile node. The free threaded variant is still missing some dependencies.
+
+Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12
+
+### Instructions:
+
 Git clone this repo.

 Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints

 Put your VAE in: models/vae

-At the time of writing this pytorch has issues with python versions higher than 3.10 so make sure your python/pip versions are 3.10.

-### AMD GPUs (Linux only)
+### AMD GPUs (Linux)
+
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.4.2```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```

+This is the command to install the nightly with ROCm 7.0 which might have some performance improvements:
+
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1```
+
+
+### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
+
+These have less hardware support than the builds above but they work on windows. You also need to install the pytorch version specific to your hardware.
+
+RDNA 3 (RX 7000 series):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-dgpu/```
+
+RDNA 3.5 (Strix halo/Ryzen AI Max+ 365):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx1151/```
+
+RDNA 4 (RX 9000 series):
+
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx120X-all/```
+
+### Intel GPUs (Windows and Linux)
+
+Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
+
+1. To install PyTorch xpu, use the following command:
+
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu```
+
+This is the command to install the Pytorch xpu nightly which might have some performance improvements:
+
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu```

 ### NVIDIA

-Nvidia users should install torch and xformers using this command:
+Nvidia users should install stable pytorch using this command:

-```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 xformers```
+```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu130```
+
+This is the command to install pytorch nightly instead which might have performance improvements.
+
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130```

 #### Troubleshooting

@ -118,33 +284,60 @@ After this you should have everything installed and can proceed to running Comfy

 ### Others:

-[Intel Arc](https://github.com/comfyanonymous/ComfyUI/discussions/476)
+#### Apple Mac silicon

-Mac/MPS: There is basic support in the code but until someone makes some install instruction you are on your own.
+You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS version.

-### I already have another UI for Stable Diffusion installed do I really have to install all of these dependencies?
+1. Install pytorch nightly. For instructions, read the [Accelerated PyTorch training on Mac](https://developer.apple.com/metal/pytorch/) Apple Developer guide (make sure to install the latest pytorch nightly).
+1. Follow the [ComfyUI manual installation](#manual-install-windows-linux) instructions for Windows and Linux.
+1. Install the ComfyUI [dependencies](#dependencies). If you have another Stable Diffusion UI [you might be able to reuse the dependencies](#i-already-have-another-ui-for-stable-diffusion-installed-do-i-really-have-to-install-all-of-these-dependencies).
+1. Launch ComfyUI by running `python main.py`

-You don't. If you have another UI installed and working with it's own python venv you can use that venv to run ComfyUI. You can open up your favorite terminal and activate it:
+> **Note**: Remember to add your models, VAE, LoRAs etc. to the corresponding Comfy folders, as discussed in [ComfyUI manual installation](#manual-install-windows-linux).

-```source path_to_other_sd_gui/venv/bin/activate```
+#### Ascend NPUs

-or on Windows:
+For models compatible with Ascend Extension for PyTorch (torch_npu). To get started, ensure your environment meets the prerequisites outlined on the [installation](https://ascend.github.io/docs/sources/ascend/quick_install.html) page. Here's a step-by-step guide tailored to your platform and installation method:

-With Powershell: ```"path_to_other_sd_gui\venv\Scripts\Activate.ps1"```
+1. Begin by installing the recommended or newer kernel version for Linux as specified in the Installation page of torch-npu, if necessary.
+2. Proceed with the installation of Ascend Basekit, which includes the driver, firmware, and CANN, following the instructions provided for your specific platform.
+3. Next, install the necessary packages for torch-npu by adhering to the platform-specific instructions on the [Installation](https://ascend.github.io/docs/sources/pytorch/install.html#pytorch) page.
+4. Finally, adhere to the [ComfyUI manual installation](#manual-install-windows-linux) guide for Linux. Once all components are installed, you can run ComfyUI as described earlier.

-With cmd.exe: ```"path_to_other_sd_gui\venv\Scripts\activate.bat"```
+#### Cambricon MLUs

-And then you can use that terminal to run Comfyui without installing any dependencies. Note that the venv folder might be called something else depending on the SD UI.
+For models compatible with Cambricon Extension for PyTorch (torch_mlu). Here's a step-by-step guide tailored to your platform and installation method:
+
+1. Install the Cambricon CNToolkit by adhering to the platform-specific instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cntoolkit_3.7.2/cntoolkit_install_3.7.2/index.html)
+2. Next, install the PyTorch(torch_mlu) following the instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cambricon_pytorch_1.17.0/user_guide_1.9/index.html)
+3. Launch ComfyUI by running `python main.py`
+
+#### Iluvatar Corex
+
+For models compatible with Iluvatar Extension for PyTorch. Here's a step-by-step guide tailored to your platform and installation method:
+
+1. Install the Iluvatar Corex Toolkit by adhering to the platform-specific instructions on the [Installation](https://support.iluvatar.com/#/DocumentCentre?id=1&nameCenter=2&productId=520117912052801536)
+2. Launch ComfyUI by running `python main.py`

 # Running

 ```python main.py```

-### For AMD 6700, 6600 and maybe others
+### For AMD cards not officially supported by ROCm

 Try running it with this command if you have issues:

-```HSA_OVERRIDE_GFX_VERSION=10.3.0 python main.py```
+For 6700, 6600 and maybe other RDNA2 or older: ```HSA_OVERRIDE_GFX_VERSION=10.3.0 python main.py```
+
+For AMD 7600 and maybe other RDNA3 cards: ```HSA_OVERRIDE_GFX_VERSION=11.0.0 python main.py```
+
+### AMD ROCm Tips
+
+You can enable experimental memory efficient attention on recent pytorch in ComfyUI on some AMD GPUs using this command, it should already be enabled by default on RDNA3. If this improves speed for you on latest pytorch on your GPU please report it so that I can enable it by default.
+
+```TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 python main.py --use-pytorch-cross-attention```
+
+You can also try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run.

 # Notes

@ -158,39 +351,78 @@ You can use () to change emphasis of a word or phrase like: (good code:1.2) or (

 You can use {day|night}, for wildcard/dynamic prompts. With this syntax "{wild|card|test}" will be randomly replaced by either "wild", "card" or "test" by the frontend every time you queue the prompt. To use {} characters in your actual prompt escape them like: \\{ or \\}.

+Dynamic prompts also support C-style comments, like `// comment` or `/* comment */`.
+
 To use a textual inversion concepts/embeddings in a text prompt put them in the models/embeddings directory and use them in the CLIPTextEncode node like this (you can omit the .pt extension):

 ```embedding:embedding_filename.pt```

-### Fedora

-To get python 3.10 on fedora:
-```dnf install python3.10```
+## How to show high-quality previews?

-Then you can:
+Use ```--preview-method auto``` to enable previews.

-```python3.10 -m ensurepip```
+The default installation includes a fast latent preview method that's low-resolution. To enable higher-quality previews with [TAESD](https://github.com/madebyollin/taesd), download the [taesd_decoder.pth, taesdxl_decoder.pth, taesd3_decoder.pth and taef1_decoder.pth](https://github.com/madebyollin/taesd/) and place them in the `models/vae_approx` folder. Once they're installed, restart ComfyUI and launch it with `--preview-method taesd` to enable high-quality previews.

-This will let you use: pip3.10 to install all the dependencies.
+## How to use TLS/SSL?
+Generate a self-signed certificate (not appropriate for shared/production use) and key by running the command: `openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=StateName/L=CityName/O=CompanyName/OU=CompanySectionName/CN=CommonNameOrHostname"`

-## How to increase generation speed?
+Use `--tls-keyfile key.pem --tls-certfile cert.pem` to enable TLS/SSL, the app will now be accessible with `https://...` instead of `http://...`.

-Make sure you use the regular loaders/Load Checkpoint node to load checkpoints. It will auto pick the right settings depending on your GPU.
-
-You can set this command line setting to disable the upcasting to fp32 in some cross attention operations which will increase your speed. Note that this will very likely give you black images on SD2.x models. If you use xformers this option does not do anything.
-
-```--dont-upcast-attention```
+> Note: Windows users can use [alexisrolland/docker-openssl](https://github.com/alexisrolland/docker-openssl) or one of the [3rd party binary distributions](https://wiki.openssl.org/index.php/Binaries) to run the command example above.
+<br/><br/>If you use a container, note that the volume mount `-v` can be a relative path so `... -v ".\:/openssl-certs" ...` would create the key & cert files in the current directory of your command prompt or powershell terminal.

 ## Support and dev channel

+[Discord](https://comfy.org/discord): Try the #help or #feedback channels.
+
 [Matrix space: #comfyui_space:matrix.org](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) (it's like discord but open source).

+See also: [https://www.comfy.org/](https://www.comfy.org/)
+
+## Frontend Development
+
+As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). This repository now hosts the compiled JS (from TS/Vue) under the `web/` directory.
+
+### Reporting Issues and Requesting Features
+
+For any bugs, issues, or feature requests related to the frontend, please use the [ComfyUI Frontend repository](https://github.com/Comfy-Org/ComfyUI_frontend). This will help us manage and address frontend-specific concerns more efficiently.
+
+### Using the Latest Frontend
+
+The new frontend is now the default for ComfyUI. However, please note:
+
+1. The frontend in the main ComfyUI repository is updated fortnightly.
+2. Daily releases are available in the separate frontend repository.
+
+To use the most up-to-date frontend version:
+
+1. For the latest daily release, launch ComfyUI with this command line argument:
+
+   ```
+   --front-end-version Comfy-Org/ComfyUI_frontend@latest
+   ```
+
+2. For a specific version, replace `latest` with the desired version number:
+
+   ```
+   --front-end-version Comfy-Org/ComfyUI_frontend@1.2.2
+   ```
+
+This approach allows you to easily switch between the stable fortnightly release and the cutting-edge daily updates, or even specific versions for testing purposes.
+
+### Accessing the Legacy Frontend
+
+If you need to use the legacy frontend for any reason, you can access it using the following command line argument:
+
+```
+--front-end-version Comfy-Org/ComfyUI_legacy_frontend@latest
+```
+
+This will use a snapshot of the legacy frontend preserved in the [ComfyUI Legacy Frontend repository](https://github.com/Comfy-Org/ComfyUI_legacy_frontend).
+
 # QA

-### Why did you make this?
+### Which GPU should I buy for this?

-I wanted to learn how Stable Diffusion worked in detail. I also wanted something clean and powerful that would let me experiment with SD without restrictions.
-
-### Who is this for?
-
-This is for anyone that wants to make complex workflows with SD or that wants to learn more how SD works. The interface follows closely how SD works and the code should be much more simple to understand than other SD UIs.
+[See this page for some recommendations](https://github.com/comfyanonymous/ComfyUI/wiki/Which-GPU-should-I-buy-for-ComfyUI)
--- a/alembic.ini
+++ b/alembic.ini
@ -0,0 +1,84 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic_db
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic_db/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic_db/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+# version_path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+version_path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = sqlite:///user/comfyui.db
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
--- a/alembic_db/README.md
+++ b/alembic_db/README.md
@ -0,0 +1,4 @@
+## Generate new revision
+
+1. Update models in `/app/database/models.py`
+2. Run `alembic revision --autogenerate -m "{your message}"`
--- a/alembic_db/env.py
+++ b/alembic_db/env.py
@ -0,0 +1,64 @@
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+
+from app.database.models import Base
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+    Calls to context.execute() here emit the given string to the
+    script output.
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/alembic_db/script.py.mako
+++ b/alembic_db/script.py.mako
@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
--- a/comfy/ldm/data/init.py
+++ b/comfy/ldm/data/init.py
--- a/comfy/ldm/models/diffusion/init.py
+++ b/comfy/ldm/models/diffusion/init.py
--- a/api_server/routes/internal/README.md
+++ b/api_server/routes/internal/README.md
@ -0,0 +1,3 @@
+# ComfyUI Internal Routes
+
+All routes under the `/internal` path are designated for **internal use by ComfyUI only**. These routes are not intended for use by external applications may change at any time without notice.
--- a/api_server/routes/internal/init.py
+++ b/api_server/routes/internal/init.py
--- a/api_server/routes/internal/internal_routes.py
+++ b/api_server/routes/internal/internal_routes.py
@ -0,0 +1,73 @@
+from aiohttp import web
+from typing import Optional
+from folder_paths import folder_names_and_paths, get_directory_by_type
+from api_server.services.terminal_service import TerminalService
+import app.logger
+import os
+
+class InternalRoutes:
+    '''
+    The top level web router for internal routes: /internal/*
+    The endpoints here should NOT be depended upon. It is for ComfyUI frontend use only.
+    Check README.md for more information.
+    '''
+
+    def __init__(self, prompt_server):
+        self.routes: web.RouteTableDef = web.RouteTableDef()
+        self._app: Optional[web.Application] = None
+        self.prompt_server = prompt_server
+        self.terminal_service = TerminalService(prompt_server)
+
+    def setup_routes(self):
+        @self.routes.get('/logs')
+        async def get_logs(request):
+            return web.json_response("".join([(l["t"] + " - " + l["m"]) for l in app.logger.get_logs()]))
+
+        @self.routes.get('/logs/raw')
+        async def get_raw_logs(request):
+            self.terminal_service.update_size()
+            return web.json_response({
+                "entries": list(app.logger.get_logs()),
+                "size": {"cols": self.terminal_service.cols, "rows": self.terminal_service.rows}
+            })
+
+        @self.routes.patch('/logs/subscribe')
+        async def subscribe_logs(request):
+            json_data = await request.json()
+            client_id = json_data["clientId"]
+            enabled = json_data["enabled"]
+            if enabled:
+                self.terminal_service.subscribe(client_id)
+            else:
+                self.terminal_service.unsubscribe(client_id)
+
+            return web.Response(status=200)
+
+
+        @self.routes.get('/folder_paths')
+        async def get_folder_paths(request):
+            response = {}
+            for key in folder_names_and_paths:
+                response[key] = folder_names_and_paths[key][0]
+            return web.json_response(response)
+
+        @self.routes.get('/files/{directory_type}')
+        async def get_files(request: web.Request) -> web.Response:
+            directory_type = request.match_info['directory_type']
+            if directory_type not in ("output", "input", "temp"):
+                return web.json_response({"error": "Invalid directory type"}, status=400)
+
+            directory = get_directory_by_type(directory_type)
+            sorted_files = sorted(
+                (entry for entry in os.scandir(directory) if entry.is_file()),
+                key=lambda entry: -entry.stat().st_mtime
+            )
+            return web.json_response([entry.name for entry in sorted_files], status=200)
+
+
+    def get_app(self):
+        if self._app is None:
+            self._app = web.Application()
+            self.setup_routes()
+            self._app.add_routes(self.routes)
+        return self._app
--- a/comfy/ldm/modules/midas/midas/init.py
+++ b/comfy/ldm/modules/midas/midas/init.py
--- a/api_server/services/terminal_service.py
+++ b/api_server/services/terminal_service.py
@ -0,0 +1,60 @@
+from app.logger import on_flush
+import os
+import shutil
+
+
+class TerminalService:
+    def __init__(self, server):
+        self.server = server
+        self.cols = None
+        self.rows = None
+        self.subscriptions = set()
+        on_flush(self.send_messages)
+
+    def get_terminal_size(self):
+        try:
+            size = os.get_terminal_size()
+            return (size.columns, size.lines)
+        except OSError:
+            try:
+                size = shutil.get_terminal_size()
+                return (size.columns, size.lines)
+            except OSError:
+                return (80, 24)  # fallback to 80x24
+
+    def update_size(self):
+        columns, lines = self.get_terminal_size()
+        changed = False
+
+        if columns != self.cols:
+            self.cols = columns
+            changed = True
+
+        if lines != self.rows:
+            self.rows = lines
+            changed = True
+
+        if changed:
+            return {"cols": self.cols, "rows": self.rows}
+
+        return None
+
+    def subscribe(self, client_id):
+        self.subscriptions.add(client_id)
+
+    def unsubscribe(self, client_id):
+        self.subscriptions.discard(client_id)
+
+    def send_messages(self, entries):
+        if not len(entries) or not len(self.subscriptions):
+            return
+
+        new_size = self.update_size()
+
+        for client_id in self.subscriptions.copy(): # prevent: Set changed size during iteration
+            if client_id not in self.server.sockets:
+                # Automatically unsub if the socket has disconnected
+                self.unsubscribe(client_id)
+                continue
+
+            self.server.send_sync("logs", {"entries": entries, "size": new_size}, client_id)
--- a/api_server/utils/file_operations.py
+++ b/api_server/utils/file_operations.py
@ -0,0 +1,42 @@
+import os
+from typing import List, Union, TypedDict, Literal
+from typing_extensions import TypeGuard
+class FileInfo(TypedDict):
+    name: str
+    path: str
+    type: Literal["file"]
+    size: int
+
+class DirectoryInfo(TypedDict):
+    name: str
+    path: str
+    type: Literal["directory"]
+
+FileSystemItem = Union[FileInfo, DirectoryInfo]
+
+def is_file_info(item: FileSystemItem) -> TypeGuard[FileInfo]:
+    return item["type"] == "file"
+
+class FileSystemOperations:
+    @staticmethod
+    def walk_directory(directory: str) -> List[FileSystemItem]:
+        file_list: List[FileSystemItem] = []
+        for root, dirs, files in os.walk(directory):
+            for name in files:
+                file_path = os.path.join(root, name)
+                relative_path = os.path.relpath(file_path, directory)
+                file_list.append({
+                    "name": name,
+                    "path": relative_path,
+                    "type": "file",
+                    "size": os.path.getsize(file_path)
+                })
+            for name in dirs:
+                dir_path = os.path.join(root, name)
+                relative_path = os.path.relpath(dir_path, directory)
+                file_list.append({
+                    "name": name,
+                    "path": relative_path,
+                    "type": "directory"
+                })
+        return file_list
--- a/comfy_extras/chainner_models/init.py
+++ b/comfy_extras/chainner_models/init.py
--- a/app/app_settings.py
+++ b/app/app_settings.py
@ -0,0 +1,65 @@
+import os
+import json
+from aiohttp import web
+import logging
+
+
+class AppSettings():
+    def __init__(self, user_manager):
+        self.user_manager = user_manager
+
+    def get_settings(self, request):
+        try:
+            file = self.user_manager.get_request_user_filepath(
+                request,
+                "comfy.settings.json"
+            )
+        except KeyError as e:
+            logging.error("User settings not found.")
+            raise web.HTTPUnauthorized() from e
+        if os.path.isfile(file):
+            try:
+                with open(file) as f:
+                    return json.load(f)
+            except:
+                logging.error(f"The user settings file is corrupted: {file}")
+                return {}
+        else:
+            return {}
+
+    def save_settings(self, request, settings):
+        file = self.user_manager.get_request_user_filepath(
+            request, "comfy.settings.json")
+        with open(file, "w") as f:
+            f.write(json.dumps(settings, indent=4))
+
+    def add_routes(self, routes):
+        @routes.get("/settings")
+        async def get_settings(request):
+            return web.json_response(self.get_settings(request))
+
+        @routes.get("/settings/{id}")
+        async def get_setting(request):
+            value = None
+            settings = self.get_settings(request)
+            setting_id = request.match_info.get("id", None)
+            if setting_id and setting_id in settings:
+                value = settings[setting_id]
+            return web.json_response(value)
+
+        @routes.post("/settings")
+        async def post_settings(request):
+            settings = self.get_settings(request)
+            new_settings = await request.json()
+            self.save_settings(request, {**settings, **new_settings})
+            return web.Response(status=200)
+
+        @routes.post("/settings/{id}")
+        async def post_setting(request):
+            setting_id = request.match_info.get("id", None)
+            if not setting_id:
+                return web.Response(status=400)
+            settings = self.get_settings(request)
+            settings[setting_id] = await request.json()
+            self.save_settings(request, settings)
+            return web.Response(status=200)
--- a/app/custom_node_manager.py
+++ b/app/custom_node_manager.py
@ -0,0 +1,145 @@
+from __future__ import annotations
+
+import os
+import folder_paths
+import glob
+from aiohttp import web
+import json
+import logging
+from functools import lru_cache
+
+from utils.json_util import merge_json_recursive
+
+
+# Extra locale files to load into main.json
+EXTRA_LOCALE_FILES = [
+    "nodeDefs.json",
+    "commands.json",
+    "settings.json",
+]
+
+
+def safe_load_json_file(file_path: str) -> dict:
+    if not os.path.exists(file_path):
+        return {}
+
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except json.JSONDecodeError:
+        logging.error(f"Error loading {file_path}")
+        return {}
+
+
+class CustomNodeManager:
+    @lru_cache(maxsize=1)
+    def build_translations(self):
+        """Load all custom nodes translations during initialization. Translations are
+        expected to be loaded from `locales/` folder.
+
+        The folder structure is expected to be the following:
+        - custom_nodes/
+            - custom_node_1/
+                - locales/
+                    - en/
+                        - main.json
+                        - commands.json
+                        - settings.json
+
+        returned translations are expected to be in the following format:
+        {
+            "en": {
+                "nodeDefs": {...},
+                "commands": {...},
+                "settings": {...},
+                ...{other main.json keys}
+            }
+        }
+        """
+
+        translations = {}
+
+        for folder in folder_paths.get_folder_paths("custom_nodes"):
+            # Sort glob results for deterministic ordering
+            for custom_node_dir in sorted(glob.glob(os.path.join(folder, "*/"))):
+                locales_dir = os.path.join(custom_node_dir, "locales")
+                if not os.path.exists(locales_dir):
+                    continue
+
+                for lang_dir in glob.glob(os.path.join(locales_dir, "*/")):
+                    lang_code = os.path.basename(os.path.dirname(lang_dir))
+
+                    if lang_code not in translations:
+                        translations[lang_code] = {}
+
+                    # Load main.json
+                    main_file = os.path.join(lang_dir, "main.json")
+                    node_translations = safe_load_json_file(main_file)
+
+                    # Load extra locale files
+                    for extra_file in EXTRA_LOCALE_FILES:
+                        extra_file_path = os.path.join(lang_dir, extra_file)
+                        key = extra_file.split(".")[0]
+                        json_data = safe_load_json_file(extra_file_path)
+                        if json_data:
+                            node_translations[key] = json_data
+
+                    if node_translations:
+                        translations[lang_code] = merge_json_recursive(
+                            translations[lang_code], node_translations
+                        )
+
+        return translations
+
+    def add_routes(self, routes, webapp, loadedModules):
+
+        example_workflow_folder_names = ["example_workflows", "example", "examples", "workflow", "workflows"]
+
+        @routes.get("/workflow_templates")
+        async def get_workflow_templates(request):
+            """Returns a web response that contains the map of custom_nodes names and their associated workflow templates. The ones without templates are omitted."""
+
+            files = []
+
+            for folder in folder_paths.get_folder_paths("custom_nodes"):
+                for folder_name in example_workflow_folder_names:
+                    pattern = os.path.join(folder, f"*/{folder_name}/*.json")
+                    matched_files = glob.glob(pattern)
+                    files.extend(matched_files)
+
+            workflow_templates_dict = (
+                {}
+            )  # custom_nodes folder name -> example workflow names
+            for file in files:
+                custom_nodes_name = os.path.basename(
+                    os.path.dirname(os.path.dirname(file))
+                )
+                workflow_name = os.path.splitext(os.path.basename(file))[0]
+                workflow_templates_dict.setdefault(custom_nodes_name, []).append(
+                    workflow_name
+                )
+            return web.json_response(workflow_templates_dict)
+
+        # Serve workflow templates from custom nodes.
+        for module_name, module_dir in loadedModules:
+            for folder_name in example_workflow_folder_names:
+                workflows_dir = os.path.join(module_dir, folder_name)
+
+                if os.path.exists(workflows_dir):
+                    if folder_name != "example_workflows":
+                        logging.debug(
+                            "Found example workflow folder '%s' for custom node '%s', consider renaming it to 'example_workflows'",
+                            folder_name, module_name)
+
+                    webapp.add_routes(
+                        [
+                            web.static(
+                                "/api/workflow_templates/" + module_name, workflows_dir
+                            )
+                        ]
+                    )
+
+        @routes.get("/i18n")
+        async def get_i18n(request):
+            """Returns translations from all custom nodes' locales folders."""
+            return web.json_response(self.build_translations())
--- a/app/database/db.py
+++ b/app/database/db.py
@ -0,0 +1,112 @@
+import logging
+import os
+import shutil
+from app.logger import log_startup_warning
+from utils.install_util import get_missing_requirements_message
+from comfy.cli_args import args
+
+_DB_AVAILABLE = False
+Session = None
+
+
+try:
+    from alembic import command
+    from alembic.config import Config
+    from alembic.runtime.migration import MigrationContext
+    from alembic.script import ScriptDirectory
+    from sqlalchemy import create_engine
+    from sqlalchemy.orm import sessionmaker
+
+    _DB_AVAILABLE = True
+except ImportError as e:
+    log_startup_warning(
+        f"""
+------------------------------------------------------------------------
+Error importing dependencies: {e}
+{get_missing_requirements_message()}
+This error is happening because ComfyUI now uses a local sqlite database.
+------------------------------------------------------------------------
+""".strip()
+    )
+
+
+def dependencies_available():
+    """
+    Temporary function to check if the dependencies are available
+    """
+    return _DB_AVAILABLE
+
+
+def can_create_session():
+    """
+    Temporary function to check if the database is available to create a session
+    During initial release there may be environmental issues (or missing dependencies) that prevent the database from being created
+    """
+    return dependencies_available() and Session is not None
+
+
+def get_alembic_config():
+    root_path = os.path.join(os.path.dirname(__file__), "../..")
+    config_path = os.path.abspath(os.path.join(root_path, "alembic.ini"))
+    scripts_path = os.path.abspath(os.path.join(root_path, "alembic_db"))
+
+    config = Config(config_path)
+    config.set_main_option("script_location", scripts_path)
+    config.set_main_option("sqlalchemy.url", args.database_url)
+
+    return config
+
+
+def get_db_path():
+    url = args.database_url
+    if url.startswith("sqlite:///"):
+        return url.split("///")[1]
+    else:
+        raise ValueError(f"Unsupported database URL '{url}'.")
+
+
+def init_db():
+    db_url = args.database_url
+    logging.debug(f"Database URL: {db_url}")
+    db_path = get_db_path()
+    db_exists = os.path.exists(db_path)
+
+    config = get_alembic_config()
+
+    # Check if we need to upgrade
+    engine = create_engine(db_url)
+    conn = engine.connect()
+
+    context = MigrationContext.configure(conn)
+    current_rev = context.get_current_revision()
+
+    script = ScriptDirectory.from_config(config)
+    target_rev = script.get_current_head()
+
+    if target_rev is None:
+        logging.warning("No target revision found.")
+    elif current_rev != target_rev:
+        # Backup the database pre upgrade
+        backup_path = db_path + ".bkp"
+        if db_exists:
+            shutil.copy(db_path, backup_path)
+        else:
+            backup_path = None
+
+        try:
+            command.upgrade(config, target_rev)
+            logging.info(f"Database upgraded from {current_rev} to {target_rev}")
+        except Exception as e:
+            if backup_path:
+                # Restore the database from backup if upgrade fails
+                shutil.copy(backup_path, db_path)
+                os.remove(backup_path)
+            logging.exception("Error upgrading database: ")
+            raise e
+
+    global Session
+    Session = sessionmaker(bind=engine)
+
+
+def create_session():
+    return Session()
--- a/app/database/models.py
+++ b/app/database/models.py
@ -0,0 +1,14 @@
+from sqlalchemy.orm import declarative_base
+
+Base = declarative_base()
+
+
+def to_dict(obj):
+    fields = obj.__table__.columns.keys()
+    return {
+        field: (val.to_dict() if hasattr(val, "to_dict") else val)
+        for field in fields
+        if (val := getattr(obj, field))
+    }
+
+# TODO: Define models here
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@ -0,0 +1,457 @@
+from __future__ import annotations
+import argparse
+import logging
+import os
+import re
+import sys
+import tempfile
+import zipfile
+import importlib
+from dataclasses import dataclass
+from functools import cached_property
+from pathlib import Path
+from typing import Dict, TypedDict, Optional
+from aiohttp import web
+from importlib.metadata import version
+
+import requests
+from typing_extensions import NotRequired
+
+from utils.install_util import get_missing_requirements_message, requirements_path
+
+from comfy.cli_args import DEFAULT_VERSION_STRING
+import app.logger
+
+
+def frontend_install_warning_message():
+    return f"""
+{get_missing_requirements_message()}
+
+This error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.
+""".strip()
+
+def parse_version(version: str) -> tuple[int, int, int]:
+        return tuple(map(int, version.split(".")))
+
+def is_valid_version(version: str) -> bool:
+    """Validate if a string is a valid semantic version (X.Y.Z format)."""
+    pattern = r"^(\d+)\.(\d+)\.(\d+)$"
+    return bool(re.match(pattern, version))
+
+def get_installed_frontend_version():
+    """Get the currently installed frontend package version."""
+    frontend_version_str = version("comfyui-frontend-package")
+    return frontend_version_str
+
+
+def get_required_frontend_version():
+    """Get the required frontend version from requirements.txt."""
+    try:
+        with open(requirements_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("comfyui-frontend-package=="):
+                    version_str = line.split("==")[-1]
+                    if not is_valid_version(version_str):
+                        logging.error(f"Invalid version format in requirements.txt: {version_str}")
+                        return None
+                    return version_str
+            logging.error("comfyui-frontend-package not found in requirements.txt")
+            return None
+    except FileNotFoundError:
+        logging.error("requirements.txt not found. Cannot determine required frontend version.")
+        return None
+    except Exception as e:
+        logging.error(f"Error reading requirements.txt: {e}")
+        return None
+
+
+def check_frontend_version():
+    """Check if the frontend version is up to date."""
+
+    try:
+        frontend_version_str = get_installed_frontend_version()
+        frontend_version = parse_version(frontend_version_str)
+        required_frontend_str = get_required_frontend_version()
+        required_frontend = parse_version(required_frontend_str)
+        if frontend_version < required_frontend:
+            app.logger.log_startup_warning(
+                f"""
+________________________________________________________________________
+WARNING WARNING WARNING WARNING WARNING
+
+Installed frontend version {".".join(map(str, frontend_version))} is lower than the recommended version {".".join(map(str, required_frontend))}.
+
+{frontend_install_warning_message()}
+________________________________________________________________________
+""".strip()
+            )
+        else:
+            logging.info("ComfyUI frontend version: {}".format(frontend_version_str))
+    except Exception as e:
+        logging.error(f"Failed to check frontend version: {e}")
+
+
+REQUEST_TIMEOUT = 10  # seconds
+
+
+class Asset(TypedDict):
+    url: str
+
+
+class Release(TypedDict):
+    id: int
+    tag_name: str
+    name: str
+    prerelease: bool
+    created_at: str
+    published_at: str
+    body: str
+    assets: NotRequired[list[Asset]]
+
+
+@dataclass
+class FrontEndProvider:
+    owner: str
+    repo: str
+
+    @property
+    def folder_name(self) -> str:
+        return f"{self.owner}_{self.repo}"
+
+    @property
+    def release_url(self) -> str:
+        return f"https://api.github.com/repos/{self.owner}/{self.repo}/releases"
+
+    @cached_property
+    def all_releases(self) -> list[Release]:
+        releases = []
+        api_url = self.release_url
+        while api_url:
+            response = requests.get(api_url, timeout=REQUEST_TIMEOUT)
+            response.raise_for_status()  # Raises an HTTPError if the response was an error
+            releases.extend(response.json())
+            # GitHub uses the Link header to provide pagination links. Check if it exists and update api_url accordingly.
+            if "next" in response.links:
+                api_url = response.links["next"]["url"]
+            else:
+                api_url = None
+        return releases
+
+    @cached_property
+    def latest_release(self) -> Release:
+        latest_release_url = f"{self.release_url}/latest"
+        response = requests.get(latest_release_url, timeout=REQUEST_TIMEOUT)
+        response.raise_for_status()  # Raises an HTTPError if the response was an error
+        return response.json()
+
+    @cached_property
+    def latest_prerelease(self) -> Release:
+        """Get the latest pre-release version - even if it's older than the latest release"""
+        release = [release for release in self.all_releases if release["prerelease"]]
+
+        if not release:
+            raise ValueError("No pre-releases found")
+
+        # GitHub returns releases in reverse chronological order, so first is latest
+        return release[0]
+
+    def get_release(self, version: str) -> Release:
+        if version == "latest":
+            return self.latest_release
+        elif version == "prerelease":
+            return self.latest_prerelease
+        else:
+            for release in self.all_releases:
+                if release["tag_name"] in [version, f"v{version}"]:
+                    return release
+            raise ValueError(f"Version {version} not found in releases")
+
+
+def download_release_asset_zip(release: Release, destination_path: str) -> None:
+    """Download dist.zip from github release."""
+    asset_url = None
+    for asset in release.get("assets", []):
+        if asset["name"] == "dist.zip":
+            asset_url = asset["url"]
+            break
+
+    if not asset_url:
+        raise ValueError("dist.zip not found in the release assets")
+
+    # Use a temporary file to download the zip content
+    with tempfile.TemporaryFile() as tmp_file:
+        headers = {"Accept": "application/octet-stream"}
+        response = requests.get(
+            asset_url, headers=headers, allow_redirects=True, timeout=REQUEST_TIMEOUT
+        )
+        response.raise_for_status()  # Ensure we got a successful response
+
+        # Write the content to the temporary file
+        tmp_file.write(response.content)
+
+        # Go back to the beginning of the temporary file
+        tmp_file.seek(0)
+
+        # Extract the zip file content to the destination path
+        with zipfile.ZipFile(tmp_file, "r") as zip_ref:
+            zip_ref.extractall(destination_path)
+
+
+class FrontendManager:
+    CUSTOM_FRONTENDS_ROOT = str(Path(__file__).parents[1] / "web_custom_versions")
+
+    @classmethod
+    def get_required_frontend_version(cls) -> str:
+        """Get the required frontend package version."""
+        return get_required_frontend_version()
+
+    @classmethod
+    def get_installed_templates_version(cls) -> str:
+        """Get the currently installed workflow templates package version."""
+        try:
+            templates_version_str = version("comfyui-workflow-templates")
+            return templates_version_str
+        except Exception:
+            return None
+
+    @classmethod
+    def get_required_templates_version(cls) -> str:
+        """Get the required workflow templates version from requirements.txt."""
+        try:
+            with open(requirements_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("comfyui-workflow-templates=="):
+                        version_str = line.split("==")[-1]
+                        if not is_valid_version(version_str):
+                            logging.error(f"Invalid templates version format in requirements.txt: {version_str}")
+                            return None
+                        return version_str
+                logging.error("comfyui-workflow-templates not found in requirements.txt")
+                return None
+        except FileNotFoundError:
+            logging.error("requirements.txt not found. Cannot determine required templates version.")
+            return None
+        except Exception as e:
+            logging.error(f"Error reading requirements.txt: {e}")
+            return None
+
+    @classmethod
+    def default_frontend_path(cls) -> str:
+        try:
+            import comfyui_frontend_package
+
+            return str(importlib.resources.files(comfyui_frontend_package) / "static")
+        except ImportError:
+            logging.error(
+                f"""
+********** ERROR ***********
+
+comfyui-frontend-package is not installed.
+
+{frontend_install_warning_message()}
+
+********** ERROR ***********
+""".strip()
+            )
+            sys.exit(-1)
+
+    @classmethod
+    def template_asset_map(cls) -> Optional[Dict[str, str]]:
+        """Return a mapping of template asset names to their absolute paths."""
+        try:
+            from comfyui_workflow_templates import (
+                get_asset_path,
+                iter_templates,
+            )
+        except ImportError:
+            logging.error(
+                f"""
+********** ERROR ***********
+
+comfyui-workflow-templates is not installed.
+
+{frontend_install_warning_message()}
+
+********** ERROR ***********
+""".strip()
+            )
+            return None
+
+        try:
+            template_entries = list(iter_templates())
+        except Exception as exc:
+            logging.error(f"Failed to enumerate workflow templates: {exc}")
+            return None
+
+        asset_map: Dict[str, str] = {}
+        try:
+            for entry in template_entries:
+                for asset in entry.assets:
+                    asset_map[asset.filename] = get_asset_path(
+                        entry.template_id, asset.filename
+                    )
+        except Exception as exc:
+            logging.error(f"Failed to resolve template asset paths: {exc}")
+            return None
+
+        if not asset_map:
+            logging.error("No workflow template assets found. Did the packages install correctly?")
+            return None
+
+        return asset_map
+
+
+    @classmethod
+    def legacy_templates_path(cls) -> Optional[str]:
+        """Return the legacy templates directory shipped inside the meta package."""
+        try:
+            import comfyui_workflow_templates
+
+            return str(
+                importlib.resources.files(comfyui_workflow_templates) / "templates"
+            )
+        except ImportError:
+            logging.error(
+                f"""
+********** ERROR ***********
+
+comfyui-workflow-templates is not installed.
+
+{frontend_install_warning_message()}
+
+********** ERROR ***********
+""".strip()
+            )
+            return None
+
+    @classmethod
+    def embedded_docs_path(cls) -> str:
+        """Get the path to embedded documentation"""
+        try:
+            import comfyui_embedded_docs
+
+            return str(
+                importlib.resources.files(comfyui_embedded_docs) / "docs"
+            )
+        except ImportError:
+            logging.info("comfyui-embedded-docs package not found")
+            return None
+
+    @classmethod
+    def parse_version_string(cls, value: str) -> tuple[str, str, str]:
+        """
+        Args:
+            value (str): The version string to parse.
+
+        Returns:
+            tuple[str, str]: A tuple containing provider name and version.
+
+        Raises:
+            argparse.ArgumentTypeError: If the version string is invalid.
+        """
+        VERSION_PATTERN = r"^([a-zA-Z0-9][a-zA-Z0-9-]{0,38})/([a-zA-Z0-9_.-]+)@(v?\d+\.\d+\.\d+[-._a-zA-Z0-9]*|latest|prerelease)$"
+        match_result = re.match(VERSION_PATTERN, value)
+        if match_result is None:
+            raise argparse.ArgumentTypeError(f"Invalid version string: {value}")
+
+        return match_result.group(1), match_result.group(2), match_result.group(3)
+
+    @classmethod
+    def init_frontend_unsafe(
+        cls, version_string: str, provider: Optional[FrontEndProvider] = None
+    ) -> str:
+        """
+        Initializes the frontend for the specified version.
+
+        Args:
+            version_string (str): The version string.
+            provider (FrontEndProvider, optional): The provider to use. Defaults to None.
+
+        Returns:
+            str: The path to the initialized frontend.
+
+        Raises:
+            Exception: If there is an error during the initialization process.
+            main error source might be request timeout or invalid URL.
+        """
+        if version_string == DEFAULT_VERSION_STRING:
+            check_frontend_version()
+            return cls.default_frontend_path()
+
+        repo_owner, repo_name, version = cls.parse_version_string(version_string)
+
+        if version.startswith("v"):
+            expected_path = str(
+                Path(cls.CUSTOM_FRONTENDS_ROOT)
+                / f"{repo_owner}_{repo_name}"
+                / version.lstrip("v")
+            )
+            if os.path.exists(expected_path):
+                logging.info(
+                    f"Using existing copy of specific frontend version tag: {repo_owner}/{repo_name}@{version}"
+                )
+                return expected_path
+
+        logging.info(
+            f"Initializing frontend: {repo_owner}/{repo_name}@{version}, requesting version details from GitHub..."
+        )
+
+        provider = provider or FrontEndProvider(repo_owner, repo_name)
+        release = provider.get_release(version)
+
+        semantic_version = release["tag_name"].lstrip("v")
+        web_root = str(
+            Path(cls.CUSTOM_FRONTENDS_ROOT) / provider.folder_name / semantic_version
+        )
+        if not os.path.exists(web_root):
+            try:
+                os.makedirs(web_root, exist_ok=True)
+                logging.info(
+                    "Downloading frontend(%s) version(%s) to (%s)",
+                    provider.folder_name,
+                    semantic_version,
+                    web_root,
+                )
+                logging.debug(release)
+                download_release_asset_zip(release, destination_path=web_root)
+            finally:
+                # Clean up the directory if it is empty, i.e. the download failed
+                if not os.listdir(web_root):
+                    os.rmdir(web_root)
+
+        return web_root
+
+    @classmethod
+    def init_frontend(cls, version_string: str) -> str:
+        """
+        Initializes the frontend with the specified version string.
+
+        Args:
+            version_string (str): The version string to initialize the frontend with.
+
+        Returns:
+            str: The path of the initialized frontend.
+        """
+        try:
+            return cls.init_frontend_unsafe(version_string)
+        except Exception as e:
+            logging.error("Failed to initialize frontend: %s", e)
+            logging.info("Falling back to the default frontend.")
+            check_frontend_version()
+            return cls.default_frontend_path()
+    @classmethod
+    def template_asset_handler(cls):
+        assets = cls.template_asset_map()
+        if not assets:
+            return None
+
+        async def serve_template(request: web.Request) -> web.StreamResponse:
+            rel_path = request.match_info.get("path", "")
+            target = assets.get(rel_path)
+            if target is None:
+                raise web.HTTPNotFound()
+            return web.FileResponse(target)
+
+        return serve_template
--- a/app/logger.py
+++ b/app/logger.py
@ -0,0 +1,98 @@
+from collections import deque
+from datetime import datetime
+import io
+import logging
+import sys
+import threading
+
+logs = None
+stdout_interceptor = None
+stderr_interceptor = None
+
+
+class LogInterceptor(io.TextIOWrapper):
+    def __init__(self, stream,  *args, **kwargs):
+        buffer = stream.buffer
+        encoding = stream.encoding
+        super().__init__(buffer, *args, **kwargs, encoding=encoding, line_buffering=stream.line_buffering)
+        self._lock = threading.Lock()
+        self._flush_callbacks = []
+        self._logs_since_flush = []
+
+    def write(self, data):
+        entry = {"t": datetime.now().isoformat(), "m": data}
+        with self._lock:
+            self._logs_since_flush.append(entry)
+
+            # Simple handling for cr to overwrite the last output if it isnt a full line
+            # else logs just get full of progress messages
+            if isinstance(data, str) and data.startswith("\r") and not logs[-1]["m"].endswith("\n"):
+                logs.pop()
+            logs.append(entry)
+        super().write(data)
+
+    def flush(self):
+        super().flush()
+        for cb in self._flush_callbacks:
+            cb(self._logs_since_flush)
+            self._logs_since_flush = []
+
+    def on_flush(self, callback):
+        self._flush_callbacks.append(callback)
+
+
+def get_logs():
+    return logs
+
+
+def on_flush(callback):
+    if stdout_interceptor is not None:
+        stdout_interceptor.on_flush(callback)
+    if stderr_interceptor is not None:
+        stderr_interceptor.on_flush(callback)
+
+def setup_logger(log_level: str = 'INFO', capacity: int = 300, use_stdout: bool = False):
+    global logs
+    if logs:
+        return
+
+    # Override output streams and log to buffer
+    logs = deque(maxlen=capacity)
+
+    global stdout_interceptor
+    global stderr_interceptor
+    stdout_interceptor = sys.stdout = LogInterceptor(sys.stdout)
+    stderr_interceptor = sys.stderr = LogInterceptor(sys.stderr)
+
+    # Setup default global logger
+    logger = logging.getLogger()
+    logger.setLevel(log_level)
+
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(logging.Formatter("%(message)s"))
+
+    if use_stdout:
+        # Only errors and critical to stderr
+        stream_handler.addFilter(lambda record: not record.levelno < logging.ERROR)
+
+        # Lesser to stdout
+        stdout_handler = logging.StreamHandler(sys.stdout)
+        stdout_handler.setFormatter(logging.Formatter("%(message)s"))
+        stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR)
+        logger.addHandler(stdout_handler)
+
+    logger.addHandler(stream_handler)
+
+
+STARTUP_WARNINGS = []
+
+
+def log_startup_warning(msg):
+    logging.warning(msg)
+    STARTUP_WARNINGS.append(msg)
+
+
+def print_startup_warnings():
+    for s in STARTUP_WARNINGS:
+        logging.warning(s)
+    STARTUP_WARNINGS.clear()
--- a/app/model_manager.py
+++ b/app/model_manager.py
@ -0,0 +1,195 @@
+from __future__ import annotations
+
+import os
+import base64
+import json
+import time
+import logging
+import folder_paths
+import glob
+import comfy.utils
+from aiohttp import web
+from PIL import Image
+from io import BytesIO
+from folder_paths import map_legacy, filter_files_extensions, filter_files_content_types
+
+
+class ModelFileManager:
+    def __init__(self) -> None:
+        self.cache: dict[str, tuple[list[dict], dict[str, float], float]] = {}
+
+    def get_cache(self, key: str, default=None) -> tuple[list[dict], dict[str, float], float] | None:
+        return self.cache.get(key, default)
+
+    def set_cache(self, key: str, value: tuple[list[dict], dict[str, float], float]):
+        self.cache[key] = value
+
+    def clear_cache(self):
+        self.cache.clear()
+
+    def add_routes(self, routes):
+        # NOTE: This is an experiment to replace `/models`
+        @routes.get("/experiment/models")
+        async def get_model_folders(request):
+            model_types = list(folder_paths.folder_names_and_paths.keys())
+            folder_black_list = ["configs", "custom_nodes"]
+            output_folders: list[dict] = []
+            for folder in model_types:
+                if folder in folder_black_list:
+                    continue
+                output_folders.append({"name": folder, "folders": folder_paths.get_folder_paths(folder)})
+            return web.json_response(output_folders)
+
+        # NOTE: This is an experiment to replace `/models/{folder}`
+        @routes.get("/experiment/models/{folder}")
+        async def get_all_models(request):
+            folder = request.match_info.get("folder", None)
+            if not folder in folder_paths.folder_names_and_paths:
+                return web.Response(status=404)
+            files = self.get_model_file_list(folder)
+            return web.json_response(files)
+
+        @routes.get("/experiment/models/preview/{folder}/{path_index}/{filename:.*}")
+        async def get_model_preview(request):
+            folder_name = request.match_info.get("folder", None)
+            path_index = int(request.match_info.get("path_index", None))
+            filename = request.match_info.get("filename", None)
+
+            if not folder_name in folder_paths.folder_names_and_paths:
+                return web.Response(status=404)
+
+            folders = folder_paths.folder_names_and_paths[folder_name]
+            folder = folders[0][path_index]
+            full_filename = os.path.join(folder, filename)
+
+            previews = self.get_model_previews(full_filename)
+            default_preview = previews[0] if len(previews) > 0 else None
+            if default_preview is None or (isinstance(default_preview, str) and not os.path.isfile(default_preview)):
+                return web.Response(status=404)
+
+            try:
+                with Image.open(default_preview) as img:
+                    img_bytes = BytesIO()
+                    img.save(img_bytes, format="WEBP")
+                    img_bytes.seek(0)
+                    return web.Response(body=img_bytes.getvalue(), content_type="image/webp")
+            except:
+                return web.Response(status=404)
+
+    def get_model_file_list(self, folder_name: str):
+        folder_name = map_legacy(folder_name)
+        folders = folder_paths.folder_names_and_paths[folder_name]
+        output_list: list[dict] = []
+
+        for index, folder in enumerate(folders[0]):
+            if not os.path.isdir(folder):
+                continue
+            out = self.cache_model_file_list_(folder)
+            if out is None:
+                out = self.recursive_search_models_(folder, index)
+                self.set_cache(folder, out)
+            output_list.extend(out[0])
+
+        return output_list
+
+    def cache_model_file_list_(self, folder: str):
+        model_file_list_cache = self.get_cache(folder)
+
+        if model_file_list_cache is None:
+            return None
+        if not os.path.isdir(folder):
+            return None
+        if os.path.getmtime(folder) != model_file_list_cache[1]:
+            return None
+        for x in model_file_list_cache[1]:
+            time_modified = model_file_list_cache[1][x]
+            folder = x
+            if os.path.getmtime(folder) != time_modified:
+                return None
+
+        return model_file_list_cache
+
+    def recursive_search_models_(self, directory: str, pathIndex: int) -> tuple[list[str], dict[str, float], float]:
+        if not os.path.isdir(directory):
+            return [], {}, time.perf_counter()
+
+        excluded_dir_names = [".git"]
+        # TODO use settings
+        include_hidden_files = False
+
+        result: list[str] = []
+        dirs: dict[str, float] = {}
+
+        for dirpath, subdirs, filenames in os.walk(directory, followlinks=True, topdown=True):
+            subdirs[:] = [d for d in subdirs if d not in excluded_dir_names]
+            if not include_hidden_files:
+                subdirs[:] = [d for d in subdirs if not d.startswith(".")]
+                filenames = [f for f in filenames if not f.startswith(".")]
+
+            filenames = filter_files_extensions(filenames, folder_paths.supported_pt_extensions)
+
+            for file_name in filenames:
+                try:
+                    full_path = os.path.join(dirpath, file_name)
+                    relative_path = os.path.relpath(full_path, directory)
+
+                    # Get file metadata
+                    file_info = {
+                        "name": relative_path,
+                        "pathIndex": pathIndex,
+                        "modified": os.path.getmtime(full_path),  # Add modification time
+                        "created": os.path.getctime(full_path),   # Add creation time
+                        "size": os.path.getsize(full_path)        # Add file size
+                    }
+                    result.append(file_info)
+
+                except Exception as e:
+                    logging.warning(f"Warning: Unable to access {file_name}. Error: {e}. Skipping this file.")
+                    continue
+
+            for d in subdirs:
+                path: str = os.path.join(dirpath, d)
+                try:
+                    dirs[path] = os.path.getmtime(path)
+                except FileNotFoundError:
+                    logging.warning(f"Warning: Unable to access {path}. Skipping this path.")
+                    continue
+
+        return result, dirs, time.perf_counter()
+
+    def get_model_previews(self, filepath: str) -> list[str | BytesIO]:
+        dirname = os.path.dirname(filepath)
+
+        if not os.path.exists(dirname):
+            return []
+
+        basename = os.path.splitext(filepath)[0]
+        match_files = glob.glob(f"{basename}.*", recursive=False)
+        image_files = filter_files_content_types(match_files, "image")
+        safetensors_file = next(filter(lambda x: x.endswith(".safetensors"), match_files), None)
+        safetensors_metadata = {}
+
+        result: list[str | BytesIO] = []
+
+        for filename in image_files:
+            _basename = os.path.splitext(filename)[0]
+            if _basename == basename:
+                result.append(filename)
+            if _basename == f"{basename}.preview":
+                result.append(filename)
+
+        if safetensors_file:
+            safetensors_filepath = os.path.join(dirname, safetensors_file)
+            header = comfy.utils.safetensors_header(safetensors_filepath, max_size=8*1024*1024)
+            if header:
+                safetensors_metadata = json.loads(header)
+        safetensors_images = safetensors_metadata.get("__metadata__", {}).get("ssmd_cover_images", None)
+        if safetensors_images:
+            safetensors_images = json.loads(safetensors_images)
+            for image in safetensors_images:
+                result.append(BytesIO(base64.b64decode(image)))
+
+        return result
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.clear_cache()
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from typing import TypedDict
+import os
+import folder_paths
+import glob
+from aiohttp import web
+import hashlib
+
+
+class Source:
+    custom_node = "custom_node"
+
+class SubgraphEntry(TypedDict):
+    source: str
+    """
+    Source of subgraph - custom_nodes vs templates.
+    """
+    path: str
+    """
+    Relative path of the subgraph file.
+    For custom nodes, will be the relative directory like <custom_node_dir>/subgraphs/<name>.json
+    """
+    name: str
+    """
+    Name of subgraph file.
+    """
+    info: CustomNodeSubgraphEntryInfo
+    """
+    Additional info about subgraph; in the case of custom_nodes, will contain nodepack name
+    """
+    data: str
+
+class CustomNodeSubgraphEntryInfo(TypedDict):
+    node_pack: str
+    """Node pack name."""
+
+class SubgraphManager:
+    def __init__(self):
+        self.cached_custom_node_subgraphs: dict[SubgraphEntry] | None = None
+
+    async def load_entry_data(self, entry: SubgraphEntry):
+        with open(entry['path'], 'r') as f:
+            entry['data'] = f.read()
+        return entry
+
+    async def sanitize_entry(self, entry: SubgraphEntry | None, remove_data=False) -> SubgraphEntry | None:
+        if entry is None:
+            return None
+        entry = entry.copy()
+        entry.pop('path', None)
+        if remove_data:
+            entry.pop('data', None)
+        return entry
+
+    async def sanitize_entries(self, entries: dict[str, SubgraphEntry], remove_data=False) -> dict[str, SubgraphEntry]:
+        entries = entries.copy()
+        for key in list(entries.keys()):
+            entries[key] = await self.sanitize_entry(entries[key], remove_data)
+        return entries
+
+    async def get_custom_node_subgraphs(self, loadedModules, force_reload=False):
+        # if not forced to reload and cached, return cache
+        if not force_reload and self.cached_custom_node_subgraphs is not None:
+            return self.cached_custom_node_subgraphs
+        # Load subgraphs from custom nodes
+        subfolder = "subgraphs"
+        subgraphs_dict: dict[SubgraphEntry] = {}
+
+        for folder in folder_paths.get_folder_paths("custom_nodes"):
+            pattern = os.path.join(folder, f"*/{subfolder}/*.json")
+            matched_files = glob.glob(pattern)
+            for file in matched_files:
+                # replace backslashes with forward slashes
+                file = file.replace('\\', '/')
+                info: CustomNodeSubgraphEntryInfo = {
+                    "node_pack": "custom_nodes." + file.split('/')[-3]
+                }
+                source = Source.custom_node
+                # hash source + path to make sure id will be as unique as possible, but
+                # reproducible across backend reloads
+                id = hashlib.sha256(f"{source}{file}".encode()).hexdigest()
+                entry: SubgraphEntry = {
+                    "source": Source.custom_node,
+                    "name": os.path.splitext(os.path.basename(file))[0],
+                    "path": file,
+                    "info": info,
+                }
+                subgraphs_dict[id] = entry
+        self.cached_custom_node_subgraphs = subgraphs_dict
+        return subgraphs_dict
+
+    async def get_custom_node_subgraph(self, id: str, loadedModules):
+        subgraphs = await self.get_custom_node_subgraphs(loadedModules)
+        entry: SubgraphEntry = subgraphs.get(id, None)
+        if entry is not None and entry.get('data', None) is None:
+            await self.load_entry_data(entry)
+        return entry
+
+    def add_routes(self, routes, loadedModules):
+        @routes.get("/global_subgraphs")
+        async def get_global_subgraphs(request):
+            subgraphs_dict = await self.get_custom_node_subgraphs(loadedModules)
+            # NOTE: we may want to include other sources of global subgraphs such as templates in the future;
+            # that's the reasoning for the current implementation
+            return web.json_response(await self.sanitize_entries(subgraphs_dict, remove_data=True))
+
+        @routes.get("/global_subgraphs/{id}")
+        async def get_global_subgraph(request):
+            id = request.match_info.get("id", None)
+            subgraph = await self.get_custom_node_subgraph(id, loadedModules)
+            return web.json_response(await self.sanitize_entry(subgraph))
--- a/app/user_manager.py
+++ b/app/user_manager.py
@ -0,0 +1,456 @@
+from __future__ import annotations
+import json
+import os
+import re
+import uuid
+import glob
+import shutil
+import logging
+from aiohttp import web
+from urllib import parse
+from comfy.cli_args import args
+import folder_paths
+from .app_settings import AppSettings
+from typing import TypedDict
+
+default_user = "default"
+
+
+class FileInfo(TypedDict):
+    path: str
+    size: int
+    modified: int
+    created: int
+
+
+def get_file_info(path: str, relative_to: str) -> FileInfo:
+    return {
+        "path": os.path.relpath(path, relative_to).replace(os.sep, '/'),
+        "size": os.path.getsize(path),
+        "modified": os.path.getmtime(path),
+        "created": os.path.getctime(path)
+    }
+
+
+class UserManager():
+    def __init__(self):
+        user_directory = folder_paths.get_user_directory()
+
+        self.settings = AppSettings(self)
+        if not os.path.exists(user_directory):
+            os.makedirs(user_directory, exist_ok=True)
+            if not args.multi_user:
+                logging.warning("****** User settings have been changed to be stored on the server instead of browser storage. ******")
+                logging.warning("****** For multi-user setups add the --multi-user CLI argument to enable multiple user profiles. ******")
+
+        if args.multi_user:
+            if os.path.isfile(self.get_users_file()):
+                with open(self.get_users_file()) as f:
+                    self.users = json.load(f)
+            else:
+                self.users = {}
+        else:
+            self.users = {"default": "default"}
+
+    def get_users_file(self):
+        return os.path.join(folder_paths.get_user_directory(), "users.json")
+
+    def get_request_user_id(self, request):
+        user = "default"
+        if args.multi_user and "comfy-user" in request.headers:
+            user = request.headers["comfy-user"]
+            # Block System Users (use same error message to prevent probing)
+            if user.startswith(folder_paths.SYSTEM_USER_PREFIX):
+                raise KeyError("Unknown user: " + user)
+
+        if user not in self.users:
+            raise KeyError("Unknown user: " + user)
+
+        return user
+
+    def get_request_user_filepath(self, request, file, type="userdata", create_dir=True):
+        if type == "userdata":
+            root_dir = folder_paths.get_user_directory()
+        else:
+            raise KeyError("Unknown filepath type:" + type)
+
+        user = self.get_request_user_id(request)
+        user_root = folder_paths.get_public_user_directory(user)
+        if user_root is None:
+            return None
+        path = user_root
+
+        # prevent leaving /{type}
+        if os.path.commonpath((root_dir, user_root)) != root_dir:
+            return None
+
+        if file is not None:
+            # Check if filename is url encoded
+            if "%" in file:
+                file = parse.unquote(file)
+
+            # prevent leaving /{type}/{user}
+            path = os.path.abspath(os.path.join(user_root, file))
+            if os.path.commonpath((user_root, path)) != user_root:
+                return None
+
+        parent = os.path.split(path)[0]
+
+        if create_dir and not os.path.exists(parent):
+            os.makedirs(parent, exist_ok=True)
+
+        return path
+
+    def add_user(self, name):
+        name = name.strip()
+        if not name:
+            raise ValueError("username not provided")
+        if name.startswith(folder_paths.SYSTEM_USER_PREFIX):
+            raise ValueError("System User prefix not allowed")
+        user_id = re.sub("[^a-zA-Z0-9-_]+", '-', name)
+        if user_id.startswith(folder_paths.SYSTEM_USER_PREFIX):
+            raise ValueError("System User prefix not allowed")
+        user_id = user_id + "_" + str(uuid.uuid4())
+
+        self.users[user_id] = name
+
+        with open(self.get_users_file(), "w") as f:
+            json.dump(self.users, f)
+
+        return user_id
+
+    def add_routes(self, routes):
+        self.settings.add_routes(routes)
+
+        @routes.get("/users")
+        async def get_users(request):
+            if args.multi_user:
+                return web.json_response({"storage": "server", "users": self.users})
+            else:
+                user_dir = self.get_request_user_filepath(request, None, create_dir=False)
+                return web.json_response({
+                    "storage": "server",
+                    "migrated": os.path.exists(user_dir)
+                })
+
+        @routes.post("/users")
+        async def post_users(request):
+            body = await request.json()
+            username = body["username"]
+            if username in self.users.values():
+                return web.json_response({"error": "Duplicate username."}, status=400)
+
+            try:
+                user_id = self.add_user(username)
+            except ValueError as e:
+                return web.json_response({"error": str(e)}, status=400)
+            return web.json_response(user_id)
+
+        @routes.get("/userdata")
+        async def listuserdata(request):
+            """
+            List user data files in a specified directory.
+
+            This endpoint allows listing files in a user's data directory, with options for recursion,
+            full file information, and path splitting.
+
+            Query Parameters:
+            - dir (required): The directory to list files from.
+            - recurse (optional): If "true", recursively list files in subdirectories.
+            - full_info (optional): If "true", return detailed file information (path, size, modified time).
+            - split (optional): If "true", split file paths into components (only applies when full_info is false).
+
+            Returns:
+            - 400: If 'dir' parameter is missing.
+            - 403: If the requested path is not allowed.
+            - 404: If the requested directory does not exist.
+            - 200: JSON response with the list of files or file information.
+
+            The response format depends on the query parameters:
+            - Default: List of relative file paths.
+            - full_info=true: List of dictionaries with file details.
+            - split=true (and full_info=false): List of lists, each containing path components.
+            """
+            directory = request.rel_url.query.get('dir', '')
+            if not directory:
+                return web.Response(status=400, text="Directory not provided")
+
+            path = self.get_request_user_filepath(request, directory)
+            if not path:
+                return web.Response(status=403, text="Invalid directory")
+
+            if not os.path.exists(path):
+                return web.Response(status=404, text="Directory not found")
+
+            recurse = request.rel_url.query.get('recurse', '').lower() == "true"
+            full_info = request.rel_url.query.get('full_info', '').lower() == "true"
+            split_path = request.rel_url.query.get('split', '').lower() == "true"
+
+            # Use different patterns based on whether we're recursing or not
+            if recurse:
+                pattern = os.path.join(glob.escape(path), '**', '*')
+            else:
+                pattern = os.path.join(glob.escape(path), '*')
+
+            def process_full_path(full_path: str) -> FileInfo | str | list[str]:
+                if full_info:
+                    return get_file_info(full_path, path)
+
+                rel_path = os.path.relpath(full_path, path).replace(os.sep, '/')
+                if split_path:
+                    return [rel_path] + rel_path.split('/')
+
+                return rel_path
+
+            results = [
+                process_full_path(full_path)
+                for full_path in glob.glob(pattern, recursive=recurse)
+                if os.path.isfile(full_path)
+            ]
+
+            return web.json_response(results)
+
+        @routes.get("/v2/userdata")
+        async def list_userdata_v2(request):
+            """
+            List files and directories in a user's data directory.
+
+            This endpoint provides a structured listing of contents within a specified
+            subdirectory of the user's data storage.
+
+            Query Parameters:
+            - path (optional): The relative path within the user's data directory
+                               to list. Defaults to the root ('').
+
+            Returns:
+            - 400: If the requested path is invalid, outside the user's data directory, or is not a directory.
+            - 404: If the requested path does not exist.
+            - 403: If the user is invalid.
+            - 500: If there is an error reading the directory contents.
+            - 200: JSON response containing a list of file and directory objects.
+                   Each object includes:
+                   - name: The name of the file or directory.
+                   - type: 'file' or 'directory'.
+                   - path: The relative path from the user's data root.
+                   - size (for files): The size in bytes.
+                   - modified (for files): The last modified timestamp (Unix epoch).
+            """
+            requested_rel_path = request.rel_url.query.get('path', '')
+
+            # URL-decode the path parameter
+            try:
+                requested_rel_path = parse.unquote(requested_rel_path)
+            except Exception as e:
+                logging.warning(f"Failed to decode path parameter: {requested_rel_path}, Error: {e}")
+                return web.Response(status=400, text="Invalid characters in path parameter")
+
+
+            # Check user validity and get the absolute path for the requested directory
+            try:
+                 base_user_path = self.get_request_user_filepath(request, None, create_dir=False)
+
+                 if requested_rel_path:
+                     target_abs_path = self.get_request_user_filepath(request, requested_rel_path, create_dir=False)
+                 else:
+                     target_abs_path = base_user_path
+
+            except KeyError as e:
+                 # Invalid user detected by get_request_user_id inside get_request_user_filepath
+                 logging.warning(f"Access denied for user: {e}")
+                 return web.Response(status=403, text="Invalid user specified in request")
+
+
+            if not target_abs_path:
+                 # Path traversal or other issue detected by get_request_user_filepath
+                 return web.Response(status=400, text="Invalid path requested")
+
+            # Handle cases where the user directory or target path doesn't exist
+            if not os.path.exists(target_abs_path):
+                # Check if it's the base user directory that's missing (new user case)
+                if target_abs_path == base_user_path:
+                    # It's okay if the base user directory doesn't exist yet, return empty list
+                     return web.json_response([])
+                else:
+                    # A specific subdirectory was requested but doesn't exist
+                     return web.Response(status=404, text="Requested path not found")
+
+            if not os.path.isdir(target_abs_path):
+                 return web.Response(status=400, text="Requested path is not a directory")
+
+            results = []
+            try:
+                for root, dirs, files in os.walk(target_abs_path, topdown=True):
+                    # Process directories
+                    for dir_name in dirs:
+                        dir_path = os.path.join(root, dir_name)
+                        rel_path = os.path.relpath(dir_path, base_user_path).replace(os.sep, '/')
+                        results.append({
+                            "name": dir_name,
+                            "path": rel_path,
+                            "type": "directory"
+                        })
+
+                    # Process files
+                    for file_name in files:
+                        file_path = os.path.join(root, file_name)
+                        rel_path = os.path.relpath(file_path, base_user_path).replace(os.sep, '/')
+                        entry_info = {
+                            "name": file_name,
+                            "path": rel_path,
+                            "type": "file"
+                        }
+                        try:
+                            stats = os.stat(file_path) # Use os.stat for potentially better performance with os.walk
+                            entry_info["size"] = stats.st_size
+                            entry_info["modified"] = stats.st_mtime
+                        except OSError as stat_error:
+                            logging.warning(f"Could not stat file {file_path}: {stat_error}")
+                            pass # Include file with available info
+                        results.append(entry_info)
+            except OSError as e:
+                logging.error(f"Error listing directory {target_abs_path}: {e}")
+                return web.Response(status=500, text="Error reading directory contents")
+
+            # Sort results alphabetically, directories first then files
+            results.sort(key=lambda x: (x['type'] != 'directory', x['name'].lower()))
+
+            return web.json_response(results)
+
+        def get_user_data_path(request, check_exists = False, param = "file"):
+            file = request.match_info.get(param, None)
+            if not file:
+                return web.Response(status=400)
+
+            path = self.get_request_user_filepath(request, file)
+            if not path:
+                return web.Response(status=403)
+
+            if check_exists and not os.path.exists(path):
+                return web.Response(status=404)
+
+            return path
+
+        @routes.get("/userdata/{file}")
+        async def getuserdata(request):
+            path = get_user_data_path(request, check_exists=True)
+            if not isinstance(path, str):
+                return path
+
+            return web.FileResponse(path)
+
+        @routes.post("/userdata/{file}")
+        async def post_userdata(request):
+            """
+            Upload or update a user data file.
+
+            This endpoint handles file uploads to a user's data directory, with options for
+            controlling overwrite behavior and response format.
+
+            Query Parameters:
+            - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+            - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+                                  If "false", returns only the relative file path.
+
+            Path Parameters:
+            - file: The target file path (URL encoded if necessary).
+
+            Returns:
+            - 400: If 'file' parameter is missing.
+            - 403: If the requested path is not allowed.
+            - 409: If overwrite=false and the file already exists.
+            - 200: JSON response with either:
+                  - Full file information (if full_info=true)
+                  - Relative file path (if full_info=false)
+
+            The request body should contain the raw file content to be written.
+            """
+            path = get_user_data_path(request)
+            if not isinstance(path, str):
+                return path
+
+            overwrite = request.query.get("overwrite", 'true') != "false"
+            full_info = request.query.get('full_info', 'false').lower() == "true"
+
+            if not overwrite and os.path.exists(path):
+                return web.Response(status=409, text="File already exists")
+
+            try:
+                body = await request.read()
+
+                with open(path, "wb") as f:
+                    f.write(body)
+            except OSError as e:
+                logging.warning(f"Error saving file '{path}': {e}")
+                return web.Response(
+                    status=400,
+                    reason="Invalid filename. Please avoid special characters like :\\/*?\"<>|"
+                )
+
+            user_path = self.get_request_user_filepath(request, None)
+            if full_info:
+                resp = get_file_info(path, user_path)
+            else:
+                resp = os.path.relpath(path, user_path)
+
+            return web.json_response(resp)
+
+        @routes.delete("/userdata/{file}")
+        async def delete_userdata(request):
+            path = get_user_data_path(request, check_exists=True)
+            if not isinstance(path, str):
+                return path
+
+            os.remove(path)
+
+            return web.Response(status=204)
+
+        @routes.post("/userdata/{file}/move/{dest}")
+        async def move_userdata(request):
+            """
+            Move or rename a user data file.
+
+            This endpoint handles moving or renaming files within a user's data directory, with options for
+            controlling overwrite behavior and response format.
+
+            Path Parameters:
+            - file: The source file path (URL encoded if necessary)
+            - dest: The destination file path (URL encoded if necessary)
+
+            Query Parameters:
+            - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+            - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+                                  If "false", returns only the relative file path.
+
+            Returns:
+            - 400: If either 'file' or 'dest' parameter is missing
+            - 403: If either requested path is not allowed
+            - 404: If the source file does not exist
+            - 409: If overwrite=false and the destination file already exists
+            - 200: JSON response with either:
+                  - Full file information (if full_info=true)
+                  - Relative file path (if full_info=false)
+            """
+            source = get_user_data_path(request, check_exists=True)
+            if not isinstance(source, str):
+                return source
+
+            dest = get_user_data_path(request, check_exists=False, param="dest")
+            if not isinstance(dest, str):
+                return dest
+
+            overwrite = request.query.get("overwrite", 'true') != "false"
+            full_info = request.query.get('full_info', 'false').lower() == "true"
+
+            if not overwrite and os.path.exists(dest):
+                return web.Response(status=409, text="File already exists")
+
+            logging.info(f"moving '{source}' -> '{dest}'")
+            shutil.move(source, dest)
+
+            user_path = self.get_request_user_filepath(request, None)
+            if full_info:
+                resp = get_file_info(dest, user_path)
+            else:
+                resp = os.path.relpath(dest, user_path)
+
+            return web.json_response(resp)
--- a/comfy/audio_encoders/audio_encoders.py
+++ b/comfy/audio_encoders/audio_encoders.py
@ -0,0 +1,91 @@
+from .wav2vec2 import Wav2Vec2Model
+from .whisper import WhisperLargeV3
+import comfy.model_management
+import comfy.ops
+import comfy.utils
+import logging
+import torchaudio
+
+
+class AudioEncoderModel():
+    def __init__(self, config):
+        self.load_device = comfy.model_management.text_encoder_device()
+        offload_device = comfy.model_management.text_encoder_offload_device()
+        self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
+        model_type = config.pop("model_type")
+        model_config = dict(config)
+        model_config.update({
+            "dtype": self.dtype,
+            "device": offload_device,
+            "operations": comfy.ops.manual_cast
+        })
+
+        if model_type == "wav2vec2":
+            self.model = Wav2Vec2Model(**model_config)
+        elif model_type == "whisper3":
+            self.model = WhisperLargeV3(**model_config)
+        self.model.eval()
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.model_sample_rate = 16000
+
+    def load_sd(self, sd):
+        return self.model.load_state_dict(sd, strict=False)
+
+    def get_sd(self):
+        return self.model.state_dict()
+
+    def encode_audio(self, audio, sample_rate):
+        comfy.model_management.load_model_gpu(self.patcher)
+        audio = torchaudio.functional.resample(audio, sample_rate, self.model_sample_rate)
+        out, all_layers = self.model(audio.to(self.load_device))
+        outputs = {}
+        outputs["encoded_audio"] = out
+        outputs["encoded_audio_all_layers"] = all_layers
+        outputs["audio_samples"] = audio.shape[2]
+        return outputs
+
+
+def load_audio_encoder_from_sd(sd, prefix=""):
+    sd = comfy.utils.state_dict_prefix_replace(sd, {"wav2vec2.": ""})
+    if "encoder.layer_norm.bias" in sd: #wav2vec2
+        embed_dim = sd["encoder.layer_norm.bias"].shape[0]
+        if embed_dim == 1024:# large
+            config = {
+                "model_type": "wav2vec2",
+                "embed_dim": 1024,
+                "num_heads": 16,
+                "num_layers": 24,
+                "conv_norm": True,
+                "conv_bias": True,
+                "do_normalize": True,
+                "do_stable_layer_norm": True
+                }
+        elif embed_dim == 768: # base
+            config = {
+                "model_type": "wav2vec2",
+                "embed_dim": 768,
+                "num_heads": 12,
+                "num_layers": 12,
+                "conv_norm": False,
+                "conv_bias": False,
+                "do_normalize": False, # chinese-wav2vec2-base has this False
+                "do_stable_layer_norm": False
+            }
+        else:
+            raise RuntimeError("ERROR: audio encoder file is invalid or unsupported embed_dim: {}".format(embed_dim))
+    elif "model.encoder.embed_positions.weight" in sd:
+        sd = comfy.utils.state_dict_prefix_replace(sd, {"model.": ""})
+        config = {
+            "model_type": "whisper3",
+        }
+    else:
+        raise RuntimeError("ERROR: audio encoder not supported.")
+
+    audio_encoder = AudioEncoderModel(config)
+    m, u = audio_encoder.load_sd(sd)
+    if len(m) > 0:
+        logging.warning("missing audio encoder: {}".format(m))
+    if len(u) > 0:
+        logging.warning("unexpected audio encoder: {}".format(u))
+
+    return audio_encoder
--- a/comfy/audio_encoders/wav2vec2.py
+++ b/comfy/audio_encoders/wav2vec2.py
@ -0,0 +1,252 @@
+import torch
+import torch.nn as nn
+from comfy.ldm.modules.attention import optimized_attention_masked
+
+
+class LayerNormConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, bias=False, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.conv = operations.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=bias, device=device, dtype=dtype)
+        self.layer_norm = operations.LayerNorm(out_channels, elementwise_affine=True, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.conv(x)
+        return torch.nn.functional.gelu(self.layer_norm(x.transpose(-2, -1)).transpose(-2, -1))
+
+class LayerGroupNormConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, bias=False, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.conv = operations.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=bias, device=device, dtype=dtype)
+        self.layer_norm = operations.GroupNorm(num_groups=out_channels, num_channels=out_channels, affine=True, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.conv(x)
+        return torch.nn.functional.gelu(self.layer_norm(x))
+
+class ConvNoNorm(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, bias=False, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.conv = operations.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=bias, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.conv(x)
+        return torch.nn.functional.gelu(x)
+
+
+class ConvFeatureEncoder(nn.Module):
+    def __init__(self, conv_dim, conv_bias=False, conv_norm=True, dtype=None, device=None, operations=None):
+        super().__init__()
+        if conv_norm:
+            self.conv_layers = nn.ModuleList([
+                LayerNormConv(1, conv_dim, kernel_size=10, stride=5, bias=True, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=2, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                LayerNormConv(conv_dim, conv_dim, kernel_size=2, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+            ])
+        else:
+            self.conv_layers = nn.ModuleList([
+                LayerGroupNormConv(1, conv_dim, kernel_size=10, stride=5, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=3, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=2, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+                ConvNoNorm(conv_dim, conv_dim, kernel_size=2, stride=2, bias=conv_bias, device=device, dtype=dtype, operations=operations),
+            ])
+
+    def forward(self, x):
+        x = x.unsqueeze(1)
+
+        for conv in self.conv_layers:
+            x = conv(x)
+
+        return x.transpose(1, 2)
+
+
+class FeatureProjection(nn.Module):
+    def __init__(self, conv_dim, embed_dim, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.layer_norm = operations.LayerNorm(conv_dim, eps=1e-05, device=device, dtype=dtype)
+        self.projection = operations.Linear(conv_dim, embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.layer_norm(x)
+        x = self.projection(x)
+        return x
+
+
+class PositionalConvEmbedding(nn.Module):
+    def __init__(self, embed_dim=768, kernel_size=128, groups=16):
+        super().__init__()
+        self.conv = nn.Conv1d(
+            embed_dim,
+            embed_dim,
+            kernel_size=kernel_size,
+            padding=kernel_size // 2,
+            groups=groups,
+        )
+        self.conv = torch.nn.utils.parametrizations.weight_norm(self.conv, name="weight", dim=2)
+        self.activation = nn.GELU()
+
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = self.conv(x)[:, :, :-1]
+        x = self.activation(x)
+        x = x.transpose(1, 2)
+        return x
+
+
+class TransformerEncoder(nn.Module):
+    def __init__(
+        self,
+        embed_dim=768,
+        num_heads=12,
+        num_layers=12,
+        mlp_ratio=4.0,
+        do_stable_layer_norm=True,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        self.pos_conv_embed = PositionalConvEmbedding(embed_dim=embed_dim)
+        self.layers = nn.ModuleList([
+            TransformerEncoderLayer(
+                embed_dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                do_stable_layer_norm=do_stable_layer_norm,
+                device=device, dtype=dtype, operations=operations
+            )
+            for _ in range(num_layers)
+        ])
+
+        self.layer_norm = operations.LayerNorm(embed_dim, eps=1e-05, device=device, dtype=dtype)
+        self.do_stable_layer_norm = do_stable_layer_norm
+
+    def forward(self, x, mask=None):
+        x = x + self.pos_conv_embed(x)
+        all_x = ()
+        if not self.do_stable_layer_norm:
+            x = self.layer_norm(x)
+        for layer in self.layers:
+            all_x += (x,)
+            x = layer(x, mask)
+        if self.do_stable_layer_norm:
+            x = self.layer_norm(x)
+        all_x += (x,)
+        return x, all_x
+
+
+class Attention(nn.Module):
+    def __init__(self, embed_dim, num_heads, bias=True, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+
+        self.k_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.v_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.q_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+
+    def forward(self, x, mask=None):
+        assert (mask is None)  # TODO?
+        q = self.q_proj(x)
+        k = self.k_proj(x)
+        v = self.v_proj(x)
+
+        out = optimized_attention_masked(q, k, v, self.num_heads)
+        return self.out_proj(out)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, embed_dim, mlp_ratio, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.intermediate_dense = operations.Linear(embed_dim, int(embed_dim * mlp_ratio), device=device, dtype=dtype)
+        self.output_dense = operations.Linear(int(embed_dim * mlp_ratio), embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.intermediate_dense(x)
+        x = torch.nn.functional.gelu(x)
+        x = self.output_dense(x)
+        return x
+
+
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        embed_dim=768,
+        num_heads=12,
+        mlp_ratio=4.0,
+        do_stable_layer_norm=True,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        self.attention = Attention(embed_dim, num_heads, device=device, dtype=dtype, operations=operations)
+
+        self.layer_norm = operations.LayerNorm(embed_dim, device=device, dtype=dtype)
+        self.feed_forward = FeedForward(embed_dim, mlp_ratio, device=device, dtype=dtype, operations=operations)
+        self.final_layer_norm = operations.LayerNorm(embed_dim, device=device, dtype=dtype)
+        self.do_stable_layer_norm = do_stable_layer_norm
+
+    def forward(self, x, mask=None):
+        residual = x
+        if self.do_stable_layer_norm:
+            x = self.layer_norm(x)
+        x = self.attention(x, mask=mask)
+        x = residual + x
+        if not self.do_stable_layer_norm:
+            x = self.layer_norm(x)
+            return self.final_layer_norm(x + self.feed_forward(x))
+        else:
+            return x + self.feed_forward(self.final_layer_norm(x))
+
+
+class Wav2Vec2Model(nn.Module):
+    """Complete Wav2Vec 2.0 model."""
+
+    def __init__(
+        self,
+        embed_dim=1024,
+        final_dim=256,
+        num_heads=16,
+        num_layers=24,
+        conv_norm=True,
+        conv_bias=True,
+        do_normalize=True,
+        do_stable_layer_norm=True,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        conv_dim = 512
+        self.feature_extractor = ConvFeatureEncoder(conv_dim, conv_norm=conv_norm, conv_bias=conv_bias, device=device, dtype=dtype, operations=operations)
+        self.feature_projection = FeatureProjection(conv_dim, embed_dim, device=device, dtype=dtype, operations=operations)
+
+        self.masked_spec_embed = nn.Parameter(torch.empty(embed_dim, device=device, dtype=dtype))
+        self.do_normalize = do_normalize
+
+        self.encoder = TransformerEncoder(
+            embed_dim=embed_dim,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            do_stable_layer_norm=do_stable_layer_norm,
+            device=device, dtype=dtype, operations=operations
+        )
+
+    def forward(self, x, mask_time_indices=None, return_dict=False):
+        x = torch.mean(x, dim=1)
+
+        if self.do_normalize:
+            x = (x - x.mean()) / torch.sqrt(x.var() + 1e-7)
+
+        features = self.feature_extractor(x)
+        features = self.feature_projection(features)
+        batch_size, seq_len, _ = features.shape
+
+        x, all_x = self.encoder(features)
+        return x, all_x
--- a/comfy/audio_encoders/whisper.py
+++ b/comfy/audio_encoders/whisper.py
@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio
+from typing import Optional
+from comfy.ldm.modules.attention import optimized_attention_masked
+import comfy.ops
+
+class WhisperFeatureExtractor(nn.Module):
+    def __init__(self, n_mels=128, device=None):
+        super().__init__()
+        self.sample_rate = 16000
+        self.n_fft = 400
+        self.hop_length = 160
+        self.n_mels = n_mels
+        self.chunk_length = 30
+        self.n_samples = 480000
+
+        self.mel_spectrogram = torchaudio.transforms.MelSpectrogram(
+            sample_rate=self.sample_rate,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            f_min=0,
+            f_max=8000,
+            norm="slaney",
+            mel_scale="slaney",
+        ).to(device)
+
+    def __call__(self, audio):
+        audio = torch.mean(audio, dim=1)
+        batch_size = audio.shape[0]
+        processed_audio = []
+
+        for i in range(batch_size):
+            aud = audio[i]
+            if aud.shape[0] > self.n_samples:
+                aud = aud[:self.n_samples]
+            elif aud.shape[0] < self.n_samples:
+                aud = F.pad(aud, (0, self.n_samples - aud.shape[0]))
+            processed_audio.append(aud)
+
+        audio = torch.stack(processed_audio)
+
+        mel_spec = self.mel_spectrogram(audio.to(self.mel_spectrogram.spectrogram.window.device))[:, :, :-1].to(audio.device)
+
+        log_mel_spec = torch.clamp(mel_spec, min=1e-10).log10()
+        log_mel_spec = torch.maximum(log_mel_spec, log_mel_spec.max() - 8.0)
+        log_mel_spec = (log_mel_spec + 4.0) / 4.0
+
+        return log_mel_spec
+
+
+class MultiHeadAttention(nn.Module):
+    def __init__(self, d_model: int, n_heads: int, dtype=None, device=None, operations=None):
+        super().__init__()
+        assert d_model % n_heads == 0
+
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.d_k = d_model // n_heads
+
+        self.q_proj = operations.Linear(d_model, d_model, dtype=dtype, device=device)
+        self.k_proj = operations.Linear(d_model, d_model, bias=False, dtype=dtype, device=device)
+        self.v_proj = operations.Linear(d_model, d_model, dtype=dtype, device=device)
+        self.out_proj = operations.Linear(d_model, d_model, dtype=dtype, device=device)
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        batch_size, seq_len, _ = query.shape
+
+        q = self.q_proj(query)
+        k = self.k_proj(key)
+        v = self.v_proj(value)
+
+        attn_output = optimized_attention_masked(q, k, v, self.n_heads, mask)
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model: int, n_heads: int, d_ff: int, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        self.self_attn = MultiHeadAttention(d_model, n_heads, dtype=dtype, device=device, operations=operations)
+        self.self_attn_layer_norm = operations.LayerNorm(d_model, dtype=dtype, device=device)
+
+        self.fc1 = operations.Linear(d_model, d_ff, dtype=dtype, device=device)
+        self.fc2 = operations.Linear(d_ff, d_model, dtype=dtype, device=device)
+        self.final_layer_norm = operations.LayerNorm(d_model, dtype=dtype, device=device)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        residual = x
+        x = self.self_attn_layer_norm(x)
+        x = self.self_attn(x, x, x, attention_mask)
+        x = residual + x
+
+        residual = x
+        x = self.final_layer_norm(x)
+        x = self.fc1(x)
+        x = F.gelu(x)
+        x = self.fc2(x)
+        x = residual + x
+
+        return x
+
+
+class AudioEncoder(nn.Module):
+    def __init__(
+        self,
+        n_mels: int = 128,
+        n_ctx: int = 1500,
+        n_state: int = 1280,
+        n_head: int = 20,
+        n_layer: int = 32,
+        dtype=None,
+        device=None,
+        operations=None
+    ):
+        super().__init__()
+
+        self.conv1 = operations.Conv1d(n_mels, n_state, kernel_size=3, padding=1, dtype=dtype, device=device)
+        self.conv2 = operations.Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1, dtype=dtype, device=device)
+
+        self.embed_positions = operations.Embedding(n_ctx, n_state, dtype=dtype, device=device)
+
+        self.layers = nn.ModuleList([
+            EncoderLayer(n_state, n_head, n_state * 4, dtype=dtype, device=device, operations=operations)
+            for _ in range(n_layer)
+        ])
+
+        self.layer_norm = operations.LayerNorm(n_state, dtype=dtype, device=device)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = F.gelu(self.conv1(x))
+        x = F.gelu(self.conv2(x))
+
+        x = x.transpose(1, 2)
+
+        x = x + comfy.ops.cast_to_input(self.embed_positions.weight[:, :x.shape[1]], x)
+
+        all_x = ()
+        for layer in self.layers:
+            all_x += (x,)
+            x = layer(x)
+
+        x = self.layer_norm(x)
+        all_x += (x,)
+        return x, all_x
+
+
+class WhisperLargeV3(nn.Module):
+    def __init__(
+        self,
+        n_mels: int = 128,
+        n_audio_ctx: int = 1500,
+        n_audio_state: int = 1280,
+        n_audio_head: int = 20,
+        n_audio_layer: int = 32,
+        dtype=None,
+        device=None,
+        operations=None
+    ):
+        super().__init__()
+
+        self.feature_extractor = WhisperFeatureExtractor(n_mels=n_mels, device=device)
+
+        self.encoder = AudioEncoder(
+            n_mels, n_audio_ctx, n_audio_state, n_audio_head, n_audio_layer,
+            dtype=dtype, device=device, operations=operations
+        )
+
+    def forward(self, audio):
+        mel = self.feature_extractor(audio)
+        x, all_x = self.encoder(mel)
+        return x, all_x
--- a/comfy/checkpoint_pickle.py
+++ b/comfy/checkpoint_pickle.py
@ -0,0 +1,13 @@
+import pickle
+
+load = pickle.load
+
+class Empty:
+    pass
+
+class Unpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        #TODO: safe unpickle
+        if module.startswith("pytorch_lightning"):
+            return Empty
+        return super().find_class(module, name)
--- a/comfy/cldm/cldm.py
+++ b/comfy/cldm/cldm.py
@ -2,21 +2,56 @@
 #and modified

 import torch
-import torch as th
 import torch.nn as nn

 from ..ldm.modules.diffusionmodules.util import (
-    conv_nd,
-    linear,
-    zero_module,
    timestep_embedding,
 )

 from ..ldm.modules.attention import SpatialTransformer
-from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample, AttentionBlock
-from ..ldm.models.diffusion.ddpm import LatentDiffusion
-from ..ldm.util import log_txt_as_img, exists, instantiate_from_config
+from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample
+from ..ldm.util import exists
+from .control_types import UNION_CONTROLNET_TYPES
+from collections import OrderedDict
+import comfy.ops
+from comfy.ldm.modules.attention import optimized_attention

+class OptimizedAttention(nn.Module):
+    def __init__(self, c, nhead, dropout=0.0, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.heads = nhead
+        self.c = c
+
+        self.in_proj = operations.Linear(c, c * 3, bias=True, dtype=dtype, device=device)
+        self.out_proj = operations.Linear(c, c, bias=True, dtype=dtype, device=device)
+
+    def forward(self, x):
+        x = self.in_proj(x)
+        q, k, v = x.split(self.c, dim=2)
+        out = optimized_attention(q, k, v, self.heads)
+        return self.out_proj(out)
+
+class QuickGELU(nn.Module):
+    def forward(self, x: torch.Tensor):
+        return x * torch.sigmoid(1.702 * x)
+
+class ResBlockUnionControlnet(nn.Module):
+    def __init__(self, dim, nhead, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.attn = OptimizedAttention(dim, nhead, dtype=dtype, device=device, operations=operations)
+        self.ln_1 = operations.LayerNorm(dim, dtype=dtype, device=device)
+        self.mlp = nn.Sequential(
+            OrderedDict([("c_fc", operations.Linear(dim, dim * 4, dtype=dtype, device=device)), ("gelu", QuickGELU()),
+                         ("c_proj", operations.Linear(dim * 4, dim, dtype=dtype, device=device))]))
+        self.ln_2 = operations.LayerNorm(dim, dtype=dtype, device=device)
+
+    def attention(self, x: torch.Tensor):
+        return self.attn(x)
+
+    def forward(self, x: torch.Tensor):
+        x = x + self.attention(self.ln_1(x))
+        x = x + self.mlp(self.ln_2(x))
+        return x

 class ControlledUnetModel(UNetModel):
    #implemented in the ldm unet
@ -30,13 +65,13 @@ class ControlNet(nn.Module):
        model_channels,
        hint_channels,
        num_res_blocks,
-        attention_resolutions,
        dropout=0,
        channel_mult=(1, 2, 4, 8),
        conv_resample=True,
        dims=2,
+        num_classes=None,
        use_checkpoint=False,
-        use_fp16=False,
+        dtype=torch.float32,
        num_heads=-1,
        num_head_channels=-1,
        num_heads_upsample=-1,
@ -52,8 +87,17 @@ class ControlNet(nn.Module):
        num_attention_blocks=None,
        disable_middle_self_attn=False,
        use_linear_in_transformer=False,
+        adm_in_channels=None,
+        transformer_depth_middle=None,
+        transformer_depth_output=None,
+        attn_precision=None,
+        union_controlnet_num_control_type=None,
+        device=None,
+        operations=comfy.ops.disable_weight_init,
+        **kwargs,
    ):
        super().__init__()
+        assert use_spatial_transformer == True, "use_spatial_transformer has to be true"
        if use_spatial_transformer:
            assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...'

@ -76,6 +120,7 @@ class ControlNet(nn.Module):
        self.image_size = image_size
        self.in_channels = in_channels
        self.model_channels = model_channels
+
        if isinstance(num_res_blocks, int):
            self.num_res_blocks = len(channel_mult) * [num_res_blocks]
        else:
@ -83,23 +128,22 @@ class ControlNet(nn.Module):
                raise ValueError("provide num_res_blocks either as an int (globally constant) or "
                                 "as a list/tuple (per-level) with the same length as channel_mult")
            self.num_res_blocks = num_res_blocks
+
        if disable_self_attentions is not None:
            # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not
            assert len(disable_self_attentions) == len(channel_mult)
        if num_attention_blocks is not None:
            assert len(num_attention_blocks) == len(self.num_res_blocks)
            assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks))))
-            print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. "
-                  f"This option has LESS priority than attention_resolutions {attention_resolutions}, "
-                  f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, "
-                  f"attention will still not be set.")

-        self.attention_resolutions = attention_resolutions
+        transformer_depth = transformer_depth[:]
+
        self.dropout = dropout
        self.channel_mult = channel_mult
        self.conv_resample = conv_resample
+        self.num_classes = num_classes
        self.use_checkpoint = use_checkpoint
-        self.dtype = th.float16 if use_fp16 else th.float32
+        self.dtype = dtype
        self.num_heads = num_heads
        self.num_head_channels = num_head_channels
        self.num_heads_upsample = num_heads_upsample
@ -107,36 +151,53 @@ class ControlNet(nn.Module):

        time_embed_dim = model_channels * 4
        self.time_embed = nn.Sequential(
-            linear(model_channels, time_embed_dim),
+            operations.Linear(model_channels, time_embed_dim, dtype=self.dtype, device=device),
            nn.SiLU(),
-            linear(time_embed_dim, time_embed_dim),
+            operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device),
        )

+        if self.num_classes is not None:
+            if isinstance(self.num_classes, int):
+                self.label_emb = nn.Embedding(num_classes, time_embed_dim)
+            elif self.num_classes == "continuous":
+                self.label_emb = nn.Linear(1, time_embed_dim)
+            elif self.num_classes == "sequential":
+                assert adm_in_channels is not None
+                self.label_emb = nn.Sequential(
+                    nn.Sequential(
+                        operations.Linear(adm_in_channels, time_embed_dim, dtype=self.dtype, device=device),
+                        nn.SiLU(),
+                        operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device),
+                    )
+                )
+            else:
+                raise ValueError()
+
        self.input_blocks = nn.ModuleList(
            [
                TimestepEmbedSequential(
-                    conv_nd(dims, in_channels, model_channels, 3, padding=1)
+                    operations.conv_nd(dims, in_channels, model_channels, 3, padding=1, dtype=self.dtype, device=device)
                )
            ]
        )
-        self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)])
+        self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels, operations=operations, dtype=self.dtype, device=device)])

        self.input_hint_block = TimestepEmbedSequential(
-                    conv_nd(dims, hint_channels, 16, 3, padding=1),
+                    operations.conv_nd(dims, hint_channels, 16, 3, padding=1, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 16, 16, 3, padding=1),
+                    operations.conv_nd(dims, 16, 16, 3, padding=1, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 16, 32, 3, padding=1, stride=2),
+                    operations.conv_nd(dims, 16, 32, 3, padding=1, stride=2, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 32, 32, 3, padding=1),
+                    operations.conv_nd(dims, 32, 32, 3, padding=1, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 32, 96, 3, padding=1, stride=2),
+                    operations.conv_nd(dims, 32, 96, 3, padding=1, stride=2, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 96, 96, 3, padding=1),
+                    operations.conv_nd(dims, 96, 96, 3, padding=1, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    conv_nd(dims, 96, 256, 3, padding=1, stride=2),
+                    operations.conv_nd(dims, 96, 256, 3, padding=1, stride=2, dtype=self.dtype, device=device),
                    nn.SiLU(),
-                    zero_module(conv_nd(dims, 256, model_channels, 3, padding=1))
+                    operations.conv_nd(dims, 256, model_channels, 3, padding=1, dtype=self.dtype, device=device)
        )

        self._feature_size = model_channels
@ -154,10 +215,14 @@ class ControlNet(nn.Module):
                        dims=dims,
                        use_checkpoint=use_checkpoint,
                        use_scale_shift_norm=use_scale_shift_norm,
+                        dtype=self.dtype,
+                        device=device,
+                        operations=operations,
                    )
                ]
                ch = mult * model_channels
-                if ds in attention_resolutions:
+                num_transformers = transformer_depth.pop(0)
+                if num_transformers > 0:
                    if num_head_channels == -1:
                        dim_head = ch // num_heads
                    else:
@ -173,20 +238,14 @@ class ControlNet(nn.Module):

                    if not exists(num_attention_blocks) or nr < num_attention_blocks[level]:
                        layers.append(
-                            AttentionBlock(
-                                ch,
-                                use_checkpoint=use_checkpoint,
-                                num_heads=num_heads,
-                                num_head_channels=dim_head,
-                                use_new_attention_order=use_new_attention_order,
-                            ) if not use_spatial_transformer else SpatialTransformer(
-                                ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim,
+                            SpatialTransformer(
+                                ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim,
                                disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer,
-                                use_checkpoint=use_checkpoint
+                                use_checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=self.dtype, device=device, operations=operations
                            )
                        )
                self.input_blocks.append(TimestepEmbedSequential(*layers))
-                self.zero_convs.append(self.make_zero_conv(ch))
+                self.zero_convs.append(self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device))
                self._feature_size += ch
                input_block_chans.append(ch)
            if level != len(channel_mult) - 1:
@ -202,16 +261,19 @@ class ControlNet(nn.Module):
                            use_checkpoint=use_checkpoint,
                            use_scale_shift_norm=use_scale_shift_norm,
                            down=True,
+                            dtype=self.dtype,
+                            device=device,
+                            operations=operations
                        )
                        if resblock_updown
                        else Downsample(
-                            ch, conv_resample, dims=dims, out_channels=out_ch
+                            ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations
                        )
                    )
                )
                ch = out_ch
                input_block_chans.append(ch)
-                self.zero_convs.append(self.make_zero_conv(ch))
+                self.zero_convs.append(self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device))
                ds *= 2
                self._feature_size += ch

@ -223,7 +285,7 @@ class ControlNet(nn.Module):
        if legacy:
            #num_heads = 1
            dim_head = ch // num_heads if use_spatial_transformer else num_head_channels
-        self.middle_block = TimestepEmbedSequential(
+        mid_block = [
            ResBlock(
                ch,
                time_embed_dim,
@ -231,17 +293,15 @@ class ControlNet(nn.Module):
                dims=dims,
                use_checkpoint=use_checkpoint,
                use_scale_shift_norm=use_scale_shift_norm,
-            ),
-            AttentionBlock(
-                ch,
-                use_checkpoint=use_checkpoint,
-                num_heads=num_heads,
-                num_head_channels=dim_head,
-                use_new_attention_order=use_new_attention_order,
-            ) if not use_spatial_transformer else SpatialTransformer(  # always uses a self-attn
-                            ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim,
+                dtype=self.dtype,
+                device=device,
+                operations=operations
+            )]
+        if transformer_depth_middle >= 0:
+            mid_block += [SpatialTransformer(  # always uses a self-attn
+                            ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim,
                            disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer,
-                            use_checkpoint=use_checkpoint
+                            use_checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=self.dtype, device=device, operations=operations
                        ),
            ResBlock(
                ch,
@ -250,23 +310,114 @@ class ControlNet(nn.Module):
                dims=dims,
                use_checkpoint=use_checkpoint,
                use_scale_shift_norm=use_scale_shift_norm,
-            ),
-        )
-        self.middle_block_out = self.make_zero_conv(ch)
+                dtype=self.dtype,
+                device=device,
+                operations=operations
+            )]
+        self.middle_block = TimestepEmbedSequential(*mid_block)
+        self.middle_block_out = self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device)
        self._feature_size += ch

-    def make_zero_conv(self, channels):
-        return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0)))
+        if union_controlnet_num_control_type is not None:
+            self.num_control_type = union_controlnet_num_control_type
+            num_trans_channel = 320
+            num_trans_head = 8
+            num_trans_layer = 1
+            num_proj_channel = 320
+            # task_scale_factor = num_trans_channel ** 0.5
+            self.task_embedding = nn.Parameter(torch.empty(self.num_control_type, num_trans_channel, dtype=self.dtype, device=device))

-    def forward(self, x, hint, timesteps, context, **kwargs):
-        t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False)
+            self.transformer_layes = nn.Sequential(*[ResBlockUnionControlnet(num_trans_channel, num_trans_head, dtype=self.dtype, device=device, operations=operations) for _ in range(num_trans_layer)])
+            self.spatial_ch_projs = operations.Linear(num_trans_channel, num_proj_channel, dtype=self.dtype, device=device)
+            #-----------------------------------------------------------------------------------------------------
+
+            control_add_embed_dim = 256
+            class ControlAddEmbedding(nn.Module):
+                def __init__(self, in_dim, out_dim, num_control_type, dtype=None, device=None, operations=None):
+                    super().__init__()
+                    self.num_control_type = num_control_type
+                    self.in_dim = in_dim
+                    self.linear_1 = operations.Linear(in_dim * num_control_type, out_dim, dtype=dtype, device=device)
+                    self.linear_2 = operations.Linear(out_dim, out_dim, dtype=dtype, device=device)
+                def forward(self, control_type, dtype, device):
+                    c_type = torch.zeros((self.num_control_type,), device=device)
+                    c_type[control_type] = 1.0
+                    c_type = timestep_embedding(c_type.flatten(), self.in_dim, repeat_only=False).to(dtype).reshape((-1, self.num_control_type * self.in_dim))
+                    return self.linear_2(torch.nn.functional.silu(self.linear_1(c_type)))
+
+            self.control_add_embedding = ControlAddEmbedding(control_add_embed_dim, time_embed_dim, self.num_control_type, dtype=self.dtype, device=device, operations=operations)
+        else:
+            self.task_embedding = None
+            self.control_add_embedding = None
+
+    def union_controlnet_merge(self, hint, control_type, emb, context):
+        # Equivalent to: https://github.com/xinsir6/ControlNetPlus/tree/main
+        inputs = []
+        condition_list = []
+
+        for idx in range(min(1, len(control_type))):
+            controlnet_cond = self.input_hint_block(hint[idx], emb, context)
+            feat_seq = torch.mean(controlnet_cond, dim=(2, 3))
+            if idx < len(control_type):
+                feat_seq += self.task_embedding[control_type[idx]].to(dtype=feat_seq.dtype, device=feat_seq.device)
+
+            inputs.append(feat_seq.unsqueeze(1))
+            condition_list.append(controlnet_cond)
+
+        x = torch.cat(inputs, dim=1)
+        x = self.transformer_layes(x)
+        controlnet_cond_fuser = None
+        for idx in range(len(control_type)):
+            alpha = self.spatial_ch_projs(x[:, idx])
+            alpha = alpha.unsqueeze(-1).unsqueeze(-1)
+            o = condition_list[idx] + alpha
+            if controlnet_cond_fuser is None:
+                controlnet_cond_fuser = o
+            else:
+                controlnet_cond_fuser += o
+        return controlnet_cond_fuser
+
+    def make_zero_conv(self, channels, operations=None, dtype=None, device=None):
+        return TimestepEmbedSequential(operations.conv_nd(self.dims, channels, channels, 1, padding=0, dtype=dtype, device=device))
+
+    def forward(self, x, hint, timesteps, context, y=None, **kwargs):
+        t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype)
        emb = self.time_embed(t_emb)

-        guided_hint = self.input_hint_block(hint, emb, context)
+        guided_hint = None
+        if self.control_add_embedding is not None: #Union Controlnet
+            control_type = kwargs.get("control_type", [])

-        outs = []
+            if any([c >= self.num_control_type for c in control_type]):
+                max_type = max(control_type)
+                max_type_name = {
+                    v: k for k, v in UNION_CONTROLNET_TYPES.items()
+                }[max_type]
+                raise ValueError(
+                    f"Control type {max_type_name}({max_type}) is out of range for the number of control types" +
+                    f"({self.num_control_type}) supported.\n" +
+                    "Please consider using the ProMax ControlNet Union model.\n" +
+                    "https://huggingface.co/xinsir/controlnet-union-sdxl-1.0/tree/main"
+                )

-        h = x.type(self.dtype)
+            emb += self.control_add_embedding(control_type, emb.dtype, emb.device)
+            if len(control_type) > 0:
+                if len(hint.shape) < 5:
+                    hint = hint.unsqueeze(dim=0)
+                guided_hint = self.union_controlnet_merge(hint, control_type, emb, context)
+
+        if guided_hint is None:
+            guided_hint = self.input_hint_block(hint, emb, context)
+
+        out_output = []
+        out_middle = []
+
+        if self.num_classes is not None:
+            if y is None:
+                raise ValueError("y is None, did you try using a controlnet for SDXL on SD1?")
+            emb = emb + self.label_emb(y)
+
+        h = x
        for module, zero_conv in zip(self.input_blocks, self.zero_convs):
            if guided_hint is not None:
                h = module(h, emb, context)
@ -274,10 +425,10 @@ class ControlNet(nn.Module):
                guided_hint = None
            else:
                h = module(h, emb, context)
-            outs.append(zero_conv(h, emb, context))
+            out_output.append(zero_conv(h, emb, context))

        h = self.middle_block(h, emb, context)
-        outs.append(self.middle_block_out(h, emb, context))
+        out_middle.append(self.middle_block_out(h, emb, context))

-        return outs
+        return {"middle": out_middle, "output": out_output}

--- a/comfy/cldm/control_types.py
+++ b/comfy/cldm/control_types.py
@ -0,0 +1,10 @@
+UNION_CONTROLNET_TYPES = {
+    "openpose": 0,
+    "depth": 1,
+    "hed/pidi/scribble/ted": 2,
+    "canny/lineart/anime_lineart/mlsd": 3,
+    "normal": 4,
+    "segment": 5,
+    "tile": 6,
+    "repaint": 7,
+}
--- a/comfy/cldm/dit_embedder.py
+++ b/comfy/cldm/dit_embedder.py
@ -0,0 +1,120 @@
+import math
+from typing import List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from comfy.ldm.modules.diffusionmodules.mmdit import DismantledBlock, PatchEmbed, VectorEmbedder, TimestepEmbedder, get_2d_sincos_pos_embed_torch
+
+
+class ControlNetEmbedder(nn.Module):
+
+    def __init__(
+        self,
+        img_size: int,
+        patch_size: int,
+        in_chans: int,
+        attention_head_dim: int,
+        num_attention_heads: int,
+        adm_in_channels: int,
+        num_layers: int,
+        main_model_double: int,
+        double_y_emb: bool,
+        device: torch.device,
+        dtype: torch.dtype,
+        pos_embed_max_size: Optional[int] = None,
+        operations = None,
+    ):
+        super().__init__()
+        self.main_model_double = main_model_double
+        self.dtype = dtype
+        self.hidden_size = num_attention_heads * attention_head_dim
+        self.patch_size = patch_size
+        self.x_embedder = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=self.hidden_size,
+            strict_img_size=pos_embed_max_size is None,
+            device=device,
+            dtype=dtype,
+            operations=operations,
+        )
+
+        self.t_embedder = TimestepEmbedder(self.hidden_size, dtype=dtype, device=device, operations=operations)
+
+        self.double_y_emb = double_y_emb
+        if self.double_y_emb:
+            self.orig_y_embedder = VectorEmbedder(
+                adm_in_channels, self.hidden_size, dtype, device, operations=operations
+            )
+            self.y_embedder = VectorEmbedder(
+                self.hidden_size, self.hidden_size, dtype, device, operations=operations
+            )
+        else:
+            self.y_embedder = VectorEmbedder(
+                adm_in_channels, self.hidden_size, dtype, device, operations=operations
+            )
+
+        self.transformer_blocks = nn.ModuleList(
+            DismantledBlock(
+                hidden_size=self.hidden_size, num_heads=num_attention_heads, qkv_bias=True,
+                dtype=dtype, device=device, operations=operations
+            )
+            for _ in range(num_layers)
+        )
+
+        # self.use_y_embedder = pooled_projection_dim != self.time_text_embed.text_embedder.linear_1.in_features
+        # TODO double check this logic when 8b
+        self.use_y_embedder = True
+
+        self.controlnet_blocks = nn.ModuleList([])
+        for _ in range(len(self.transformer_blocks)):
+            controlnet_block = operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)
+            self.controlnet_blocks.append(controlnet_block)
+
+        self.pos_embed_input = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=self.hidden_size,
+            strict_img_size=False,
+            device=device,
+            dtype=dtype,
+            operations=operations,
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        y: Optional[torch.Tensor] = None,
+        context: Optional[torch.Tensor] = None,
+        hint = None,
+    ) -> Tuple[Tensor, List[Tensor]]:
+        x_shape = list(x.shape)
+        x = self.x_embedder(x)
+        if not self.double_y_emb:
+            h = (x_shape[-2] + 1) // self.patch_size
+            w = (x_shape[-1] + 1) // self.patch_size
+            x += get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, device=x.device)
+        c = self.t_embedder(timesteps, dtype=x.dtype)
+        if y is not None and self.y_embedder is not None:
+            if self.double_y_emb:
+                y = self.orig_y_embedder(y)
+            y = self.y_embedder(y)
+            c = c + y
+
+        x = x + self.pos_embed_input(hint)
+
+        block_out = ()
+
+        repeat = math.ceil(self.main_model_double / len(self.transformer_blocks))
+        for i in range(len(self.transformer_blocks)):
+            out = self.transformer_blocks[i](x, c)
+            if not self.double_y_emb:
+                x = out
+            block_out += (self.controlnet_blocks[i](out),) * repeat
+
+        return {"output": block_out}
--- a/comfy/cldm/mmdit.py
+++ b/comfy/cldm/mmdit.py
@ -0,0 +1,81 @@
+import torch
+from typing import Optional
+import comfy.ldm.modules.diffusionmodules.mmdit
+
+class ControlNet(comfy.ldm.modules.diffusionmodules.mmdit.MMDiT):
+    def __init__(
+        self,
+        num_blocks = None,
+        control_latent_channels = None,
+        dtype = None,
+        device = None,
+        operations = None,
+        **kwargs,
+    ):
+        super().__init__(dtype=dtype, device=device, operations=operations, final_layer=False, num_blocks=num_blocks, **kwargs)
+        # controlnet_blocks
+        self.controlnet_blocks = torch.nn.ModuleList([])
+        for _ in range(len(self.joint_blocks)):
+            self.controlnet_blocks.append(operations.Linear(self.hidden_size, self.hidden_size, device=device, dtype=dtype))
+
+        if control_latent_channels is None:
+            control_latent_channels = self.in_channels
+
+        self.pos_embed_input = comfy.ldm.modules.diffusionmodules.mmdit.PatchEmbed(
+            None,
+            self.patch_size,
+            control_latent_channels,
+            self.hidden_size,
+            bias=True,
+            strict_img_size=False,
+            dtype=dtype,
+            device=device,
+            operations=operations
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        y: Optional[torch.Tensor] = None,
+        context: Optional[torch.Tensor] = None,
+        hint = None,
+    ) -> torch.Tensor:
+
+        #weird sd3 controlnet specific stuff
+        y = torch.zeros_like(y)
+
+        if self.context_processor is not None:
+            context = self.context_processor(context)
+
+        hw = x.shape[-2:]
+        x = self.x_embedder(x) + self.cropped_pos_embed(hw, device=x.device).to(dtype=x.dtype, device=x.device)
+        x += self.pos_embed_input(hint)
+
+        c = self.t_embedder(timesteps, dtype=x.dtype)
+        if y is not None and self.y_embedder is not None:
+            y = self.y_embedder(y)
+            c = c + y
+
+        if context is not None:
+            context = self.context_embedder(context)
+
+        output = []
+
+        blocks = len(self.joint_blocks)
+        for i in range(blocks):
+            context, x = self.joint_blocks[i](
+                context,
+                x,
+                c=c,
+                use_checkpoint=self.use_checkpoint,
+            )
+
+            out = self.controlnet_blocks[i](x)
+            count = self.depth // blocks
+            if i == blocks - 1:
+                count -= 1
+            for j in range(count):
+                output.append(out)
+
+        return {"output": output}
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -1,36 +1,244 @@
 import argparse
+import enum
+import os
+import comfy.options
+
+
+class EnumAction(argparse.Action):
+    """
+    Argparse action for handling Enums
+    """
+    def __init__(self, **kwargs):
+        # Pop off the type value
+        enum_type = kwargs.pop("type", None)
+
+        # Ensure an Enum subclass is provided
+        if enum_type is None:
+            raise ValueError("type must be assigned an Enum when using EnumAction")
+        if not issubclass(enum_type, enum.Enum):
+            raise TypeError("type must be an Enum when using EnumAction")
+
+        # Generate choices from the Enum
+        choices = tuple(e.value for e in enum_type)
+        kwargs.setdefault("choices", choices)
+        kwargs.setdefault("metavar", f"[{','.join(list(choices))}]")
+
+        super(EnumAction, self).__init__(**kwargs)
+
+        self._enum = enum_type
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        # Convert value back into an Enum
+        value = self._enum(values)
+        setattr(namespace, self.dest, value)
+

 parser = argparse.ArgumentParser()

-parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
+parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0,::", help="Specify the IP address to listen on (default: 127.0.0.1). You can give a list of ip addresses by separating them with a comma like: 127.2.2.2,127.3.3.3 If --listen is provided without an argument, it defaults to 0.0.0.0,:: (listens on all ipv4 and ipv6)")
 parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
+parser.add_argument("--tls-keyfile", type=str, help="Path to TLS (SSL) key file. Enables TLS, makes app accessible at https://... requires --tls-certfile to function")
+parser.add_argument("--tls-certfile", type=str, help="Path to TLS (SSL) certificate file. Enables TLS, makes app accessible at https://... requires --tls-keyfile to function")
 parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
+parser.add_argument("--max-upload-size", type=float, default=100, help="Set the maximum upload size in MB.")
+
+parser.add_argument("--base-directory", type=str, default=None, help="Set the ComfyUI base directory for models, custom_nodes, input, output, temp, and user directories.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
-parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
+parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory. Overrides --base-directory.")
+parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory). Overrides --base-directory.")
+parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory. Overrides --base-directory.")
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
-parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
-parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
-parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
+parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use. All other devices will not be visible.")
+parser.add_argument("--default-device", type=int, default=None, metavar="DEFAULT_DEVICE_ID", help="Set the id of the default device, all other devices will stay visible.")
+cm_group = parser.add_mutually_exclusive_group()
+cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).")
+cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.")
+
+
+fp_group = parser.add_mutually_exclusive_group()
+fp_group.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+fp_group.add_argument("--force-fp16", action="store_true", help="Force fp16.")
+
+fpunet_group = parser.add_mutually_exclusive_group()
+fpunet_group.add_argument("--fp32-unet", action="store_true", help="Run the diffusion model in fp32.")
+fpunet_group.add_argument("--fp64-unet", action="store_true", help="Run the diffusion model in fp64.")
+fpunet_group.add_argument("--bf16-unet", action="store_true", help="Run the diffusion model in bf16.")
+fpunet_group.add_argument("--fp16-unet", action="store_true", help="Run the diffusion model in fp16")
+fpunet_group.add_argument("--fp8_e4m3fn-unet", action="store_true", help="Store unet weights in fp8_e4m3fn.")
+fpunet_group.add_argument("--fp8_e5m2-unet", action="store_true", help="Store unet weights in fp8_e5m2.")
+fpunet_group.add_argument("--fp8_e8m0fnu-unet", action="store_true", help="Store unet weights in fp8_e8m0fnu.")
+
+fpvae_group = parser.add_mutually_exclusive_group()
+fpvae_group.add_argument("--fp16-vae", action="store_true", help="Run the VAE in fp16, might cause black images.")
+fpvae_group.add_argument("--fp32-vae", action="store_true", help="Run the VAE in full precision fp32.")
+fpvae_group.add_argument("--bf16-vae", action="store_true", help="Run the VAE in bf16.")
+
+parser.add_argument("--cpu-vae", action="store_true", help="Run the VAE on the CPU.")
+
+fpte_group = parser.add_mutually_exclusive_group()
+fpte_group.add_argument("--fp8_e4m3fn-text-enc", action="store_true", help="Store text encoder weights in fp8 (e4m3fn variant).")
+fpte_group.add_argument("--fp8_e5m2-text-enc", action="store_true", help="Store text encoder weights in fp8 (e5m2 variant).")
+fpte_group.add_argument("--fp16-text-enc", action="store_true", help="Store text encoder weights in fp16.")
+fpte_group.add_argument("--fp32-text-enc", action="store_true", help="Store text encoder weights in fp32.")
+fpte_group.add_argument("--bf16-text-enc", action="store_true", help="Store text encoder weights in bf16.")
+
+parser.add_argument("--force-channels-last", action="store_true", help="Force channels last format when inferencing the models.")
+
 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")

+parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
+parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
+parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
+
+class LatentPreviewMethod(enum.Enum):
+    NoPreviews = "none"
+    Auto = "auto"
+    Latent2RGB = "latent2rgb"
+    TAESD = "taesd"
+
+parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction)
+
+parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
+
+cache_group = parser.add_mutually_exclusive_group()
+cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
+cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
+cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
+cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")
+
 attn_group = parser.add_mutually_exclusive_group()
-attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
+attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
+attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.")
 attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
+attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.")
+attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.")

 parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")

+upcast = parser.add_mutually_exclusive_group()
+upcast.add_argument("--force-upcast-attention", action="store_true", help="Force enable attention upcasting, please report if it fixes black images.")
+upcast.add_argument("--dont-upcast-attention", action="store_true", help="Disable all upcasting of attention. Should be unnecessary except for debugging.")
+
+
 vram_group = parser.add_mutually_exclusive_group()
+vram_group.add_argument("--gpu-only", action="store_true", help="Store and run everything (text encoders/CLIP models, etc... on the GPU).")
 vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
 vram_group.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.")
 vram_group.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.")
 vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
 vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")

+parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
+
+parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
+parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
+
+parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")
+
+parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.")
+
+parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
+parser.add_argument("--deterministic", action="store_true", help="Make pytorch use slower deterministic algorithms when it can. Note that this might not make images deterministic in all cases.")
+
+class PerformanceFeature(enum.Enum):
+    Fp16Accumulation = "fp16_accumulation"
+    Fp8MatrixMultiplication = "fp8_matrix_mult"
+    CublasOps = "cublas_ops"
+    AutoTune = "autotune"
+
+parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))
+
+parser.add_argument("--disable-pinned-memory", action="store_true", help="Disable pinned memory use.")
+
+parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.")
+parser.add_argument("--disable-mmap", action="store_true", help="Don't use mmap when loading safetensors.")
+
 parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
 parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
 parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).")

-args = parser.parse_args()
+parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
+parser.add_argument("--disable-all-custom-nodes", action="store_true", help="Disable loading all custom nodes.")
+parser.add_argument("--whitelist-custom-nodes", type=str, nargs='+', default=[], help="Specify custom node folders to load even when --disable-all-custom-nodes is enabled.")
+parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes. Also prevents the frontend from communicating with the internet.")
+
+parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.")
+
+parser.add_argument("--verbose", default='INFO', const='DEBUG', nargs="?", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Set the logging level')
+parser.add_argument("--log-stdout", action="store_true", help="Send normal process output to stdout instead of stderr (default).")
+
+# The default built-in provider hosted under web/
+DEFAULT_VERSION_STRING = "comfyanonymous/ComfyUI@latest"
+
+parser.add_argument(
+    "--front-end-version",
+    type=str,
+    default=DEFAULT_VERSION_STRING,
+    help="""
+    Specifies the version of the frontend to be used. This command needs internet connectivity to query and
+    download available frontend implementations from GitHub releases.
+
+    The version string should be in the format of:
+    [repoOwner]/[repoName]@[version]
+    where version is one of: "latest" or a valid version number (e.g. "1.0.0")
+    """,
+)
+
+def is_valid_directory(path: str) -> str:
+    """Validate if the given path is a directory, and check permissions."""
+    if not os.path.exists(path):
+        raise argparse.ArgumentTypeError(f"The path '{path}' does not exist.")
+    if not os.path.isdir(path):
+        raise argparse.ArgumentTypeError(f"'{path}' is not a directory.")
+    if not os.access(path, os.R_OK):
+        raise argparse.ArgumentTypeError(f"You do not have read permissions for '{path}'.")
+    return path
+
+parser.add_argument(
+    "--front-end-root",
+    type=is_valid_directory,
+    default=None,
+    help="The local filesystem path to the directory where the frontend is located. Overrides --front-end-version.",
+)
+
+parser.add_argument("--user-directory", type=is_valid_directory, default=None, help="Set the ComfyUI user directory with an absolute path. Overrides --base-directory.")
+
+parser.add_argument("--enable-compress-response-body", action="store_true", help="Enable compressing response body.")
+
+parser.add_argument(
+    "--comfy-api-base",
+    type=str,
+    default="https://api.comfy.org",
+    help="Set the base URL for the ComfyUI API.  (default: https://api.comfy.org)",
+)
+
+database_default_path = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "user", "comfyui.db")
+)
+parser.add_argument("--database-url", type=str, default=f"sqlite:///{database_default_path}", help="Specify the database URL, e.g. for an in-memory database you can use 'sqlite:///:memory:'.")
+
+if comfy.options.args_parsing:
+    args = parser.parse_args()
+else:
+    args = parser.parse_args([])

 if args.windows_standalone_build:
    args.auto_launch = True
+
+if args.disable_auto_launch:
+    args.auto_launch = False
+
+if args.force_fp16:
+    args.fp16_unet = True
+
+
+# '--fast' is not provided, use an empty set
+if args.fast is None:
+    args.fast = set()
+# '--fast' is provided with an empty list, enable all optimizations
+elif args.fast == []:
+    args.fast = set(PerformanceFeature)
+# '--fast' is provided with a list of performance features, use that list
+else:
+    args.fast = set(args.fast)
--- a/comfy/clip_config_bigg.json
+++ b/comfy/clip_config_bigg.json
@ -0,0 +1,23 @@
+{
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 49407,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 20,
+  "num_hidden_layers": 32,
+  "pad_token_id": 1,
+  "projection_dim": 1280,
+  "torch_dtype": "float32",
+  "vocab_size": 49408
+}
--- a/comfy/clip_model.py
+++ b/comfy/clip_model.py
@ -0,0 +1,254 @@
+import torch
+from comfy.ldm.modules.attention import optimized_attention_for_device
+import comfy.ops
+
+class CLIPAttention(torch.nn.Module):
+    def __init__(self, embed_dim, heads, dtype, device, operations):
+        super().__init__()
+
+        self.heads = heads
+        self.q_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+        self.k_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+        self.v_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+
+        self.out_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+
+    def forward(self, x, mask=None, optimized_attention=None):
+        q = self.q_proj(x)
+        k = self.k_proj(x)
+        v = self.v_proj(x)
+
+        out = optimized_attention(q, k, v, self.heads, mask)
+        return self.out_proj(out)
+
+ACTIVATIONS = {"quick_gelu": lambda a: a * torch.sigmoid(1.702 * a),
+               "gelu": torch.nn.functional.gelu,
+               "gelu_pytorch_tanh": lambda a: torch.nn.functional.gelu(a, approximate="tanh"),
+}
+
+class CLIPMLP(torch.nn.Module):
+    def __init__(self, embed_dim, intermediate_size, activation, dtype, device, operations):
+        super().__init__()
+        self.fc1 = operations.Linear(embed_dim, intermediate_size, bias=True, dtype=dtype, device=device)
+        self.activation = ACTIVATIONS[activation]
+        self.fc2 = operations.Linear(intermediate_size, embed_dim, bias=True, dtype=dtype, device=device)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.activation(x)
+        x = self.fc2(x)
+        return x
+
+class CLIPLayer(torch.nn.Module):
+    def __init__(self, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations):
+        super().__init__()
+        self.layer_norm1 = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+        self.self_attn = CLIPAttention(embed_dim, heads, dtype, device, operations)
+        self.layer_norm2 = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+        self.mlp = CLIPMLP(embed_dim, intermediate_size, intermediate_activation, dtype, device, operations)
+
+    def forward(self, x, mask=None, optimized_attention=None):
+        x += self.self_attn(self.layer_norm1(x), mask, optimized_attention)
+        x += self.mlp(self.layer_norm2(x))
+        return x
+
+
+class CLIPEncoder(torch.nn.Module):
+    def __init__(self, num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations):
+        super().__init__()
+        self.layers = torch.nn.ModuleList([CLIPLayer(embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations) for i in range(num_layers)])
+
+    def forward(self, x, mask=None, intermediate_output=None):
+        optimized_attention = optimized_attention_for_device(x.device, mask=mask is not None, small_input=True)
+
+        all_intermediate = None
+        if intermediate_output is not None:
+            if intermediate_output == "all":
+                all_intermediate = []
+                intermediate_output = None
+            elif intermediate_output < 0:
+                intermediate_output = len(self.layers) + intermediate_output
+
+        intermediate = None
+        for i, l in enumerate(self.layers):
+            x = l(x, mask, optimized_attention)
+            if i == intermediate_output:
+                intermediate = x.clone()
+            if all_intermediate is not None:
+                all_intermediate.append(x.unsqueeze(1).clone())
+
+        if all_intermediate is not None:
+            intermediate = torch.cat(all_intermediate, dim=1)
+
+        return x, intermediate
+
+class CLIPEmbeddings(torch.nn.Module):
+    def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.token_embedding = operations.Embedding(vocab_size, embed_dim, dtype=dtype, device=device)
+        self.position_embedding = operations.Embedding(num_positions, embed_dim, dtype=dtype, device=device)
+
+    def forward(self, input_tokens, dtype=torch.float32):
+        return self.token_embedding(input_tokens, out_dtype=dtype) + comfy.ops.cast_to(self.position_embedding.weight, dtype=dtype, device=input_tokens.device)
+
+
+class CLIPTextModel_(torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        num_layers = config_dict["num_hidden_layers"]
+        embed_dim = config_dict["hidden_size"]
+        heads = config_dict["num_attention_heads"]
+        intermediate_size = config_dict["intermediate_size"]
+        intermediate_activation = config_dict["hidden_act"]
+        num_positions = config_dict["max_position_embeddings"]
+        self.eos_token_id = config_dict["eos_token_id"]
+
+        super().__init__()
+        self.embeddings = CLIPEmbeddings(embed_dim, num_positions=num_positions, dtype=dtype, device=device, operations=operations)
+        self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
+        self.final_layer_norm = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+
+    def forward(self, input_tokens=None, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=torch.float32, embeds_info=[]):
+        if embeds is not None:
+            x = embeds + comfy.ops.cast_to(self.embeddings.position_embedding.weight, dtype=dtype, device=embeds.device)
+        else:
+            x = self.embeddings(input_tokens, dtype=dtype)
+
+        mask = None
+        if attention_mask is not None:
+            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1])
+            mask = mask.masked_fill(mask.to(torch.bool), -torch.finfo(x.dtype).max)
+
+        causal_mask = torch.full((x.shape[1], x.shape[1]), -torch.finfo(x.dtype).max, dtype=x.dtype, device=x.device).triu_(1)
+
+        if mask is not None:
+            mask += causal_mask
+        else:
+            mask = causal_mask
+
+        x, i = self.encoder(x, mask=mask, intermediate_output=intermediate_output)
+        x = self.final_layer_norm(x)
+        if i is not None and final_layer_norm_intermediate:
+            i = self.final_layer_norm(i)
+
+        if num_tokens is not None:
+            pooled_output = x[list(range(x.shape[0])), list(map(lambda a: a - 1, num_tokens))]
+        else:
+            pooled_output = x[torch.arange(x.shape[0], device=x.device), (torch.round(input_tokens).to(dtype=torch.int, device=x.device) == self.eos_token_id).int().argmax(dim=-1),]
+        return x, i, pooled_output
+
+class CLIPTextModel(torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        self.num_layers = config_dict["num_hidden_layers"]
+        self.text_model = CLIPTextModel_(config_dict, dtype, device, operations)
+        embed_dim = config_dict["hidden_size"]
+        self.text_projection = operations.Linear(embed_dim, embed_dim, bias=False, dtype=dtype, device=device)
+        self.dtype = dtype
+
+    def get_input_embeddings(self):
+        return self.text_model.embeddings.token_embedding
+
+    def set_input_embeddings(self, embeddings):
+        self.text_model.embeddings.token_embedding = embeddings
+
+    def forward(self, *args, **kwargs):
+        x = self.text_model(*args, **kwargs)
+        out = self.text_projection(x[2])
+        return (x[0], x[1], out, x[2])
+
+
+class CLIPVisionEmbeddings(torch.nn.Module):
+    def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", dtype=None, device=None, operations=None):
+        super().__init__()
+
+        num_patches = (image_size // patch_size) ** 2
+        if model_type == "siglip_vision_model":
+            self.class_embedding = None
+            patch_bias = True
+        else:
+            num_patches = num_patches + 1
+            self.class_embedding = torch.nn.Parameter(torch.empty(embed_dim, dtype=dtype, device=device))
+            patch_bias = False
+
+        self.patch_embedding = operations.Conv2d(
+            in_channels=num_channels,
+            out_channels=embed_dim,
+            kernel_size=patch_size,
+            stride=patch_size,
+            bias=patch_bias,
+            dtype=dtype,
+            device=device
+        )
+
+        self.position_embedding = operations.Embedding(num_patches, embed_dim, dtype=dtype, device=device)
+
+    def forward(self, pixel_values):
+        embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2)
+        if self.class_embedding is not None:
+            embeds = torch.cat([comfy.ops.cast_to_input(self.class_embedding, embeds).expand(pixel_values.shape[0], 1, -1), embeds], dim=1)
+        return embeds + comfy.ops.cast_to_input(self.position_embedding.weight, embeds)
+
+
+class CLIPVision(torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        num_layers = config_dict["num_hidden_layers"]
+        embed_dim = config_dict["hidden_size"]
+        heads = config_dict["num_attention_heads"]
+        intermediate_size = config_dict["intermediate_size"]
+        intermediate_activation = config_dict["hidden_act"]
+        model_type = config_dict["model_type"]
+
+        self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations)
+        if model_type == "siglip_vision_model":
+            self.pre_layrnorm = lambda a: a
+            self.output_layernorm = True
+        else:
+            self.pre_layrnorm = operations.LayerNorm(embed_dim)
+            self.output_layernorm = False
+        self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
+        self.post_layernorm = operations.LayerNorm(embed_dim)
+
+    def forward(self, pixel_values, attention_mask=None, intermediate_output=None):
+        x = self.embeddings(pixel_values)
+        x = self.pre_layrnorm(x)
+        #TODO: attention_mask?
+        x, i = self.encoder(x, mask=None, intermediate_output=intermediate_output)
+        if self.output_layernorm:
+            x = self.post_layernorm(x)
+            pooled_output = x
+        else:
+            pooled_output = self.post_layernorm(x[:, 0, :])
+        return x, i, pooled_output
+
+class LlavaProjector(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, dtype, device, operations):
+        super().__init__()
+        self.linear_1 = operations.Linear(in_dim, out_dim, bias=True, device=device, dtype=dtype)
+        self.linear_2 = operations.Linear(out_dim, out_dim, bias=True, device=device, dtype=dtype)
+
+    def forward(self, x):
+        return self.linear_2(torch.nn.functional.gelu(self.linear_1(x[:, 1:])))
+
+class CLIPVisionModelProjection(torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        self.vision_model = CLIPVision(config_dict, dtype, device, operations)
+        if "projection_dim" in config_dict:
+            self.visual_projection = operations.Linear(config_dict["hidden_size"], config_dict["projection_dim"], bias=False)
+        else:
+            self.visual_projection = lambda a: a
+
+        if "llava3" == config_dict.get("projector_type", None):
+            self.multi_modal_projector = LlavaProjector(config_dict["hidden_size"], 4096, dtype, device, operations)
+        else:
+            self.multi_modal_projector = None
+
+    def forward(self, *args, **kwargs):
+        x = self.vision_model(*args, **kwargs)
+        out = self.visual_projection(x[2])
+        projected = None
+        if self.multi_modal_projector is not None:
+            projected = self.multi_modal_projector(x[1])
+
+        return (x[0], x[1], out, projected)
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@ -1,64 +1,164 @@
-from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor
-from .utils import load_torch_file, transformers_convert
+from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
 import os
 import torch
+import json
+import logging
+
+import comfy.ops
+import comfy.model_patcher
+import comfy.model_management
+import comfy.utils
+import comfy.clip_model
+import comfy.image_encoders.dino2
+
+class Output:
+    def __getitem__(self, key):
+        return getattr(self, key)
+    def __setitem__(self, key, item):
+        setattr(self, key, item)
+
+def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True):
+    image = image[:, :, :, :3] if image.shape[3] > 3 else image
+    mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
+    std = torch.tensor(std, device=image.device, dtype=image.dtype)
+    image = image.movedim(-1, 1)
+    if not (image.shape[2] == size and image.shape[3] == size):
+        if crop:
+            scale = (size / min(image.shape[2], image.shape[3]))
+            scale_size = (round(scale * image.shape[2]), round(scale * image.shape[3]))
+        else:
+            scale_size = (size, size)
+
+        image = torch.nn.functional.interpolate(image, size=scale_size, mode="bicubic", antialias=True)
+        h = (image.shape[2] - size)//2
+        w = (image.shape[3] - size)//2
+        image = image[:,:,h:h+size,w:w+size]
+    image = torch.clip((255. * image), 0, 255).round() / 255.0
+    return (image - mean.view([3,1,1])) / std.view([3,1,1])
+
+IMAGE_ENCODERS = {
+    "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
+    "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
+    "dinov2": comfy.image_encoders.dino2.Dinov2Model,
+}

 class ClipVisionModel():
    def __init__(self, json_config):
-        config = CLIPVisionConfig.from_json_file(json_config)
-        self.model = CLIPVisionModelWithProjection(config)
-        self.processor = CLIPImageProcessor(crop_size=224,
-                                            do_center_crop=True,
-                                            do_convert_rgb=True,
-                                            do_normalize=True,
-                                            do_resize=True,
-                                            image_mean=[ 0.48145466,0.4578275,0.40821073],
-                                            image_std=[0.26862954,0.26130258,0.27577711],
-                                            resample=3, #bicubic
-                                            size=224)
+        with open(json_config) as f:
+            config = json.load(f)
+
+        self.image_size = config.get("image_size", 224)
+        self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073])
+        self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711])
+        model_type = config.get("model_type", "clip_vision_model")
+        model_class = IMAGE_ENCODERS.get(model_type)
+        if model_type == "siglip_vision_model":
+            self.return_all_hidden_states = True
+        else:
+            self.return_all_hidden_states = False
+
+        self.load_device = comfy.model_management.text_encoder_device()
+        offload_device = comfy.model_management.text_encoder_offload_device()
+        self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
+        self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
+        self.model.eval()
+
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)

    def load_sd(self, sd):
-        self.model.load_state_dict(sd, strict=False)
+        return self.model.load_state_dict(sd, strict=False)

-    def encode_image(self, image):
-        img = torch.clip((255. * image[0]), 0, 255).round().int()
-        inputs = self.processor(images=[img], return_tensors="pt")
-        outputs = self.model(**inputs)
+    def get_sd(self):
+        return self.model.state_dict()
+
+    def encode_image(self, image, crop=True):
+        comfy.model_management.load_model_gpu(self.patcher)
+        pixel_values = clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float()
+        out = self.model(pixel_values=pixel_values, intermediate_output='all' if self.return_all_hidden_states else -2)
+
+        outputs = Output()
+        outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
+        outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device())
+        if self.return_all_hidden_states:
+            all_hs = out[1].to(comfy.model_management.intermediate_device())
+            outputs["penultimate_hidden_states"] = all_hs[:, -2]
+            outputs["all_hidden_states"] = all_hs
+        else:
+            outputs["penultimate_hidden_states"] = out[1].to(comfy.model_management.intermediate_device())
+
+        outputs["mm_projected"] = out[3]
        return outputs

-def convert_to_transformers(sd):
+def convert_to_transformers(sd, prefix):
    sd_k = sd.keys()
-    if "embedder.model.visual.transformer.resblocks.0.attn.in_proj_weight" in sd_k:
+    if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k:
        keys_to_replace = {
-            "embedder.model.visual.class_embedding": "vision_model.embeddings.class_embedding",
-            "embedder.model.visual.conv1.weight": "vision_model.embeddings.patch_embedding.weight",
-            "embedder.model.visual.positional_embedding": "vision_model.embeddings.position_embedding.weight",
-            "embedder.model.visual.ln_post.bias": "vision_model.post_layernorm.bias",
-            "embedder.model.visual.ln_post.weight": "vision_model.post_layernorm.weight",
-            "embedder.model.visual.ln_pre.bias": "vision_model.pre_layrnorm.bias",
-            "embedder.model.visual.ln_pre.weight": "vision_model.pre_layrnorm.weight",
+            "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding",
+            "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight",
+            "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight",
+            "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias",
+            "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight",
+            "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias",
+            "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight",
        }

        for x in keys_to_replace:
            if x in sd_k:
                sd[keys_to_replace[x]] = sd.pop(x)

-        if "embedder.model.visual.proj" in sd_k:
-            sd['visual_projection.weight'] = sd.pop("embedder.model.visual.proj").transpose(0, 1)
+        if "{}proj".format(prefix) in sd_k:
+            sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1)

-        sd = transformers_convert(sd, "embedder.model.visual", "vision_model", 32)
+        sd = transformers_convert(sd, prefix, "vision_model.", 48)
+    else:
+        replace_prefix = {prefix: ""}
+        sd = state_dict_prefix_replace(sd, replace_prefix)
    return sd

-def load_clipvision_from_sd(sd):
-    sd = convert_to_transformers(sd)
-    if "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
+def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
+    if convert_keys:
+        sd = convert_to_transformers(sd, prefix)
+    if "vision_model.encoder.layers.47.layer_norm1.weight" in sd:
+        json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json")
+    elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
        json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
+    elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
+        embed_shape = sd["vision_model.embeddings.position_embedding.weight"].shape[0]
+        if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
+            if embed_shape == 729:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
+            elif embed_shape == 1024:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json")
+        elif embed_shape == 577:
+            if "multi_modal_projector.linear_1.bias" in sd:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json")
+            else:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
+        else:
+            json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
+
+    # Dinov2
+    elif 'encoder.layer.39.layer_scale2.lambda1' in sd:
+        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
+    elif 'encoder.layer.23.layer_scale2.lambda1' in sd:
+        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_large.json")
    else:
-        json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
+        return None
+
    clip = ClipVisionModel(json_config)
-    clip.load_sd(sd)
+    m, u = clip.load_sd(sd)
+    if len(m) > 0:
+        logging.warning("missing clip vision: {}".format(m))
+    u = set(u)
+    keys = list(sd.keys())
+    for k in keys:
+        if k not in u:
+            sd.pop(k)
    return clip

 def load(ckpt_path):
    sd = load_torch_file(ckpt_path)
-    return load_clipvision_from_sd(sd)
+    if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd:
+        return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True)
+    else:
+        return load_clipvision_from_sd(sd)
--- a/comfy/clip_vision_config_g.json
+++ b/comfy/clip_vision_config_g.json
@ -0,0 +1,18 @@
+{
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_size": 1664,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 48,
+  "patch_size": 14,
+  "projection_dim": 1280,
+  "torch_dtype": "float32"
+}
--- a/comfy/clip_vision_config_vitl_336.json
+++ b/comfy/clip_vision_config_vitl_336.json
@ -0,0 +1,18 @@
+{
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 1024,
+  "image_size": 336,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-5,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 14,
+  "projection_dim": 768,
+  "torch_dtype": "float32"
+}
--- a/comfy/clip_vision_config_vitl_336_llava.json
+++ b/comfy/clip_vision_config_vitl_336_llava.json
@ -0,0 +1,19 @@
+{
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 1024,
+  "image_size": 336,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-5,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 14,
+  "projection_dim": 768,
+  "projector_type": "llava3",
+  "torch_dtype": "float32"
+}
--- a/comfy/clip_vision_siglip_384.json
+++ b/comfy/clip_vision_siglip_384.json
@ -0,0 +1,13 @@
+{
+  "num_channels": 3,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_size": 1152,
+  "image_size": 384,
+  "intermediate_size": 4304,
+  "model_type": "siglip_vision_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 27,
+  "patch_size": 14,
+  "image_mean": [0.5, 0.5, 0.5],
+  "image_std": [0.5, 0.5, 0.5]
+}
--- a/comfy/clip_vision_siglip_512.json
+++ b/comfy/clip_vision_siglip_512.json
@ -0,0 +1,13 @@
+{
+  "num_channels": 3,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_size": 1152,
+  "image_size": 512,
+  "intermediate_size": 4304,
+  "model_type": "siglip_vision_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 27,
+  "patch_size": 16,
+  "image_mean": [0.5, 0.5, 0.5],
+  "image_std": [0.5, 0.5, 0.5]
+}
--- a/comfy/comfy_types/README.md
+++ b/comfy/comfy_types/README.md
@ -0,0 +1,43 @@
+# Comfy Typing
+## Type hinting for ComfyUI Node development
+
+This module provides type hinting and concrete convenience types for node developers.
+If cloned to the custom_nodes directory of ComfyUI, types can be imported using:
+
+```python
+from comfy.comfy_types import IO, ComfyNodeABC, CheckLazyMixin
+
+class ExampleNode(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(s) -> InputTypeDict:
+        return {"required": {}}
+```
+
+Full example is in [examples/example_nodes.py](examples/example_nodes.py).
+
+# Types
+A few primary types are documented below.  More complete information is available via the docstrings on each type.
+
+## `IO`
+
+A string enum of built-in and a few custom data types.  Includes the following special types and their requisite plumbing:
+
+- `ANY`: `"*"`
+- `NUMBER`: `"FLOAT,INT"`
+- `PRIMITIVE`: `"STRING,FLOAT,INT,BOOLEAN"`
+
+## `ComfyNodeABC`
+
+An abstract base class for nodes, offering type-hinting / autocomplete, and somewhat-alright docstrings.
+
+### Type hinting for `INPUT_TYPES`
+
+![INPUT_TYPES auto-completion in Visual Studio Code](examples/input_types.png)
+
+### `INPUT_TYPES` return dict
+
+![INPUT_TYPES return value type hinting in Visual Studio Code](examples/required_hint.png)
+
+### Options for individual inputs
+
+![INPUT_TYPES return value option auto-completion in Visual Studio Code](examples/input_options.png)
--- a/comfy/comfy_types/init.py
+++ b/comfy/comfy_types/init.py
@ -0,0 +1,46 @@
+import torch
+from typing import Callable, Protocol, TypedDict, Optional, List
+from .node_typing import IO, InputTypeDict, ComfyNodeABC, CheckLazyMixin, FileLocator
+
+
+class UnetApplyFunction(Protocol):
+    """Function signature protocol on comfy.model_base.BaseModel.apply_model"""
+
+    def __call__(self, x: torch.Tensor, t: torch.Tensor, **kwargs) -> torch.Tensor:
+        pass
+
+
+class UnetApplyConds(TypedDict):
+    """Optional conditions for unet apply function."""
+
+    c_concat: Optional[torch.Tensor]
+    c_crossattn: Optional[torch.Tensor]
+    control: Optional[torch.Tensor]
+    transformer_options: Optional[dict]
+
+
+class UnetParams(TypedDict):
+    # Tensor of shape [B, C, H, W]
+    input: torch.Tensor
+    # Tensor of shape [B]
+    timestep: torch.Tensor
+    c: UnetApplyConds
+    # List of [0, 1], [0], [1], ...
+    # 0 means conditional, 1 means conditional unconditional
+    cond_or_uncond: List[int]
+
+
+UnetWrapperFunction = Callable[[UnetApplyFunction, UnetParams], torch.Tensor]
+
+
+__all__ = [
+    "UnetWrapperFunction",
+    UnetApplyConds.__name__,
+    UnetParams.__name__,
+    UnetApplyFunction.__name__,
+    IO.__name__,
+    InputTypeDict.__name__,
+    ComfyNodeABC.__name__,
+    CheckLazyMixin.__name__,
+    FileLocator.__name__,
+]
--- a/comfy/comfy_types/examples/example_nodes.py
+++ b/comfy/comfy_types/examples/example_nodes.py
@ -0,0 +1,28 @@
+from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
+from inspect import cleandoc
+
+
+class ExampleNode(ComfyNodeABC):
+    """An example node that just adds 1 to an input integer.
+
+    * Requires a modern IDE to provide any benefit (detail: an IDE configured with analysis paths etc).
+    * This node is intended as an example for developers only.
+    """
+
+    DESCRIPTION = cleandoc(__doc__)
+    CATEGORY = "examples"
+
+    @classmethod
+    def INPUT_TYPES(s) -> InputTypeDict:
+        return {
+            "required": {
+                "input_int": (IO.INT, {"defaultInput": True}),
+            }
+        }
+
+    RETURN_TYPES = (IO.INT,)
+    RETURN_NAMES = ("input_plus_one",)
+    FUNCTION = "execute"
+
+    def execute(self, input_int: int):
+        return (input_int + 1,)
--- a/comfy/comfy_types/examples/input_options.png
+++ b/comfy/comfy_types/examples/input_options.png
--- a/comfy/comfy_types/examples/input_types.png
+++ b/comfy/comfy_types/examples/input_types.png
--- a/comfy/comfy_types/examples/required_hint.png
+++ b/comfy/comfy_types/examples/required_hint.png
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@ -0,0 +1,350 @@
+"""Comfy-specific type hinting"""
+
+from __future__ import annotations
+from typing import Literal, TypedDict, Optional
+from typing_extensions import NotRequired
+from abc import ABC, abstractmethod
+from enum import Enum
+
+
+class StrEnum(str, Enum):
+    """Base class for string enums. Python's StrEnum is not available until 3.11."""
+
+    def __str__(self) -> str:
+        return self.value
+
+
+class IO(StrEnum):
+    """Node input/output data types.
+
+    Includes functionality for ``"*"`` (`ANY`) and ``"MULTI,TYPES"``.
+    """
+
+    STRING = "STRING"
+    IMAGE = "IMAGE"
+    MASK = "MASK"
+    LATENT = "LATENT"
+    BOOLEAN = "BOOLEAN"
+    INT = "INT"
+    FLOAT = "FLOAT"
+    COMBO = "COMBO"
+    CONDITIONING = "CONDITIONING"
+    SAMPLER = "SAMPLER"
+    SIGMAS = "SIGMAS"
+    GUIDER = "GUIDER"
+    NOISE = "NOISE"
+    CLIP = "CLIP"
+    CONTROL_NET = "CONTROL_NET"
+    VAE = "VAE"
+    MODEL = "MODEL"
+    LORA_MODEL = "LORA_MODEL"
+    LOSS_MAP = "LOSS_MAP"
+    CLIP_VISION = "CLIP_VISION"
+    CLIP_VISION_OUTPUT = "CLIP_VISION_OUTPUT"
+    STYLE_MODEL = "STYLE_MODEL"
+    GLIGEN = "GLIGEN"
+    UPSCALE_MODEL = "UPSCALE_MODEL"
+    AUDIO = "AUDIO"
+    WEBCAM = "WEBCAM"
+    POINT = "POINT"
+    FACE_ANALYSIS = "FACE_ANALYSIS"
+    BBOX = "BBOX"
+    SEGS = "SEGS"
+    VIDEO = "VIDEO"
+
+    ANY = "*"
+    """Always matches any type, but at a price.
+
+    Causes some functionality issues (e.g. reroutes, link types), and should be avoided whenever possible.
+    """
+    NUMBER = "FLOAT,INT"
+    """A float or an int - could be either"""
+    PRIMITIVE = "STRING,FLOAT,INT,BOOLEAN"
+    """Could be any of: string, float, int, or bool"""
+
+    def __ne__(self, value: object) -> bool:
+        if self == "*" or value == "*":
+            return False
+        if not isinstance(value, str):
+            return True
+        a = frozenset(self.split(","))
+        b = frozenset(value.split(","))
+        return not (b.issubset(a) or a.issubset(b))
+
+
+class RemoteInputOptions(TypedDict):
+    route: str
+    """The route to the remote source."""
+    refresh_button: bool
+    """Specifies whether to show a refresh button in the UI below the widget."""
+    control_after_refresh: Literal["first", "last"]
+    """Specifies the control after the refresh button is clicked. If "first", the first item will be automatically selected, and so on."""
+    timeout: int
+    """The maximum amount of time to wait for a response from the remote source in milliseconds."""
+    max_retries: int
+    """The maximum number of retries before aborting the request."""
+    refresh: int
+    """The TTL of the remote input's value in milliseconds. Specifies the interval at which the remote input's value is refreshed."""
+
+
+class MultiSelectOptions(TypedDict):
+    placeholder: NotRequired[str]
+    """The placeholder text to display in the multi-select widget when no items are selected."""
+    chip: NotRequired[bool]
+    """Specifies whether to use chips instead of comma separated values for the multi-select widget."""
+
+
+class InputTypeOptions(TypedDict):
+    """Provides type hinting for the return type of the INPUT_TYPES node function.
+
+    Due to IDE limitations with unions, for now all options are available for all types (e.g. `label_on` is hinted even when the type is not `IO.BOOLEAN`).
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/datatypes
+    """
+
+    default: NotRequired[bool | str | float | int | list | tuple]
+    """The default value of the widget"""
+    defaultInput: NotRequired[bool]
+    """@deprecated in v1.16 frontend. v1.16 frontend allows input socket and widget to co-exist.
+    - defaultInput on required inputs should be dropped.
+    - defaultInput on optional inputs should be replaced with forceInput.
+    Ref: https://github.com/Comfy-Org/ComfyUI_frontend/pull/3364
+    """
+    forceInput: NotRequired[bool]
+    """Forces the input to be an input slot rather than a widget even a widget is available for the input type."""
+    lazy: NotRequired[bool]
+    """Declares that this input uses lazy evaluation"""
+    rawLink: NotRequired[bool]
+    """When a link exists, rather than receiving the evaluated value, you will receive the link (i.e. `["nodeId", <outputIndex>]`). Designed for node expansion."""
+    tooltip: NotRequired[str]
+    """Tooltip for the input (or widget), shown on pointer hover"""
+    socketless: NotRequired[bool]
+    """All inputs (including widgets) have an input socket to connect links. When ``true``, if there is a widget for this input, no socket will be created.
+    Available from frontend v1.17.5
+    Ref: https://github.com/Comfy-Org/ComfyUI_frontend/pull/3548
+    """
+    widgetType: NotRequired[str]
+    """Specifies a type to be used for widget initialization if different from the input type.
+    Available from frontend v1.18.0
+    https://github.com/Comfy-Org/ComfyUI_frontend/pull/3550"""
+    # class InputTypeNumber(InputTypeOptions):
+    # default: float | int
+    min: NotRequired[float]
+    """The minimum value of a number (``FLOAT`` | ``INT``)"""
+    max: NotRequired[float]
+    """The maximum value of a number (``FLOAT`` | ``INT``)"""
+    step: NotRequired[float]
+    """The amount to increment or decrement a widget by when stepping up/down (``FLOAT`` | ``INT``)"""
+    round: NotRequired[float]
+    """Floats are rounded by this value (``FLOAT``)"""
+    # class InputTypeBoolean(InputTypeOptions):
+    # default: bool
+    label_on: NotRequired[str]
+    """The label to use in the UI when the bool is True (``BOOLEAN``)"""
+    label_off: NotRequired[str]
+    """The label to use in the UI when the bool is False (``BOOLEAN``)"""
+    # class InputTypeString(InputTypeOptions):
+    # default: str
+    multiline: NotRequired[bool]
+    """Use a multiline text box (``STRING``)"""
+    placeholder: NotRequired[str]
+    """Placeholder text to display in the UI when empty (``STRING``)"""
+    # Deprecated:
+    # defaultVal: str
+    dynamicPrompts: NotRequired[bool]
+    """Causes the front-end to evaluate dynamic prompts (``STRING``)"""
+    # class InputTypeCombo(InputTypeOptions):
+    image_upload: NotRequired[bool]
+    """Specifies whether the input should have an image upload button and image preview attached to it. Requires that the input's name is `image`."""
+    image_folder: NotRequired[Literal["input", "output", "temp"]]
+    """Specifies which folder to get preview images from if the input has the ``image_upload`` flag.
+    """
+    remote: NotRequired[RemoteInputOptions]
+    """Specifies the configuration for a remote input.
+    Available after ComfyUI frontend v1.9.7
+    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2422"""
+    control_after_generate: NotRequired[bool]
+    """Specifies whether a control widget should be added to the input, adding options to automatically change the value after each prompt is queued. Currently only used for INT and COMBO types."""
+    options: NotRequired[list[str | int | float]]
+    """COMBO type only. Specifies the selectable options for the combo widget.
+    Prefer:
+    ["COMBO", {"options": ["Option 1", "Option 2", "Option 3"]}]
+    Over:
+    [["Option 1", "Option 2", "Option 3"]]
+    """
+    multi_select: NotRequired[MultiSelectOptions]
+    """COMBO type only. Specifies the configuration for a multi-select widget.
+    Available after ComfyUI frontend v1.13.4
+    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
+
+
+class HiddenInputTypeDict(TypedDict):
+    """Provides type hinting for the hidden entry of node INPUT_TYPES."""
+
+    node_id: NotRequired[Literal["UNIQUE_ID"]]
+    """UNIQUE_ID is the unique identifier of the node, and matches the id property of the node on the client side. It is commonly used in client-server communications (see messages)."""
+    unique_id: NotRequired[Literal["UNIQUE_ID"]]
+    """UNIQUE_ID is the unique identifier of the node, and matches the id property of the node on the client side. It is commonly used in client-server communications (see messages)."""
+    prompt: NotRequired[Literal["PROMPT"]]
+    """PROMPT is the complete prompt sent by the client to the server. See the prompt object for a full description."""
+    extra_pnginfo: NotRequired[Literal["EXTRA_PNGINFO"]]
+    """EXTRA_PNGINFO is a dictionary that will be copied into the metadata of any .png files saved. Custom nodes can store additional information in this dictionary for saving (or as a way to communicate with a downstream node)."""
+    dynprompt: NotRequired[Literal["DYNPROMPT"]]
+    """DYNPROMPT is an instance of comfy_execution.graph.DynamicPrompt. It differs from PROMPT in that it may mutate during the course of execution in response to Node Expansion."""
+
+
+class InputTypeDict(TypedDict):
+    """Provides type hinting for node INPUT_TYPES.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/more_on_inputs
+    """
+
+    required: NotRequired[dict[str, tuple[IO, InputTypeOptions]]]
+    """Describes all inputs that must be connected for the node to execute."""
+    optional: NotRequired[dict[str, tuple[IO, InputTypeOptions]]]
+    """Describes inputs which do not need to be connected."""
+    hidden: NotRequired[HiddenInputTypeDict]
+    """Offers advanced functionality and server-client communication.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/more_on_inputs#hidden-inputs
+    """
+
+
+class ComfyNodeABC(ABC):
+    """Abstract base class for Comfy nodes.  Includes the names and expected types of attributes.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview
+    """
+
+    DESCRIPTION: str
+    """Node description, shown as a tooltip when hovering over the node.
+
+    Usage::
+
+        # Explicitly define the description
+        DESCRIPTION = "Example description here."
+
+        # Use the docstring of the node class.
+        DESCRIPTION = cleandoc(__doc__)
+    """
+    CATEGORY: str
+    """The category of the node, as per the "Add Node" menu.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#category
+    """
+    EXPERIMENTAL: bool
+    """Flags a node as experimental, informing users that it may change or not work as expected."""
+    DEPRECATED: bool
+    """Flags a node as deprecated, indicating to users that they should find alternatives to this node."""
+    API_NODE: Optional[bool]
+    """Flags a node as an API node. See: https://docs.comfy.org/tutorials/api-nodes/overview."""
+
+    @classmethod
+    @abstractmethod
+    def INPUT_TYPES(s) -> InputTypeDict:
+        """Defines node inputs.
+
+        * Must include the ``required`` key, which describes all inputs that must be connected for the node to execute.
+        * The ``optional`` key can be added to describe inputs which do not need to be connected.
+        * The ``hidden`` key offers some advanced functionality.  More info at: https://docs.comfy.org/custom-nodes/backend/more_on_inputs#hidden-inputs
+
+        Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#input-types
+        """
+        return {"required": {}}
+
+    OUTPUT_NODE: bool
+    """Flags this node as an output node, causing any inputs it requires to be executed.
+
+    If a node is not connected to any output nodes, that node will not be executed.  Usage::
+
+        OUTPUT_NODE = True
+
+    From the docs:
+
+    By default, a node is not considered an output. Set ``OUTPUT_NODE = True`` to specify that it is.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#output-node
+    """
+    INPUT_IS_LIST: bool
+    """A flag indicating if this node implements the additional code necessary to deal with OUTPUT_IS_LIST nodes.
+
+    All inputs of ``type`` will become ``list[type]``, regardless of how many items are passed in.  This also affects ``check_lazy_status``.
+
+    From the docs:
+
+    A node can also override the default input behaviour and receive the whole list in a single call. This is done by setting a class attribute `INPUT_IS_LIST` to ``True``.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/lists#list-processing
+    """
+    OUTPUT_IS_LIST: tuple[bool, ...]
+    """A tuple indicating which node outputs are lists, but will be connected to nodes that expect individual items.
+
+    Connected nodes that do not implement `INPUT_IS_LIST` will be executed once for every item in the list.
+
+    A ``tuple[bool]``, where the items match those in `RETURN_TYPES`::
+
+        RETURN_TYPES = (IO.INT, IO.INT, IO.STRING)
+        OUTPUT_IS_LIST = (True, True, False) # The string output will be handled normally
+
+    From the docs:
+
+    In order to tell Comfy that the list being returned should not be wrapped, but treated as a series of data for sequential processing,
+    the node should provide a class attribute `OUTPUT_IS_LIST`, which is a ``tuple[bool]``, of the same length as `RETURN_TYPES`,
+    specifying which outputs which should be so treated.
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/lists#list-processing
+    """
+
+    RETURN_TYPES: tuple[IO, ...]
+    """A tuple representing the outputs of this node.
+
+    Usage::
+
+        RETURN_TYPES = (IO.INT, "INT", "CUSTOM_TYPE")
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#return-types
+    """
+    RETURN_NAMES: tuple[str, ...]
+    """The output slot names for each item in `RETURN_TYPES`, e.g. ``RETURN_NAMES = ("count", "filter_string")``
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#return-names
+    """
+    OUTPUT_TOOLTIPS: tuple[str, ...]
+    """A tuple of strings to use as tooltips for node outputs, one for each item in `RETURN_TYPES`."""
+    FUNCTION: str
+    """The name of the function to execute as a literal string, e.g. `FUNCTION = "execute"`
+
+    Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#function
+    """
+
+
+class CheckLazyMixin:
+    """Provides a basic check_lazy_status implementation and type hinting for nodes that use lazy inputs."""
+
+    def check_lazy_status(self, **kwargs) -> list[str]:
+        """Returns a list of input names that should be evaluated.
+
+        This basic mixin impl. requires all inputs.
+
+        :kwargs: All node inputs will be included here.  If the input is ``None``, it should be assumed that it has not yet been evaluated.  \
+            When using ``INPUT_IS_LIST = True``, unevaluated will instead be ``(None,)``.
+
+        Params should match the nodes execution ``FUNCTION`` (self, and all inputs by name).
+        Will be executed repeatedly until it returns an empty list, or all requested items were already evaluated (and sent as params).
+
+        Comfy Docs: https://docs.comfy.org/custom-nodes/backend/lazy_evaluation#defining-check-lazy-status
+        """
+
+        need = [name for name in kwargs if kwargs[name] is None]
+        return need
+
+
+class FileLocator(TypedDict):
+    """Provides type hinting for the file location"""
+
+    filename: str
+    """The filename of the file."""
+    subfolder: str
+    """The subfolder of the file."""
+    type: Literal["input", "output", "temp"]
+    """The root folder of the file."""
--- a/comfy/conds.py
+++ b/comfy/conds.py
@ -0,0 +1,137 @@
+import torch
+import math
+import comfy.utils
+import logging
+
+
+class CONDRegular:
+    def __init__(self, cond):
+        self.cond = cond
+
+    def _copy_with(self, cond):
+        return self.__class__(cond)
+
+    def process_cond(self, batch_size, **kwargs):
+        return self._copy_with(comfy.utils.repeat_to_batch_size(self.cond, batch_size))
+
+    def can_concat(self, other):
+        if self.cond.shape != other.cond.shape:
+            return False
+        if self.cond.device != other.cond.device:
+            logging.warning("WARNING: conds not on same device, skipping concat.")
+            return False
+        return True
+
+    def concat(self, others):
+        conds = [self.cond]
+        for x in others:
+            conds.append(x.cond)
+        return torch.cat(conds)
+
+    def size(self):
+        return list(self.cond.size())
+
+
+class CONDNoiseShape(CONDRegular):
+    def process_cond(self, batch_size, area, **kwargs):
+        data = self.cond
+        if area is not None:
+            dims = len(area) // 2
+            for i in range(dims):
+                data = data.narrow(i + 2, area[i + dims], area[i])
+
+        return self._copy_with(comfy.utils.repeat_to_batch_size(data, batch_size))
+
+
+class CONDCrossAttn(CONDRegular):
+    def can_concat(self, other):
+        s1 = self.cond.shape
+        s2 = other.cond.shape
+        if s1 != s2:
+            if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen
+                return False
+
+            mult_min = math.lcm(s1[1], s2[1])
+            diff = mult_min // min(s1[1], s2[1])
+            if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
+                return False
+        if self.cond.device != other.cond.device:
+            logging.warning("WARNING: conds not on same device: skipping concat.")
+            return False
+        return True
+
+    def concat(self, others):
+        conds = [self.cond]
+        crossattn_max_len = self.cond.shape[1]
+        for x in others:
+            c = x.cond
+            crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1])
+            conds.append(c)
+
+        out = []
+        for c in conds:
+            if c.shape[1] < crossattn_max_len:
+                c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result
+            out.append(c)
+        return torch.cat(out)
+
+
+class CONDConstant(CONDRegular):
+    def __init__(self, cond):
+        self.cond = cond
+
+    def process_cond(self, batch_size, **kwargs):
+        return self._copy_with(self.cond)
+
+    def can_concat(self, other):
+        if self.cond != other.cond:
+            return False
+        return True
+
+    def concat(self, others):
+        return self.cond
+
+    def size(self):
+        return [1]
+
+
+class CONDList(CONDRegular):
+    def __init__(self, cond):
+        self.cond = cond
+
+    def process_cond(self, batch_size, **kwargs):
+        out = []
+        for c in self.cond:
+            out.append(comfy.utils.repeat_to_batch_size(c, batch_size))
+
+        return self._copy_with(out)
+
+    def can_concat(self, other):
+        if len(self.cond) != len(other.cond):
+            return False
+        for i in range(len(self.cond)):
+            if self.cond[i].shape != other.cond[i].shape:
+                return False
+
+        return True
+
+    def concat(self, others):
+        out = []
+        for i in range(len(self.cond)):
+            o = [self.cond[i]]
+            for x in others:
+                o.append(x.cond[i])
+            out.append(torch.cat(o))
+
+        return out
+
+    def size(self):  # hackish implementation to make the mem estimation work
+        o = 0
+        c = 1
+        for c in self.cond:
+            size = c.size()
+            o += math.prod(size)
+            if len(size) > 1:
+                c = size[1]
+
+        return [1, c, o // c]
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@ -0,0 +1,540 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Callable
+import torch
+import numpy as np
+import collections
+from dataclasses import dataclass
+from abc import ABC, abstractmethod
+import logging
+import comfy.model_management
+import comfy.patcher_extension
+if TYPE_CHECKING:
+    from comfy.model_base import BaseModel
+    from comfy.model_patcher import ModelPatcher
+    from comfy.controlnet import ControlBase
+
+
+class ContextWindowABC(ABC):
+    def __init__(self):
+        ...
+
+    @abstractmethod
+    def get_tensor(self, full: torch.Tensor) -> torch.Tensor:
+        """
+        Get torch.Tensor applicable to current window.
+        """
+        raise NotImplementedError("Not implemented.")
+
+    @abstractmethod
+    def add_window(self, full: torch.Tensor, to_add: torch.Tensor) -> torch.Tensor:
+        """
+        Apply torch.Tensor of window to the full tensor, in place. Returns reference to updated full tensor, not a copy.
+        """
+        raise NotImplementedError("Not implemented.")
+
+class ContextHandlerABC(ABC):
+    def __init__(self):
+        ...
+
+    @abstractmethod
+    def should_use_context(self, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]) -> bool:
+        raise NotImplementedError("Not implemented.")
+
+    @abstractmethod
+    def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: ContextWindowABC, device=None) -> list:
+        raise NotImplementedError("Not implemented.")
+
+    @abstractmethod
+    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
+        raise NotImplementedError("Not implemented.")
+
+
+
+class IndexListContextWindow(ContextWindowABC):
+    def __init__(self, index_list: list[int], dim: int=0):
+        self.index_list = index_list
+        self.context_length = len(index_list)
+        self.dim = dim
+
+    def get_tensor(self, full: torch.Tensor, device=None, dim=None) -> torch.Tensor:
+        if dim is None:
+            dim = self.dim
+        if dim == 0 and full.shape[dim] == 1:
+            return full
+        idx = [slice(None)] * dim + [self.index_list]
+        return full[idx].to(device)
+
+    def add_window(self, full: torch.Tensor, to_add: torch.Tensor, dim=None) -> torch.Tensor:
+        if dim is None:
+            dim = self.dim
+        idx = [slice(None)] * dim + [self.index_list]
+        full[idx] += to_add
+        return full
+
+
+class IndexListCallbacks:
+    EVALUATE_CONTEXT_WINDOWS = "evaluate_context_windows"
+    COMBINE_CONTEXT_WINDOW_RESULTS = "combine_context_window_results"
+    EXECUTE_START = "execute_start"
+    EXECUTE_CLEANUP = "execute_cleanup"
+
+    def init_callbacks(self):
+        return {}
+
+
+@dataclass
+class ContextSchedule:
+    name: str
+    func: Callable
+
+@dataclass
+class ContextFuseMethod:
+    name: str
+    func: Callable
+
+ContextResults = collections.namedtuple("ContextResults", ['window_idx', 'sub_conds_out', 'sub_conds', 'window'])
+class IndexListContextHandler(ContextHandlerABC):
+    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1, closed_loop=False, dim=0):
+        self.context_schedule = context_schedule
+        self.fuse_method = fuse_method
+        self.context_length = context_length
+        self.context_overlap = context_overlap
+        self.context_stride = context_stride
+        self.closed_loop = closed_loop
+        self.dim = dim
+        self._step = 0
+
+        self.callbacks = {}
+
+    def should_use_context(self, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]) -> bool:
+        # for now, assume first dim is batch - should have stored on BaseModel in actual implementation
+        if x_in.size(self.dim) > self.context_length:
+            logging.info(f"Using context windows {self.context_length} for {x_in.size(self.dim)} frames.")
+            return True
+        return False
+
+    def prepare_control_objects(self, control: ControlBase, device=None) -> ControlBase:
+        if control.previous_controlnet is not None:
+            self.prepare_control_objects(control.previous_controlnet, device)
+        return control
+
+    def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: IndexListContextWindow, device=None) -> list:
+        if cond_in is None:
+            return None
+        # reuse or resize cond items to match context requirements
+        resized_cond = []
+        # cond object is a list containing a dict - outer list is irrelevant, so just loop through it
+        for actual_cond in cond_in:
+            resized_actual_cond = actual_cond.copy()
+            # now we are in the inner dict - "pooled_output" is a tensor, "control" is a ControlBase object, "model_conds" is dictionary
+            for key in actual_cond:
+                try:
+                    cond_item = actual_cond[key]
+                    if isinstance(cond_item, torch.Tensor):
+                        # check that tensor is the expected length - x.size(0)
+                        if self.dim < cond_item.ndim and cond_item.size(self.dim) == x_in.size(self.dim):
+                            # if so, it's subsetting time - tell controls the expected indeces so they can handle them
+                            actual_cond_item = window.get_tensor(cond_item)
+                            resized_actual_cond[key] = actual_cond_item.to(device)
+                        else:
+                            resized_actual_cond[key] = cond_item.to(device)
+                    # look for control
+                    elif key == "control":
+                        resized_actual_cond[key] = self.prepare_control_objects(cond_item, device)
+                    elif isinstance(cond_item, dict):
+                        new_cond_item = cond_item.copy()
+                        # when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
+                        for cond_key, cond_value in new_cond_item.items():
+                            if isinstance(cond_value, torch.Tensor):
+                                if cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim):
+                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
+                            # if has cond that is a Tensor, check if needs to be subset
+                            elif hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
+                                if cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim):
+                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device))
+                            elif cond_key == "num_video_frames": # for SVD
+                                new_cond_item[cond_key] = cond_value._copy_with(cond_value.cond)
+                                new_cond_item[cond_key].cond = window.context_length
+                        resized_actual_cond[key] = new_cond_item
+                    else:
+                        resized_actual_cond[key] = cond_item
+                finally:
+                    del cond_item  # just in case to prevent VRAM issues
+            resized_cond.append(resized_actual_cond)
+        return resized_cond
+
+    def set_step(self, timestep: torch.Tensor, model_options: dict[str]):
+        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep, rtol=0.0001)
+        matches = torch.nonzero(mask)
+        if torch.numel(matches) == 0:
+            raise Exception("No sample_sigmas matched current timestep; something went wrong.")
+        self._step = int(matches[0].item())
+
+    def get_context_windows(self, model: BaseModel, x_in: torch.Tensor, model_options: dict[str]) -> list[IndexListContextWindow]:
+        full_length = x_in.size(self.dim) # TODO: choose dim based on model
+        context_windows = self.context_schedule.func(full_length, self, model_options)
+        context_windows = [IndexListContextWindow(window, dim=self.dim) for window in context_windows]
+        return context_windows
+
+    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
+        self.set_step(timestep, model_options)
+        context_windows = self.get_context_windows(model, x_in, model_options)
+        enumerated_context_windows = list(enumerate(context_windows))
+
+        conds_final = [torch.zeros_like(x_in) for _ in conds]
+        if self.fuse_method.name == ContextFuseMethods.RELATIVE:
+            counts_final = [torch.ones(get_shape_for_dim(x_in, self.dim), device=x_in.device) for _ in conds]
+        else:
+            counts_final = [torch.zeros(get_shape_for_dim(x_in, self.dim), device=x_in.device) for _ in conds]
+        biases_final = [([0.0] * x_in.shape[self.dim]) for _ in conds]
+
+        for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EXECUTE_START, self.callbacks):
+            callback(self, model, x_in, conds, timestep, model_options)
+
+        for enum_window in enumerated_context_windows:
+            results = self.evaluate_context_windows(calc_cond_batch, model, x_in, conds, timestep, [enum_window], model_options)
+            for result in results:
+                self.combine_context_window_results(x_in, result.sub_conds_out, result.sub_conds, result.window, result.window_idx, len(enumerated_context_windows), timestep,
+                                            conds_final, counts_final, biases_final)
+        try:
+            # finalize conds
+            if self.fuse_method.name == ContextFuseMethods.RELATIVE:
+                # relative is already normalized, so return as is
+                del counts_final
+                return conds_final
+            else:
+                # normalize conds via division by context usage counts
+                for i in range(len(conds_final)):
+                    conds_final[i] /= counts_final[i]
+                del counts_final
+                return conds_final
+        finally:
+            for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EXECUTE_CLEANUP, self.callbacks):
+                callback(self, model, x_in, conds, timestep, model_options)
+
+    def evaluate_context_windows(self, calc_cond_batch: Callable, model: BaseModel, x_in: torch.Tensor, conds, timestep: torch.Tensor, enumerated_context_windows: list[tuple[int, IndexListContextWindow]],
+                                model_options, device=None, first_device=None):
+        results: list[ContextResults] = []
+        for window_idx, window in enumerated_context_windows:
+            # allow processing to end between context window executions for faster Cancel
+            comfy.model_management.throw_exception_if_processing_interrupted()
+
+            for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EVALUATE_CONTEXT_WINDOWS, self.callbacks):
+                callback(self, model, x_in, conds, timestep, model_options, window_idx, window, model_options, device, first_device)
+
+            # update exposed params
+            model_options["transformer_options"]["context_window"] = window
+            # get subsections of x, timestep, conds
+            sub_x = window.get_tensor(x_in, device)
+            sub_timestep = window.get_tensor(timestep, device, dim=0)
+            sub_conds = [self.get_resized_cond(cond, x_in, window, device) for cond in conds]
+
+            sub_conds_out = calc_cond_batch(model, sub_conds, sub_x, sub_timestep, model_options)
+            if device is not None:
+                for i in range(len(sub_conds_out)):
+                    sub_conds_out[i] = sub_conds_out[i].to(x_in.device)
+            results.append(ContextResults(window_idx, sub_conds_out, sub_conds, window))
+        return results
+
+
+    def combine_context_window_results(self, x_in: torch.Tensor, sub_conds_out, sub_conds, window: IndexListContextWindow, window_idx: int, total_windows: int, timestep: torch.Tensor,
+                                    conds_final: list[torch.Tensor], counts_final: list[torch.Tensor], biases_final: list[torch.Tensor]):
+        if self.fuse_method.name == ContextFuseMethods.RELATIVE:
+            for pos, idx in enumerate(window.index_list):
+                # bias is the influence of a specific index in relation to the whole context window
+                bias = 1 - abs(idx - (window.index_list[0] + window.index_list[-1]) / 2) / ((window.index_list[-1] - window.index_list[0] + 1e-2) / 2)
+                bias = max(1e-2, bias)
+                # take weighted average relative to total bias of current idx
+                for i in range(len(sub_conds_out)):
+                    bias_total = biases_final[i][idx]
+                    prev_weight = (bias_total / (bias_total + bias))
+                    new_weight = (bias / (bias_total + bias))
+                    # account for dims of tensors
+                    idx_window = [slice(None)] * self.dim + [idx]
+                    pos_window = [slice(None)] * self.dim + [pos]
+                    # apply new values
+                    conds_final[i][idx_window] = conds_final[i][idx_window] * prev_weight + sub_conds_out[i][pos_window] * new_weight
+                    biases_final[i][idx] = bias_total + bias
+        else:
+            # add conds and counts based on weights of fuse method
+            weights = get_context_weights(window.context_length, x_in.shape[self.dim], window.index_list, self, sigma=timestep)
+            weights_tensor = match_weights_to_dim(weights, x_in, self.dim, device=x_in.device)
+            for i in range(len(sub_conds_out)):
+                window.add_window(conds_final[i], sub_conds_out[i] * weights_tensor)
+                window.add_window(counts_final[i], weights_tensor)
+
+        for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.COMBINE_CONTEXT_WINDOW_RESULTS, self.callbacks):
+            callback(self, x_in, sub_conds_out, sub_conds, window, window_idx, total_windows, timestep, conds_final, counts_final, biases_final)
+
+
+def _prepare_sampling_wrapper(executor, model, noise_shape: torch.Tensor, *args, **kwargs):
+    # limit noise_shape length to context_length for more accurate vram use estimation
+    model_options = kwargs.get("model_options", None)
+    if model_options is None:
+        raise Exception("model_options not found in prepare_sampling_wrapper; this should never happen, something went wrong.")
+    handler: IndexListContextHandler = model_options.get("context_handler", None)
+    if handler is not None:
+        noise_shape = list(noise_shape)
+        noise_shape[handler.dim] = min(noise_shape[handler.dim], handler.context_length)
+    return executor(model, noise_shape, *args, **kwargs)
+
+
+def create_prepare_sampling_wrapper(model: ModelPatcher):
+    model.add_wrapper_with_key(
+        comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING,
+        "ContextWindows_prepare_sampling",
+        _prepare_sampling_wrapper
+    )
+
+
+def match_weights_to_dim(weights: list[float], x_in: torch.Tensor, dim: int, device=None) -> torch.Tensor:
+    total_dims = len(x_in.shape)
+    weights_tensor = torch.Tensor(weights).to(device=device)
+    for _ in range(dim):
+        weights_tensor = weights_tensor.unsqueeze(0)
+    for _ in range(total_dims - dim - 1):
+        weights_tensor = weights_tensor.unsqueeze(-1)
+    return weights_tensor
+
+def get_shape_for_dim(x_in: torch.Tensor, dim: int) -> list[int]:
+    total_dims = len(x_in.shape)
+    shape = []
+    for _ in range(dim):
+        shape.append(1)
+    shape.append(x_in.shape[dim])
+    for _ in range(total_dims - dim - 1):
+        shape.append(1)
+    return shape
+
+class ContextSchedules:
+    UNIFORM_LOOPED = "looped_uniform"
+    UNIFORM_STANDARD = "standard_uniform"
+    STATIC_STANDARD = "standard_static"
+    BATCHED = "batched"
+
+
+# from https://github.com/neggles/animatediff-cli/blob/main/src/animatediff/pipelines/context.py
+def create_windows_uniform_looped(num_frames: int, handler: IndexListContextHandler, model_options: dict[str]):
+    windows = []
+    if num_frames < handler.context_length:
+        windows.append(list(range(num_frames)))
+        return windows
+
+    context_stride = min(handler.context_stride, int(np.ceil(np.log2(num_frames / handler.context_length))) + 1)
+    # obtain uniform windows as normal, looping and all
+    for context_step in 1 << np.arange(context_stride):
+        pad = int(round(num_frames * ordered_halving(handler._step)))
+        for j in range(
+            int(ordered_halving(handler._step) * context_step) + pad,
+            num_frames + pad + (0 if handler.closed_loop else -handler.context_overlap),
+            (handler.context_length * context_step - handler.context_overlap),
+        ):
+            windows.append([e % num_frames for e in range(j, j + handler.context_length * context_step, context_step)])
+
+    return windows
+
+def create_windows_uniform_standard(num_frames: int, handler: IndexListContextHandler, model_options: dict[str]):
+    # unlike looped, uniform_straight does NOT allow windows that loop back to the beginning;
+    # instead, they get shifted to the corresponding end of the frames.
+    # in the case that a window (shifted or not) is identical to the previous one, it gets skipped.
+    windows = []
+    if num_frames <= handler.context_length:
+        windows.append(list(range(num_frames)))
+        return windows
+
+    context_stride = min(handler.context_stride, int(np.ceil(np.log2(num_frames / handler.context_length))) + 1)
+    # first, obtain uniform windows as normal, looping and all
+    for context_step in 1 << np.arange(context_stride):
+        pad = int(round(num_frames * ordered_halving(handler._step)))
+        for j in range(
+            int(ordered_halving(handler._step) * context_step) + pad,
+            num_frames + pad + (-handler.context_overlap),
+            (handler.context_length * context_step - handler.context_overlap),
+        ):
+            windows.append([e % num_frames for e in range(j, j + handler.context_length * context_step, context_step)])
+
+    # now that windows are created, shift any windows that loop, and delete duplicate windows
+    delete_idxs = []
+    win_i = 0
+    while win_i < len(windows):
+        # if window is rolls over itself, need to shift it
+        is_roll, roll_idx = does_window_roll_over(windows[win_i], num_frames)
+        if is_roll:
+            roll_val = windows[win_i][roll_idx]  # roll_val might not be 0 for windows of higher strides
+            shift_window_to_end(windows[win_i], num_frames=num_frames)
+            # check if next window (cyclical) is missing roll_val
+            if roll_val not in windows[(win_i+1) % len(windows)]:
+                # need to insert new window here - just insert window starting at roll_val
+                windows.insert(win_i+1, list(range(roll_val, roll_val + handler.context_length)))
+        # delete window if it's not unique
+        for pre_i in range(0, win_i):
+            if windows[win_i] == windows[pre_i]:
+                delete_idxs.append(win_i)
+                break
+        win_i += 1
+
+    # reverse delete_idxs so that they will be deleted in an order that doesn't break idx correlation
+    delete_idxs.reverse()
+    for i in delete_idxs:
+        windows.pop(i)
+
+    return windows
+
+
+def create_windows_static_standard(num_frames: int, handler: IndexListContextHandler, model_options: dict[str]):
+    windows = []
+    if num_frames <= handler.context_length:
+        windows.append(list(range(num_frames)))
+        return windows
+    # always return the same set of windows
+    delta = handler.context_length - handler.context_overlap
+    for start_idx in range(0, num_frames, delta):
+        # if past the end of frames, move start_idx back to allow same context_length
+        ending = start_idx + handler.context_length
+        if ending >= num_frames:
+            final_delta = ending - num_frames
+            final_start_idx = start_idx - final_delta
+            windows.append(list(range(final_start_idx, final_start_idx + handler.context_length)))
+            break
+        windows.append(list(range(start_idx, start_idx + handler.context_length)))
+    return windows
+
+
+def create_windows_batched(num_frames: int, handler: IndexListContextHandler, model_options: dict[str]):
+    windows = []
+    if num_frames <= handler.context_length:
+        windows.append(list(range(num_frames)))
+        return windows
+    # always return the same set of windows;
+    # no overlap, just cut up based on context_length;
+    # last window size will be different if num_frames % opts.context_length != 0
+    for start_idx in range(0, num_frames, handler.context_length):
+        windows.append(list(range(start_idx, min(start_idx + handler.context_length, num_frames))))
+    return windows
+
+
+def create_windows_default(num_frames: int, handler: IndexListContextHandler):
+    return [list(range(num_frames))]
+
+
+CONTEXT_MAPPING = {
+    ContextSchedules.UNIFORM_LOOPED: create_windows_uniform_looped,
+    ContextSchedules.UNIFORM_STANDARD: create_windows_uniform_standard,
+    ContextSchedules.STATIC_STANDARD: create_windows_static_standard,
+    ContextSchedules.BATCHED: create_windows_batched,
+}
+
+
+def get_matching_context_schedule(context_schedule: str) -> ContextSchedule:
+    func = CONTEXT_MAPPING.get(context_schedule, None)
+    if func is None:
+        raise ValueError(f"Unknown context_schedule '{context_schedule}'.")
+    return ContextSchedule(context_schedule, func)
+
+
+def get_context_weights(length: int, full_length: int, idxs: list[int], handler: IndexListContextHandler, sigma: torch.Tensor=None):
+    return handler.fuse_method.func(length, sigma=sigma, handler=handler, full_length=full_length, idxs=idxs)
+
+
+def create_weights_flat(length: int, **kwargs) -> list[float]:
+    # weight is the same for all
+    return [1.0] * length
+
+def create_weights_pyramid(length: int, **kwargs) -> list[float]:
+    # weight is based on the distance away from the edge of the context window;
+    # based on weighted average concept in FreeNoise paper
+    if length % 2 == 0:
+        max_weight = length // 2
+        weight_sequence = list(range(1, max_weight + 1, 1)) + list(range(max_weight, 0, -1))
+    else:
+        max_weight = (length + 1) // 2
+        weight_sequence = list(range(1, max_weight, 1)) + [max_weight] + list(range(max_weight - 1, 0, -1))
+    return weight_sequence
+
+def create_weights_overlap_linear(length: int, full_length: int, idxs: list[int], handler: IndexListContextHandler, **kwargs):
+    # based on code in Kijai's WanVideoWrapper: https://github.com/kijai/ComfyUI-WanVideoWrapper/blob/dbb2523b37e4ccdf45127e5ae33e31362f755c8e/nodes.py#L1302
+    # only expected overlap is given different weights
+    weights_torch = torch.ones((length))
+    # blend left-side on all except first window
+    if min(idxs) > 0:
+        ramp_up = torch.linspace(1e-37, 1, handler.context_overlap)
+        weights_torch[:handler.context_overlap] = ramp_up
+    # blend right-side on all except last window
+    if max(idxs) < full_length-1:
+        ramp_down = torch.linspace(1, 1e-37, handler.context_overlap)
+        weights_torch[-handler.context_overlap:] = ramp_down
+    return weights_torch
+
+class ContextFuseMethods:
+    FLAT = "flat"
+    PYRAMID = "pyramid"
+    RELATIVE = "relative"
+    OVERLAP_LINEAR = "overlap-linear"
+
+    LIST = [PYRAMID, FLAT, OVERLAP_LINEAR]
+    LIST_STATIC = [PYRAMID, RELATIVE, FLAT, OVERLAP_LINEAR]
+
+
+FUSE_MAPPING = {
+    ContextFuseMethods.FLAT: create_weights_flat,
+    ContextFuseMethods.PYRAMID: create_weights_pyramid,
+    ContextFuseMethods.RELATIVE: create_weights_pyramid,
+    ContextFuseMethods.OVERLAP_LINEAR: create_weights_overlap_linear,
+}
+
+def get_matching_fuse_method(fuse_method: str) -> ContextFuseMethod:
+    func = FUSE_MAPPING.get(fuse_method, None)
+    if func is None:
+        raise ValueError(f"Unknown fuse_method '{fuse_method}'.")
+    return ContextFuseMethod(fuse_method, func)
+
+# Returns fraction that has denominator that is a power of 2
+def ordered_halving(val):
+    # get binary value, padded with 0s for 64 bits
+    bin_str = f"{val:064b}"
+    # flip binary value, padding included
+    bin_flip = bin_str[::-1]
+    # convert binary to int
+    as_int = int(bin_flip, 2)
+    # divide by 1 << 64, equivalent to 2**64, or 18446744073709551616,
+    # or b10000000000000000000000000000000000000000000000000000000000000000 (1 with 64 zero's)
+    return as_int / (1 << 64)
+
+
+def get_missing_indexes(windows: list[list[int]], num_frames: int) -> list[int]:
+    all_indexes = list(range(num_frames))
+    for w in windows:
+        for val in w:
+            try:
+                all_indexes.remove(val)
+            except ValueError:
+                pass
+    return all_indexes
+
+
+def does_window_roll_over(window: list[int], num_frames: int) -> tuple[bool, int]:
+    prev_val = -1
+    for i, val in enumerate(window):
+        val = val % num_frames
+        if val < prev_val:
+            return True, i
+        prev_val = val
+    return False, -1
+
+
+def shift_window_to_start(window: list[int], num_frames: int):
+    start_val = window[0]
+    for i in range(len(window)):
+        # 1) subtract each element by start_val to move vals relative to the start of all frames
+        # 2) add num_frames and take modulus to get adjusted vals
+        window[i] = ((window[i] - start_val) + num_frames) % num_frames
+
+
+def shift_window_to_end(window: list[int], num_frames: int):
+    # 1) shift window to start
+    shift_window_to_start(window, num_frames)
+    end_val = window[-1]
+    end_delta = num_frames - end_val - 1
+    for i in range(len(window)):
+        # 2) add end_delta to each val to slide windows to end
+        window[i] = window[i] + end_delta
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@ -0,0 +1,884 @@
+"""
+    This file is part of ComfyUI.
+    Copyright (C) 2024 Comfy
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+
+import torch
+from enum import Enum
+import math
+import os
+import logging
+import comfy.utils
+import comfy.model_management
+import comfy.model_detection
+import comfy.model_patcher
+import comfy.ops
+import comfy.latent_formats
+import comfy.model_base
+
+import comfy.cldm.cldm
+import comfy.t2i_adapter.adapter
+import comfy.ldm.cascade.controlnet
+import comfy.cldm.mmdit
+import comfy.ldm.hydit.controlnet
+import comfy.ldm.flux.controlnet
+import comfy.ldm.qwen_image.controlnet
+import comfy.cldm.dit_embedder
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from comfy.hooks import HookGroup
+
+
+def broadcast_image_to(tensor, target_batch_size, batched_number):
+    current_batch_size = tensor.shape[0]
+    if current_batch_size == 1:
+        return tensor
+
+    per_batch = target_batch_size // batched_number
+    tensor = tensor[:per_batch]
+
+    if per_batch > tensor.shape[0]:
+        tensor = torch.cat([tensor] * (per_batch // tensor.shape[0]) + [tensor[:(per_batch % tensor.shape[0])]], dim=0)
+
+    current_batch_size = tensor.shape[0]
+    if current_batch_size == target_batch_size:
+        return tensor
+    else:
+        return torch.cat([tensor] * batched_number, dim=0)
+
+class StrengthType(Enum):
+    CONSTANT = 1
+    LINEAR_UP = 2
+
+class ControlBase:
+    def __init__(self):
+        self.cond_hint_original = None
+        self.cond_hint = None
+        self.strength = 1.0
+        self.timestep_percent_range = (0.0, 1.0)
+        self.latent_format = None
+        self.vae = None
+        self.global_average_pooling = False
+        self.timestep_range = None
+        self.compression_ratio = 8
+        self.upscale_algorithm = 'nearest-exact'
+        self.extra_args = {}
+        self.previous_controlnet = None
+        self.extra_conds = []
+        self.strength_type = StrengthType.CONSTANT
+        self.concat_mask = False
+        self.extra_concat_orig = []
+        self.extra_concat = None
+        self.extra_hooks: HookGroup = None
+        self.preprocess_image = lambda a: a
+
+    def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0), vae=None, extra_concat=[]):
+        self.cond_hint_original = cond_hint
+        self.strength = strength
+        self.timestep_percent_range = timestep_percent_range
+        if self.latent_format is not None:
+            if vae is None:
+                logging.warning("WARNING: no VAE provided to the controlnet apply node when this controlnet requires one.")
+            self.vae = vae
+        self.extra_concat_orig = extra_concat.copy()
+        if self.concat_mask and len(self.extra_concat_orig) == 0:
+            self.extra_concat_orig.append(torch.tensor([[[[1.0]]]]))
+        return self
+
+    def pre_run(self, model, percent_to_timestep_function):
+        self.timestep_range = (percent_to_timestep_function(self.timestep_percent_range[0]), percent_to_timestep_function(self.timestep_percent_range[1]))
+        if self.previous_controlnet is not None:
+            self.previous_controlnet.pre_run(model, percent_to_timestep_function)
+
+    def set_previous_controlnet(self, controlnet):
+        self.previous_controlnet = controlnet
+        return self
+
+    def cleanup(self):
+        if self.previous_controlnet is not None:
+            self.previous_controlnet.cleanup()
+
+        self.cond_hint = None
+        self.extra_concat = None
+        self.timestep_range = None
+
+    def get_models(self):
+        out = []
+        if self.previous_controlnet is not None:
+            out += self.previous_controlnet.get_models()
+        return out
+
+    def get_extra_hooks(self):
+        out = []
+        if self.extra_hooks is not None:
+            out.append(self.extra_hooks)
+        if self.previous_controlnet is not None:
+            out += self.previous_controlnet.get_extra_hooks()
+        return out
+
+    def copy_to(self, c):
+        c.cond_hint_original = self.cond_hint_original
+        c.strength = self.strength
+        c.timestep_percent_range = self.timestep_percent_range
+        c.global_average_pooling = self.global_average_pooling
+        c.compression_ratio = self.compression_ratio
+        c.upscale_algorithm = self.upscale_algorithm
+        c.latent_format = self.latent_format
+        c.extra_args = self.extra_args.copy()
+        c.vae = self.vae
+        c.extra_conds = self.extra_conds.copy()
+        c.strength_type = self.strength_type
+        c.concat_mask = self.concat_mask
+        c.extra_concat_orig = self.extra_concat_orig.copy()
+        c.extra_hooks = self.extra_hooks.clone() if self.extra_hooks else None
+        c.preprocess_image = self.preprocess_image
+
+    def inference_memory_requirements(self, dtype):
+        if self.previous_controlnet is not None:
+            return self.previous_controlnet.inference_memory_requirements(dtype)
+        return 0
+
+    def control_merge(self, control, control_prev, output_dtype):
+        out = {'input':[], 'middle':[], 'output': []}
+
+        for key in control:
+            control_output = control[key]
+            applied_to = set()
+            for i in range(len(control_output)):
+                x = control_output[i]
+                if x is not None:
+                    if self.global_average_pooling:
+                        x = torch.mean(x, dim=(2, 3), keepdim=True).repeat(1, 1, x.shape[2], x.shape[3])
+
+                    if x not in applied_to: #memory saving strategy, allow shared tensors and only apply strength to shared tensors once
+                        applied_to.add(x)
+                        if self.strength_type == StrengthType.CONSTANT:
+                            x *= self.strength
+                        elif self.strength_type == StrengthType.LINEAR_UP:
+                            x *= (self.strength ** float(len(control_output) - i))
+
+                    if output_dtype is not None and x.dtype != output_dtype:
+                        x = x.to(output_dtype)
+
+                out[key].append(x)
+
+        if control_prev is not None:
+            for x in ['input', 'middle', 'output']:
+                o = out[x]
+                for i in range(len(control_prev[x])):
+                    prev_val = control_prev[x][i]
+                    if i >= len(o):
+                        o.append(prev_val)
+                    elif prev_val is not None:
+                        if o[i] is None:
+                            o[i] = prev_val
+                        else:
+                            if o[i].shape[0] < prev_val.shape[0]:
+                                o[i] = prev_val + o[i]
+                            else:
+                                o[i] = prev_val + o[i] #TODO: change back to inplace add if shared tensors stop being an issue
+        return out
+
+    def set_extra_arg(self, argument, value=None):
+        self.extra_args[argument] = value
+
+
+class ControlNet(ControlBase):
+    def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT, concat_mask=False, preprocess_image=lambda a: a):
+        super().__init__()
+        self.control_model = control_model
+        self.load_device = load_device
+        if control_model is not None:
+            self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())
+
+        self.compression_ratio = compression_ratio
+        self.global_average_pooling = global_average_pooling
+        self.model_sampling_current = None
+        self.manual_cast_dtype = manual_cast_dtype
+        self.latent_format = latent_format
+        self.extra_conds += extra_conds
+        self.strength_type = strength_type
+        self.concat_mask = concat_mask
+        self.preprocess_image = preprocess_image
+
+    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+        control_prev = None
+        if self.previous_controlnet is not None:
+            control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number, transformer_options)
+
+        if self.timestep_range is not None:
+            if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]:
+                if control_prev is not None:
+                    return control_prev
+                else:
+                    return None
+
+        dtype = self.control_model.dtype
+        if self.manual_cast_dtype is not None:
+            dtype = self.manual_cast_dtype
+
+        if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
+            if self.cond_hint is not None:
+                del self.cond_hint
+            self.cond_hint = None
+            compression_ratio = self.compression_ratio
+            if self.vae is not None:
+                compression_ratio *= self.vae.spacial_compression_encode()
+            else:
+                if self.latent_format is not None:
+                    raise ValueError("This Controlnet needs a VAE but none was provided, please use a ControlNetApply node with a VAE input and connect it.")
+            self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, x_noisy.shape[-1] * compression_ratio, x_noisy.shape[-2] * compression_ratio, self.upscale_algorithm, "center")
+            self.cond_hint = self.preprocess_image(self.cond_hint)
+            if self.vae is not None:
+                loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
+                self.cond_hint = self.vae.encode(self.cond_hint.movedim(1, -1))
+                comfy.model_management.load_models_gpu(loaded_models)
+            if self.latent_format is not None:
+                self.cond_hint = self.latent_format.process_in(self.cond_hint)
+            if len(self.extra_concat_orig) > 0:
+                to_concat = []
+                for c in self.extra_concat_orig:
+                    c = c.to(self.cond_hint.device)
+                    c = comfy.utils.common_upscale(c, self.cond_hint.shape[-1], self.cond_hint.shape[-2], self.upscale_algorithm, "center")
+                    if c.ndim < self.cond_hint.ndim:
+                        c = c.unsqueeze(2)
+                        c = comfy.utils.repeat_to_batch_size(c, self.cond_hint.shape[2], dim=2)
+                    to_concat.append(comfy.utils.repeat_to_batch_size(c, self.cond_hint.shape[0]))
+                self.cond_hint = torch.cat([self.cond_hint] + to_concat, dim=1)
+
+            self.cond_hint = self.cond_hint.to(device=x_noisy.device, dtype=dtype)
+        if x_noisy.shape[0] != self.cond_hint.shape[0]:
+            self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
+
+        context = cond.get('crossattn_controlnet', cond['c_crossattn'])
+        extra = self.extra_args.copy()
+        for c in self.extra_conds:
+            temp = cond.get(c, None)
+            if temp is not None:
+                extra[c] = comfy.model_base.convert_tensor(temp, dtype, x_noisy.device)
+
+        timestep = self.model_sampling_current.timestep(t)
+        x_noisy = self.model_sampling_current.calculate_input(t, x_noisy)
+
+        control = self.control_model(x=x_noisy.to(dtype), hint=self.cond_hint, timesteps=timestep.to(dtype), context=comfy.model_management.cast_to_device(context, x_noisy.device, dtype), **extra)
+        return self.control_merge(control, control_prev, output_dtype=None)
+
+    def copy(self):
+        c = ControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+        c.control_model = self.control_model
+        c.control_model_wrapped = self.control_model_wrapped
+        self.copy_to(c)
+        return c
+
+    def get_models(self):
+        out = super().get_models()
+        out.append(self.control_model_wrapped)
+        return out
+
+    def pre_run(self, model, percent_to_timestep_function):
+        super().pre_run(model, percent_to_timestep_function)
+        self.model_sampling_current = model.model_sampling
+
+    def cleanup(self):
+        self.model_sampling_current = None
+        super().cleanup()
+
+class ControlLoraOps:
+    class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
+        def __init__(self, in_features: int, out_features: int, bias: bool = True,
+                    device=None, dtype=None) -> None:
+            super().__init__()
+            self.in_features = in_features
+            self.out_features = out_features
+            self.weight = None
+            self.up = None
+            self.down = None
+            self.bias = None
+
+        def forward(self, input):
+            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
+            if self.up is not None:
+                x = torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
+            else:
+                x = torch.nn.functional.linear(input, weight, bias)
+            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
+
+    class Conv2d(torch.nn.Module, comfy.ops.CastWeightBiasOp):
+        def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            dilation=1,
+            groups=1,
+            bias=True,
+            padding_mode='zeros',
+            device=None,
+            dtype=None
+        ):
+            super().__init__()
+            self.in_channels = in_channels
+            self.out_channels = out_channels
+            self.kernel_size = kernel_size
+            self.stride = stride
+            self.padding = padding
+            self.dilation = dilation
+            self.transposed = False
+            self.output_padding = 0
+            self.groups = groups
+            self.padding_mode = padding_mode
+
+            self.weight = None
+            self.bias = None
+            self.up = None
+            self.down = None
+
+
+        def forward(self, input):
+            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
+            if self.up is not None:
+                x = torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
+            else:
+                x = torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
+            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
+
+class ControlLora(ControlNet):
+    def __init__(self, control_weights, global_average_pooling=False, model_options={}): #TODO? model_options
+        ControlBase.__init__(self)
+        self.control_weights = control_weights
+        self.global_average_pooling = global_average_pooling
+        self.extra_conds += ["y"]
+
+    def pre_run(self, model, percent_to_timestep_function):
+        super().pre_run(model, percent_to_timestep_function)
+        controlnet_config = model.model_config.unet_config.copy()
+        controlnet_config.pop("out_channels")
+        controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1]
+        self.manual_cast_dtype = model.manual_cast_dtype
+        dtype = model.get_dtype()
+        if self.manual_cast_dtype is None:
+            class control_lora_ops(ControlLoraOps, comfy.ops.disable_weight_init):
+                pass
+        else:
+            class control_lora_ops(ControlLoraOps, comfy.ops.manual_cast):
+                pass
+            dtype = self.manual_cast_dtype
+
+        controlnet_config["operations"] = control_lora_ops
+        controlnet_config["dtype"] = dtype
+        self.control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
+        self.control_model.to(comfy.model_management.get_torch_device())
+        diffusion_model = model.diffusion_model
+        sd = diffusion_model.state_dict()
+
+        for k in sd:
+            weight = sd[k]
+            try:
+                comfy.utils.set_attr_param(self.control_model, k, weight)
+            except:
+                pass
+
+        for k in self.control_weights:
+            if (k not in {"lora_controlnet"}):
+                if (k.endswith(".up") or k.endswith(".down") or k.endswith(".weight") or k.endswith(".bias")) and ("__" not in k):
+                    comfy.utils.set_attr_param(self.control_model, k, self.control_weights[k].to(dtype).to(comfy.model_management.get_torch_device()))
+
+    def copy(self):
+        c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
+        self.copy_to(c)
+        return c
+
+    def cleanup(self):
+        del self.control_model
+        self.control_model = None
+        super().cleanup()
+
+    def get_models(self):
+        out = ControlBase.get_models(self)
+        return out
+
+    def inference_memory_requirements(self, dtype):
+        return comfy.utils.calculate_parameters(self.control_weights) * comfy.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype)
+
+def controlnet_config(sd, model_options={}):
+    model_config = comfy.model_detection.model_config_from_unet(sd, "", True)
+
+    unet_dtype = model_options.get("dtype", None)
+    if unet_dtype is None:
+        weight_dtype = comfy.utils.weight_dtype(sd)
+
+        supported_inference_dtypes = list(model_config.supported_inference_dtypes)
+        unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
+
+    load_device = comfy.model_management.get_torch_device()
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+    offload_device = comfy.model_management.unet_offload_device()
+    return model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device
+
+def controlnet_load_state_dict(control_model, sd):
+    missing, unexpected = control_model.load_state_dict(sd, strict=False)
+
+    if len(missing) > 0:
+        logging.warning("missing controlnet keys: {}".format(missing))
+
+    if len(unexpected) > 0:
+        logging.debug("unexpected controlnet keys: {}".format(unexpected))
+    return control_model
+
+
+def load_controlnet_mmdit(sd, model_options={}):
+    new_sd = comfy.model_detection.convert_diffusers_mmdit(sd, "")
+    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(new_sd, model_options=model_options)
+    num_blocks = comfy.model_detection.count_blocks(new_sd, 'joint_blocks.{}.')
+    for k in sd:
+        new_sd[k] = sd[k]
+
+    concat_mask = False
+    control_latent_channels = new_sd.get("pos_embed_input.proj.weight").shape[1]
+    if control_latent_channels == 17: #inpaint controlnet
+        concat_mask = True
+
+    control_model = comfy.cldm.mmdit.ControlNet(num_blocks=num_blocks, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    control_model = controlnet_load_state_dict(control_model, new_sd)
+
+    latent_format = comfy.latent_formats.SD3()
+    latent_format.shift_factor = 0 #SD3 controlnet weirdness
+    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
+    return control
+
+
+class ControlNetSD35(ControlNet):
+    def pre_run(self, model, percent_to_timestep_function):
+        if self.control_model.double_y_emb:
+            missing, unexpected = self.control_model.orig_y_embedder.load_state_dict(model.diffusion_model.y_embedder.state_dict(), strict=False)
+        else:
+            missing, unexpected = self.control_model.x_embedder.load_state_dict(model.diffusion_model.x_embedder.state_dict(), strict=False)
+        super().pre_run(model, percent_to_timestep_function)
+
+    def copy(self):
+        c = ControlNetSD35(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+        c.control_model = self.control_model
+        c.control_model_wrapped = self.control_model_wrapped
+        self.copy_to(c)
+        return c
+
+def load_controlnet_sd35(sd, model_options={}):
+    control_type = -1
+    if "control_type" in sd:
+        control_type = round(sd.pop("control_type").item())
+
+    # blur_cnet = control_type == 0
+    canny_cnet = control_type == 1
+    depth_cnet = control_type == 2
+
+    new_sd = {}
+    for k in comfy.utils.MMDIT_MAP_BASIC:
+        if k[1] in sd:
+            new_sd[k[0]] = sd.pop(k[1])
+    for k in sd:
+        new_sd[k] = sd[k]
+    sd = new_sd
+
+    y_emb_shape = sd["y_embedder.mlp.0.weight"].shape
+    depth = y_emb_shape[0] // 64
+    hidden_size = 64 * depth
+    num_heads = depth
+    head_dim = hidden_size // num_heads
+    num_blocks = comfy.model_detection.count_blocks(new_sd, 'transformer_blocks.{}.')
+
+    load_device = comfy.model_management.get_torch_device()
+    offload_device = comfy.model_management.unet_offload_device()
+    unet_dtype = comfy.model_management.unet_dtype(model_params=-1)
+
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+    control_model = comfy.cldm.dit_embedder.ControlNetEmbedder(img_size=None,
+                                                               patch_size=2,
+                                                               in_chans=16,
+                                                               num_layers=num_blocks,
+                                                               main_model_double=depth,
+                                                               double_y_emb=y_emb_shape[0] == y_emb_shape[1],
+                                                               attention_head_dim=head_dim,
+                                                               num_attention_heads=num_heads,
+                                                               adm_in_channels=2048,
+                                                               device=offload_device,
+                                                               dtype=unet_dtype,
+                                                               operations=operations)
+
+    control_model = controlnet_load_state_dict(control_model, sd)
+
+    latent_format = comfy.latent_formats.SD3()
+    preprocess_image = lambda a: a
+    if canny_cnet:
+        preprocess_image = lambda a: (a * 255 * 0.5 + 0.5)
+    elif depth_cnet:
+        preprocess_image = lambda a: 1.0 - a
+
+    control = ControlNetSD35(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, preprocess_image=preprocess_image)
+    return control
+
+
+
+def load_controlnet_hunyuandit(controlnet_data, model_options={}):
+    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(controlnet_data, model_options=model_options)
+
+    control_model = comfy.ldm.hydit.controlnet.HunYuanControlNet(operations=operations, device=offload_device, dtype=unet_dtype)
+    control_model = controlnet_load_state_dict(control_model, controlnet_data)
+
+    latent_format = comfy.latent_formats.SDXL()
+    extra_conds = ['text_embedding_mask', 'encoder_hidden_states_t5', 'text_embedding_mask_t5', 'image_meta_size', 'style', 'cos_cis_img', 'sin_cis_img']
+    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds, strength_type=StrengthType.CONSTANT)
+    return control
+
+def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
+    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
+    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    control_model = controlnet_load_state_dict(control_model, sd)
+    extra_conds = ['y', 'guidance']
+    control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
+    return control
+
+def load_controlnet_flux_instantx(sd, model_options={}):
+    new_sd = comfy.model_detection.convert_diffusers_mmdit(sd, "")
+    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(new_sd, model_options=model_options)
+    for k in sd:
+        new_sd[k] = sd[k]
+
+    num_union_modes = 0
+    union_cnet = "controlnet_mode_embedder.weight"
+    if union_cnet in new_sd:
+        num_union_modes = new_sd[union_cnet].shape[0]
+
+    control_latent_channels = new_sd.get("pos_embed_input.weight").shape[1] // 4
+    concat_mask = False
+    if control_latent_channels == 17:
+        concat_mask = True
+
+    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(latent_input=True, num_union_modes=num_union_modes, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    control_model = controlnet_load_state_dict(control_model, new_sd)
+
+    latent_format = comfy.latent_formats.Flux()
+    extra_conds = ['y', 'guidance']
+    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
+    return control
+
+def load_controlnet_qwen_instantx(sd, model_options={}):
+    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
+    control_latent_channels = sd.get("controlnet_x_embedder.weight").shape[1]
+
+    extra_condition_channels = 0
+    concat_mask = False
+    if control_latent_channels == 68: #inpaint controlnet
+        extra_condition_channels = control_latent_channels - 64
+        concat_mask = True
+    control_model = comfy.ldm.qwen_image.controlnet.QwenImageControlNetModel(extra_condition_channels=extra_condition_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    control_model = controlnet_load_state_dict(control_model, sd)
+    latent_format = comfy.latent_formats.Wan21()
+    extra_conds = []
+    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
+    return control
+
+def convert_mistoline(sd):
+    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})
+
+
+def load_controlnet_state_dict(state_dict, model=None, model_options={}):
+    controlnet_data = state_dict
+    if 'after_proj_list.18.bias' in controlnet_data.keys(): #Hunyuan DiT
+        return load_controlnet_hunyuandit(controlnet_data, model_options=model_options)
+
+    if "lora_controlnet" in controlnet_data:
+        return ControlLora(controlnet_data, model_options=model_options)
+
+    controlnet_config = None
+    supported_inference_dtypes = None
+
+    if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
+        controlnet_config = comfy.model_detection.unet_config_from_diffusers_unet(controlnet_data)
+        diffusers_keys = comfy.utils.unet_to_diffusers(controlnet_config)
+        diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
+        diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
+
+        count = 0
+        loop = True
+        while loop:
+            suffix = [".weight", ".bias"]
+            for s in suffix:
+                k_in = "controlnet_down_blocks.{}{}".format(count, s)
+                k_out = "zero_convs.{}.0{}".format(count, s)
+                if k_in not in controlnet_data:
+                    loop = False
+                    break
+                diffusers_keys[k_in] = k_out
+            count += 1
+
+        count = 0
+        loop = True
+        while loop:
+            suffix = [".weight", ".bias"]
+            for s in suffix:
+                if count == 0:
+                    k_in = "controlnet_cond_embedding.conv_in{}".format(s)
+                else:
+                    k_in = "controlnet_cond_embedding.blocks.{}{}".format(count - 1, s)
+                k_out = "input_hint_block.{}{}".format(count * 2, s)
+                if k_in not in controlnet_data:
+                    k_in = "controlnet_cond_embedding.conv_out{}".format(s)
+                    loop = False
+                diffusers_keys[k_in] = k_out
+            count += 1
+
+        new_sd = {}
+        for k in diffusers_keys:
+            if k in controlnet_data:
+                new_sd[diffusers_keys[k]] = controlnet_data.pop(k)
+
+        if "control_add_embedding.linear_1.bias" in controlnet_data: #Union Controlnet
+            controlnet_config["union_controlnet_num_control_type"] = controlnet_data["task_embedding"].shape[0]
+            for k in list(controlnet_data.keys()):
+                new_k = k.replace('.attn.in_proj_', '.attn.in_proj.')
+                new_sd[new_k] = controlnet_data.pop(k)
+
+        leftover_keys = controlnet_data.keys()
+        if len(leftover_keys) > 0:
+            logging.warning("leftover keys: {}".format(leftover_keys))
+        controlnet_data = new_sd
+    elif "controlnet_blocks.0.weight" in controlnet_data:
+        if "double_blocks.0.img_attn.norm.key_norm.scale" in controlnet_data:
+            return load_controlnet_flux_xlabs_mistoline(controlnet_data, model_options=model_options)
+        elif "pos_embed_input.proj.weight" in controlnet_data:
+            if "transformer_blocks.0.adaLN_modulation.1.bias" in controlnet_data:
+                return load_controlnet_sd35(controlnet_data, model_options=model_options) #Stability sd3.5 format
+            else:
+                return load_controlnet_mmdit(controlnet_data, model_options=model_options) #SD3 diffusers controlnet
+        elif "transformer_blocks.0.img_mlp.net.0.proj.weight" in controlnet_data:
+            return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
+        elif "controlnet_x_embedder.weight" in controlnet_data:
+            return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
+
+    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
+        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
+
+    pth_key = 'control_model.zero_convs.0.0.weight'
+    pth = False
+    key = 'zero_convs.0.0.weight'
+    if pth_key in controlnet_data:
+        pth = True
+        key = pth_key
+        prefix = "control_model."
+    elif key in controlnet_data:
+        prefix = ""
+    else:
+        net = load_t2i_adapter(controlnet_data, model_options=model_options)
+        if net is None:
+            logging.error("error could not detect control model type.")
+        return net
+
+    if controlnet_config is None:
+        model_config = comfy.model_detection.model_config_from_unet(controlnet_data, prefix, True)
+        supported_inference_dtypes = list(model_config.supported_inference_dtypes)
+        controlnet_config = model_config.unet_config
+
+    unet_dtype = model_options.get("dtype", None)
+    if unet_dtype is None:
+        weight_dtype = comfy.utils.weight_dtype(controlnet_data)
+
+        if supported_inference_dtypes is None:
+            supported_inference_dtypes = [comfy.model_management.unet_dtype()]
+
+        unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
+
+    load_device = comfy.model_management.get_torch_device()
+
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype)
+
+    controlnet_config["operations"] = operations
+    controlnet_config["dtype"] = unet_dtype
+    controlnet_config["device"] = comfy.model_management.unet_offload_device()
+    controlnet_config.pop("out_channels")
+    controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1]
+    control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
+
+    if pth:
+        if 'difference' in controlnet_data:
+            if model is not None:
+                comfy.model_management.load_models_gpu([model])
+                model_sd = model.model_state_dict()
+                for x in controlnet_data:
+                    c_m = "control_model."
+                    if x.startswith(c_m):
+                        sd_key = "diffusion_model.{}".format(x[len(c_m):])
+                        if sd_key in model_sd:
+                            cd = controlnet_data[x]
+                            cd += model_sd[sd_key].type(cd.dtype).to(cd.device)
+            else:
+                logging.warning("WARNING: Loaded a diff controlnet without a model. It will very likely not work.")
+
+        class WeightsLoader(torch.nn.Module):
+            pass
+        w = WeightsLoader()
+        w.control_model = control_model
+        missing, unexpected = w.load_state_dict(controlnet_data, strict=False)
+    else:
+        missing, unexpected = control_model.load_state_dict(controlnet_data, strict=False)
+
+    if len(missing) > 0:
+        logging.warning("missing controlnet keys: {}".format(missing))
+
+    if len(unexpected) > 0:
+        logging.debug("unexpected controlnet keys: {}".format(unexpected))
+
+    global_average_pooling = model_options.get("global_average_pooling", False)
+    control = ControlNet(control_model, global_average_pooling=global_average_pooling, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
+    return control
+
+def load_controlnet(ckpt_path, model=None, model_options={}):
+    model_options = model_options.copy()
+    if "global_average_pooling" not in model_options:
+        filename = os.path.splitext(ckpt_path)[0]
+        if filename.endswith("_shuffle") or filename.endswith("_shuffle_fp16"): #TODO: smarter way of enabling global_average_pooling
+            model_options["global_average_pooling"] = True
+
+    cnet = load_controlnet_state_dict(comfy.utils.load_torch_file(ckpt_path, safe_load=True), model=model, model_options=model_options)
+    if cnet is None:
+        logging.error("error checkpoint does not contain controlnet or t2i adapter data {}".format(ckpt_path))
+    return cnet
+
+class T2IAdapter(ControlBase):
+    def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
+        super().__init__()
+        self.t2i_model = t2i_model
+        self.channels_in = channels_in
+        self.control_input = None
+        self.compression_ratio = compression_ratio
+        self.upscale_algorithm = upscale_algorithm
+        if device is None:
+            device = comfy.model_management.get_torch_device()
+        self.device = device
+
+    def scale_image_to(self, width, height):
+        unshuffle_amount = self.t2i_model.unshuffle_amount
+        width = math.ceil(width / unshuffle_amount) * unshuffle_amount
+        height = math.ceil(height / unshuffle_amount) * unshuffle_amount
+        return width, height
+
+    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+        control_prev = None
+        if self.previous_controlnet is not None:
+            control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number, transformer_options)
+
+        if self.timestep_range is not None:
+            if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]:
+                if control_prev is not None:
+                    return control_prev
+                else:
+                    return None
+
+        if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
+            if self.cond_hint is not None:
+                del self.cond_hint
+            self.control_input = None
+            self.cond_hint = None
+            width, height = self.scale_image_to(x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio)
+            self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, width, height, self.upscale_algorithm, "center").float().to(self.device)
+            if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
+                self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
+        if x_noisy.shape[0] != self.cond_hint.shape[0]:
+            self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
+        if self.control_input is None:
+            self.t2i_model.to(x_noisy.dtype)
+            self.t2i_model.to(self.device)
+            self.control_input = self.t2i_model(self.cond_hint.to(x_noisy.dtype))
+            self.t2i_model.cpu()
+
+        control_input = {}
+        for k in self.control_input:
+            control_input[k] = list(map(lambda a: None if a is None else a.clone(), self.control_input[k]))
+
+        return self.control_merge(control_input, control_prev, x_noisy.dtype)
+
+    def copy(self):
+        c = T2IAdapter(self.t2i_model, self.channels_in, self.compression_ratio, self.upscale_algorithm)
+        self.copy_to(c)
+        return c
+
+def load_t2i_adapter(t2i_data, model_options={}): #TODO: model_options
+    compression_ratio = 8
+    upscale_algorithm = 'nearest-exact'
+
+    if 'adapter' in t2i_data:
+        t2i_data = t2i_data['adapter']
+    if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
+        prefix_replace = {}
+        for i in range(4):
+            for j in range(2):
+                prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j)
+            prefix_replace["adapter.body.{}.".format(i, )] = "body.{}.".format(i * 2)
+        prefix_replace["adapter."] = ""
+        t2i_data = comfy.utils.state_dict_prefix_replace(t2i_data, prefix_replace)
+    keys = t2i_data.keys()
+
+    if "body.0.in_conv.weight" in keys:
+        cin = t2i_data['body.0.in_conv.weight'].shape[1]
+        model_ad = comfy.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
+    elif 'conv_in.weight' in keys:
+        cin = t2i_data['conv_in.weight'].shape[1]
+        channel = t2i_data['conv_in.weight'].shape[0]
+        ksize = t2i_data['body.0.block2.weight'].shape[2]
+        use_conv = False
+        down_opts = list(filter(lambda a: a.endswith("down_opt.op.weight"), keys))
+        if len(down_opts) > 0:
+            use_conv = True
+        xl = False
+        if cin == 256 or cin == 768:
+            xl = True
+        model_ad = comfy.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
+    elif "backbone.0.0.weight" in keys:
+        model_ad = comfy.ldm.cascade.controlnet.ControlNet(c_in=t2i_data['backbone.0.0.weight'].shape[1], proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+        compression_ratio = 32
+        upscale_algorithm = 'bilinear'
+    elif "backbone.10.blocks.0.weight" in keys:
+        model_ad = comfy.ldm.cascade.controlnet.ControlNet(c_in=t2i_data['backbone.0.weight'].shape[1], bottleneck_mode="large", proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+        compression_ratio = 1
+        upscale_algorithm = 'nearest-exact'
+    else:
+        return None
+
+    missing, unexpected = model_ad.load_state_dict(t2i_data)
+    if len(missing) > 0:
+        logging.warning("t2i missing {}".format(missing))
+
+    if len(unexpected) > 0:
+        logging.debug("t2i unexpected {}".format(unexpected))
+
+    return T2IAdapter(model_ad, model_ad.input_channels, compression_ratio, upscale_algorithm)
--- a/comfy/diffusers_convert.py
+++ b/comfy/diffusers_convert.py
@ -1,116 +1,9 @@
-import json
-import os
-import yaml
-
-import folder_paths
-from comfy.ldm.util import instantiate_from_config
-from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE
-import os.path as osp
 import re
 import torch
-from safetensors.torch import load_file, save_file
+import logging

 # conversion code from https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py

-# =================#
-# UNet Conversion #
-# =================#
-
-unet_conversion_map = [
-    # (stable-diffusion, HF Diffusers)
-    ("time_embed.0.weight", "time_embedding.linear_1.weight"),
-    ("time_embed.0.bias", "time_embedding.linear_1.bias"),
-    ("time_embed.2.weight", "time_embedding.linear_2.weight"),
-    ("time_embed.2.bias", "time_embedding.linear_2.bias"),
-    ("input_blocks.0.0.weight", "conv_in.weight"),
-    ("input_blocks.0.0.bias", "conv_in.bias"),
-    ("out.0.weight", "conv_norm_out.weight"),
-    ("out.0.bias", "conv_norm_out.bias"),
-    ("out.2.weight", "conv_out.weight"),
-    ("out.2.bias", "conv_out.bias"),
-]
-
-unet_conversion_map_resnet = [
-    # (stable-diffusion, HF Diffusers)
-    ("in_layers.0", "norm1"),
-    ("in_layers.2", "conv1"),
-    ("out_layers.0", "norm2"),
-    ("out_layers.3", "conv2"),
-    ("emb_layers.1", "time_emb_proj"),
-    ("skip_connection", "conv_shortcut"),
-]
-
-unet_conversion_map_layer = []
-# hardcoded number of downblocks and resnets/attentions...
-# would need smarter logic for other networks.
-for i in range(4):
-    # loop over downblocks/upblocks
-
-    for j in range(2):
-        # loop over resnets/attentions for downblocks
-        hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
-        sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0."
-        unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
-
-        if i < 3:
-            # no attention layers in down_blocks.3
-            hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
-            sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1."
-            unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
-
-    for j in range(3):
-        # loop over resnets/attentions for upblocks
-        hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
-        sd_up_res_prefix = f"output_blocks.{3 * i + j}.0."
-        unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
-
-        if i > 0:
-            # no attention layers in up_blocks.0
-            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
-            sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1."
-            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
-
-    if i < 3:
-        # no downsample in down_blocks.3
-        hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
-        sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op."
-        unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
-
-        # no upsample in up_blocks.3
-        hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
-        sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{1 if i == 0 else 2}."
-        unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
-
-hf_mid_atn_prefix = "mid_block.attentions.0."
-sd_mid_atn_prefix = "middle_block.1."
-unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
-
-for j in range(2):
-    hf_mid_res_prefix = f"mid_block.resnets.{j}."
-    sd_mid_res_prefix = f"middle_block.{2 * j}."
-    unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
-
-
-def convert_unet_state_dict(unet_state_dict):
-    # buyer beware: this is a *brittle* function,
-    # and correct output requires that all of these pieces interact in
-    # the exact order in which I have arranged them.
-    mapping = {k: k for k in unet_state_dict.keys()}
-    for sd_name, hf_name in unet_conversion_map:
-        mapping[hf_name] = sd_name
-    for k, v in mapping.items():
-        if "resnets" in k:
-            for sd_part, hf_part in unet_conversion_map_resnet:
-                v = v.replace(hf_part, sd_part)
-            mapping[k] = v
-    for k, v in mapping.items():
-        for sd_part, hf_part in unet_conversion_map_layer:
-            v = v.replace(hf_part, sd_part)
-        mapping[k] = v
-    new_state_dict = {v: unet_state_dict[k] for k, v in mapping.items()}
-    return new_state_dict
-
-
 # ================#
 # VAE Conversion #
 # ================#
@ -157,20 +50,31 @@ vae_conversion_map_attn = [
    ("q.", "query."),
    ("k.", "key."),
    ("v.", "value."),
+    ("q.", "to_q."),
+    ("k.", "to_k."),
+    ("v.", "to_v."),
+    ("proj_out.", "to_out.0."),
    ("proj_out.", "proj_attn."),
 ]


-def reshape_weight_for_sd(w):
+def reshape_weight_for_sd(w, conv3d=False):
    # convert HF linear weights to SD conv2d weights
-    return w.reshape(*w.shape, 1, 1)
+    if conv3d:
+        return w.reshape(*w.shape, 1, 1, 1)
+    else:
+        return w.reshape(*w.shape, 1, 1)


 def convert_vae_state_dict(vae_state_dict):
    mapping = {k: k for k in vae_state_dict.keys()}
+    conv3d = False
    for k, v in mapping.items():
        for sd_part, hf_part in vae_conversion_map:
            v = v.replace(hf_part, sd_part)
+        if v.endswith(".conv.weight"):
+            if not conv3d and vae_state_dict[k].ndim == 5:
+                conv3d = True
        mapping[k] = v
    for k, v in mapping.items():
        if "attentions" in k:
@ -182,8 +86,8 @@ def convert_vae_state_dict(vae_state_dict):
    for k, v in new_state_dict.items():
        for weight_name in weights_to_convert:
            if f"mid.attn_1.{weight_name}.weight" in k:
-                print(f"Reshaping {k} for SD format")
-                new_state_dict[k] = reshape_weight_for_sd(v)
+                logging.debug(f"Reshaping {k} for SD format")
+                new_state_dict[k] = reshape_weight_for_sd(v, conv3d=conv3d)
    return new_state_dict


@ -211,11 +115,30 @@ textenc_pattern = re.compile("|".join(protected.keys()))
 code2idx = {"q": 0, "k": 1, "v": 2}


-def convert_text_enc_state_dict_v20(text_enc_dict):
+# This function exists because at the time of writing torch.cat can't do fp8 with cuda
+def cat_tensors(tensors):
+    x = 0
+    for t in tensors:
+        x += t.shape[0]
+
+    shape = [x] + list(tensors[0].shape)[1:]
+    out = torch.empty(shape, device=tensors[0].device, dtype=tensors[0].dtype)
+
+    x = 0
+    for t in tensors:
+        out[x:x + t.shape[0]] = t
+        x += t.shape[0]
+
+    return out
+
+
+def convert_text_enc_state_dict_v20(text_enc_dict, prefix=""):
    new_state_dict = {}
    capture_qkv_weight = {}
    capture_qkv_bias = {}
    for k, v in text_enc_dict.items():
+        if not k.startswith(prefix):
+            continue
        if (
                k.endswith(".self_attn.q_proj.weight")
                or k.endswith(".self_attn.k_proj.weight")
@ -240,123 +163,27 @@ def convert_text_enc_state_dict_v20(text_enc_dict):
            capture_qkv_bias[k_pre][code2idx[k_code]] = v
            continue

-        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k)
-        new_state_dict[relabelled_key] = v
+        text_proj = "transformer.text_projection.weight"
+        if k.endswith(text_proj):
+            new_state_dict[k.replace(text_proj, "text_projection")] = v.transpose(0, 1).contiguous()
+        else:
+            relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k)
+            new_state_dict[relabelled_key] = v

    for k_pre, tensors in capture_qkv_weight.items():
        if None in tensors:
            raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
-        new_state_dict[relabelled_key + ".in_proj_weight"] = torch.cat(tensors)
+        new_state_dict[relabelled_key + ".in_proj_weight"] = cat_tensors(tensors)

    for k_pre, tensors in capture_qkv_bias.items():
        if None in tensors:
            raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
-        new_state_dict[relabelled_key + ".in_proj_bias"] = torch.cat(tensors)
+        new_state_dict[relabelled_key + ".in_proj_bias"] = cat_tensors(tensors)

    return new_state_dict


 def convert_text_enc_state_dict(text_enc_dict):
    return text_enc_dict
-
-
-def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, embedding_directory=None):
-    diffusers_unet_conf = json.load(open(osp.join(model_path, "unet/config.json")))
-    diffusers_scheduler_conf = json.load(open(osp.join(model_path, "scheduler/scheduler_config.json")))
-
-    # magic
-    v2 = diffusers_unet_conf["sample_size"] == 96
-    if 'prediction_type' in diffusers_scheduler_conf:
-        v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction'
-
-    if v2:
-        if v_pred:
-            config_path = folder_paths.get_full_path("configs", 'v2-inference-v.yaml')
-        else:
-            config_path = folder_paths.get_full_path("configs", 'v2-inference.yaml')
-    else:
-        config_path = folder_paths.get_full_path("configs", 'v1-inference.yaml')
-
-    with open(config_path, 'r') as stream:
-        config = yaml.safe_load(stream)
-
-    model_config_params = config['model']['params']
-    clip_config = model_config_params['cond_stage_config']
-    scale_factor = model_config_params['scale_factor']
-    vae_config = model_config_params['first_stage_config']
-    vae_config['scale_factor'] = scale_factor
-    model_config_params["unet_config"]["params"]["use_fp16"] = fp16
-
-    unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors")
-    vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors")
-    text_enc_path = osp.join(model_path, "text_encoder", "model.safetensors")
-
-    # Load models from safetensors if it exists, if it doesn't pytorch
-    if osp.exists(unet_path):
-        unet_state_dict = load_file(unet_path, device="cpu")
-    else:
-        unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.bin")
-        unet_state_dict = torch.load(unet_path, map_location="cpu")
-
-    if osp.exists(vae_path):
-        vae_state_dict = load_file(vae_path, device="cpu")
-    else:
-        vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.bin")
-        vae_state_dict = torch.load(vae_path, map_location="cpu")
-
-    if osp.exists(text_enc_path):
-        text_enc_dict = load_file(text_enc_path, device="cpu")
-    else:
-        text_enc_path = osp.join(model_path, "text_encoder", "pytorch_model.bin")
-        text_enc_dict = torch.load(text_enc_path, map_location="cpu")
-
-    # Convert the UNet model
-    unet_state_dict = convert_unet_state_dict(unet_state_dict)
-    unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()}
-
-    # Convert the VAE model
-    vae_state_dict = convert_vae_state_dict(vae_state_dict)
-    vae_state_dict = {"first_stage_model." + k: v for k, v in vae_state_dict.items()}
-
-    # Easiest way to identify v2.0 model seems to be that the text encoder (OpenCLIP) is deeper
-    is_v20_model = "text_model.encoder.layers.22.layer_norm2.bias" in text_enc_dict
-
-    if is_v20_model:
-        # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm
-        text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()}
-        text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()}
-    else:
-        text_enc_dict = convert_text_enc_state_dict(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()}
-
-    # Put together new checkpoint
-    sd = {**unet_state_dict, **vae_state_dict, **text_enc_dict}
-
-    clip = None
-    vae = None
-
-    class WeightsLoader(torch.nn.Module):
-        pass
-
-    w = WeightsLoader()
-    load_state_dict_to = []
-    if output_vae:
-        vae = VAE(scale_factor=scale_factor, config=vae_config)
-        w.first_stage_model = vae.first_stage_model
-        load_state_dict_to = [w]
-
-    if output_clip:
-        clip = CLIP(config=clip_config, embedding_directory=embedding_directory)
-        w.cond_stage_model = clip.cond_stage_model
-        load_state_dict_to = [w]
-
-    model = instantiate_from_config(config["model"])
-    model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to)
-
-    if fp16:
-        model = model.half()
-
-    return ModelPatcher(model), clip, vae
--- a/comfy/diffusers_load.py
+++ b/comfy/diffusers_load.py
@ -0,0 +1,36 @@
+import os
+
+import comfy.sd
+
+def first_file(path, filenames):
+    for f in filenames:
+        p = os.path.join(path, f)
+        if os.path.exists(p):
+            return p
+    return None
+
+def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None):
+    diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"]
+    unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names)
+    vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names)
+
+    text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"]
+    text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names)
+    text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names)
+
+    text_encoder_paths = [text_encoder1_path]
+    if text_encoder2_path is not None:
+        text_encoder_paths.append(text_encoder2_path)
+
+    unet = comfy.sd.load_diffusion_model(unet_path)
+
+    clip = None
+    if output_clip:
+        clip = comfy.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
+
+    vae = None
+    if output_vae:
+        sd = comfy.utils.load_torch_file(vae_path)
+        vae = comfy.sd.VAE(sd=sd)
+
+    return (unet, clip, vae)
--- a/comfy/extra_samplers/uni_pc.py
+++ b/comfy/extra_samplers/uni_pc.py
@ -1,10 +1,10 @@
 #code taken from: https://github.com/wl-zhao/UniPC and modified

 import torch
-import torch.nn.functional as F
 import math
+import logging

-from tqdm.auto import trange, tqdm
+from tqdm.auto import trange


 class NoiseScheduleVP:
@ -16,7 +16,7 @@ class NoiseScheduleVP:
            continuous_beta_0=0.1,
            continuous_beta_1=20.,
        ):
-        """Create a wrapper class for the forward SDE (VP type).
+        r"""Create a wrapper class for the forward SDE (VP type).

        ***
        Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t.
@ -80,7 +80,7 @@ class NoiseScheduleVP:
                    'linear' or 'cosine' for continuous-time DPMs.
        Returns:
            A wrapper object of the forward SDE (VP type).
-        
+
        ===============================================================

        Example:
@ -180,7 +180,6 @@ class NoiseScheduleVP:

 def model_wrapper(
    model,
-    sampling_function,
    noise_schedule,
    model_type="noise",
    model_kwargs={},
@ -209,7 +208,7 @@ def model_wrapper(
                arXiv preprint arXiv:2202.00512 (2022).
            [2] Ho, Jonathan, et al. "Imagen Video: High Definition Video Generation with Diffusion Models."
                arXiv preprint arXiv:2210.02303 (2022).
-    
+
        4. "score": marginal score function. (Trained by denoising score matching).
            Note that the score function and the noise prediction model follows a simple relationship:
            ```
@ -227,7 +226,7 @@ def model_wrapper(
            The input `model` has the following format:
            ``
                model(x, t_input, **model_kwargs) -> noise | x_start | v | score
-            `` 
+            ``

            The input `classifier_fn` has the following format:
            ``
@ -241,12 +240,12 @@ def model_wrapper(
            The input `model` has the following format:
            ``
                model(x, t_input, cond, **model_kwargs) -> noise | x_start | v | score
-            `` 
+            ``
            And if cond == `unconditional_condition`, the model output is the unconditional DPM output.

            [4] Ho, Jonathan, and Tim Salimans. "Classifier-free diffusion guidance."
                arXiv preprint arXiv:2207.12598 (2022).
-        
+

    The `t_input` is the time label of the model, which may be discrete-time labels (i.e. 0 to 999)
    or continuous-time labels (i.e. epsilon to T).
@ -255,7 +254,7 @@ def model_wrapper(
    ``
        def model_fn(x, t_continuous) -> noise:
            t_input = get_model_input_time(t_continuous)
-            return noise_pred(model, x, t_input, **model_kwargs)         
+            return noise_pred(model, x, t_input, **model_kwargs)
    ``
    where `t_continuous` is the continuous time labels (i.e. epsilon to T). And we use `model_fn` for DPM-Solver.

@ -295,7 +294,7 @@ def model_wrapper(
        if t_continuous.reshape((-1,)).shape[0] == 1:
            t_continuous = t_continuous.expand((x.shape[0]))
        t_input = get_model_input_time(t_continuous)
-        output = sampling_function(model, x, t_input, **model_kwargs)
+        output = model(x, t_input, **model_kwargs)
        if model_type == "noise":
            return output
        elif model_type == "x_start":
@ -359,11 +358,8 @@ class UniPC:
        thresholding=False,
        max_val=1.,
        variant='bh1',
-        noise_mask=None,
-        masked_image=None,
-        noise=None,
    ):
-        """Construct a UniPC. 
+        """Construct a UniPC.

        We support both data_prediction and noise_prediction.
        """
@ -373,13 +369,10 @@ class UniPC:
        self.predict_x0 = predict_x0
        self.thresholding = thresholding
        self.max_val = max_val
-        self.noise_mask = noise_mask
-        self.masked_image = masked_image
-        self.noise = noise

    def dynamic_thresholding_fn(self, x0, t=None):
        """
-        The dynamic thresholding method. 
+        The dynamic thresholding method.
        """
        dims = x0.dim()
        p = self.dynamic_thresholding_ratio
@ -392,10 +385,7 @@ class UniPC:
        """
        Return the noise prediction model.
        """
-        if self.noise_mask is not None:
-            return self.model(x, t) * self.noise_mask
-        else:
-            return self.model(x, t)
+        return self.model(x, t)

    def data_prediction_fn(self, x, t):
        """
@ -410,13 +400,11 @@ class UniPC:
            s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1)
            s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims)
            x0 = torch.clamp(x0, -s, s) / s
-        if self.noise_mask is not None:
-            x0 = x0 * self.noise_mask + (1. - self.noise_mask) * self.masked_image
        return x0

    def model_fn(self, x, t):
        """
-        Convert the model to the noise prediction model or the data prediction model. 
+        Convert the model to the noise prediction model or the data prediction model.
        """
        if self.predict_x0:
            return self.data_prediction_fn(x, t)
@ -473,7 +461,7 @@ class UniPC:

    def denoise_to_zero_fn(self, x, s):
        """
-        Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization. 
+        Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization.
        """
        return self.data_prediction_fn(x, s)

@ -487,7 +475,7 @@ class UniPC:
            return self.multistep_uni_pc_vary_update(x, model_prev_list, t_prev_list, t, order, **kwargs)

    def multistep_uni_pc_vary_update(self, x, model_prev_list, t_prev_list, t, order, use_corrector=True):
-        print(f'using unified predictor-corrector with order {order} (solver type: vary coeff)')
+        logging.info(f'using unified predictor-corrector with order {order} (solver type: vary coeff)')
        ns = self.noise_schedule
        assert order <= len(model_prev_list)

@ -522,7 +510,7 @@ class UniPC:
        col = torch.ones_like(rks)
        for k in range(1, K + 1):
            C.append(col)
-            col = col * rks / (k + 1) 
+            col = col * rks / (k + 1)
        C = torch.stack(C, dim=1)

        if len(D1s) > 0:
@ -531,7 +519,6 @@ class UniPC:
            A_p = C_inv_p

        if use_corrector:
-            print('using corrector')
            C_inv = torch.linalg.inv(C)
            A_c = C_inv

@ -634,12 +621,12 @@ class UniPC:
            B_h = torch.expm1(hh)
        else:
            raise NotImplementedError()
-            
+
        for i in range(1, order + 1):
            R.append(torch.pow(rks, i - 1))
            b.append(h_phi_k * factorial_i / B_h)
            factorial_i *= (i + 1)
-            h_phi_k = h_phi_k / hh - 1 / factorial_i 
+            h_phi_k = h_phi_k / hh - 1 / factorial_i

        R = torch.stack(R)
        b = torch.tensor(b, device=x.device)
@ -674,7 +661,7 @@ class UniPC:

            if x_t is None:
                if use_predictor:
-                    pred_res = torch.einsum('k,bkchw->bchw', rhos_p, D1s)
+                    pred_res = torch.tensordot(D1s, rhos_p, dims=([1], [0]))  # torch.einsum('k,bkchw->bchw', rhos_p, D1s)
                else:
                    pred_res = 0
                x_t = x_t_ - expand_dims(alpha_t * B_h, dims) * pred_res
@ -682,14 +669,14 @@ class UniPC:
            if use_corrector:
                model_t = self.model_fn(x_t, t)
                if D1s is not None:
-                    corr_res = torch.einsum('k,bkchw->bchw', rhos_c[:-1], D1s)
+                    corr_res = torch.tensordot(D1s, rhos_c[:-1], dims=([1], [0]))  # torch.einsum('k,bkchw->bchw', rhos_c[:-1], D1s)
                else:
                    corr_res = 0
                D1_t = (model_t - model_prev_0)
                x_t = x_t_ - expand_dims(alpha_t * B_h, dims) * (corr_res + rhos_c[-1] * D1_t)
        else:
            x_t_ = (
-                expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dimss) * x
+                expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x
                - expand_dims(sigma_t * h_phi_1, dims) * model_prev_0
            )
            if x_t is None:
@ -714,9 +701,8 @@ class UniPC:
        method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver',
        atol=0.0078, rtol=0.05, corrector=False, callback=None, disable_pbar=False
    ):
-        t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end
-        t_T = self.noise_schedule.T if t_start is None else t_start
-        device = x.device
+        # t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end
+        # t_T = self.noise_schedule.T if t_start is None else t_start
        steps = len(timesteps) - 1
        if method == 'multistep':
            assert steps >= order
@ -724,8 +710,6 @@ class UniPC:
            assert timesteps.shape[0] - 1 == steps
            # with torch.no_grad():
            for step_index in trange(steps, disable=disable_pbar):
-                if self.noise_mask is not None:
-                    x = x * self.noise_mask + (1. - self.noise_mask) * (self.masked_image * self.noise_schedule.marginal_alpha(timesteps[step_index]) + self.noise * self.noise_schedule.marginal_std(timesteps[step_index]))
                if step_index == 0:
                    vec_t = timesteps[0].expand((x.shape[0]))
                    model_prev_list = [self.model_fn(x, vec_t)]
@ -767,11 +751,11 @@ class UniPC:
                                model_x = self.model_fn(x, vec_t)
                            model_prev_list[-1] = model_x
                if callback is not None:
-                    callback(step_index, model_prev_list[-1], x, steps)
+                    callback({'x': x, 'i': step_index, 'denoised': model_prev_list[-1]})
        else:
            raise NotImplementedError()
-        if denoise_to_zero:
-            x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0)
+        # if denoise_to_zero:
+        #     x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0)
        return x


@ -834,52 +818,56 @@ def expand_dims(v, dims):
    return v[(...,) + (None,)*(dims - 1)]


+class SigmaConvert:
+    schedule = ""
+    def marginal_log_mean_coeff(self, sigma):
+        return 0.5 * torch.log(1 / ((sigma * sigma) + 1))

-def sample_unipc(model, noise, image, sigmas, sampling_function, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'):
-        to_zero = False
+    def marginal_alpha(self, t):
+        return torch.exp(self.marginal_log_mean_coeff(t))
+
+    def marginal_std(self, t):
+        return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t)))
+
+    def marginal_lambda(self, t):
+        """
+        Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T].
+        """
+        log_mean_coeff = self.marginal_log_mean_coeff(t)
+        log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff))
+        return log_mean_coeff - log_std
+
+def predict_eps_sigma(model, input, sigma_in, **kwargs):
+    sigma = sigma_in.view(sigma_in.shape[:1] + (1,) * (input.ndim - 1))
+    input = input * ((sigma ** 2 + 1.0) ** 0.5)
+    return  (input - model(input, sigma_in, **kwargs)) / sigma
+
+
+def sample_unipc(model, noise, sigmas, extra_args=None, callback=None, disable=False, variant='bh1'):
+        timesteps = sigmas.clone()
        if sigmas[-1] == 0:
-            timesteps = torch.nn.functional.interpolate(sigmas[None,None,:-1], size=(len(sigmas),), mode='linear')[0][0]
-            to_zero = True
+            timesteps = sigmas[:]
+            timesteps[-1] = 0.001
        else:
            timesteps = sigmas.clone()
+        ns = SigmaConvert()

-        for s in range(timesteps.shape[0]):
-            timesteps[s] = (model.sigma_to_t(timesteps[s]) / 1000) + (1 / len(model.sigmas))
-
-        ns = NoiseScheduleVP('discrete', alphas_cumprod=model.inner_model.alphas_cumprod)
-
-        if image is not None:
-            img = image * ns.marginal_alpha(timesteps[0])
-            if max_denoise:
-                noise_mult = 1.0
-            else:
-                noise_mult = ns.marginal_std(timesteps[0])
-            img += noise * noise_mult
-        else:
-            img = noise
-
-        if to_zero:
-            timesteps[-1] = (1 / len(model.sigmas))
-
-        device = noise.device
-
-        if model.parameterization == "v":
-            model_type = "v"
-        else:
-            model_type = "noise"
+        noise = noise / torch.sqrt(1.0 + timesteps[0] ** 2.0)
+        model_type = "noise"

        model_fn = model_wrapper(
-            model.inner_model.inner_model.apply_model,
-            sampling_function,
+            lambda input, sigma, **kwargs: predict_eps_sigma(model, input, sigma, **kwargs),
            ns,
            model_type=model_type,
            guidance_type="uncond",
            model_kwargs=extra_args,
        )

-        order = min(3, len(timesteps) - 1)
-        uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, noise_mask=noise_mask, masked_image=image, noise=noise, variant=variant)
-        x = uni_pc.sample(img, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable)
-        if not to_zero:
-            x /= ns.marginal_alpha(timesteps[-1])
+        order = min(3, len(timesteps) - 2)
+        uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, variant=variant)
+        x = uni_pc.sample(noise, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable)
+        x /= ns.marginal_alpha(timesteps[-1])
        return x
+
+def sample_unipc_bh2(model, noise, sigmas, extra_args=None, callback=None, disable=False):
+    return sample_unipc(model, noise, sigmas, extra_args, callback, disable, variant='bh2')
--- a/Show More
+++ b/Show More