sd_models_config.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import os
  2. import torch
  3. from modules import shared, paths, sd_disable_initialization, devices
  4. sd_configs_path = shared.sd_configs_path
  5. sd_repo_configs_path = os.path.join(paths.paths['Stable Diffusion'], "configs", "stable-diffusion")
  6. sd_xl_repo_configs_path = os.path.join(paths.paths['Stable Diffusion XL'], "configs", "inference")
  7. config_default = shared.sd_default_config
  8. config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
  9. config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
  10. config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
  11. config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml")
  12. config_sdxlv = os.path.join(sd_configs_path, "sd_xl_v.yaml")
  13. config_sdxl_refiner = os.path.join(sd_xl_repo_configs_path, "sd_xl_refiner.yaml")
  14. config_sdxl_inpainting = os.path.join(sd_configs_path, "sd_xl_inpaint.yaml")
  15. config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml")
  16. config_unclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-l-inference.yaml")
  17. config_unopenclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-h-inference.yaml")
  18. config_inpainting = os.path.join(sd_configs_path, "v1-inpainting-inference.yaml")
  19. config_instruct_pix2pix = os.path.join(sd_configs_path, "instruct-pix2pix.yaml")
  20. config_alt_diffusion = os.path.join(sd_configs_path, "alt-diffusion-inference.yaml")
  21. config_alt_diffusion_m18 = os.path.join(sd_configs_path, "alt-diffusion-m18-inference.yaml")
  22. config_sd3 = os.path.join(sd_configs_path, "sd3-inference.yaml")
  23. def is_using_v_parameterization_for_sd2(state_dict):
  24. """
  25. Detects whether unet in state_dict is using v-parameterization. Returns True if it is. You're welcome.
  26. """
  27. import ldm.modules.diffusionmodules.openaimodel
  28. device = devices.device
  29. with sd_disable_initialization.DisableInitialization():
  30. unet = ldm.modules.diffusionmodules.openaimodel.UNetModel(
  31. use_checkpoint=False,
  32. use_fp16=False,
  33. image_size=32,
  34. in_channels=4,
  35. out_channels=4,
  36. model_channels=320,
  37. attention_resolutions=[4, 2, 1],
  38. num_res_blocks=2,
  39. channel_mult=[1, 2, 4, 4],
  40. num_head_channels=64,
  41. use_spatial_transformer=True,
  42. use_linear_in_transformer=True,
  43. transformer_depth=1,
  44. context_dim=1024,
  45. legacy=False
  46. )
  47. unet.eval()
  48. with torch.no_grad():
  49. unet_sd = {k.replace("model.diffusion_model.", ""): v for k, v in state_dict.items() if "model.diffusion_model." in k}
  50. unet.load_state_dict(unet_sd, strict=True)
  51. unet.to(device=device, dtype=devices.dtype_unet)
  52. test_cond = torch.ones((1, 2, 1024), device=device) * 0.5
  53. x_test = torch.ones((1, 4, 8, 8), device=device) * 0.5
  54. with devices.autocast():
  55. out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().cpu().item()
  56. return out < -1
  57. def guess_model_config_from_state_dict(sd, filename):
  58. sd2_cond_proj_weight = sd.get('cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight', None)
  59. diffusion_model_input = sd.get('model.diffusion_model.input_blocks.0.0.weight', None)
  60. sd2_variations_weight = sd.get('embedder.model.ln_final.weight', None)
  61. if "model.diffusion_model.x_embedder.proj.weight" in sd:
  62. return config_sd3
  63. if sd.get('conditioner.embedders.1.model.ln_final.weight', None) is not None:
  64. if diffusion_model_input.shape[1] == 9:
  65. return config_sdxl_inpainting
  66. else:
  67. if ('v_pred' in sd):
  68. del sd['v_pred']
  69. return config_sdxlv
  70. return config_sdxl
  71. if sd.get('conditioner.embedders.0.model.ln_final.weight', None) is not None:
  72. return config_sdxl_refiner
  73. elif sd.get('depth_model.model.pretrained.act_postprocess3.0.project.0.bias', None) is not None:
  74. return config_depth_model
  75. elif sd2_variations_weight is not None and sd2_variations_weight.shape[0] == 768:
  76. return config_unclip
  77. elif sd2_variations_weight is not None and sd2_variations_weight.shape[0] == 1024:
  78. return config_unopenclip
  79. if sd2_cond_proj_weight is not None and sd2_cond_proj_weight.shape[1] == 1024:
  80. if diffusion_model_input.shape[1] == 9:
  81. return config_sd2_inpainting
  82. elif is_using_v_parameterization_for_sd2(sd):
  83. return config_sd2v
  84. else:
  85. return config_sd2
  86. if diffusion_model_input is not None:
  87. if diffusion_model_input.shape[1] == 9:
  88. return config_inpainting
  89. if diffusion_model_input.shape[1] == 8:
  90. return config_instruct_pix2pix
  91. if sd.get('cond_stage_model.roberta.embeddings.word_embeddings.weight', None) is not None:
  92. if sd.get('cond_stage_model.transformation.weight').size()[0] == 1024:
  93. return config_alt_diffusion_m18
  94. return config_alt_diffusion
  95. return config_default
  96. def find_checkpoint_config(state_dict, info):
  97. if info is None:
  98. return guess_model_config_from_state_dict(state_dict, "")
  99. config = find_checkpoint_config_near_filename(info)
  100. if config is not None:
  101. return config
  102. return guess_model_config_from_state_dict(state_dict, info.filename)
  103. def find_checkpoint_config_near_filename(info):
  104. if info is None:
  105. return None
  106. config = f"{os.path.splitext(info.filename)[0]}.yaml"
  107. if os.path.exists(config):
  108. return config
  109. return None