sd_hijack_unet.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import torch
  2. from packaging import version
  3. from modules import devices
  4. from modules.sd_hijack_utils import CondFunc
  5. class TorchHijackForUnet:
  6. """
  7. This is torch, but with cat that resizes tensors to appropriate dimensions if they do not match;
  8. this makes it possible to create pictures with dimensions that are multiples of 8 rather than 64
  9. """
  10. def __getattr__(self, item):
  11. if item == 'cat':
  12. return self.cat
  13. if hasattr(torch, item):
  14. return getattr(torch, item)
  15. raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
  16. def cat(self, tensors, *args, **kwargs):
  17. if len(tensors) == 2:
  18. a, b = tensors
  19. if a.shape[-2:] != b.shape[-2:]:
  20. a = torch.nn.functional.interpolate(a, b.shape[-2:], mode="nearest")
  21. tensors = (a, b)
  22. return torch.cat(tensors, *args, **kwargs)
  23. th = TorchHijackForUnet()
  24. # Below are monkey patches to enable upcasting a float16 UNet for float32 sampling
  25. def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
  26. """Always make sure inputs to unet are in correct dtype."""
  27. if isinstance(cond, dict):
  28. for y in cond.keys():
  29. if isinstance(cond[y], list):
  30. cond[y] = [x.to(devices.dtype_unet) if isinstance(x, torch.Tensor) else x for x in cond[y]]
  31. else:
  32. cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y]
  33. with devices.autocast():
  34. result = orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs)
  35. if devices.unet_needs_upcast:
  36. return result.float()
  37. else:
  38. return result
  39. class GELUHijack(torch.nn.GELU, torch.nn.Module):
  40. def __init__(self, *args, **kwargs):
  41. torch.nn.GELU.__init__(self, *args, **kwargs)
  42. def forward(self, x):
  43. if devices.unet_needs_upcast:
  44. return torch.nn.GELU.forward(self.float(), x.float()).to(devices.dtype_unet)
  45. else:
  46. return torch.nn.GELU.forward(self, x)
  47. ddpm_edit_hijack = None
  48. def hijack_ddpm_edit():
  49. global ddpm_edit_hijack
  50. if not ddpm_edit_hijack:
  51. CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
  52. CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
  53. ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model)
  54. unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
  55. if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
  56. CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
  57. CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
  58. CondFunc('open_clip.transformer.ResidualAttentionBlock.__init__', lambda orig_func, *args, **kwargs: kwargs.update({'act_layer': GELUHijack}) and False or orig_func(*args, **kwargs), lambda _, *args, **kwargs: kwargs.get('act_layer') is None or kwargs['act_layer'] == torch.nn.GELU)
  59. first_stage_cond = lambda _, self, *args, **kwargs: devices.unet_needs_upcast and self.model.diffusion_model.dtype == torch.float16
  60. first_stage_sub = lambda orig_func, self, x, **kwargs: orig_func(self, x.to(devices.dtype_vae), **kwargs)
  61. CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
  62. CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
  63. CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond)
  64. CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model)
  65. CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model)
  66. def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs):
  67. if devices.unet_needs_upcast and timesteps.dtype == torch.int64:
  68. dtype = torch.float32
  69. else:
  70. dtype = devices.dtype_unet
  71. return orig_func(timesteps, *args, **kwargs).to(dtype=dtype)
  72. CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
  73. CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)