|
@@ -36,7 +36,7 @@ th = TorchHijackForUnet()
|
|
|
|
|
|
# Below are monkey patches to enable upcasting a float16 UNet for float32 sampling
|
|
# Below are monkey patches to enable upcasting a float16 UNet for float32 sampling
|
|
def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
|
|
def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
|
|
-
|
|
|
|
|
|
+ """Always make sure inputs to unet are in correct dtype."""
|
|
if isinstance(cond, dict):
|
|
if isinstance(cond, dict):
|
|
for y in cond.keys():
|
|
for y in cond.keys():
|
|
if isinstance(cond[y], list):
|
|
if isinstance(cond[y], list):
|
|
@@ -45,7 +45,11 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
|
|
cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y]
|
|
cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y]
|
|
|
|
|
|
with devices.autocast():
|
|
with devices.autocast():
|
|
- return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float()
|
|
|
|
|
|
+ result = orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs)
|
|
|
|
+ if devices.unet_needs_upcast:
|
|
|
|
+ return result.float()
|
|
|
|
+ else:
|
|
|
|
+ return result
|
|
|
|
|
|
|
|
|
|
class GELUHijack(torch.nn.GELU, torch.nn.Module):
|
|
class GELUHijack(torch.nn.GELU, torch.nn.Module):
|
|
@@ -64,12 +68,11 @@ def hijack_ddpm_edit():
|
|
if not ddpm_edit_hijack:
|
|
if not ddpm_edit_hijack:
|
|
CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
|
|
CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
|
|
CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
|
|
CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
|
|
- ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
|
|
|
|
|
|
+ ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model)
|
|
|
|
|
|
|
|
|
|
unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
|
|
unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
|
|
-CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
|
|
|
|
-CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
|
|
|
|
|
|
+
|
|
if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
|
|
if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
|
|
CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
|
|
CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
|
|
CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
|
|
CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
|
|
@@ -81,5 +84,17 @@ CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.decode_first_stage', first_s
|
|
CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
|
|
CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
|
|
CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond)
|
|
CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond)
|
|
|
|
|
|
-CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model, unet_needs_upcast)
|
|
|
|
-CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
|
|
|
|
|
|
+CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model)
|
|
|
|
+CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs):
|
|
|
|
+ if devices.unet_needs_upcast and timesteps.dtype == torch.int64:
|
|
|
|
+ dtype = torch.float32
|
|
|
|
+ else:
|
|
|
|
+ dtype = devices.dtype_unet
|
|
|
|
+ return orig_func(timesteps, *args, **kwargs).to(dtype=dtype)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
|
|
|
|
+CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
|