diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py index 9921e3932465..35caa4641884 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py @@ -550,7 +550,7 @@ class CogVideoXUpBlock3D(nn.Module): spatial_norm_dim (`int`, defaults to `16`): The dimension to use for spatial norm if it is to be used instead of group norm. add_upsample (`bool`, defaults to `True`): - Whether or not to use a upsampling layer. If not used, output dimension would be same as input dimension. + Whether or not to use an upsampling layer. If not used, output dimension would be same as input dimension. compress_time (`bool`, defaults to `False`): Whether or not to downsample across temporal dimension. pad_mode (str, defaults to `"first"`): diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index e018381ba859..eddaae8d21f1 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -172,7 +172,7 @@ def __call__(self, hooks, model_id, model, execution_device): try: mem_on_device = device_module.mem_get_info(execution_device.index)[0] except AttributeError: - raise AttributeError(f"Do not know how to obtain obtain memory info for {str(device_module)}.") + raise AttributeError(f"Do not know how to obtain memory info for {str(device_module)}.") mem_on_device = mem_on_device - self.memory_reserve_margin if current_module_size < mem_on_device: diff --git a/src/diffusers/pipelines/auto_pipeline.py b/src/diffusers/pipelines/auto_pipeline.py index 2876798e14bd..6c50dd059028 100644 --- a/src/diffusers/pipelines/auto_pipeline.py +++ b/src/diffusers/pipelines/auto_pipeline.py @@ -659,7 +659,7 @@ def __init__(self, *args, **kwargs): @validate_hf_hub_args def from_pretrained(cls, pretrained_model_or_path, **kwargs): r""" - Instantiates a image-to-image Pytorch diffusion pipeline from pretrained pipeline weight. + Instantiates an image-to-image Pytorch diffusion pipeline from pretrained pipeline weight. The from_pretrained() method takes care of returning the correct pipeline class instance by: 1. Detect the pipeline class of the pretrained_model_or_path based on the _class_name property of its @@ -817,7 +817,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): @classmethod def from_pipe(cls, pipeline, **kwargs): r""" - Instantiates a image-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class. + Instantiates an image-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class. The from_pipe() method takes care of returning the correct pipeline class instance by finding the image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class name. diff --git a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py index a068f99c6368..e57f2abef9e8 100644 --- a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py +++ b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py @@ -11,7 +11,7 @@ # Modified from ClipCaptionModel in https://github.com/thu-ml/unidiffuser/blob/main/libs/caption_decoder.py class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin): """ - Text decoder model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to + Text decoder model for an image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to generate text from the UniDiffuser image-text embedding. Parameters: diff --git a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py index 6fd4ff50285f..49db56ec4ad6 100644 --- a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py +++ b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py @@ -831,7 +831,7 @@ def forward( class UniDiffuserModel(ModelMixin, ConfigMixin): """ - Transformer model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a + Transformer model for an image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a modification of [`UTransformer2DModel`] with input and output heads for the VAE-embedded latent image, the CLIP-embedded image, and the CLIP-embedded prompt (see paper for more details). diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 484f2ca58e1b..0fc80670412c 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -368,7 +368,7 @@ def set_timesteps( based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas` must be `None`, and `timestep_spacing` attribute will be ignored. sigmas (`list[float]`, *optional*): - Custom sigmas used to support arbitrary timesteps schedule schedule. If `None`, timesteps and sigmas + Custom sigmas used to support arbitrary timesteps schedule. If `None`, timesteps and sigmas will be generated based on the relevant scheduler attributes. If `sigmas` is passed, `num_inference_steps` and `timesteps` must be `None`, and the timesteps will be generated based on the custom sigmas schedule.