diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py
index 9921e3932465..35caa4641884 100644
--- a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py
@@ -550,7 +550,7 @@ class CogVideoXUpBlock3D(nn.Module):
         spatial_norm_dim (`int`, defaults to `16`):
             The dimension to use for spatial norm if it is to be used instead of group norm.
         add_upsample (`bool`, defaults to `True`):
-            Whether or not to use a upsampling layer. If not used, output dimension would be same as input dimension.
+            Whether or not to use an upsampling layer. If not used, output dimension would be same as input dimension.
         compress_time (`bool`, defaults to `False`):
             Whether or not to downsample across temporal dimension.
         pad_mode (str, defaults to `"first"`):
diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py
index e018381ba859..eddaae8d21f1 100644
--- a/src/diffusers/modular_pipelines/components_manager.py
+++ b/src/diffusers/modular_pipelines/components_manager.py
@@ -172,7 +172,7 @@ def __call__(self, hooks, model_id, model, execution_device):
         try:
             mem_on_device = device_module.mem_get_info(execution_device.index)[0]
         except AttributeError:
-            raise AttributeError(f"Do not know how to obtain obtain memory info for {str(device_module)}.")
+            raise AttributeError(f"Do not know how to obtain memory info for {str(device_module)}.")
 
         mem_on_device = mem_on_device - self.memory_reserve_margin
         if current_module_size < mem_on_device:
diff --git a/src/diffusers/pipelines/auto_pipeline.py b/src/diffusers/pipelines/auto_pipeline.py
index 2876798e14bd..6c50dd059028 100644
--- a/src/diffusers/pipelines/auto_pipeline.py
+++ b/src/diffusers/pipelines/auto_pipeline.py
@@ -659,7 +659,7 @@ def __init__(self, *args, **kwargs):
     @validate_hf_hub_args
     def from_pretrained(cls, pretrained_model_or_path, **kwargs):
         r"""
-        Instantiates a image-to-image Pytorch diffusion pipeline from pretrained pipeline weight.
+        Instantiates an image-to-image Pytorch diffusion pipeline from pretrained pipeline weight.
 
         The from_pretrained() method takes care of returning the correct pipeline class instance by:
             1. Detect the pipeline class of the pretrained_model_or_path based on the _class_name property of its
@@ -817,7 +817,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs):
     @classmethod
     def from_pipe(cls, pipeline, **kwargs):
         r"""
-        Instantiates a image-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class.
+        Instantiates an image-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class.
 
         The from_pipe() method takes care of returning the correct pipeline class instance by finding the
         image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class name.
diff --git a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py
index a068f99c6368..e57f2abef9e8 100644
--- a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py
+++ b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_text_decoder.py
@@ -11,7 +11,7 @@
 # Modified from ClipCaptionModel in https://github.com/thu-ml/unidiffuser/blob/main/libs/caption_decoder.py
 class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
     """
-    Text decoder model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to
+    Text decoder model for an image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to
     generate text from the UniDiffuser image-text embedding.
 
     Parameters:
diff --git a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py
index 6fd4ff50285f..49db56ec4ad6 100644
--- a/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py
+++ b/src/diffusers/pipelines/deprecated/unidiffuser/modeling_uvit.py
@@ -831,7 +831,7 @@ def forward(
 
 class UniDiffuserModel(ModelMixin, ConfigMixin):
     """
-    Transformer model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a
+    Transformer model for an image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a
     modification of [`UTransformer2DModel`] with input and output heads for the VAE-embedded latent image, the
     CLIP-embedded image, and the CLIP-embedded prompt (see paper for more details).
 
diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py
index 484f2ca58e1b..0fc80670412c 100644
--- a/src/diffusers/schedulers/scheduling_euler_discrete.py
+++ b/src/diffusers/schedulers/scheduling_euler_discrete.py
@@ -368,7 +368,7 @@ def set_timesteps(
                 based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas`
                 must be `None`, and `timestep_spacing` attribute will be ignored.
             sigmas (`list[float]`, *optional*):
-                Custom sigmas used to support arbitrary timesteps schedule schedule. If `None`, timesteps and sigmas
+                Custom sigmas used to support arbitrary timesteps schedule. If `None`, timesteps and sigmas
                 will be generated based on the relevant scheduler attributes. If `sigmas` is passed,
                 `num_inference_steps` and `timesteps` must be `None`, and the timesteps will be generated based on the
                 custom sigmas schedule.