diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 053fc7ac..da56a0a9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -59,20 +59,20 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.10
+    - name: Set up Python 3.10.12
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.10.12"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+        pip install nemo-toolkit[asr,nlp]==2.7.2
         pip install -r requirements/main.txt
         pip install -r requirements/tests.txt
         sudo apt-get update
         sudo apt-get install -y libsndfile1 ffmpeg sox libsox-fmt-mp3
         pip install pytorch_lightning
         pip install Cython wheel  # need to pre-install to avoid error in nemo installation
-        pip install nemo-toolkit[asr,nlp]==2.2.1
         pip install nemo_text_processing
         pip install -r requirements/huggingface.txt
         pip install pymarian
diff --git a/dataset_configs/multilingual/granary/README.md b/dataset_configs/multilingual/granary/README.md
index b3c0c474..f63b767b 100644
--- a/dataset_configs/multilingual/granary/README.md
+++ b/dataset_configs/multilingual/granary/README.md
@@ -63,7 +63,7 @@ pip install fasttext
 - `ConvertToTarredAudioDataset` (optional, only if tar-sharding is enabled)
 
 ```bash
-pip install lhotse "nemo-toolkit[common]==2.2.1"
+pip install lhotse "nemo-toolkit[common]==2.7.2"
 ```
 
 ### Quick start
diff --git a/dataset_configs/multilingual/granary/config.yaml b/dataset_configs/multilingual/granary/config.yaml
index 78e778b0..140c0df6 100644
--- a/dataset_configs/multilingual/granary/config.yaml
+++ b/dataset_configs/multilingual/granary/config.yaml
@@ -71,7 +71,7 @@ documentation: |
 
   ``ConvertToTarredAudioDataset`` *(optional, only if tar-sharding is enabled)*::
 
-      pip install lhotse "nemo-toolkit[common]==2.2.1"
+      pip install lhotse "nemo-toolkit[common]==2.7.2"
 
   Quick start
   -----------
diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp
index f174c7b1..0cb1ddb3 100644
--- a/docker/Dockerfile.tts_sdp
+++ b/docker/Dockerfile.tts_sdp
@@ -38,9 +38,17 @@ RUN rm -rf /src/NeMo-speech-data-processor/.git
 WORKDIR /src/NeMo-speech-data-processor
 RUN pip install -r requirements/main.txt 
 RUN pip install -r requirements/tts.txt
+RUN pip install pytest pytest-cov boto3
 RUN pip install flash-attn --no-build-isolation
 RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip
-RUN pip install megatron-core transformer_engine[pytorch]==2.4.0
-RUN pip install nemo_toolkit['all']==2.1.0 
+
+# newer versions of nemo do not have PunctuationCapitalizationModels
+RUN pip install nemo_toolkit['all']==2.3.2 
+
+# nemo updates torch version, so we need to install the same version, for properly working tts
+RUN python -m pip install --force-reinstall \
+    torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 \
+    --index-url https://download.pytorch.org/whl/cu121
+RUN python -m pip install "numpy<2.0.0"
 
 WORKDIR /src/NeMo-speech-data-processor
\ No newline at end of file
diff --git a/requirements/main.txt b/requirements/main.txt
index 31a4a87d..ebc9f865 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -34,4 +34,4 @@ datasets>=2.14.0,<3.0.0
 # for vLLMInference processor is required: pip install "optree>=0.13.0" vllm
 # for CometoidWMTQualityEstimation processor is required: pip install pymarian
 # for FastTextLangIdClassifier processor is required: pip install fasttext
-# for ConvertToTarredAudioDatasetConfig processor can be additionally required: pip install lhotse "nemo-toolkit[common]==2.2.1"
\ No newline at end of file
+# for ConvertToTarredAudioDatasetConfig processor can be additionally required: pip install lhotse "nemo-toolkit[common]==2.7.2"
\ No newline at end of file
diff --git a/requirements/tests.txt b/requirements/tests.txt
index 0f8b8675..b877e01e 100644
--- a/requirements/tests.txt
+++ b/requirements/tests.txt
@@ -4,5 +4,6 @@ pytest
 pytest-cov
 # lhotse requires torch and torchaudio to be present
 lhotse
-torch
-torchaudio
\ No newline at end of file
+torchaudio
+torchcodec
+fasttext
\ No newline at end of file
diff --git a/sdp/processors/inference/asr/faster_whisper/faster_whisper_inference.py b/sdp/processors/inference/asr/faster_whisper/faster_whisper_inference.py
index b8e419d9..ad32d40c 100644
--- a/sdp/processors/inference/asr/faster_whisper/faster_whisper_inference.py
+++ b/sdp/processors/inference/asr/faster_whisper/faster_whisper_inference.py
@@ -388,7 +388,7 @@ def _write_words(words: List[Dict]):
 
         output_words_filepath = None
         if self.config.inference.word_timestamps:
-            output_words_filepath = _write_words(output_words_filepath, sample_words)
+            output_words_filepath = _write_words(sample_words)
         
         return dict(segments = output_segments_filepath, words = output_words_filepath)
 
diff --git a/sdp/processors/inference/asr/nemo/utils/speech_to_text_with_vad.py b/sdp/processors/inference/asr/nemo/utils/speech_to_text_with_vad.py
index 2c734754..7988975c 100644
--- a/sdp/processors/inference/asr/nemo/utils/speech_to_text_with_vad.py
+++ b/sdp/processors/inference/asr/nemo/utils/speech_to_text_with_vad.py
@@ -71,7 +71,7 @@
 
 from nemo.collections.asr.data import feature_to_text_dataset
 from nemo.collections.asr.metrics.wer import word_error_rate
-from nemo.collections.asr.models import ASRModel, EncDecClassificationModel
+from nemo.collections.asr.models import ASRModel, EncDecClassificationModel, EncDecFrameClassificationModel
 from nemo.collections.asr.parts.submodules.ctc_decoding import CTCDecodingConfig
 from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTDecodingConfig
 from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest
@@ -79,8 +79,6 @@
     generate_overlap_vad_seq,
     generate_vad_segment_table,
     get_vad_stream_status,
-    init_frame_vad_model,
-    init_vad_model,
 )
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
@@ -246,9 +244,9 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C
     out_dir.mkdir(parents=True, exist_ok=True)
     torch.set_grad_enabled(False)
     if cfg.vad_model:
-        vad_model = init_frame_vad_model(cfg.vad_model)
+        vad_model = init_frame_vad_model(cfg.vad_model, strict=False)
     else:
-        vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet")
+        vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet", strict=False)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     vad_model = vad_model.to(device)
     vad_model.eval()
@@ -477,15 +475,43 @@ def generate_vad_frame_pred(
     return out_dir
 
 
-def init_asr_model(model_path: str) -> ASRModel:
+def init_frame_vad_model(model_path: str, strict: bool = False) -> EncDecFrameClassificationModel:
+    """
+    Initiate VAD model with model path
+    """
+    if model_path.endswith('.nemo'):
+        logging.info(f"Using local VAD model from {model_path}")
+        vad_model = EncDecFrameClassificationModel.restore_from(restore_path=model_path, strict=strict)
+    elif model_path.endswith('.ckpt'):
+        vad_model = EncDecFrameClassificationModel.load_from_checkpoint(checkpoint_path=model_path, strict=strict)
+    else:
+        logging.info(f"Using NGC cloud VAD model {model_path}")
+        vad_model = EncDecFrameClassificationModel.from_pretrained(model_name=model_path, strict=strict)
+    return vad_model
+
+def init_vad_model(model_path: str, strict: bool = False) -> EncDecClassificationModel:
+    """
+    Initiate VAD model with model path
+    """
+    if model_path.endswith('.nemo'):
+        logging.info(f"Using local VAD model from {model_path}")
+        vad_model = EncDecClassificationModel.restore_from(restore_path=model_path, strict=strict)
+    elif model_path.endswith('.ckpt'):
+        vad_model = EncDecClassificationModel.load_from_checkpoint(checkpoint_path=model_path, strict=strict)
+    else:
+        logging.info(f"Using NGC cloud VAD model {model_path}")
+        vad_model = EncDecClassificationModel.from_pretrained(model_name=model_path, strict=strict)
+    return vad_model
+
+def init_asr_model(model_path: str, strict: bool = True) -> ASRModel:
     if model_path.endswith('.nemo'):
         logging.info(f"Using local ASR model from {model_path}")
-        asr_model = ASRModel.restore_from(restore_path=model_path)
+        asr_model = ASRModel.restore_from(restore_path=model_path, strict=strict)
     elif model_path.endswith('.ckpt'):
-        asr_model = ASRModel.load_from_checkpoint(checkpoint_path=model_path)
+        asr_model = ASRModel.load_from_checkpoint(checkpoint_path=model_path, strict=False)
     else:
         logging.info(f"Using NGC ASR model {model_path}")
-        asr_model = ASRModel.from_pretrained(model_name=model_path)
+        asr_model = ASRModel.from_pretrained(model_name=model_path, strict=strict)
     return asr_model
 
 
diff --git a/sdp/processors/inference/llm/vllm/vllm.py b/sdp/processors/inference/llm/vllm/vllm.py
index 9ef9e89c..041590ae 100644
--- a/sdp/processors/inference/llm/vllm/vllm.py
+++ b/sdp/processors/inference/llm/vllm/vllm.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import json
+
+import yaml
 from tqdm import tqdm
 
 from sdp.processors.base_processor import BaseProcessor
@@ -53,7 +54,7 @@ class vLLMInference(BaseProcessor):
 
         - model: https://docs.vllm.ai/en/latest/api/vllm/index.html#vllm.LLM
         - inference: https://docs.vllm.ai/en/v0.6.4/dev/sampling_params.html
-        - apply_chat_template: https://huggingface.co/docs/transformers/main/en/chat_templating#applychattemplate
+        - apply_chat_template: https://huggingface.co/docs/transformers/main/en/chat_templating
 
         Make sure to install `optree>=0.13.0` and `vllm` before using this processor:
             pip install "optree>=0.13.0" vllm
@@ -70,8 +71,8 @@ def __init__(self,
                  apply_chat_template: dict = {},
                  **kwargs):
 
-        from vllm import SamplingParams
         from transformers import AutoTokenizer
+        from vllm import SamplingParams
 
         super().__init__(**kwargs)
     
diff --git a/sdp/processors/toloka/accept_if.py b/sdp/processors/toloka/accept_if.py
index 8472f601..4f092f51 100644
--- a/sdp/processors/toloka/accept_if.py
+++ b/sdp/processors/toloka/accept_if.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 from collections import defaultdict
 from typing import Optional
 
@@ -73,6 +74,15 @@ def __init__(
         **kwargs,
     ):
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.input_data_file = input_data_file
         self.input_pool_file = input_pool_file
         self.threshold = threshold
diff --git a/sdp/processors/toloka/create_pool.py b/sdp/processors/toloka/create_pool.py
index 9948cef4..75daed4e 100644
--- a/sdp/processors/toloka/create_pool.py
+++ b/sdp/processors/toloka/create_pool.py
@@ -15,6 +15,7 @@
 import datetime
 import json
 import os
+import warnings
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseParallelProcessor
@@ -55,7 +56,17 @@ def __init__(
         lang : str, optional
             The language filter for the pool. Defaults to 'HY'.
         """
+        
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.API_KEY = os.getenv('TOLOKA_API_KEY')
         if not self.API_KEY:
             raise ValueError("TOLOKA_API_KEY environment variable is not set")
diff --git a/sdp/processors/toloka/create_project.py b/sdp/processors/toloka/create_project.py
index bf8ece19..a9d01102 100644
--- a/sdp/processors/toloka/create_project.py
+++ b/sdp/processors/toloka/create_project.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
@@ -52,6 +53,15 @@ def __init__(
         **kwargs,
     ):
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.API_KEY = os.getenv('TOLOKA_API_KEY')
         if not self.API_KEY:
             raise ValueError("TOLOKA_API_KEY environment variable is not set")
diff --git a/sdp/processors/toloka/create_sentence_set.py b/sdp/processors/toloka/create_sentence_set.py
index 8a86afb6..8141f92d 100644
--- a/sdp/processors/toloka/create_sentence_set.py
+++ b/sdp/processors/toloka/create_sentence_set.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 
 from docx import Document
 
@@ -34,6 +35,14 @@ class CreateSentenceSet(BaseParallelProcessor):
     """
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
 
     def parse_docx(self, file_path):
         doc = Document(file_path)
diff --git a/sdp/processors/toloka/create_task_set.py b/sdp/processors/toloka/create_task_set.py
index 3957091f..8e58ca5a 100644
--- a/sdp/processors/toloka/create_task_set.py
+++ b/sdp/processors/toloka/create_task_set.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 from typing import List, Optional
 
 from sdp.logging import logger
@@ -53,6 +54,15 @@ def __init__(
         **kwargs,
     ):
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.input_data_file = input_data_file
         self.input_pool_file = input_pool_file
         self.limit = limit
diff --git a/sdp/processors/toloka/download_responses.py b/sdp/processors/toloka/download_responses.py
index aa2563cf..2fcfe48d 100644
--- a/sdp/processors/toloka/download_responses.py
+++ b/sdp/processors/toloka/download_responses.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
@@ -82,6 +83,15 @@ def __init__(
             The ID of the pool from which results will be retrieved. Defaults to None.
         """
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.input_data_file = input_data_file
         self.input_pool_file = input_pool_file
         self.output_dir = output_dir
diff --git a/sdp/processors/toloka/reject_if.py b/sdp/processors/toloka/reject_if.py
index 182c3e86..35057942 100644
--- a/sdp/processors/toloka/reject_if.py
+++ b/sdp/processors/toloka/reject_if.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import warnings
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
@@ -77,6 +78,15 @@ def __init__(
             The ID of the pool from which assignments will be retrieved. Defaults to None.
         """
         super().__init__(**kwargs)
+        
+        # Deprecation warning
+        warnings.warn(
+            "Toloka processors are deprecated and will be removed in a future version. "
+            "Please migrate to alternative solutions for crowdsourcing tasks.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        
         self.input_data_file = input_data_file
         self.input_pool_file = input_pool_file
         self.config_file = config_file
diff --git a/sdp/processors/tts/nemo_asr_align.py b/sdp/processors/tts/nemo_asr_align.py
index 9a71c476..19f426db 100644
--- a/sdp/processors/tts/nemo_asr_align.py
+++ b/sdp/processors/tts/nemo_asr_align.py
@@ -119,7 +119,7 @@ def get_alignments_text(self, hypotheses):
                 - list: List of dictionaries with word alignments (word, start, end)
                 - str: The transcribed text
         """
-        timestamp_dict = hypotheses.timestep # extract timesteps from hypothesis of first (and only) audio file
+        timestamp_dict = hypotheses.timestamp  # extract timesteps from hypothesis of first (and only) audio file
 
         # For a FastConformer model, you can display the word timestamps as follows:
         # 80ms is duration of a timestep at output of the Conformer
diff --git a/sdp/processors/tts/text.py b/sdp/processors/tts/text.py
index 37dbb862..b259181f 100644
--- a/sdp/processors/tts/text.py
+++ b/sdp/processors/tts/text.py
@@ -15,7 +15,6 @@
 import json
 from sdp.processors.base_processor import BaseProcessor, BaseParallelProcessor, DataEntry
 from sdp.utils.common import load_manifest, save_manifest
-from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
 from nemo.collections.nlp.models import PunctuationCapitalizationModel
 
 class InverseTextNormalizationProcessor(BaseParallelProcessor):
@@ -42,6 +41,7 @@ class InverseTextNormalizationProcessor(BaseParallelProcessor):
     def __init__(self, 
                  language="en",
                  **kwargs):
+        from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
         super().__init__(**kwargs)
         self.normalizer = InverseNormalizer(lang=language)
     
diff --git a/tests/test_cfg_end_to_end_tests.py b/tests/test_cfg_end_to_end_tests.py
index aa0fde74..c7b49fa0 100644
--- a/tests/test_cfg_end_to_end_tests.py
+++ b/tests/test_cfg_end_to_end_tests.py
@@ -288,20 +288,20 @@ def get_test_cases() -> List[Tuple[str, Callable]]:
            config_path=f"{DATASET_CONFIGS_ROOT}/arabic/everyayah/config.yaml", 
            data_check_fn=partial(data_check_fn_generic, file_name="everyayah.hf")
         ),
-        TestCase(
-           config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_start.yaml",
-           data_check_fn=data_check_fn_armenian_toloka_pipeline_start,
-           fields_to_ignore=['source_filepath'],
-           processors_to_run="2:14",
-           reference_manifest_filename="pipeline_start/test_data_reference.json"
-        ),
-        TestCase(
-           config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_get_final_res.yaml",
-           data_check_fn=data_check_fn_armenian_toloka_pipeline_get_final_res,
-           reference_manifest_filename="pipeline_get_final_res/test_data_reference.json",
-           fields_to_ignore=['audio_filepath', 'duration'],
-           processors_to_run="1:6"
-        ),
+        # TestCase(
+        #    config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_start.yaml",
+        #    data_check_fn=data_check_fn_armenian_toloka_pipeline_start,
+        #    fields_to_ignore=['source_filepath'],
+        #    processors_to_run="2:14",
+        #    reference_manifest_filename="pipeline_start/test_data_reference.json"
+        # ),
+        # TestCase(
+        #    config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_get_final_res.yaml",
+        #    data_check_fn=data_check_fn_armenian_toloka_pipeline_get_final_res,
+        #    reference_manifest_filename="pipeline_get_final_res/test_data_reference.json",
+        #    fields_to_ignore=['audio_filepath', 'duration'],
+        #    processors_to_run="1:6"
+        # ),
         TestCase(
             config_path=f"{DATASET_CONFIGS_ROOT}/portuguese/unlabeled/config.yaml", 
             data_check_fn=partial(data_check_fn_unlabeled),
@@ -498,4 +498,4 @@ def test_configs(setup_data, tmp_path):
         shutil.rmtree(tmp_path)
 
 if __name__ == "__main__":
-    pytest.main([__file__, "-v", "--durations=0"])
+    pytest.main([__file__, "-v", "--durations=0"])
\ No newline at end of file