generative-computing · jakelorocco · Mar 12, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
@@ -121,6 +121,7 @@ test = [
     "pytest-asyncio",
     "pytest-cov>=6.0.0",
     "pytest-timeout",  # For test suite timeout configuration
+    "pytest-recording",  # For blocking network access and using canned server responses
     "pytest-xdist>=3.8.0", # For concurrent test execution (uv run pytest -n auto test/)
     "psutil",  # For test infrastructure: RAM detection in conftest.py
     "nbmake>=1.5.5",  # Notebook testing
@@ -262,7 +263,7 @@ ignore-words-list = 'mellea,hashi,noo,Asai,asai,nd,mot,rouge,Rouge,Strat,Wight'
 check-filenames = true
 check-hidden = false
 regex = "(?<![a-z])[a-z'`]+|[A-Z][a-z'`]*|[a-z]+'[a-z]*|[a-z]+(?=[_-])|[a-z]+(?=[A-Z])|\\d+"
-skip = 'requirements.txt,uv.lock,CHANGELOG.md'
+skip = 'requirements.txt,uv.lock,CHANGELOG.md,test/formatters/granite/testdata/test_canned_output/model_output/*.json'
 
 # -----------------------------
 # Pytest - Testing Framework

@@ -104,12 +104,20 @@ class Email(pydantic.BaseModel):
 @pytest.mark.qualitative
 @pytest.mark.timeout(150)
 async def test_generate_from_raw(session) -> None:
-    prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
+    # Note capital letter "W" at the beginning of each prompt. This capital letter is
+    # very important to the ollama version of Granite 4.0 micro, the current default
+    # model for Mellea.
+    prompts = ["What is 1+1?", "What is 2+2?", "What is 3+3?", "What is 4+4?"]
 
     results = await session.backend.generate_from_raw(
         actions=[CBlock(value=prompt) for prompt in prompts],
         ctx=session.ctx,
-        model_options={ModelOption.CONTEXT_WINDOW: 2048},
+        model_options={
+            ModelOption.CONTEXT_WINDOW: 2048,
+            # With raw prompts and high temperature, a response of arbitrary
+            # length is normal operation.
+            ModelOption.MAX_NEW_TOKENS: 100,
+        },
     )
 
     assert len(results) == len(prompts)