pmclSF · pmclSF · Feb 28, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 28, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -41,19 +41,7 @@ jobs:
 
     - name: Run tests
       run: |
-        pytest \
-          tests/test_entropy_parameters.py \
-          tests/test_context_model.py \
-          tests/test_channel_context.py \
-          tests/test_attention_context.py \
-          tests/test_model_transforms.py \
-          tests/test_integration.py \
-          tests/test_performance.py \
-          tests/test_parallel_process.py \
-          tests/test_colorbar.py \
-          tests/test_entropy_model.py \
-          tests/test_octree_coding.py \
-          -v --cov=src --cov-report=xml -m "not gpu and not slow"
+        pytest tests/ -v --cov=src --cov-report=xml -m "not gpu and not slow"
 
     - name: Upload coverage
       uses: codecov/codecov-action@v4

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.8.16
+3.10
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -68,7 +68,7 @@ else:
 
 **Rules:**
 - Model-level `call()` methods and any layer that branches on training mode **must** accept `training=None` and pass it through to sub-layers that need it.
-- Leaf layers that do not use `training` internally (e.g., `CENICGDN`, `SpatialSeparableConv`, `MaskedConv3D`, `SliceTransform`) currently omit it from their signatures. This is the established convention — do not add `training` to these unless they gain training-dependent behavior.
+- Leaf layers that do not use `training` internally (e.g., `GDN`, `SpatialSeparableConv`, `MaskedConv3D`, `SliceTransform`) currently omit it from their signatures. This is the established convention — do not add `training` to these unless they gain training-dependent behavior.
 - **Never remove the training conditional** from methods that have it. Never replace noise injection with unconditional `tf.round()`.
 - When adding new layers: include `training=None` if the layer has any training-dependent behavior. Omit it for pure computation layers.
 
@@ -77,7 +77,7 @@ else:
 All model tensors are 5D: `(batch, depth, height, width, channels)` — channels-last.
 
 - Convolutions are `Conv3D`, never `Conv2D`. Kernels are 3-tuples: `(3, 3, 3)`.
-- Channel axis is axis 4 (see `CENICGDN.call()` which does `tf.tensordot(norm, self.gamma, [[4], [0]])`).
+- Channel axis is axis 4 (see `GDN.call()` which uses `tf.einsum('...c,cd->...d', ...)`).
 - Input voxel grids have 1 channel: shape `(B, D, H, W, 1)`.
 - Do not flatten spatial dimensions to use 2D ops. The 3D structure is load-bearing.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.ruff]
 line-length = 120
-target-version = "py38"
+target-version = "py310"
 
 [tool.ruff.lint]
 select = ["F", "I", "E", "W"]
@@ -23,5 +23,5 @@ known-first-party = [
     "ev_compare", "ev_run_render", "mp_report", "mp_run",
     "quick_benchmark", "benchmarks", "parallel_process",
     "point_cloud_metrics", "map_color", "colorbar",
-    "cli_train", "test_utils",
+    "cli_train", "file_io", "test_utils",
 ]
diff --git a/setup.py b/setup.py
@@ -2,12 +2,20 @@
 
 setup(
     name="deepcompress",
-    version="0.1",
+    version="2.0.0",
     package_dir={"": "src"},
     packages=find_namespace_packages(include=["*"], where="src"),
+    python_requires=">=3.10",
     install_requires=[
         'numpy',
-        'pytest',
-        'numba'
+        'tensorflow>=2.11',
+        'tensorflow-probability~=0.19',
+        'matplotlib',
+        'pandas',
+        'tqdm',
+        'pyyaml',
+        'scipy',
+        'numba',
+        'keras-tuner',
     ],
-)
+)
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1 @@
+__version__ = "2.0.0"
diff --git a/src/__pycache__/__init__.cpython-38.pyc b/src/__pycache__/__init__.cpython-38.pyc
diff --git a/src/__pycache__/colorbar.cpython-38.pyc b/src/__pycache__/colorbar.cpython-38.pyc
diff --git a/src/__pycache__/compress_octree.cpython-38.pyc b/src/__pycache__/compress_octree.cpython-38.pyc
diff --git a/src/__pycache__/ds_mesh_to_pc.cpython-38.pyc b/src/__pycache__/ds_mesh_to_pc.cpython-38.pyc
diff --git a/src/__pycache__/ds_pc_octree_blocks.cpython-38.pyc b/src/__pycache__/ds_pc_octree_blocks.cpython-38.pyc
diff --git a/src/__pycache__/ev_run_experiment.cpython-38.pyc b/src/__pycache__/ev_run_experiment.cpython-38.pyc
diff --git a/src/__pycache__/ev_run_render.cpython-38.pyc b/src/__pycache__/ev_run_render.cpython-38.pyc
diff --git a/src/__pycache__/experiment.cpython-38.pyc b/src/__pycache__/experiment.cpython-38.pyc
diff --git a/src/__pycache__/map_color.cpython-38.pyc b/src/__pycache__/map_color.cpython-38.pyc
diff --git a/src/__pycache__/model_opt.cpython-38.pyc b/src/__pycache__/model_opt.cpython-38.pyc
diff --git a/src/__pycache__/model_transforms.cpython-38.pyc b/src/__pycache__/model_transforms.cpython-38.pyc
diff --git a/src/__pycache__/octree_coding.cpython-38.pyc b/src/__pycache__/octree_coding.cpython-38.pyc
diff --git a/src/__pycache__/parallel_process.cpython-38.pyc b/src/__pycache__/parallel_process.cpython-38.pyc
diff --git a/src/__pycache__/patch_gaussian_conditional.cpython-38.pyc b/src/__pycache__/patch_gaussian_conditional.cpython-38.pyc
diff --git a/src/__pycache__/pc_metric.cpython-38.pyc b/src/__pycache__/pc_metric.cpython-38.pyc
diff --git a/src/attention_context.py b/src/attention_context.py
@@ -15,7 +15,7 @@
 
 import tensorflow as tf
 
-from constants import LOG_2_RECIPROCAL
+from .constants import LOG_2_RECIPROCAL
 
 
 class WindowedAttention3D(tf.keras.layers.Layer):
@@ -669,8 +669,8 @@ def __init__(self,
         self.num_attention_layers = num_attention_layers
 
         # Import here to avoid circular dependency
-        from entropy_model import ConditionalGaussian
-        from entropy_parameters import EntropyParameters
+        from .entropy_model import ConditionalGaussian, PatchedGaussianConditional
+        from .entropy_parameters import EntropyParameters
 
         # Hyperprior-based parameter prediction
         self.entropy_parameters = EntropyParameters(
@@ -714,6 +714,9 @@ def __init__(self,
         # Conditional Gaussian for entropy coding
         self.conditional = ConditionalGaussian()
 
+        # Hyperprior entropy model (for z)
+        self.hyper_entropy = PatchedGaussianConditional()
+
         self.scale_min = 0.01
 
     def _split_params(self, params: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
@@ -723,13 +726,15 @@ def _split_params(self, params: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         return mean, scale
 
     def call(self, y: tf.Tensor, z_hat: tf.Tensor,
+             z: Optional[tf.Tensor] = None,
              training: Optional[bool] = None) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Process latent y using hyperprior and attention context.
 
         Args:
             y: Main latent representation.
             z_hat: Decoded hyperprior.
+            z: Quantized/noised hyper-latent for computing z rate.
             training: Whether in training mode.
 
         Returns:
@@ -758,10 +763,18 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
         # Process through conditional Gaussian
         y_hat, y_likelihood = self.conditional(y, scale, mean, training=training)
 
-        # Compute total bits
-        # Using pre-computed reciprocal: multiplication is faster than division
-        bits_per_element = -y_likelihood * LOG_2_RECIPROCAL
-        total_bits = tf.reduce_sum(bits_per_element)
+        # Compute y bits from discretized likelihood
+        y_bits = tf.reduce_sum(-tf.math.log(y_likelihood) * LOG_2_RECIPROCAL)
+
+        # Compute z bits if z is provided
+        z_bits = tf.constant(0.0)
+        if z is not None:
+            if not self.hyper_entropy.built:
+                self.hyper_entropy.build(z.shape)
+            z_likelihood = self.hyper_entropy.likelihood(z)
+            z_bits = tf.reduce_sum(-tf.math.log(z_likelihood) * LOG_2_RECIPROCAL)
+
+        total_bits = y_bits + z_bits
 
         return y_hat, y_likelihood, total_bits
 
@@ -804,9 +817,9 @@ def __init__(self,
         self.num_channel_groups = num_channel_groups
         self.num_attention_layers = num_attention_layers
 
-        from channel_context import ChannelContext
-        from entropy_model import ConditionalGaussian
-        from entropy_parameters import EntropyParameters
+        from .channel_context import ChannelContext
+        from .entropy_model import ConditionalGaussian, PatchedGaussianConditional
+        from .entropy_parameters import EntropyParameters
 
         # Hyperprior parameters
         self.entropy_parameters = EntropyParameters(
@@ -819,6 +832,8 @@ def __init__(self,
             num_groups=num_channel_groups
         )
 
+        self.channels_per_group = latent_channels // num_channel_groups
+
         # Attention context (applied per channel group)
         self.attention_contexts = [
             BidirectionalMaskTransformer(
@@ -830,6 +845,17 @@ def __init__(self,
             for i in range(num_channel_groups)
         ]
 
+        # Attention output to parameters (replaces concat hack)
+        self.attention_to_params = [
+            tf.keras.layers.Conv3D(
+                filters=self.channels_per_group * 2,  # mean and scale
+                kernel_size=1,
+                padding='same',
+                name=f'attn_to_params_{i}'
+            )
+            for i in range(num_channel_groups)
+        ]
+
         # Parameter fusion per group
         self.param_fusions = [
             tf.keras.layers.Conv3D(
@@ -847,7 +873,9 @@ def __init__(self,
             for i in range(num_channel_groups)
         ]
 
-        self.channels_per_group = latent_channels // num_channel_groups
+        # Hyperprior entropy model (for z)
+        self.hyper_entropy = PatchedGaussianConditional()
+
         self.scale_min = 0.01
 
     def _split_params(self, params: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
@@ -856,6 +884,7 @@ def _split_params(self, params: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         return mean, scale
 
     def call(self, y: tf.Tensor, z_hat: tf.Tensor,
+             z: Optional[tf.Tensor] = None,
              training: Optional[bool] = None) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """Process with all context types combined."""
         # Get hyperprior parameters
@@ -883,9 +912,9 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
             combined_mean = hyper_mean_slice + context_mean
             combined_scale = hyper_scale_slice * (1.0 + context_scale)
 
-            # Add attention refinement
+            # Project attention features to mean/scale parameters
             hyper_params = tf.concat([combined_mean, combined_scale], axis=-1)
-            attn_params = tf.concat([attn_features, attn_features], axis=-1)  # Use features for both
+            attn_params = self.attention_to_params[i](attn_features)
             combined = tf.concat([hyper_params, attn_params], axis=-1)
             fused_params = self.param_fusions[i](combined)
 
@@ -902,9 +931,18 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
         y_hat = tf.concat(y_hat_parts, axis=-1)
         y_likelihood = tf.concat(likelihood_parts, axis=-1)
 
-        # Using pre-computed reciprocal: multiplication is faster than division
-        bits_per_element = -y_likelihood * LOG_2_RECIPROCAL
-        total_bits = tf.reduce_sum(bits_per_element)
+        # Compute y bits from discretized likelihood
+        y_bits = tf.reduce_sum(-tf.math.log(y_likelihood) * LOG_2_RECIPROCAL)
+
+        # Compute z bits if z is provided
+        z_bits = tf.constant(0.0)
+        if z is not None:
+            if not self.hyper_entropy.built:
+                self.hyper_entropy.build(z.shape)
+            z_likelihood = self.hyper_entropy.likelihood(z)
+            z_bits = tf.reduce_sum(-tf.math.log(z_likelihood) * LOG_2_RECIPROCAL)
+
+        total_bits = y_bits + z_bits
 
         return y_hat, y_likelihood, total_bits
 

diff --git a/src/benchmarks.py b/src/benchmarks.py
@@ -391,7 +391,7 @@ def create_mask_vectorized(kernel_size, mask_type, in_channels, filters):
 
 def benchmark_attention():
     """Benchmark attention implementations."""
-    from attention_context import SparseAttention3D, WindowedAttention3D
+    from .attention_context import SparseAttention3D, WindowedAttention3D
 
     dim = 64
     input_shape = (1, 16, 16, 16, dim)  # Smaller for testing

diff --git a/src/channel_context.py b/src/channel_context.py
@@ -11,7 +11,7 @@
 
 import tensorflow as tf
 
-from constants import LOG_2_RECIPROCAL
+from .constants import LOG_2_RECIPROCAL
 
 
 class SliceTransform(tf.keras.layers.Layer):
@@ -231,8 +231,8 @@ def __init__(self,
         self.channels_per_group = latent_channels // num_groups
 
         # Import here to avoid circular dependency
-        from entropy_model import ConditionalGaussian
-        from entropy_parameters import EntropyParameters
+        from .entropy_model import ConditionalGaussian, PatchedGaussianConditional
+        from .entropy_parameters import EntropyParameters
 
         # Hyperprior-based parameter prediction
         self.entropy_parameters = EntropyParameters(
@@ -251,6 +251,9 @@ def __init__(self,
             for i in range(num_groups)
         ]
 
+        # Hyperprior entropy model (for z)
+        self.hyper_entropy = PatchedGaussianConditional()
+
         self.scale_min = 0.01
 
     def _fuse_params(self,
@@ -269,6 +272,7 @@ def _fuse_params(self,
         return mean, scale
 
     def call(self, y: tf.Tensor, z_hat: tf.Tensor,
+             z: Optional[tf.Tensor] = None,
              training: Optional[bool] = None) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Process latent y using hyperprior and channel-wise context.
@@ -279,6 +283,7 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
         Args:
             y: Main latent representation.
             z_hat: Decoded hyperprior.
+            z: Quantized/noised hyper-latent for computing z rate.
             training: Whether in training mode.
 
         Returns:
@@ -305,22 +310,17 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
 
             # Get context params (using y for training, y_hat for inference)
             # Note: Use .call() to pass non-tensor group_idx as keyword argument
-            if training:
+            if i == 0:
+                # First group: no context available, channel_context returns zeros
+                context_mean, context_scale = self.channel_context.call(y, group_idx=0)
+            elif training:
                 # Training: use ground truth y for context (teacher forcing)
                 context_mean, context_scale = self.channel_context.call(y, group_idx=i)
             else:
-                # Inference: use only already decoded groups (no padding needed!)
-                # The channel_context only uses channels 0..group_idx-1, so we
-                # only need to concatenate the decoded parts without padding.
-                # This optimization reduces memory allocations by ~25%.
-                if i == 0:
-                    # First group has no context - channel_context handles this
-                    y_hat_partial = y_hat_parts[0] if y_hat_parts else None
-                else:
-                    # Concatenate only the decoded parts (no zero padding)
-                    y_hat_partial = tf.concat(y_hat_parts, axis=-1)
+                # Inference: use already decoded groups for context
+                y_hat_partial = tf.concat(y_hat_parts, axis=-1)
                 context_mean, context_scale = self.channel_context.call(
-                    y_hat_partial if y_hat_partial is not None else y, group_idx=i
+                    y_hat_partial, group_idx=i
                 )
 
             # Fuse parameters
@@ -341,10 +341,18 @@ def call(self, y: tf.Tensor, z_hat: tf.Tensor,
         y_hat = tf.concat(y_hat_parts, axis=-1)
         y_likelihood = tf.concat(likelihood_parts, axis=-1)
 
-        # Compute total bits
-        # Using pre-computed reciprocal: multiplication is faster than division
-        bits_per_element = -y_likelihood * LOG_2_RECIPROCAL
-        total_bits = tf.reduce_sum(bits_per_element)
+        # Compute y bits from discretized likelihood
+        y_bits = tf.reduce_sum(-tf.math.log(y_likelihood) * LOG_2_RECIPROCAL)
+
+        # Compute z bits if z is provided
+        z_bits = tf.constant(0.0)
+        if z is not None:
+            if not self.hyper_entropy.built:
+                self.hyper_entropy.build(z.shape)
+            z_likelihood = self.hyper_entropy.likelihood(z)
+            z_bits = tf.reduce_sum(-tf.math.log(z_likelihood) * LOG_2_RECIPROCAL)
+
+        total_bits = y_bits + z_bits
 
         return y_hat, y_likelihood, total_bits
 

diff --git a/src/cli_train.py b/src/cli_train.py
@@ -5,7 +5,7 @@
 import keras_tuner as kt
 import tensorflow as tf
 
-from ds_mesh_to_pc import read_off
+from .file_io import read_point_cloud
 
 
 def create_model(hp):
@@ -32,8 +32,10 @@ def load_and_preprocess_data(input_dir, batch_size):
     file_paths = glob.glob(os.path.join(input_dir, "*.ply"))
 
     def parse_ply_file(file_path):
-        mesh_data = read_off(file_path)
-        return mesh_data.vertices
+        vertices = read_point_cloud(file_path)
+        if vertices is None:
+            raise ValueError(f"Failed to read point cloud: {file_path}")
+        return vertices
 
     def data_generator():
         for file_path in file_paths:
@@ -70,7 +72,7 @@ def tune_hyperparameters(input_dir, output_dir, num_epochs=10):
     best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
 
     print("Best Hyperparameters:", best_hps.values)
-    best_model.save(os.path.join(output_dir, 'best_model'))
+    best_model.save_weights(os.path.join(output_dir, 'best_model.weights.h5'))
 
 def main():
     parser = argparse.ArgumentParser(description="Train a point cloud compression model with hyperparameter tuning.")
@@ -94,7 +96,7 @@ def main():
         model.compile(optimizer='adam', loss='mean_squared_error')
         dataset = load_and_preprocess_data(args.input_dir, args.batch_size)
         model.fit(dataset, epochs=args.num_epochs)
-        model.save(os.path.join(args.output_dir, 'trained_model'))
+        model.save_weights(os.path.join(args.output_dir, 'trained_model.weights.h5'))
 
 if __name__ == "__main__":
     main()