From 46b0eaab396bec47019bc07535c53c67ecc2d9fb Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 08:14:53 +0000 Subject: [PATCH 1/6] Allow contexts to share data tables This PR changes astcenc_context_alloc() to accept a parent context from which data tables can be inherited. Descendent contexts must use the same astcenc_config settings as their parent, otherwise the data tables will mismatch. Parent contexts must not be freed by the caller until all descendent contexts have been freed. --- Docs/ChangeLog-5x.md | 12 ++++++-- Source/UnitTest/test_decode.cpp | 2 +- Source/astcenc.h | 18 ++++++++--- Source/astcenc_entry.cpp | 49 +++++++++++++++++++++--------- Source/astcenc_internal.h | 9 ++++-- Source/astcenccli_toplevel.cpp | 2 +- Utils/Example/astc_api_example.cpp | 2 +- 7 files changed, 66 insertions(+), 28 deletions(-) diff --git a/Docs/ChangeLog-5x.md b/Docs/ChangeLog-5x.md index 8e3412ef..ce7fdb2c 100644 --- a/Docs/ChangeLog-5x.md +++ b/Docs/ChangeLog-5x.md @@ -7,13 +7,21 @@ All performance data on this page is measured on an Intel Core i5-9600K clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads. -## 5.5.0 +## 5.4.0 **Status:** In development. -The 5.4.0 release is a minor maintenance release. +The 5.4.0 release is a minor feature release. + +This release includes changes to the public interface in the `astcenc.h` +header. We always recommend rebuilding your client-side code using the +header from the same release to avoid compatibility issues. * **General:** + * **Improvement:** Contexts using the same configuration can now share + read-only data tables. This can significantly reduce the amount of memory + needed for applications that parallelize by processing multiple images + in parallel instead of slicing a single image in parallel. * **Improvement:** Decompressor (`astcdec`) builds, which lack compression support, now use a smaller `block_size_descriptor` by omitting fields that are only needed for compression. This reduces the size of a decompressor diff --git a/Source/UnitTest/test_decode.cpp b/Source/UnitTest/test_decode.cpp index 923c412b..1027cff1 100644 --- a/Source/UnitTest/test_decode.cpp +++ b/Source/UnitTest/test_decode.cpp @@ -52,7 +52,7 @@ TEST(decode, decode12x12) uint8_t output[12*12*4]; astcenc_config_init(ASTCENC_PRF_LDR, 12, 12, 1, ASTCENC_PRE_MEDIUM, 0, &config); - status = astcenc_context_alloc(&config, 1, &context); + status = astcenc_context_alloc(&config, 1, &context, nullptr); EXPECT_EQ(status, ASTCENC_SUCCESS); astcenc_image image; diff --git a/Source/astcenc.h b/Source/astcenc.h index 8ecdc16f..12ec8197 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -64,7 +64,7 @@ * * // Allocate working state given config and thread_count * astcenc_context* my_context; - * astcenc_context_alloc(&my_config, thread_count, &my_context); + * astcenc_context_alloc(&my_config, thread_count, &my_context, nullptr); * * // Compress each image using these config settings * foreach image: @@ -726,21 +726,29 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( * slow, so it is recommended that contexts are reused to serially compress or decompress multiple * images to amortize setup cost. * + * A full standalone context can be created by passing @c nullptr for @c parent_context. A child + * context is created by passing another context created with the same @c config into + * @c parent_context. A child will share read-only data tables with the root ancestor full context, + * rather than creating its own, which saves a considerable amount of memory per context. You must + * only free the root @c parent_context once all descendent contexts have been freed. + * * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY * flag when creating the configuration. The compression functions will fail if invoked. For a * decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating * any context. * - * @param[in] config Codec config. - * @param thread_count Thread count to configure for. - * @param[out] context Location to store an opaque context pointer. + * @param[in] config Codec config. + * @param thread_count Thread count to configure for. + * @param[out] context Location to store an opaque context pointer. + * @param[in] parent_context Optional parent context from which to inherit read-only data tables. * * @return @c ASTCENC_SUCCESS on success, or an error if context creation failed. */ ASTCENC_PUBLIC astcenc_error astcenc_context_alloc( const astcenc_config* config, unsigned int thread_count, - astcenc_context** context); + astcenc_context** context, + const astcenc_context* parent_context); /** * @brief Compress an image. diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index a54b2cb6..b7a4f795 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -657,7 +657,8 @@ astcenc_error astcenc_config_init( astcenc_error astcenc_context_alloc( const astcenc_config* configp, unsigned int thread_count, - astcenc_context** context + astcenc_context** context, + const astcenc_context* parent_context ) { astcenc_error status; const astcenc_config& config = *configp; @@ -698,19 +699,31 @@ astcenc_error astcenc_context_alloc( return status; } - ctx->bsd = aligned_malloc(sizeof(block_size_descriptor), ASTCENC_VECALIGN); - if (!ctx->bsd) + if (!parent_context) { - delete ctxo; - return ASTCENC_ERR_OUT_OF_MEM; - } + block_size_descriptor* bsd = aligned_malloc(sizeof(block_size_descriptor), ASTCENC_VECALIGN); + if (!bsd) + { + delete ctxo; + return ASTCENC_ERR_OUT_OF_MEM; + } - bool can_omit_modes = static_cast(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); - init_block_size_descriptor(config.block_x, config.block_y, config.block_z, - can_omit_modes, - config.tune_partition_count_limit, - static_cast(config.tune_block_mode_limit) / 100.0f, - *ctx->bsd); + bool can_omit_modes = static_cast(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); + init_block_size_descriptor(config.block_x, config.block_y, config.block_z, + can_omit_modes, + config.tune_partition_count_limit, + static_cast(config.tune_block_mode_limit) / 100.0f, + *bsd); + + ctx->owns_bsd = true; + ctx->bsd = bsd; + + } + else + { + ctx->owns_bsd = false; + ctx->bsd = parent_context->context.bsd; + } #if !defined(ASTCENC_DECOMPRESS_ONLY) // Do setup only needed by compression @@ -732,7 +745,10 @@ astcenc_error astcenc_context_alloc( "compression_working_buffers size must be multiple of vector alignment"); if (!ctx->working_buffers) { - aligned_free(ctx->bsd); + if (ctx->owns_bsd) + { + aligned_free(ctx->bsd); + } delete ctxo; *context = nullptr; return ASTCENC_ERR_OUT_OF_MEM; @@ -769,7 +785,10 @@ void astcenc_context_free( { astcenc_contexti* ctx = &ctxo->context; aligned_free(ctx->working_buffers); - aligned_free(ctx->bsd); + if (ctx->owns_bsd) + { + aligned_free(ctx->bsd); + } #if defined(ASTCENC_DIAGNOSTICS) delete ctx->trace_log; #endif @@ -1271,7 +1290,7 @@ astcenc_error astcenc_get_block_info( physical_to_symbolic(*ctx->bsd, data, scb); // Fetch the appropriate partition and decimation tables - block_size_descriptor& bsd = *ctx->bsd; + const block_size_descriptor& bsd = *ctx->bsd; // Start from a clean slate memset(info, 0, sizeof(*info)); diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index eae7a33e..38551cc1 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -1217,8 +1217,11 @@ struct astcenc_contexti /** @brief The thread count supported by this context. */ unsigned int thread_count; + /** @brief Is this context the owner of @c bsd, or is it a child inheriting it. */ + bool owns_bsd; + /** @brief The block size descriptor this context was created with. */ - block_size_descriptor* bsd; + const block_size_descriptor* bsd; /* * Fields below here are not needed in a decompress-only build, but some remain as they are @@ -2216,9 +2219,9 @@ template void aligned_free(T* ptr) { #if defined(_WIN32) - _aligned_free(ptr); + _aligned_free(const_cast::type *>(ptr)); #else - free(ptr); + free(const_cast::type *>(ptr)); #endif } diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp index cf1fae1b..89a9ea18 100644 --- a/Source/astcenccli_toplevel.cpp +++ b/Source/astcenccli_toplevel.cpp @@ -2068,7 +2068,7 @@ int astcenc_main( } } - codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context); + codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context, nullptr); if (codec_status != ASTCENC_SUCCESS) { print_error("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status)); diff --git a/Utils/Example/astc_api_example.cpp b/Utils/Example/astc_api_example.cpp index 438915b7..0075c102 100644 --- a/Utils/Example/astc_api_example.cpp +++ b/Utils/Example/astc_api_example.cpp @@ -93,7 +93,7 @@ int main(int argc, char **argv) // ------------------------------------------------------------------------ // Create a context based on the configuration astcenc_context* context; - status = astcenc_context_alloc(&config, thread_count, &context); + status = astcenc_context_alloc(&config, thread_count, &context, nullptr); if (status != ASTCENC_SUCCESS) { printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(status)); From dd08f58340a5e654bfd1c0ac01fb775ef9f022d1 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 08:21:28 +0000 Subject: [PATCH 2/6] Minor cleanups --- Docs/ChangeLog-5x.md | 2 +- Source/UnitTest/test_decode.cpp | 2 +- Source/astcenc.h | 6 ++++-- Source/astcenc_entry.cpp | 2 +- Source/astcenccli_toplevel.cpp | 2 +- Utils/Example/astc_api_example.cpp | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Docs/ChangeLog-5x.md b/Docs/ChangeLog-5x.md index ce7fdb2c..bc9c78a5 100644 --- a/Docs/ChangeLog-5x.md +++ b/Docs/ChangeLog-5x.md @@ -123,4 +123,4 @@ set. - - - -_Copyright © 2022-2025, Arm Limited and contributors. All rights reserved._ +_Copyright © 2022-2026, Arm Limited and contributors. All rights reserved._ diff --git a/Source/UnitTest/test_decode.cpp b/Source/UnitTest/test_decode.cpp index 1027cff1..f415d9d8 100644 --- a/Source/UnitTest/test_decode.cpp +++ b/Source/UnitTest/test_decode.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2023 Arm Limited +// Copyright 2023-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy diff --git a/Source/astcenc.h b/Source/astcenc.h index 12ec8197..3c61c836 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2020-2025 Arm Limited +// Copyright 2020-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -37,7 +37,9 @@ * Multi-threading can be used two ways. * * * An application wishing to process multiple images in parallel can allocate multiple - * contexts and assign each context to a thread. + * contexts and assign each context to a thread. Parallel contexts that use the same + * compressor configuration can share read-only data tables by inheriting them from a + * parent context. * * An application wishing to process a single image in using multiple threads can configure * contexts for multi-threaded use, and invoke astcenc_compress/decompress() once per thread * for faster processing. The caller is responsible for creating the worker threads, and diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index b7a4f795..96bb46f0 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2025 Arm Limited +// Copyright 2011-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp index 89a9ea18..51df36aa 100644 --- a/Source/astcenccli_toplevel.cpp +++ b/Source/astcenccli_toplevel.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2024 Arm Limited +// Copyright 2011-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy diff --git a/Utils/Example/astc_api_example.cpp b/Utils/Example/astc_api_example.cpp index 0075c102..83a1f78e 100644 --- a/Utils/Example/astc_api_example.cpp +++ b/Utils/Example/astc_api_example.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2021-2024 Arm Limited +// Copyright 2021-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy From e111a462031d4262a581197bd6bd9ff07c99fe4c Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 08:24:01 +0000 Subject: [PATCH 3/6] Whitespace --- Source/astcenc_entry.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index 96bb46f0..2636e8d7 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -717,7 +717,6 @@ astcenc_error astcenc_context_alloc( ctx->owns_bsd = true; ctx->bsd = bsd; - } else { From a4d16ec3d420f65315583047d93755f420aac483 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 09:53:24 +0000 Subject: [PATCH 4/6] Add error checking --- Source/astcenc.h | 14 ++++++++------ Source/astcenc_entry.cpp | 19 ++++++++++++++++--- Source/astcenc_internal.h | 1 + 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/Source/astcenc.h b/Source/astcenc.h index 3c61c836..be3ee72a 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -728,18 +728,20 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( * slow, so it is recommended that contexts are reused to serially compress or decompress multiple * images to amortize setup cost. * - * A full standalone context can be created by passing @c nullptr for @c parent_context. A child - * context is created by passing another context created with the same @c config into - * @c parent_context. A child will share read-only data tables with the root ancestor full context, - * rather than creating its own, which saves a considerable amount of memory per context. You must - * only free the root @c parent_context once all descendent contexts have been freed. + * A standalone "root" context can be created by passing @c nullptr for @c parent_context. + * Alternatively, a child context, that shares resources with a root context, is created by passing + * another context using the same target configuration into @c parent_context. A child will use the + * read-only data tables it needs from the ancestor "root" context, rather than creating its own, + * which saves a considerable amount of memory per child. You must only free the root context once + * all descendent contexts have been freed. When you pass a @c parent_context the config is taken + * from the parent, and @c context must be @c nullptr. * * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY * flag when creating the configuration. The compression functions will fail if invoked. For a * decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating * any context. * - * @param[in] config Codec config. + * @param[in] config Codec config, must be @c nullptr if a @c parent_context is passed. * @param thread_count Thread count to configure for. * @param[out] context Location to store an opaque context pointer. * @param[in] parent_context Optional parent context from which to inherit read-only data tables. diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index 2636e8d7..8c015bd5 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -661,7 +661,6 @@ astcenc_error astcenc_context_alloc( const astcenc_context* parent_context ) { astcenc_error status; - const astcenc_config& config = *configp; status = validate_cpu_float(); if (status != ASTCENC_SUCCESS) @@ -675,17 +674,31 @@ astcenc_error astcenc_context_alloc( } #if defined(ASTCENC_DIAGNOSTICS) - // Force single threaded compressor use in diagnostic mode. + // Force single threaded compressor use in diagnostic mode if (thread_count != 1) { return ASTCENC_ERR_BAD_PARAM; } #endif + // Exactly one of config or parent_context must be set + bool has_config = configp != nullptr; + bool has_parent = parent_context != nullptr; + if (!(has_config ^ has_parent)) + { + return ASTCENC_ERR_BAD_PARAM; + } + + if (has_parent) + { + configp = &parent_context->context.config; + } + + const astcenc_config& config = *configp; astcenc_context* ctxo = new astcenc_context; astcenc_contexti* ctx = &ctxo->context; ctx->thread_count = thread_count; - ctx->config = config; + ctx->config = *configp; ctx->working_buffers = nullptr; // These are allocated per-compress, as they depend on image size diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index 38551cc1..d2d75a33 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -30,6 +30,7 @@ #endif #include #include +#include #include "astcenc.h" #include "astcenc_mathlib.h" From 64c827e5dd5de87fd78d520c73bf75d8033fac4b Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 10:04:02 +0000 Subject: [PATCH 5/6] Add unit test --- Source/UnitTest/test_decode.cpp | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Source/UnitTest/test_decode.cpp b/Source/UnitTest/test_decode.cpp index f415d9d8..d6c0b32a 100644 --- a/Source/UnitTest/test_decode.cpp +++ b/Source/UnitTest/test_decode.cpp @@ -75,6 +75,58 @@ TEST(decode, decode12x12) } } #endif + + astcenc_context_free(context); +} + +/** @brief Test harness for context inheritance. */ +TEST(decode, context_inherit) +{ + astcenc_error status; + astcenc_config config; + astcenc_context* parent_context; + astcenc_context* context; + astcenc_context* error_context; + + static const astcenc_swizzle swizzle { + ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A + }; + + uint8_t data[16] { +#if 0 + 0x84,0x00,0x38,0xC8,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0xB3,0x4D,0x78 +#else + 0x29,0x00,0x1A,0x97,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0xCF,0x97,0x86 +#endif + }; + + uint8_t output[12*12*4]; + astcenc_config_init(ASTCENC_PRF_LDR, 12, 12, 1, ASTCENC_PRE_MEDIUM, 0, &config); + + status = astcenc_context_alloc(&config, 1, &parent_context, nullptr); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + status = astcenc_context_alloc(nullptr, 1, &context, parent_context); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + status = astcenc_context_alloc(&config, 1, &error_context, parent_context); + EXPECT_EQ(status, ASTCENC_ERR_BAD_PARAM); + + astcenc_image image; + image.dim_x = 12; + image.dim_y = 12; + image.dim_z = 1; + image.data_type = ASTCENC_TYPE_U8; + uint8_t* slices = output; + image.data = reinterpret_cast(&slices); + + status = astcenc_decompress_image(context, data, 16, &image, &swizzle, 0); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + astcenc_context_free(context); + astcenc_context_free(parent_context); } } From 1011acec9a4b1fa2eb394410ceb4980c5b2eaf91 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 26 Mar 2026 10:10:03 +0000 Subject: [PATCH 6/6] Wording nit --- Source/astcenc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/astcenc.h b/Source/astcenc.h index be3ee72a..f2adf8e7 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -734,7 +734,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( * read-only data tables it needs from the ancestor "root" context, rather than creating its own, * which saves a considerable amount of memory per child. You must only free the root context once * all descendent contexts have been freed. When you pass a @c parent_context the config is taken - * from the parent, and @c context must be @c nullptr. + * from the parent, and so @c context must be @c nullptr. * * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY * flag when creating the configuration. The compression functions will fail if invoked. For a