diff --git a/Docs/ChangeLog-5x.md b/Docs/ChangeLog-5x.md index 8e3412ef..bc9c78a5 100644 --- a/Docs/ChangeLog-5x.md +++ b/Docs/ChangeLog-5x.md @@ -7,13 +7,21 @@ All performance data on this page is measured on an Intel Core i5-9600K clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads. -## 5.5.0 +## 5.4.0 **Status:** In development. -The 5.4.0 release is a minor maintenance release. +The 5.4.0 release is a minor feature release. + +This release includes changes to the public interface in the `astcenc.h` +header. We always recommend rebuilding your client-side code using the +header from the same release to avoid compatibility issues. * **General:** + * **Improvement:** Contexts using the same configuration can now share + read-only data tables. This can significantly reduce the amount of memory + needed for applications that parallelize by processing multiple images + in parallel instead of slicing a single image in parallel. * **Improvement:** Decompressor (`astcdec`) builds, which lack compression support, now use a smaller `block_size_descriptor` by omitting fields that are only needed for compression. This reduces the size of a decompressor @@ -115,4 +123,4 @@ set. - - - -_Copyright © 2022-2025, Arm Limited and contributors. All rights reserved._ +_Copyright © 2022-2026, Arm Limited and contributors. All rights reserved._ diff --git a/Source/UnitTest/test_decode.cpp b/Source/UnitTest/test_decode.cpp index 923c412b..d6c0b32a 100644 --- a/Source/UnitTest/test_decode.cpp +++ b/Source/UnitTest/test_decode.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2023 Arm Limited +// Copyright 2023-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -52,7 +52,7 @@ TEST(decode, decode12x12) uint8_t output[12*12*4]; astcenc_config_init(ASTCENC_PRF_LDR, 12, 12, 1, ASTCENC_PRE_MEDIUM, 0, &config); - status = astcenc_context_alloc(&config, 1, &context); + status = astcenc_context_alloc(&config, 1, &context, nullptr); EXPECT_EQ(status, ASTCENC_SUCCESS); astcenc_image image; @@ -75,6 +75,58 @@ TEST(decode, decode12x12) } } #endif + + astcenc_context_free(context); +} + +/** @brief Test harness for context inheritance. */ +TEST(decode, context_inherit) +{ + astcenc_error status; + astcenc_config config; + astcenc_context* parent_context; + astcenc_context* context; + astcenc_context* error_context; + + static const astcenc_swizzle swizzle { + ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A + }; + + uint8_t data[16] { +#if 0 + 0x84,0x00,0x38,0xC8,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0xB3,0x4D,0x78 +#else + 0x29,0x00,0x1A,0x97,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0xCF,0x97,0x86 +#endif + }; + + uint8_t output[12*12*4]; + astcenc_config_init(ASTCENC_PRF_LDR, 12, 12, 1, ASTCENC_PRE_MEDIUM, 0, &config); + + status = astcenc_context_alloc(&config, 1, &parent_context, nullptr); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + status = astcenc_context_alloc(nullptr, 1, &context, parent_context); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + status = astcenc_context_alloc(&config, 1, &error_context, parent_context); + EXPECT_EQ(status, ASTCENC_ERR_BAD_PARAM); + + astcenc_image image; + image.dim_x = 12; + image.dim_y = 12; + image.dim_z = 1; + image.data_type = ASTCENC_TYPE_U8; + uint8_t* slices = output; + image.data = reinterpret_cast(&slices); + + status = astcenc_decompress_image(context, data, 16, &image, &swizzle, 0); + EXPECT_EQ(status, ASTCENC_SUCCESS); + + astcenc_context_free(context); + astcenc_context_free(parent_context); } } diff --git a/Source/astcenc.h b/Source/astcenc.h index 8ecdc16f..f2adf8e7 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2020-2025 Arm Limited +// Copyright 2020-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -37,7 +37,9 @@ * Multi-threading can be used two ways. * * * An application wishing to process multiple images in parallel can allocate multiple - * contexts and assign each context to a thread. + * contexts and assign each context to a thread. Parallel contexts that use the same + * compressor configuration can share read-only data tables by inheriting them from a + * parent context. * * An application wishing to process a single image in using multiple threads can configure * contexts for multi-threaded use, and invoke astcenc_compress/decompress() once per thread * for faster processing. The caller is responsible for creating the worker threads, and @@ -64,7 +66,7 @@ * * // Allocate working state given config and thread_count * astcenc_context* my_context; - * astcenc_context_alloc(&my_config, thread_count, &my_context); + * astcenc_context_alloc(&my_config, thread_count, &my_context, nullptr); * * // Compress each image using these config settings * foreach image: @@ -726,21 +728,31 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( * slow, so it is recommended that contexts are reused to serially compress or decompress multiple * images to amortize setup cost. * + * A standalone "root" context can be created by passing @c nullptr for @c parent_context. + * Alternatively, a child context, that shares resources with a root context, is created by passing + * another context using the same target configuration into @c parent_context. A child will use the + * read-only data tables it needs from the ancestor "root" context, rather than creating its own, + * which saves a considerable amount of memory per child. You must only free the root context once + * all descendent contexts have been freed. When you pass a @c parent_context the config is taken + * from the parent, and so @c context must be @c nullptr. + * * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY * flag when creating the configuration. The compression functions will fail if invoked. For a * decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating * any context. * - * @param[in] config Codec config. - * @param thread_count Thread count to configure for. - * @param[out] context Location to store an opaque context pointer. + * @param[in] config Codec config, must be @c nullptr if a @c parent_context is passed. + * @param thread_count Thread count to configure for. + * @param[out] context Location to store an opaque context pointer. + * @param[in] parent_context Optional parent context from which to inherit read-only data tables. * * @return @c ASTCENC_SUCCESS on success, or an error if context creation failed. */ ASTCENC_PUBLIC astcenc_error astcenc_context_alloc( const astcenc_config* config, unsigned int thread_count, - astcenc_context** context); + astcenc_context** context, + const astcenc_context* parent_context); /** * @brief Compress an image. diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index a54b2cb6..8c015bd5 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2025 Arm Limited +// Copyright 2011-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -657,10 +657,10 @@ astcenc_error astcenc_config_init( astcenc_error astcenc_context_alloc( const astcenc_config* configp, unsigned int thread_count, - astcenc_context** context + astcenc_context** context, + const astcenc_context* parent_context ) { astcenc_error status; - const astcenc_config& config = *configp; status = validate_cpu_float(); if (status != ASTCENC_SUCCESS) @@ -674,17 +674,31 @@ astcenc_error astcenc_context_alloc( } #if defined(ASTCENC_DIAGNOSTICS) - // Force single threaded compressor use in diagnostic mode. + // Force single threaded compressor use in diagnostic mode if (thread_count != 1) { return ASTCENC_ERR_BAD_PARAM; } #endif + // Exactly one of config or parent_context must be set + bool has_config = configp != nullptr; + bool has_parent = parent_context != nullptr; + if (!(has_config ^ has_parent)) + { + return ASTCENC_ERR_BAD_PARAM; + } + + if (has_parent) + { + configp = &parent_context->context.config; + } + + const astcenc_config& config = *configp; astcenc_context* ctxo = new astcenc_context; astcenc_contexti* ctx = &ctxo->context; ctx->thread_count = thread_count; - ctx->config = config; + ctx->config = *configp; ctx->working_buffers = nullptr; // These are allocated per-compress, as they depend on image size @@ -698,19 +712,30 @@ astcenc_error astcenc_context_alloc( return status; } - ctx->bsd = aligned_malloc(sizeof(block_size_descriptor), ASTCENC_VECALIGN); - if (!ctx->bsd) + if (!parent_context) { - delete ctxo; - return ASTCENC_ERR_OUT_OF_MEM; - } + block_size_descriptor* bsd = aligned_malloc(sizeof(block_size_descriptor), ASTCENC_VECALIGN); + if (!bsd) + { + delete ctxo; + return ASTCENC_ERR_OUT_OF_MEM; + } - bool can_omit_modes = static_cast(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); - init_block_size_descriptor(config.block_x, config.block_y, config.block_z, - can_omit_modes, - config.tune_partition_count_limit, - static_cast(config.tune_block_mode_limit) / 100.0f, - *ctx->bsd); + bool can_omit_modes = static_cast(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); + init_block_size_descriptor(config.block_x, config.block_y, config.block_z, + can_omit_modes, + config.tune_partition_count_limit, + static_cast(config.tune_block_mode_limit) / 100.0f, + *bsd); + + ctx->owns_bsd = true; + ctx->bsd = bsd; + } + else + { + ctx->owns_bsd = false; + ctx->bsd = parent_context->context.bsd; + } #if !defined(ASTCENC_DECOMPRESS_ONLY) // Do setup only needed by compression @@ -732,7 +757,10 @@ astcenc_error astcenc_context_alloc( "compression_working_buffers size must be multiple of vector alignment"); if (!ctx->working_buffers) { - aligned_free(ctx->bsd); + if (ctx->owns_bsd) + { + aligned_free(ctx->bsd); + } delete ctxo; *context = nullptr; return ASTCENC_ERR_OUT_OF_MEM; @@ -769,7 +797,10 @@ void astcenc_context_free( { astcenc_contexti* ctx = &ctxo->context; aligned_free(ctx->working_buffers); - aligned_free(ctx->bsd); + if (ctx->owns_bsd) + { + aligned_free(ctx->bsd); + } #if defined(ASTCENC_DIAGNOSTICS) delete ctx->trace_log; #endif @@ -1271,7 +1302,7 @@ astcenc_error astcenc_get_block_info( physical_to_symbolic(*ctx->bsd, data, scb); // Fetch the appropriate partition and decimation tables - block_size_descriptor& bsd = *ctx->bsd; + const block_size_descriptor& bsd = *ctx->bsd; // Start from a clean slate memset(info, 0, sizeof(*info)); diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index eae7a33e..d2d75a33 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -30,6 +30,7 @@ #endif #include #include +#include #include "astcenc.h" #include "astcenc_mathlib.h" @@ -1217,8 +1218,11 @@ struct astcenc_contexti /** @brief The thread count supported by this context. */ unsigned int thread_count; + /** @brief Is this context the owner of @c bsd, or is it a child inheriting it. */ + bool owns_bsd; + /** @brief The block size descriptor this context was created with. */ - block_size_descriptor* bsd; + const block_size_descriptor* bsd; /* * Fields below here are not needed in a decompress-only build, but some remain as they are @@ -2216,9 +2220,9 @@ template void aligned_free(T* ptr) { #if defined(_WIN32) - _aligned_free(ptr); + _aligned_free(const_cast::type *>(ptr)); #else - free(ptr); + free(const_cast::type *>(ptr)); #endif } diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp index cf1fae1b..51df36aa 100644 --- a/Source/astcenccli_toplevel.cpp +++ b/Source/astcenccli_toplevel.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2024 Arm Limited +// Copyright 2011-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -2068,7 +2068,7 @@ int astcenc_main( } } - codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context); + codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context, nullptr); if (codec_status != ASTCENC_SUCCESS) { print_error("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status)); diff --git a/Utils/Example/astc_api_example.cpp b/Utils/Example/astc_api_example.cpp index 438915b7..83a1f78e 100644 --- a/Utils/Example/astc_api_example.cpp +++ b/Utils/Example/astc_api_example.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2021-2024 Arm Limited +// Copyright 2021-2026 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -93,7 +93,7 @@ int main(int argc, char **argv) // ------------------------------------------------------------------------ // Create a context based on the configuration astcenc_context* context; - status = astcenc_context_alloc(&config, thread_count, &context); + status = astcenc_context_alloc(&config, thread_count, &context, nullptr); if (status != ASTCENC_SUCCESS) { printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(status));