Autoexposure example restoration#728
Autoexposure example restoration#728devshgraphicsprogramming wants to merge 72 commits intomasterfrom
Conversation
…into autoexposue_ex
…into autoexposue_ex
…into autoexposue_ex
…into autoexposue_ex
…into autoexposue_ex
| template <typename T> | ||
| T morton2d_mask(uint16_t _n) | ||
| { | ||
| const static uint64_t mask[5] = | ||
| { | ||
| 0x5555555555555555ull, | ||
| 0x3333333333333333ull, | ||
| 0x0F0F0F0F0F0F0F0Full, | ||
| 0x00FF00FF00FF00FFull, | ||
| 0x0000FFFF0000FFFFull | ||
| }; | ||
| return (T)mask[_n]; | ||
| } | ||
|
|
||
| template <typename T> | ||
| T morton3d_mask(uint16_t _n) | ||
| { | ||
| const static uint64_t mask[5] = | ||
| { | ||
| 0x1249249249249249ull, | ||
| 0x10C30C30C30C30C3ull, | ||
| 0x010F00F00F00F00Full, | ||
| 0x001F0000FF0000FFull, | ||
| 0x001F00000000FFFFull | ||
| }; | ||
| return (T)mask[_n]; | ||
| } | ||
| template <typename T> | ||
| T morton4d_mask(uint16_t _n) | ||
| { | ||
| const static uint64_t mask[4] = | ||
| { | ||
| 0x1111111111111111ull, | ||
| 0x0303030303030303ull, | ||
| 0x000F000F000F000Full, | ||
| 0x000000FF000000FFull | ||
| }; | ||
| return (T)mask[_n]; | ||
| } |
There was a problem hiding this comment.
couldn't you make them into
namespace morton
{
namespace impl
{
template<uint16_t Dims, typename T>
struct mask;
// now the partial specializations
}
}with the masks as NBL_CONSTEXPR member variables
This way you can have
template<uint16_t Dims, typename T, uint16_t BitDepth>
enable_if_t<is_scalar_v<T>&&is_integral_v<T>,T> decode(T x)
{
static_assert(BitDepth <= sizeof(T)*8);
x = x & mask<Dims,T>::value[0];
.. rest of if-else statements
}| template<typename T, uint32_t bitDepth = sizeof(T) * 8u> | ||
| T morton2d_decode_x(T _morton) { return impl::morton2d_decode<T, bitDepth>(_morton); } | ||
| template<typename T, uint32_t bitDepth = sizeof(T) * 8u> | ||
| T morton2d_decode_y(T _morton) { return impl::morton2d_decode<T, bitDepth>(_morton >> 1); } | ||
|
|
||
| template<typename T, uint32_t bitDepth = sizeof(T) * 8u> | ||
| T morton2d_encode(T x, T y) { return impl::separate_bits_2d<T, bitDepth>(x) | (impl::separate_bits_2d<T, bitDepth>(y) << 1); } | ||
| template<typename T, uint32_t bitDepth = sizeof(T) * 8u> | ||
| T morton3d_encode(T x, T y, T z) { return impl::separate_bits_3d<T, bitDepth>(x) | (impl::separate_bits_3d<T, bitDepth>(y) << 1) | (impl::separate_bits_3d<T, bitDepth>(z) << 2); } | ||
| template<typename T, uint32_t bitDepth = sizeof(T) * 8u> | ||
| T morton4d_encode(T x, T y, T z, T w) { return impl::separate_bits_4d<T, bitDepth>(x) | (impl::separate_bits_4d<T, bitDepth>(y) << 1) | (impl::separate_bits_4d<T, bitDepth>(z) << 2) | (impl::separate_bits_4d<T, bitDepth>(w) << 3); } |
There was a problem hiding this comment.
imho these should be template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8>
because that way you only need to write the encode and decode once
template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8>
vector<T,Dims> decode(const T _morton)
{
vector<T,Dims> retval;
for (uint16_t i=0; i<Dims; i++)
retval[i] = impl::decode<Dims,T,BitDepth>(_morton>>i);
return retval;
}
template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8>
T encode(const vector<T,Dims> coord)
{
T retval = impl::separate_bits<Dims,T,BitDepth>(coord[0]);
for (uint16_t i=1; i<Dims; i++)
retval |= impl::separate_bits<Dims,T,BitDepth>(coord[1])<<i;
return retval;
}| retval.minLuma = lumaMinimum; | ||
| retval.maxLuma = lumaMaximum; |
There was a problem hiding this comment.
typo, you've set the min and max equal to each other
There was a problem hiding this comment.
P.S. its also more useful to take a precomputed minLumaLog2 and lumaLog2Range (diff between log of max and log of min)
There was a problem hiding this comment.
still outstanding for the geom meter
|
|
||
| uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); | ||
|
|
||
| val_accessor.atomicAdd(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); |
There was a problem hiding this comment.
no, always the same address should be added to, if you wanted to stagger, then you should stagger based on modulo of the workgroup index
| float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); | ||
| return luma / rangeLog2 + minLog2; |
There was a problem hiding this comment.
again, you're getting random floats based on workgroup index which thankfully was always the same (rare case of two wrongs making a right)
Again if you wanted to stagger, you should use entire subgroup to load the values, then subgroup reduce them
just converting to float_t is not the correct way to decode, you should divide by the maxIncrement
…stead of one value
| int_t lower, upper; | ||
| if (tid == 0) | ||
| { | ||
| const uint32_t lowerPercentile = uint32_t(BinCount * lowerBoundPercentile); |
There was a problem hiding this comment.
use totalSamples * percentile and totalSamples can be histo[BinCount-1]
lowerPercentile is in bin units while histo prefix sums are in samples, you compare different units hence the threshold is tiny and the median collapses to the first bins, your EV moves toward log2(lumaMin) and tonemapping is biased dark
counterexample https://godbolt.org/z/49WjcazMr
Description
Testing
TODO list: