sei-protocol · cody-littley · Mar 19, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 17, 2026
diff --git a/sei-db/common/metrics/buckets.go b/sei-db/common/metrics/buckets.go
@@ -0,0 +1,25 @@
+package metrics
+
+import "github.com/sei-protocol/sei-chain/sei-db/common/unit"
+
+// Shared histogram bucket boundaries for use across the codebase.
+// The OTel defaults are too coarse for meaningful percentile queries in Grafana.
+
+// LatencyBuckets covers 10μs to 5 minutes — wide enough for both fast key
+// lookups and slow compactions/flushes without needing per-metric tuning.
+var LatencyBuckets = []float64{
+	0.00001, 0.000025, 0.00005, 0.0001, 0.00025, 0.0005, // 10μs–500μs
+	0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, // 1ms–50ms
+	0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 300, // 100ms–5min
+}
+
+// ByteSizeBuckets covers 256B to 1GB for data size histograms.
+var ByteSizeBuckets = []float64{
+	256, unit.KB, 4 * unit.KB, 16 * unit.KB, 64 * unit.KB, 256 * unit.KB,
+	unit.MB, 4 * unit.MB, 16 * unit.MB, 64 * unit.MB, 256 * unit.MB, unit.GB,
+}
+
+// CountBuckets covers 1 to 1M for per-operation step/iteration counts.
+var CountBuckets = []float64{
+	1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 100000, 1000000,
+}
diff --git a/sei-db/common/threading/pool.go b/sei-db/common/threading/pool.go
@@ -9,5 +9,12 @@ type Pool interface {
 	// If Submit is called concurrently with or after shutdown (i.e. when ctx is done/cancelled), the task may
 	// be silently dropped. Callers that need a guarantee of execution must
 	// ensure Submit happens-before shutdown.
+	//
+	// This method is permitted to return an error only under the following conditions:
+	// - the pool is shutting down (i.e. its context is done/cancelled)
+	// - the provided ctx parameter is done/cancelled before this method returns
+	// - invalid input (e.g. the task is nil)
+	//
+	// If this method returns an error, the task may or may not have been executed.
 	Submit(ctx context.Context, task func()) error
 }
diff --git a/sei-db/db_engine/dbcache/cache.go b/sei-db/db_engine/dbcache/cache.go
@@ -1,6 +1,11 @@
 package dbcache
 
 import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/sei-protocol/sei-chain/sei-db/common/threading"
 	"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
 )
 
@@ -22,6 +27,9 @@ type Reader func(key []byte) (value []byte, found bool, err error)
 // - the Reader method returns an error (for methods that accpet a Reader)
 // - the cache is shutting down
 // - the cache's work pools are shutting down
+//
+// Cache errors are are generally not recoverable, and it should be assumed that a cache that has returned an error
+// is in a corrupted state, and should be discarded.
 type Cache interface {
 
 	// Get returns the value for the given key, or (nil, false, nil) if not found.
@@ -64,6 +72,14 @@ type Cache interface {
 	BatchSet(updates []CacheUpdate) error
 }
 
+// DefaultEstimatedOverheadPerEntry is a rough estimate of the fixed heap overhead per cache entry
+// on a 64-bit architecture (amd64/arm64). It accounts for the shardEntry struct (48 B),
+// list.Element (48 B), lruQueueEntry (32 B), two map-entry costs (~64 B), string allocation
+// rounding (~16 B), and a margin for the duplicate key copy stored in the LRU. Derived from
+// static analysis of Go size classes and map bucket layout; validate experimentally for your
+// target platform.
+const DefaultEstimatedOverheadPerEntry uint64 = 250
+
 // CacheUpdate describes a single key-value mutation to apply to the cache.
 type CacheUpdate struct {
 	// The key to update.
@@ -76,3 +92,35 @@ type CacheUpdate struct {
 func (u *CacheUpdate) IsDelete() bool {
 	return u.Value == nil
 }
+
+// BuildCache creates a new Cache.
+func BuildCache(
+	ctx context.Context,
+	shardCount uint64,
+	maxSize uint64,
+	readPool threading.Pool,
+	miscPool threading.Pool,
+	estimatedOverheadPerEntry uint64,
+	cacheName string,
+	metricsScrapeInterval time.Duration,
+) (Cache, error) {
+
+	if maxSize == 0 {
+		return NewNoOpCache(), nil
+	}
+
+	cache, err := NewStandardCache(
+		ctx,
+		shardCount,
+		maxSize,
+		readPool,
+		miscPool,
+		estimatedOverheadPerEntry,
+		cacheName,
+		metricsScrapeInterval,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create cache: %w", err)
+	}
+	return cache, nil
+}
diff --git a/sei-db/db_engine/dbcache/cache_impl.go b/sei-db/db_engine/dbcache/cache_impl.go
@@ -0,0 +1,194 @@
+package dbcache
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/sei-protocol/sei-chain/sei-db/common/threading"
+	"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
+)
+
+var _ Cache = (*cache)(nil)
+
+// A standard implementation of a flatcache.
+type cache struct {
+	ctx context.Context
+
+	// A utility for assigning keys to shard indices.
+	shardManager *shardManager
+
+	// The shards in the cache.
+	shards []*shard
+
+	// A pool for asynchronous reads.
+	readPool threading.Pool
+
+	// A pool for miscellaneous operations that are neither computationally intensive nor IO bound.
+	miscPool threading.Pool
+}
+
+// Creates a new Cache. If cacheName is non-empty, OTel metrics are enabled and the
+// background size scrape runs every metricsScrapeInterval.
+func NewStandardCache(
+	ctx context.Context,
+	// The number of shards in the cache. Must be a power of two and greater than 0.
+	shardCount uint64,
+	// The maximum size of the cache, in bytes.
+	maxSize uint64,
+	// A work pool for reading from the DB.
+	readPool threading.Pool,
+	// A work pool for miscellaneous operations that are neither computationally intensive nor IO bound.
+	miscPool threading.Pool,
+	// The estimated overhead per entry, in bytes. This is used to calculate the maximum size of the cache.
+	// This value should be derived experimentally, and may differ between different builds and architectures.
+	estimatedOverheadPerEntry uint64,
+	// Name used as the "cache" attribute on metrics. Empty string disables metrics.
+	cacheName string,
+	// How often to scrape cache size for metrics. Ignored if cacheName is empty.
+	metricsScrapeInterval time.Duration,
+) (Cache, error) {
+	if shardCount == 0 || (shardCount&(shardCount-1)) != 0 {
+		return nil, ErrNumShardsNotPowerOfTwo
+	}
+	if maxSize == 0 {
+		return nil, fmt.Errorf("maxSize must be greater than 0")
+	}
+
+	shardManager, err := newShardManager(shardCount)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create shard manager: %w", err)
+	}
+	sizePerShard := maxSize / shardCount
+	if sizePerShard == 0 {
+		return nil, fmt.Errorf("maxSize must be greater than shardCount")
+	}
+
+	shards := make([]*shard, shardCount)
+	for i := uint64(0); i < shardCount; i++ {
+		shards[i], err = NewShard(ctx, readPool, sizePerShard, estimatedOverheadPerEntry)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create shard: %w", err)
+		}
+	}
+
+	c := &cache{
+		ctx:          ctx,
+		shardManager: shardManager,
+		shards:       shards,
+		readPool:     readPool,
+		miscPool:     miscPool,
+	}
+
+	if cacheName != "" {
+		metrics := newCacheMetrics(ctx, cacheName, metricsScrapeInterval, c.getCacheSizeInfo)
+		for _, s := range c.shards {
+			s.metrics = metrics
+		}
+	}
+
+	return c, nil
+}
+
+func (c *cache) getCacheSizeInfo() (bytes uint64, entries uint64) {
+	for _, s := range c.shards {
+		b, e := s.getSizeInfo()
+		bytes += b
+		entries += e
+	}
+	return bytes, entries
+}
+
+func (c *cache) BatchSet(updates []CacheUpdate) error {
+	// Sort entries by shard index so each shard is locked only once.
+	shardMap := make(map[uint64][]CacheUpdate)
+	for i := range updates {
+		idx := c.shardManager.Shard(updates[i].Key)
+		shardMap[idx] = append(shardMap[idx], updates[i])
+	}
+
+	var wg sync.WaitGroup
+	for shardIndex, shardEntries := range shardMap {
+		wg.Add(1)
+		err := c.miscPool.Submit(c.ctx, func() {
+			defer wg.Done()
+			c.shards[shardIndex].BatchSet(shardEntries)
+		})
+		if err != nil {
+			return fmt.Errorf("failed to submit batch set: %w", err)
+		}
+	}
+	wg.Wait()
+
+	return nil
+}
+
+func (c *cache) BatchGet(read Reader, keys map[string]types.BatchGetResult) error {
+	work := make(map[uint64]map[string]types.BatchGetResult)
+	for key := range keys {
+		idx := c.shardManager.Shard([]byte(key))
+		if work[idx] == nil {
+			work[idx] = make(map[string]types.BatchGetResult)
+		}
+		work[idx][key] = types.BatchGetResult{}
+	}
+
+	var wg sync.WaitGroup
+	for shardIndex, subMap := range work {
+		wg.Add(1)
+
+		err := c.miscPool.Submit(c.ctx, func() {
+			defer wg.Done()
+			err := c.shards[shardIndex].BatchGet(read, subMap)
+			if err != nil {
+				for key := range subMap {
+					subMap[key] = types.BatchGetResult{Error: err}
+				}
+			}
+		})
+		if err != nil {
+			return fmt.Errorf("failed to submit batch get: %w", err)
+		}
+	}
+	wg.Wait()
+
+	for _, subMap := range work {
+		for key, result := range subMap {
+			keys[key] = result
+		}
+	}
+
+	return nil
+}
+
+func (c *cache) Delete(key []byte) {
+	shardIndex := c.shardManager.Shard(key)
+	shard := c.shards[shardIndex]
+	shard.Delete(key)
+}
+
+func (c *cache) Get(read Reader, key []byte, updateLru bool) ([]byte, bool, error) {
+	shardIndex := c.shardManager.Shard(key)
+	shard := c.shards[shardIndex]
+
+	value, ok, err := shard.Get(read, key, updateLru)
+	if err != nil {
+		return nil, false, fmt.Errorf("failed to get value from shard: %w", err)
+	}
+	if !ok {
+		return nil, false, nil
+	}
+	return value, ok, nil
+}
+
+func (c *cache) Set(key []byte, value []byte) {
+	shardIndex := c.shardManager.Shard(key)
+	shard := c.shards[shardIndex]
+
+	if value == nil {
+		shard.Delete(key)
+	} else {
+		shard.Set(key, value)
+	}
+}