From 024b1ca039db47c9c82264d7f095824ade472858 Mon Sep 17 00:00:00 2001 From: Mikhail Sokolov Date: Wed, 4 Mar 2026 14:37:10 +0200 Subject: [PATCH] [K8sDnsNameResolver] Fix Netty panic logging on JVM shutdown Removes this erroneous log message on JVM shutdown: io.grpc.StatusRuntimeException: INTERNAL: Panic! This is a bug! Why it was logged: - it seems to happen when LookupSession.lookupAsync tries to establish a new DNS server connection after the JVM shutdown has already started (it could happen because JVM shutdown hooks are executed in unpredictable order) - under the hood that function tries to register a JVM shutdown hook to clean up NIO resources registering a shutdown hook during the shutdown sequence results in java.lang.IllegalStateException: Shutdown in progress - this exception is not reported as a failure case in LookupSession.lookupAsync result completion stage but is thrown directly on the method call - the code inside the resolver doesn't handle such direct exception gracefully, which causes it to propagate to Netty internals, resulting in a scary "Panic! This is a bug!" log message written to JUL by Netty internals The fix: - catch and handle the "error on JVM shutdown" case - reduce the log level for such errors below INFO - additionally, the resolver shutdown logic was hardened for all the cases where currently running resolution is in a race with the shutdown --- .../tools/k8sdns/K8sDnsNameResolver.java | 29 ++++- .../jgrpc/tools/k8sdns/NameLookupState.java | 120 ++++++++++++++++-- 2 files changed, 136 insertions(+), 13 deletions(-) diff --git a/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/K8sDnsNameResolver.java b/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/K8sDnsNameResolver.java index 271a384..3586241 100644 --- a/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/K8sDnsNameResolver.java +++ b/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/K8sDnsNameResolver.java @@ -34,6 +34,7 @@ @Nullable private ScheduledHandle scheduledRefreshTask = null; @Nullable private SuccessResult lastSuccessfulResult = null; private boolean refreshing = false; + private boolean isShuttingDown = false; private record SuccessResult(List addresses, Instant receiveTime) {} @@ -56,6 +57,7 @@ public String getServiceAuthority() { @Override public void shutdown() { + this.isShuttingDown = true; if (this.scheduledRefreshTask != null) { this.scheduledRefreshTask.cancel(); } @@ -95,12 +97,15 @@ private void startScheduledRefreshTask(long initialDelayMs) { } private void refreshInner() { - if (!this.refreshing) { + if (!this.refreshing && !this.isShuttingDown) { this.refreshing = true; resolveAllAsync( (addresses, err) -> { try { - if (err != null) { + if (this.isShuttingDown + || err instanceof NameLookupState.LookupFailedJvmShutdownException) { + handleResolutionResultDuringShutdown(); + } else if (err != null) { handleResolutionFailure(err); } else if (addresses != null && !addresses.isEmpty()) { handleResolutionSuccess(addresses); @@ -116,6 +121,16 @@ private void refreshInner() { } } + private void handleResolutionResultDuringShutdown() { + logger.info( + "Received resolution result during shutdown, " + + "ignoring and cancelling future refresh attempts"); + if (this.scheduledRefreshTask != null) { + this.scheduledRefreshTask.cancel(); + this.scheduledRefreshTask = null; + } + } + private void handleResolutionFailure(Throwable err) { /* NameResolver contract specifies that the client handles retries and their frequency. @@ -159,7 +174,15 @@ private void resolveAllAsync( this.syncCtx.execute( () -> { if (err != null) { - logger.error("DNS lookup failed", err); + if (err instanceof NameLookupState.LookupFailedJvmShutdownException) { + // lower the level below INFO if it's caused by JVM shutdown + // to keep the shutdown logs clean; + // the error log statement is not actionable in this case + logger.debug( + "DNS lookup failed due to JVM shutdown, nothing to do here", err); + } else { + logger.error("DNS lookup failed", err); + } cb.accept(null, err); } else { this.nameLookupState = nameLookupState; diff --git a/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/NameLookupState.java b/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/NameLookupState.java index 7a4cd62..ea3e058 100644 --- a/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/NameLookupState.java +++ b/k8s-dns-name-resolver/src/main/java/com/evolution/jgrpc/tools/k8sdns/NameLookupState.java @@ -1,12 +1,14 @@ package com.evolution.jgrpc.tools.k8sdns; import com.google.common.net.InetAddresses; +import java.io.Serial; import java.net.InetAddress; import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionStage; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; import org.jspecify.annotations.Nullable; import org.xbill.DNS.Name; @@ -155,16 +157,34 @@ private CompletionStage lookupAlternativeAbsoluteNames( } private CompletionStage> lookupByAbsoluteName(Name name) { - return session - .lookupAsync(name, Type.A) - .thenApply( - (res) -> - Optional.ofNullable(res).map(LookupResult::getRecords).orElse(List.of()).stream() - .map(Record::rdataToString) - .distinct() - .sorted() // make sure that result comparison does not depend on order - .map(InetAddresses::forString) - .toList()); + CompletionStage lookupAsyncResult; + try { + lookupAsyncResult = session.lookupAsync(name, Type.A); + } catch (Exception e) { + if (messageContainsIgnoreCase(e, "Shutdown in progress") + // double-checking that we are in the middle of JVM shutdown process; + // check LookupFailedJvmShutdownException description for details + && JvmShutdownDetector.isJvmShutdownDetected()) { + lookupAsyncResult = CompletableFuture.failedFuture(new LookupFailedJvmShutdownException()); + } else { + lookupAsyncResult = CompletableFuture.failedFuture(e); + } + } + return lookupAsyncResult.thenApply( + (res) -> + Optional.ofNullable(res).map(LookupResult::getRecords).orElse(List.of()).stream() + .map(Record::rdataToString) + .distinct() + .sorted() // make sure that result comparison does not depend on order + .map(InetAddresses::forString) + .toList()); + } + + private static boolean messageContainsIgnoreCase(Throwable t, String str) { + if (t.getMessage() == null) { + return false; + } + return t.getMessage().toLowerCase().contains(str.toLowerCase()); } // org.xbill.DNS.lookup.LookupSession.safeConcat @@ -183,4 +203,84 @@ private static Name concatFailIfInvalid(Name name, Name suffix) { throw new RuntimeException(e); } } + + /** + * Indicates that a DNS lookup failed due to JVM being in the process of shutdown. + * + *

This is a separate exception so the client code can lower the log level for this kind of + * errors to keep the app shutdown logs clean. + * + *

Why such error could happen in {@link K8sDnsNameResolver}: + * + *

    + *
  • When dnsjava's LookupSession.lookupAsync tries to establish a new DNS server + * connection after the JVM shutdown has already started. It could happen because JVM + * shutdown hooks are executed in unpredictable order. + *
  • Under the hood, that function tries to register a JVM shutdown hook to clean up NIO + * resources. + *
  • Registering a shutdown hook during the shutdown sequence results in + * java.lang.IllegalStateException: Shutdown in progress + *
  • This exception is not reported as a failure case in the LookupSession.lookupAsync + * result completion stage but is thrown directly on the method call + *
  • If not caught, this error propagates to Netty internals , resulting in a scary "Panic! + * This is a bug!" log message written to JUL by Netty internals + *
+ */ + /* package */ static final class LookupFailedJvmShutdownException extends RuntimeException { + + @Serial private static final long serialVersionUID = 1L; + } + + /** + * A hack to detect if we are in the middle of JVM shutdown. + * + *

Shouldn't be called on a hot code path. + * + *

Check out {@link LookupFailedJvmShutdownException} for explanation why it's needed. + */ + private static final class JvmShutdownDetector { + + private static volatile boolean JVM_SHUTDOWN_DETECTED = false; + private static final ReentrantLock RUNTIME_POKE_LOCK = new ReentrantLock(); + + public static boolean isJvmShutdownDetected() { + // serializing access to the actual detection method (pokeRuntimeIsInShutdown) + // and caching the positive result to ensure we do not overload the JVM internal state + if (JVM_SHUTDOWN_DETECTED) { + return true; + } else { + RUNTIME_POKE_LOCK.lock(); + try { + if (JVM_SHUTDOWN_DETECTED) { + return true; + } + JVM_SHUTDOWN_DETECTED = pokeRuntimeIsInShutdown(); + return JVM_SHUTDOWN_DETECTED; + } finally { + RUNTIME_POKE_LOCK.unlock(); + } + } + } + + private static boolean pokeRuntimeIsInShutdown() { + var hook = new Thread(() -> {}); + try { + Runtime.getRuntime().addShutdownHook(hook); + Runtime.getRuntime().removeShutdownHook(hook); + } catch (IllegalStateException e) { + // java.lang.Runtime.addShutdownHook and java.lang.Runtime.removeShutdownHook + // methods Javadoc specifies that IllegalStateException is thrown, + // if called after JVM shutdown started + return true; + } catch (Error e) { + // handling Error separately from other Throwable - Error should be propagated + throw e; + } catch (Throwable e) { + // handling other remaining Throwable, just in case + // we don't know the shutdown state here so returning false + return false; + } + return false; + } + } }