Skip to content

Commit 8c8fe62

Browse files
aadityaraj7769adiraj_linkedin
andauthored
Add OpenTelemetry metrics support for XDS Client (#1127)
* added the otel changes * Added more logic for the XdsClient Otel metrics * Use NoOpXdsClientOtelMetricsProvider as default to eliminate null handling in XdsClientImpl * Update comments in XdsClientOtelMetricsProvider.java * Added tests for XdsClientOtelMetricsProvider * Resolved PR comments * Resolved PR comments * revert d2jmxconstant changes. * resolved PR comments. * Update changelog for 29.82.1 * minor changes in changelog * created separate tests and used in TestXdsLoadBalancerWithFacilitiesFactory and XdsClientOtelMetricsProviderTest.java * removed unnecessary comments * Re-push for PR check failures * Resolve PR comments: make clientName static --------- Co-authored-by: adiraj_linkedin <[email protected]>
1 parent f6eb99f commit 8c8fe62

14 files changed

+855
-38
lines changed

CHANGELOG.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ and what APIs have changed, if applicable.
1414

1515
## [Unreleased]
1616

17+
## [29.83.1] - 2026-01-19
18+
- Add open telemetry support for xds client sensor.
19+
1720
## [29.83.0] - 2026-01-16
1821
- Add stop method to RateLimiterExecutionTracker
1922

@@ -5942,7 +5945,8 @@ patch operations can re-use these classes for generating patch messages.
59425945

59435946
## [0.14.1]
59445947

5945-
[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.83.0...master
5948+
[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.83.1...master
5949+
[29.83.1]: https://github.com/linkedin/rest.li/compare/v29.83.0...v29.83.1
59465950
[29.83.0]: https://github.com/linkedin/rest.li/compare/v29.82.0...v29.83.0
59475951
[29.82.0]: https://github.com/linkedin/rest.li/compare/v29.81.2...v29.82.0
59485952
[29.81.2]: https://github.com/linkedin/rest.li/compare/v29.81.1...v29.81.2
@@ -7191,4 +7195,4 @@ patch operations can re-use these classes for generating patch messages.
71917195
[0.14.4]: https://github.com/linkedin/rest.li/compare/v0.14.3...v0.14.4
71927196
[0.14.3]: https://github.com/linkedin/rest.li/compare/v0.14.2...v0.14.3
71937197
[0.14.2]: https://github.com/linkedin/rest.li/compare/v0.14.1...v0.14.2
7194-
[0.14.1]: https://github.com/linkedin/rest.li/tree/v0.14.1
7198+
[0.14.1]: https://github.com/linkedin/rest.li/tree/v0.14.1

d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import com.linkedin.d2.discovery.stores.zk.ZKPersistentConnection;
5353
import com.linkedin.d2.discovery.stores.zk.ZooKeeper;
5454
import com.linkedin.d2.jmx.XdsServerMetricsProvider;
55+
import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider;
5556
import com.linkedin.d2.jmx.JmxManager;
5657
import com.linkedin.d2.xds.XdsClientValidator;
5758
import com.linkedin.d2.jmx.NoOpJmxManager;
@@ -247,7 +248,8 @@ public D2Client build()
247248
_config.actionOnPrecheckFailure,
248249
_config.d2CalleeInfoRecorder,
249250
_config.enableIndisDownstreamServicesFetcher,
250-
_config.indisDownstreamServicesFetchTimeout
251+
_config.indisDownstreamServicesFetchTimeout,
252+
_config.xdsClientOtelMetricsProvider
251253
);
252254

253255
final LoadBalancerWithFacilitiesFactory loadBalancerFactory = (_config.lbWithFacilitiesFactory == null) ?
@@ -856,6 +858,11 @@ public D2ClientBuilder setXdsServerMetricsProvider(XdsServerMetricsProvider xdsS
856858
return this;
857859
}
858860

861+
public D2ClientBuilder setXdsClientOtelMetricsProvider(XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider) {
862+
_config.xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider;
863+
return this;
864+
}
865+
859866
public D2ClientBuilder setLoadBalanceStreamException(boolean loadBalanceStreamException) {
860867
_config.loadBalanceStreamException = loadBalanceStreamException;
861868
return this;

d2/src/main/java/com/linkedin/d2/balancer/D2ClientConfig.java

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
import com.linkedin.d2.jmx.JmxManager;
4545
import com.linkedin.d2.jmx.NoOpXdsServerMetricsProvider;
4646
import com.linkedin.d2.jmx.NoOpJmxManager;
47+
import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider;
48+
import com.linkedin.d2.jmx.NoOpXdsClientOtelMetricsProvider;
4749
import com.linkedin.r2.transport.common.TransportClientFactory;
4850
import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext;
4951
import java.time.Duration;
@@ -181,6 +183,12 @@ public class D2ClientConfig
181183

182184
public boolean subscribeToUriGlobCollection = false;
183185
public XdsServerMetricsProvider _xdsServerMetricsProvider = new NoOpXdsServerMetricsProvider();
186+
187+
/**
188+
* Provider for OpenTelemetry metrics collection for XDS client operations.
189+
* Defaults to no-op implementation; can be overridden to enable metric tracking.
190+
*/
191+
public XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider = new NoOpXdsClientOtelMetricsProvider();
184192
public boolean loadBalanceStreamException = false;
185193
public boolean xdsInitialResourceVersionsEnabled = false;
186194
public Integer xdsStreamMaxRetryBackoffSeconds = null;
@@ -293,6 +301,145 @@ public D2ClientConfig()
293301
D2CalleeInfoRecorder d2CalleeInfoRecorder,
294302
Boolean enableIndisDownstreamServicesFetcher,
295303
Duration indisDownstreamServicesFetchTimeout)
304+
{
305+
this(zkHosts, xdsServer, hostName, zkSessionTimeoutInMs, zkStartupTimeoutInMs, lbWaitTimeout, lbWaitUnit,
306+
flagFile, basePath, fsBasePath, indisFsBasePath, componentFactory, clientFactories, lbWithFacilitiesFactory,
307+
sslContext, grpcSslContext, sslParameters, isSSLEnabled, shutdownAsynchronously, isSymlinkAware,
308+
clientServicesConfig, d2ServicePath, useNewEphemeralStoreWatcher, healthCheckOperations, executorService,
309+
retry, restRetryEnabled, streamRetryEnabled, retryLimit, retryUpdateIntervalMs, retryAggregatedIntervalNum,
310+
warmUp, warmUpTimeoutSeconds, indisWarmUpTimeoutSeconds, warmUpConcurrentRequests,
311+
indisWarmUpConcurrentRequests, downstreamServicesFetcher, indisDownstreamServicesFetcher,
312+
backupRequestsEnabled, backupRequestsStrategyStatsConsumer,
313+
backupRequestsLatencyNotificationInterval,
314+
backupRequestsLatencyNotificationIntervalUnit,
315+
enableBackupRequestsClientAsync,
316+
backupRequestsExecutorService,
317+
emitter,
318+
partitionAccessorRegistry,
319+
zooKeeperDecorator,
320+
enableSaveUriDataOnDisk,
321+
loadBalancerStrategyFactories,
322+
requestTimeoutHandlerEnabled,
323+
sslSessionValidatorFactory,
324+
zkConnection,
325+
startUpExecutorService,
326+
indisStartUpExecutorService,
327+
jmxManager,
328+
d2JmxManagerPrefix,
329+
zookeeperReadWindowMs,
330+
enableRelativeLoadBalancer,
331+
deterministicSubsettingMetadataProvider,
332+
canaryDistributionProvider,
333+
enableClusterFailout,
334+
failoutConfigProviderFactory,
335+
failoutRedirectStrategy,
336+
serviceDiscoveryEventEmitter,
337+
dualReadStateManager,
338+
xdsExecutorService,
339+
xdsStreamReadyTimeout,
340+
dualReadNewLbExecutor,
341+
xdsChannelLoadBalancingPolicy,
342+
xdsChannelLoadBalancingPolicyConfig,
343+
subscribeToUriGlobCollection,
344+
xdsServerMetricsProvider,
345+
loadBalanceStreamException,
346+
xdsInitialResourceVersionsEnabled,
347+
disableDetectLiRawD2Client,
348+
isLiRawD2Client,
349+
xdsStreamMaxRetryBackoffSeconds,
350+
xdsChannelKeepAliveTimeMins,
351+
xdsMinimumJavaVersion,
352+
actionOnPrecheckFailure,
353+
d2CalleeInfoRecorder,
354+
enableIndisDownstreamServicesFetcher,
355+
indisDownstreamServicesFetchTimeout,
356+
new NoOpXdsClientOtelMetricsProvider());
357+
}
358+
359+
D2ClientConfig(String zkHosts,
360+
String xdsServer,
361+
String hostName,
362+
long zkSessionTimeoutInMs,
363+
long zkStartupTimeoutInMs,
364+
long lbWaitTimeout,
365+
TimeUnit lbWaitUnit,
366+
String flagFile,
367+
String basePath,
368+
String fsBasePath,
369+
String indisFsBasePath,
370+
ComponentFactory componentFactory,
371+
Map<String, TransportClientFactory> clientFactories,
372+
LoadBalancerWithFacilitiesFactory lbWithFacilitiesFactory,
373+
SSLContext sslContext,
374+
SslContext grpcSslContext,
375+
SSLParameters sslParameters,
376+
boolean isSSLEnabled,
377+
boolean shutdownAsynchronously,
378+
boolean isSymlinkAware,
379+
Map<String, Map<String, Object>> clientServicesConfig,
380+
String d2ServicePath,
381+
boolean useNewEphemeralStoreWatcher,
382+
HealthCheckOperations healthCheckOperations,
383+
ScheduledExecutorService executorService,
384+
boolean retry,
385+
boolean restRetryEnabled,
386+
boolean streamRetryEnabled,
387+
int retryLimit,
388+
long retryUpdateIntervalMs,
389+
int retryAggregatedIntervalNum,
390+
boolean warmUp,
391+
int warmUpTimeoutSeconds,
392+
int indisWarmUpTimeoutSeconds,
393+
int warmUpConcurrentRequests,
394+
int indisWarmUpConcurrentRequests,
395+
DownstreamServicesFetcher downstreamServicesFetcher,
396+
DownstreamServicesFetcher indisDownstreamServicesFetcher,
397+
boolean backupRequestsEnabled,
398+
BackupRequestsStrategyStatsConsumer backupRequestsStrategyStatsConsumer,
399+
long backupRequestsLatencyNotificationInterval,
400+
TimeUnit backupRequestsLatencyNotificationIntervalUnit,
401+
boolean enableBackupRequestsClientAsync,
402+
ScheduledExecutorService backupRequestsExecutorService,
403+
EventEmitter emitter,
404+
PartitionAccessorRegistry partitionAccessorRegistry,
405+
Function<ZooKeeper, ZooKeeper> zooKeeperDecorator,
406+
boolean enableSaveUriDataOnDisk,
407+
Map<String, LoadBalancerStrategyFactory<? extends LoadBalancerStrategy>> loadBalancerStrategyFactories,
408+
boolean requestTimeoutHandlerEnabled,
409+
SslSessionValidatorFactory sslSessionValidatorFactory,
410+
ZKPersistentConnection zkConnection,
411+
ScheduledExecutorService startUpExecutorService,
412+
ScheduledExecutorService indisStartUpExecutorService,
413+
JmxManager jmxManager,
414+
String d2JmxManagerPrefix,
415+
int zookeeperReadWindowMs,
416+
boolean enableRelativeLoadBalancer,
417+
DeterministicSubsettingMetadataProvider deterministicSubsettingMetadataProvider,
418+
CanaryDistributionProvider canaryDistributionProvider,
419+
boolean enableClusterFailout,
420+
FailoutConfigProviderFactory failoutConfigProviderFactory,
421+
FailoutRedirectStrategy failoutRedirectStrategy,
422+
ServiceDiscoveryEventEmitter serviceDiscoveryEventEmitter,
423+
DualReadStateManager dualReadStateManager,
424+
ScheduledExecutorService xdsExecutorService,
425+
Long xdsStreamReadyTimeout,
426+
ExecutorService dualReadNewLbExecutor,
427+
String xdsChannelLoadBalancingPolicy,
428+
Map<String, ?> xdsChannelLoadBalancingPolicyConfig,
429+
boolean subscribeToUriGlobCollection,
430+
XdsServerMetricsProvider xdsServerMetricsProvider,
431+
boolean loadBalanceStreamException,
432+
boolean xdsInitialResourceVersionsEnabled,
433+
boolean disableDetectLiRawD2Client,
434+
boolean isLiRawD2Client,
435+
Integer xdsStreamMaxRetryBackoffSeconds,
436+
Long xdsChannelKeepAliveTimeMins,
437+
String xdsMinimumJavaVersion,
438+
XdsClientValidator.ActionOnPrecheckFailure actionOnPrecheckFailure,
439+
D2CalleeInfoRecorder d2CalleeInfoRecorder,
440+
Boolean enableIndisDownstreamServicesFetcher,
441+
Duration indisDownstreamServicesFetchTimeout,
442+
XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider)
296443
{
297444
this.zkHosts = zkHosts;
298445
this.xdsServer = xdsServer;
@@ -377,5 +524,6 @@ public D2ClientConfig()
377524
this.d2CalleeInfoRecorder = d2CalleeInfoRecorder;
378525
this.indisDownstreamServicesFetchTimeout = indisDownstreamServicesFetchTimeout;
379526
this.enableIndisDownstreamServicesFetcher = enableIndisDownstreamServicesFetcher;
527+
this.xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider;
380528
}
381529
}

d2/src/main/java/com/linkedin/d2/jmx/D2ClientJmxManager.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,17 @@ public void registerXdsClientJmx(XdsClientJmx xdsClientJmx)
305305
{
306306
_log.warn("Setting XdsClientJmx for Non-XDS source type: {}", _discoverySourceType);
307307
}
308-
final String jmxName = String.format("%s-XdsClientJmx", getGlobalPrefix(null));
308+
// Get the client name from global prefix
309+
String clientName = getGlobalPrefix(null);
310+
if(clientName != null && !clientName.isEmpty())
311+
{
312+
xdsClientJmx.setClientName(clientName);
313+
}
314+
else
315+
{
316+
_log.warn("Client name is empty, unable to set client name for XdsClientJmx");
317+
}
318+
final String jmxName = String.format("%s-XdsClientJmx", clientName);
309319
_jmxManager.registerXdsClientJmxBean(jmxName, xdsClientJmx);
310320
}
311321

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package com.linkedin.d2.jmx;
2+
3+
/**
4+
* No-Op implementation of {@link XdsClientOtelMetricsProvider}.
5+
* Used when OpenTelemetry metrics are disabled.
6+
*/
7+
public class NoOpXdsClientOtelMetricsProvider implements XdsClientOtelMetricsProvider {
8+
9+
/**
10+
* {@inheritDoc}
11+
*/
12+
@Override
13+
public void recordConnectionLost(String clientName) {
14+
// No-op
15+
}
16+
17+
/**
18+
* {@inheritDoc}
19+
*/
20+
@Override
21+
public void recordConnectionClosed(String clientName) {
22+
// No-op
23+
}
24+
25+
/**
26+
* {@inheritDoc}
27+
*/
28+
@Override
29+
public void recordReconnection(String clientName) {
30+
// No-op
31+
}
32+
33+
/**
34+
* {@inheritDoc}
35+
*/
36+
@Override
37+
public void recordRequestSent(String clientName) {
38+
// No-op
39+
}
40+
41+
/**
42+
* {@inheritDoc}
43+
*/
44+
@Override
45+
public void recordResponseReceived(String clientName) {
46+
// No-op
47+
}
48+
49+
/**
50+
* {@inheritDoc}
51+
*/
52+
@Override
53+
public void recordInitialResourceVersionSent(String clientName, int count) {
54+
// No-op
55+
}
56+
57+
/**
58+
* {@inheritDoc}
59+
*/
60+
@Override
61+
public void recordResourceNotFound(String clientName) {
62+
// No-op
63+
}
64+
65+
/**
66+
* {@inheritDoc}
67+
*/
68+
@Override
69+
public void recordResourceInvalid(String clientName) {
70+
// No-op
71+
}
72+
73+
/**
74+
* {@inheritDoc}
75+
*/
76+
@Override
77+
public void recordServerLatency(String clientName, long latencyMs) {
78+
// No-op
79+
}
80+
81+
/**
82+
* {@inheritDoc}
83+
*/
84+
@Override
85+
public void updateConnectionState(String clientName, boolean isConnected) {
86+
// No-op
87+
}
88+
89+
/**
90+
* {@inheritDoc}
91+
*/
92+
@Override
93+
public void updateActiveInitialWaitTime(String clientName, long waitTimeMs) {
94+
// No-op
95+
}
96+
}

0 commit comments

Comments
 (0)