opentelemetry_sdk/trace/
sampler.rs

1use opentelemetry::{
2    trace::{
3        Link, SamplingDecision, SamplingResult, SpanKind, TraceContextExt, TraceId, TraceState,
4    },
5    Context, KeyValue,
6};
7
8#[cfg(feature = "jaeger_remote_sampler")]
9mod jaeger_remote;
10
11#[cfg(feature = "jaeger_remote_sampler")]
12pub use jaeger_remote::{JaegerRemoteSampler, JaegerRemoteSamplerBuilder};
13#[cfg(feature = "jaeger_remote_sampler")]
14use opentelemetry_http::HttpClient;
15
16/// The [`ShouldSample`] interface allows implementations to provide samplers
17/// which will return a sampling [`SamplingResult`] based on information that
18/// is typically available just before the [`Span`] was created.
19///
20/// # Sampling
21///
22/// Sampling is a mechanism to control the noise and overhead introduced by
23/// OpenTelemetry by reducing the number of samples of traces collected and
24/// sent to the backend.
25///
26/// Sampling may be implemented on different stages of a trace collection.
27/// [OpenTelemetry SDK] defines a [`ShouldSample`] interface that can be used at
28/// instrumentation points by libraries to check the sampling [`SamplingDecision`]
29/// early and optimize the amount of telemetry that needs to be collected.
30///
31/// All other sampling algorithms may be implemented on SDK layer in exporters,
32/// or even out of process in Agent or Collector.
33///
34/// The OpenTelemetry API has two properties responsible for the data collection:
35///
36/// * [`Span::is_recording()`]. If `true` the current [`Span`] records
37///   tracing events (attributes, events, status, etc.), otherwise all tracing
38///   events are dropped. Users can use this property to determine if expensive
39///   trace events can be avoided. [`SpanProcessor`]s will receive
40///   all spans with this flag set. However, [`SpanExporter`]s will
41///   not receive them unless the `Sampled` flag was set.
42/// * `Sampled` flag in [`SpanContext::trace_flags()`]. This flag is propagated
43///   via the [`SpanContext`] to child Spans. For more details see the [W3C
44///   specification](https://w3c.github.io/trace-context/). This flag indicates
45///   that the [`Span`] has been `sampled` and will be exported. [`SpanProcessor`]s
46///   and [`SpanExporter`]s will receive spans with the `Sampled` flag set for
47///   processing.
48///
49/// The flag combination `Sampled == false` and `is_recording == true` means
50/// that the current `Span` does record information, but most likely the child
51/// `Span` will not.
52///
53/// The flag combination `Sampled == true` and `is_recording == false` could
54/// cause gaps in the distributed trace, and because of this OpenTelemetry API
55/// MUST NOT allow this combination.
56///
57/// [OpenTelemetry SDK]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#sampling
58/// [`SpanContext`]: opentelemetry::trace::SpanContext
59/// [`SpanContext::trace_flags()`]: opentelemetry::trace::SpanContext#method.trace_flags
60/// [`SpanExporter`]: crate::trace::SpanExporter
61/// [`SpanProcessor`]: crate::trace::SpanProcessor
62/// [`Span`]: opentelemetry::trace::Span
63/// [`Span::is_recording()`]: opentelemetry::trace::Span#tymethod.is_recording
64pub trait ShouldSample: CloneShouldSample + Send + Sync + std::fmt::Debug {
65    /// Returns the [`SamplingDecision`] for a [`Span`] to be created.
66    ///
67    /// The [`should_sample`] function can use any of the information provided to it in order to
68    /// make a decision about whether or not a [`Span`] should or should not be sampled. However,
69    /// there are performance implications on the creation of a span
70    ///
71    /// [`Span`]: opentelemetry::trace::Span
72    /// [`should_sample`]: ShouldSample::should_sample
73    #[allow(clippy::too_many_arguments)]
74    fn should_sample(
75        &self,
76        parent_context: Option<&Context>,
77        trace_id: TraceId,
78        name: &str,
79        span_kind: &SpanKind,
80        attributes: &[KeyValue],
81        links: &[Link],
82    ) -> SamplingResult;
83}
84
85/// This trait should not be used directly instead users should use [`ShouldSample`].
86pub trait CloneShouldSample {
87    fn box_clone(&self) -> Box<dyn ShouldSample>;
88}
89
90impl<T> CloneShouldSample for T
91where
92    T: ShouldSample + Clone + 'static,
93{
94    fn box_clone(&self) -> Box<dyn ShouldSample> {
95        Box::new(self.clone())
96    }
97}
98
99impl Clone for Box<dyn ShouldSample> {
100    fn clone(&self) -> Self {
101        self.box_clone()
102    }
103}
104
105/// Default Sampling options
106///
107/// The [built-in samplers] allow for simple decisions. For more complex scenarios consider
108/// implementing your own sampler using [`ShouldSample`] trait.
109///
110/// [built-in samplers]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#built-in-samplers
111#[derive(Clone, Debug)]
112#[non_exhaustive]
113pub enum Sampler {
114    /// Always sample the trace
115    AlwaysOn,
116    /// Never sample the trace
117    AlwaysOff,
118    /// Respects the parent span's sampling decision or delegates a delegate sampler for root spans.
119    ParentBased(Box<dyn ShouldSample>),
120    /// Sample a given fraction of traces. Fractions >= 1 will always sample. If the parent span is
121    /// sampled, then it's child spans will automatically be sampled. Fractions < 0 are treated as
122    /// zero, but spans may still be sampled if their parent is.
123    /// *Note:* If this is used then all Spans in a trace will become sampled assuming that the
124    /// first span is sampled as it is based on the `trace_id` not the `span_id`
125    TraceIdRatioBased(f64),
126    /// Jaeger remote sampler supports any remote service that implemented the jaeger remote sampler protocol.
127    /// The proto definition can be found [here](https://github.com/jaegertracing/jaeger-idl/blob/main/proto/api_v2/sampling.proto)
128    ///
129    /// Jaeger remote sampler allows remotely controlling the sampling configuration for the SDKs.
130    /// The sampling is typically configured at the collector and the SDKs actively poll for changes.
131    /// The sampler uses TraceIdRatioBased or rate-limited sampler under the hood.
132    /// These samplers can be configured per whole service (a.k.a default), or per span name in a
133    /// given service (a.k.a per operation).
134    #[cfg(feature = "jaeger_remote_sampler")]
135    JaegerRemote(JaegerRemoteSampler),
136}
137
138impl Sampler {
139    /// Create a jaeger remote sampler builder.
140    ///
141    /// ### Arguments
142    /// * `runtime` - A runtime to run the HTTP client.
143    /// * `http_client` - An HTTP client to query the sampling endpoint.
144    /// * `default_sampler` - A default sampler to make a sampling decision when the remote is unavailable or before the SDK receives the first response from remote.
145    /// * `service_name` - The name of the service. This is a required parameter to query the sampling endpoint.
146    ///
147    /// See [here](https://github.com/open-telemetry/opentelemetry-rust/blob/main/examples/jaeger-remote-sampler/src/main.rs) for an example.
148    #[cfg(feature = "jaeger_remote_sampler")]
149    pub fn jaeger_remote<C, Sampler, R, Svc>(
150        runtime: R,
151        http_client: C,
152        default_sampler: Sampler,
153        service_name: Svc,
154    ) -> JaegerRemoteSamplerBuilder<C, Sampler, R>
155    where
156        C: HttpClient + 'static,
157        Sampler: ShouldSample,
158        R: crate::runtime::RuntimeChannel,
159        Svc: Into<String>,
160    {
161        JaegerRemoteSamplerBuilder::new(runtime, http_client, default_sampler, service_name)
162    }
163}
164
165impl ShouldSample for Sampler {
166    fn should_sample(
167        &self,
168        parent_context: Option<&Context>,
169        trace_id: TraceId,
170        name: &str,
171        span_kind: &SpanKind,
172        attributes: &[KeyValue],
173        links: &[Link],
174    ) -> SamplingResult {
175        let decision = match self {
176            // Always sample the trace
177            Sampler::AlwaysOn => SamplingDecision::RecordAndSample,
178            // Never sample the trace
179            Sampler::AlwaysOff => SamplingDecision::Drop,
180            // The parent decision if sampled; otherwise the decision of delegate_sampler
181            Sampler::ParentBased(delegate_sampler) => parent_context
182                .filter(|cx| cx.has_active_span())
183                .map_or_else(
184                    || {
185                        delegate_sampler
186                            .should_sample(
187                                parent_context,
188                                trace_id,
189                                name,
190                                span_kind,
191                                attributes,
192                                links,
193                            )
194                            .decision
195                    },
196                    |ctx| {
197                        let span = ctx.span();
198                        let parent_span_context = span.span_context();
199                        if parent_span_context.is_sampled() {
200                            SamplingDecision::RecordAndSample
201                        } else {
202                            SamplingDecision::Drop
203                        }
204                    },
205                ),
206            // Probabilistically sample the trace.
207            Sampler::TraceIdRatioBased(prob) => sample_based_on_probability(prob, trace_id),
208            #[cfg(feature = "jaeger_remote_sampler")]
209            Sampler::JaegerRemote(remote_sampler) => {
210                remote_sampler
211                    .should_sample(parent_context, trace_id, name, span_kind, attributes, links)
212                    .decision
213            }
214        };
215        SamplingResult {
216            decision,
217            // No extra attributes ever set by the SDK samplers.
218            attributes: Vec::new(),
219            // all sampler in SDK will not modify trace state.
220            trace_state: match parent_context {
221                Some(ctx) => ctx.span().span_context().trace_state().clone(),
222                None => TraceState::default(),
223            },
224        }
225    }
226}
227
228pub(crate) fn sample_based_on_probability(prob: &f64, trace_id: TraceId) -> SamplingDecision {
229    if *prob >= 1.0 {
230        SamplingDecision::RecordAndSample
231    } else {
232        let prob_upper_bound = (prob.max(0.0) * (1u64 << 63) as f64) as u64;
233        // TODO: update behavior when the spec definition resolves
234        // https://github.com/open-telemetry/opentelemetry-specification/issues/1413
235        let bytes = trace_id.to_bytes();
236        let (_, low) = bytes.split_at(8);
237        let trace_id_low = u64::from_be_bytes(low.try_into().unwrap());
238        let rnd_from_trace_id = trace_id_low >> 1;
239
240        if rnd_from_trace_id < prob_upper_bound {
241            SamplingDecision::RecordAndSample
242        } else {
243            SamplingDecision::Drop
244        }
245    }
246}
247
248#[cfg(all(test, feature = "testing", feature = "trace"))]
249mod tests {
250    use super::*;
251    use crate::testing::trace::TestSpan;
252    use opentelemetry::trace::{SpanContext, SpanId, TraceFlags};
253    use rand::random;
254
255    #[rustfmt::skip]
256    fn sampler_data() -> Vec<(&'static str, Sampler, f64, bool, bool)> {
257        vec![
258            // Span w/o a parent
259            ("never_sample", Sampler::AlwaysOff, 0.0, false, false),
260            ("always_sample", Sampler::AlwaysOn, 1.0, false, false),
261            ("ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, false, false),
262            ("ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, false, false),
263            ("ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, false, false),
264            ("ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, false, false),
265            ("ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, false, false),
266
267            // Spans w/o a parent delegate
268            ("delegate_to_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, false, false),
269            ("delegate_to_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, false, false),
270            ("delegate_to_ratio_-1", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(-1.0))), 0.0, false, false),
271            ("delegate_to_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.25, false, false),
272            ("delegate_to_ratio_.50", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.50))), 0.50, false, false),
273            ("delegate_to_ratio_.75", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.75))), 0.75, false, false),
274            ("delegate_to_ratio_2.0", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(2.0))), 1.0, false, false),
275
276            // Spans with a parent that is *not* sampled act like spans w/o a parent
277            ("unsampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, false),
278            ("unsampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, false),
279            ("unsampled_parent_with_ratio_.50", Sampler::TraceIdRatioBased(0.50), 0.5, true, false),
280            ("unsampled_parent_with_ratio_.75", Sampler::TraceIdRatioBased(0.75), 0.75, true, false),
281            ("unsampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, false),
282            ("unsampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 0.0, true, false),
283            ("unsampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 0.0, true, false),
284            ("unsampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 0.0, true, false),
285
286            // A ratio sampler with a parent that is sampled will ignore the parent
287            ("sampled_parent_with_ratio_-1", Sampler::TraceIdRatioBased(-1.0), 0.0, true, true),
288            ("sampled_parent_with_ratio_.25", Sampler::TraceIdRatioBased(0.25), 0.25, true, true),
289            ("sampled_parent_with_ratio_2.0", Sampler::TraceIdRatioBased(2.0), 1.0, true, true),
290
291            // Spans with a parent that is sampled, will always sample, regardless of the delegate sampler
292            ("sampled_parent_or_else_with_always_on", Sampler::ParentBased(Box::new(Sampler::AlwaysOn)), 1.0, true, true),
293            ("sampled_parent_or_else_with_always_off", Sampler::ParentBased(Box::new(Sampler::AlwaysOff)), 1.0, true, true),
294            ("sampled_parent_or_else_with_ratio_.25", Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(0.25))), 1.0, true, true),
295
296            // Spans with a sampled parent, but when using the NeverSample Sampler, aren't sampled
297            ("sampled_parent_span_with_never_sample", Sampler::AlwaysOff, 0.0, true, true),
298        ]
299    }
300
301    #[test]
302    fn sampling() {
303        let total = 10_000;
304        for (name, sampler, expectation, parent, sample_parent) in sampler_data() {
305            let mut sampled = 0;
306            for _ in 0..total {
307                let parent_context = if parent {
308                    let trace_flags = if sample_parent {
309                        TraceFlags::SAMPLED
310                    } else {
311                        TraceFlags::default()
312                    };
313                    let span_context = SpanContext::new(
314                        TraceId::from(1),
315                        SpanId::from(1),
316                        trace_flags,
317                        false,
318                        TraceState::default(),
319                    );
320
321                    Some(Context::current_with_span(TestSpan(span_context)))
322                } else {
323                    None
324                };
325
326                let trace_id = TraceId::from(random::<u128>());
327                if sampler
328                    .should_sample(
329                        parent_context.as_ref(),
330                        trace_id,
331                        name,
332                        &SpanKind::Internal,
333                        &[],
334                        &[],
335                    )
336                    .decision
337                    == SamplingDecision::RecordAndSample
338                {
339                    sampled += 1;
340                }
341            }
342            let mut tolerance = 0.0;
343            let got = sampled as f64 / total as f64;
344
345            if expectation > 0.0 && expectation < 1.0 {
346                // See https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
347                let z = 4.75342; // This should succeed 99.9999% of the time
348                tolerance = z * (got * (1.0 - got) / total as f64).sqrt();
349            }
350
351            let diff = (got - expectation).abs();
352            assert!(
353                diff <= tolerance,
354                "{name} got {got:?} (diff: {diff}), expected {expectation} (w/tolerance: {tolerance})"
355            );
356        }
357    }
358
359    #[test]
360    fn clone_a_parent_sampler() {
361        let sampler = Sampler::ParentBased(Box::new(Sampler::AlwaysOn));
362        #[allow(clippy::redundant_clone)]
363        let cloned_sampler = sampler.clone();
364
365        let cx = Context::current_with_value("some_value");
366
367        let result = sampler.should_sample(
368            Some(&cx),
369            TraceId::from(1),
370            "should sample",
371            &SpanKind::Internal,
372            &[],
373            &[],
374        );
375
376        let cloned_result = cloned_sampler.should_sample(
377            Some(&cx),
378            TraceId::from(1),
379            "should sample",
380            &SpanKind::Internal,
381            &[],
382            &[],
383        );
384
385        assert_eq!(result, cloned_result);
386    }
387
388    #[test]
389    fn parent_sampler() {
390        // name, delegate, context(with or without parent), expected decision
391        let test_cases = vec![
392            (
393                "should using delegate sampler",
394                Sampler::AlwaysOn,
395                Context::new(),
396                SamplingDecision::RecordAndSample,
397            ),
398            (
399                "should use parent result, always off",
400                Sampler::AlwaysOn,
401                Context::current_with_span(TestSpan(SpanContext::new(
402                    TraceId::from(1),
403                    SpanId::from(1),
404                    TraceFlags::default(), // not sampling
405                    false,
406                    TraceState::default(),
407                ))),
408                SamplingDecision::Drop,
409            ),
410            (
411                "should use parent result, always on",
412                Sampler::AlwaysOff,
413                Context::current_with_span(TestSpan(SpanContext::new(
414                    TraceId::from(1),
415                    SpanId::from(1),
416                    TraceFlags::SAMPLED, // not sampling
417                    false,
418                    TraceState::default(),
419                ))),
420                SamplingDecision::RecordAndSample,
421            ),
422        ];
423
424        for (name, delegate, parent_cx, expected) in test_cases {
425            let sampler = Sampler::ParentBased(Box::new(delegate));
426            let result = sampler.should_sample(
427                Some(&parent_cx),
428                TraceId::from(1),
429                name,
430                &SpanKind::Internal,
431                &[],
432                &[],
433            );
434
435            assert_eq!(result.decision, expected);
436        }
437    }
438}