1"""Various analysis protocols and standards for recorded underwater noise from ships.
2
3.. currentmodule:: uwacan.analysis
4
5Core processing and analysis
6----------------------------
7.. autosummary::
8 :toctree: generated
9
10 Spectrogram
11 SpectralProbability
12 SpectralProbabilitySeries
13 ShipLevel
14
15Helper functions and conversions
16--------------------------------
17.. autosummary::
18 :toctree: generated
19
20 convert_to_radiated_noise
21 level_uncertainty
22 required_averaging
23
24"""
25
26import numpy as np
27from . import (
28 _core,
29 spectral,
30 propagation,
31 positional,
32)
33import xarray as xr
34
35
[docs]
36class ShipLevel(_core.DatasetWrap):
37 """Calculates and stores measured ship levels.
38
39 This class has all functionality to analyze ship transits and
40 post-process the resulting radiated noise levels.
41 The analysis methods are all implemented as classmethods with the
42 ``analyze_transits`` prefix.
43
44 Parameters
45 ----------
46 data : `xarray.Dataset`
47 The dataset with measurement results.
48 This dataset must have a "source_power" variable.
49 """
50
[docs]
51 @classmethod
52 def analyze_transits(
53 cls,
54 *transits,
55 filterbank=None,
56 propagation_model=None,
57 background_noise=None,
58 transit_min_angle=None,
59 transit_min_duration=None,
60 transit_min_length=None,
61 reference_position=None,
62 ):
63 """Analyze ship transits to estimate source power and related metrics.
64
65 Parameters
66 ----------
67 *transits : Transit objects
68 One or more `Transit` objects to be analyzed.
69 filterbank : callable, optional
70 A callable that applies a filterbank to the time data of the recording. If not provided, defaults to
71 `~spectral.Spectrogram` with 10 bands per decade between 20 Hz and 20 kHz, and a frame step of 1.
72 The callable should have the signature::
73
74 f(time_data: uwacan.TimeData) -> uwacan.TimeFrequencyData
75
76 propagation_model : callable or `~uwacan.propagation.PropagationModel`, optional
77 A callable that compensates for the propagation effects on the received power. If not provided, defaults
78 to a `~uwacan.propagation.MlogR` propagation model with ``m=20``.
79 The callable should have the signature::
80
81 propagation_model(
82 received_power: uwacan.FrequencyData,
83 receiver: uwacan.Position,
84 source: uwacan.Track
85 ) -> uwacan.FrequencyData
86
87 with the frequency data optionally also having a time dimension.
88 background_noise : callable, optional
89 A callable that models the background noise.
90 The callable should have the signature::
91
92 f(received_power: uwacan.FrequencyData) -> uwacan.FrequencyData
93
94 If not provided, defaults to a no-op function that returns the input ``received_power``.
95 A suitable callable can be created using the `uwacan.background.Background` class.
96 transit_min_angle : float, optional
97 Minimum angle for segment selection during transit analysis, in degrees.
98 The segment analyzed will cover at least this aspect angle on each side of the CPA.
99 E.g., ``transit_min_angle=30`` means the segment covers from -30° to +30°.
100 transit_min_duration : float, optional
101 Minimum duration for segment selection during transit analysis, in seconds.
102 The segment analyzed will cover at least this duration in total.
103 transit_min_length : float, optional
104 Minimum length for segment selection during transit analysis, in meters.
105 The segment analyzed will cover at least this length in total.
106
107 Returns
108 -------
109 ship_levels : `ShipLevels`
110 An instance of the class containing the analysis results for each transit, including source power,
111 latitude, longitude, transit time, and optionally signal-to-noise ratio (SNR).
112
113 Notes
114 -----
115 This method processes each transit individually by:
116 1. Determining the closest point of approach (CPA) time.
117 2. Optionally selecting a segment around CPA.
118 3. Applying the filterbank to the time data of the recording.
119 4. Compensating for background noise and propagation effects.
120
121 The method returns a concatenated dataset containing the results for all provided transits.
122 The core dimension for each transit is "segment", which indicates the time segments used
123 in the filterbank.
124 """
125 if filterbank is None:
126 filterbank = spectral.Filterbank(bands_per_decade=10, min_frequency=20, max_frequency=20_000, frame_step=1)
127
128 if background_noise is None:
129
130 def background_noise(received_power, **kwargs):
131 return received_power
132
133 if propagation_model is None:
134 propagation_model = propagation.MlogR(m=20)
135
136 if isinstance(propagation_model, propagation.PropagationModel):
137 propagation_model = propagation_model.compensate_propagation
138
139 if reference_position is None:
140 if isinstance(transit.recording.sensor, positional.Position):
141 reference_position = transit.recording.sensor
142 else:
143 raise TypeError("A reference position must be provided if the recording has more than one sensor position.")
144
145 results = []
146 for transit in transits:
147 if (transit_min_angle, transit_min_duration, transit_min_length) == (None, None, None):
148 cpa_time = transit.track.closest_point(transit.recording.sensor)["time"].data
149 else:
150 # If the recording is a spectrogram, we loose the sensor in the subwindow
151 sensor = transit.recording.sensor
152 segment = transit.track.aspect_segments(
153 reference=reference_position,
154 angles=0,
155 segment_min_duration=transit_min_duration,
156 segment_min_angle=None if not transit_min_angle else transit_min_angle * 2,
157 segment_min_length=transit_min_length,
158 )
159 cpa_time = segment.time.sel(edge="center").data
160 transit = transit.subwindow(segment)
161 # Reassign the sensor since we lost it in the subwindow
162 transit.recording.sensor = sensor
163
164 direction = transit.track.average_course("eight")
165 if hasattr(transit.recording, "time_data"):
166 time_data = transit.recording.time_data()
167 received_power = filterbank(time_data)
168 else:
169 received_power = transit.recording
170
171
172 received_power = background_noise(received_power)
173 track = transit.track.resample(received_power.time)
174 source_power = propagation_model(
175 received_power=received_power, receiver=transit.recording.sensor, source=track
176 )
177 transit_time = (received_power.data["time"] - cpa_time) / np.timedelta64(1, "s")
178 closest_to_cpa = np.abs(transit_time).argmin("time").item()
179 segment = xr.DataArray(
180 np.arange(transit_time.time.size) - closest_to_cpa, coords={"time": received_power.time}
181 )
182 transit_results = xr.Dataset(
183 data_vars=dict(
184 source_power=source_power.data,
185 latitude=track.latitude,
186 longitude=track.longitude,
187 transit_time=transit_time,
188 ),
189 coords=dict(
190 segment=segment,
191 direction=direction,
192 cpa_time=cpa_time,
193 ),
194 )
195 transit_results["received_power"] = received_power.data
196 if hasattr(received_power, "snr"):
197 transit_results["snr"] = received_power.snr
198 results.append(transit_results.swap_dims(time="segment"))
199 results = xr.concat(results, "transit")
200 results.coords["transit"] = np.arange(results.sizes["transit"]) + 1
201 return cls(results)
202
[docs]
203 @classmethod
204 def analyze_transits_in_angle_segments(
205 cls,
206 *transits,
207 filterbank=None,
208 propagation_model=None,
209 background_noise=None,
210 aspect_angles=tuple(range(-45, 46, 5)),
211 segment_min_length=100,
212 segment_min_angle=None,
213 segment_min_duration=None,
214 ):
215 """Analyze ship transits in constant angle segments to estimate source power and related metrics.
216
217 Parameters
218 ----------
219 *transits : Transit objects
220 One or more `Transit` objects to be analyzed.
221 filterbank : callable, optional
222 A callable that applies a filterbank to the time data of the recording. If not provided, defaults to
223 `NthDecadeSpectrogram` with 10 bands per decade between 20 Hz and 20 kHz, and a frame step of 1.
224 The callable should have the signature::
225
226 f(time_data: uwacan.TimeData) -> uwacan.TimeFrequencyData
227
228 propagation_model : callable or `~uwacan.propagation.PropagationModel`, optional
229 A callable that compensates for the propagation effects on the received power. If not provided, defaults
230 to a `~uwacan.propagation.MlogR` propagation model with ``m=20``.
231 The callable should have the signature::
232
233 propagation_model(
234 received_power: uwacan.FrequencyData,
235 receiver: uwacan.Position,
236 source: uwacan.Track
237 ) -> uwacan.FrequencyData
238
239 with the frequency data optionally also having a time dimension.
240 background_noise : callable, optional
241 A callable that models the background noise.
242 The callable should have the signature::
243
244 f(received_power: uwacan.FrequencyData) -> uwacan.FrequencyData
245
246 If not provided, defaults to a no-op function that returns the input ``received_power``.
247 A suitable callable can be created using the `uwacan.background.Background` class.
248 aspect_angles : array_like
249 The angles where to center each segment, in degrees.
250 Defaults to each 5° from -45° to 45°.
251 segment_min_angle : float, optional
252 Minimum angle width for the segments, in degrees.
253 segment_min_duration : float, optional
254 Minimum duration for the segments, in seconds.
255 segment_min_length : float, optional
256 Minimum length for the segments, in meters.
257
258 Returns
259 -------
260 ship_levels : `ShipLevels`
261 An instance of the class containing the analysis results for each transit, including source power,
262 latitude, longitude, transit time, and optionally signal-to-noise ratio (SNR).
263
264 Notes
265 -----
266 This method processes each transit individually by:
267
268 1. Determining the closest point of approach (CPA) time.
269 2. Finding the segments centered at each aspect_angle.
270 See `uwacan.Track.aspect_segments` for more details on how the segments are computed.
271 3. Applying the filterbank to the time data of the recording.
272 4. Averaging the received sound power within each segment.
273 5. Compensating for background noise and propagation effects in each segment.
274
275 The method returns a concatenated dataset containing the results for all provided transits.
276 The core dimension for each transit is "segment", which indicates the aspect angles specified.
277 """
278 if filterbank is None:
279 filterbank = spectral.Filterbank(bands_per_decade=10, min_frequency=20, max_frequency=20_000, frame_step=1)
280
281 transit_padding = 10
282
283 if background_noise is None:
284
285 def background_noise(received_power, **kwargs):
286 return received_power
287
288 if propagation_model is None:
289 propagation_model = propagation.MlogR(m=20)
290
291 if isinstance(propagation_model, propagation.PropagationModel):
292 propagation_model = propagation_model.compensate_propagation
293
294 results = []
295 for transit in transits:
296 segments = transit.track.aspect_segments(
297 reference=transit.recording.sensor,
298 angles=aspect_angles,
299 segment_min_length=segment_min_length,
300 segment_min_angle=segment_min_angle,
301 segment_min_duration=segment_min_duration,
302 )
303 transit = transit.subwindow(segments, extend=transit_padding)
304
305 if np.min(np.abs(segments.segment)) == 0:
306 cpa_time = segments.sel(segment=0, edge="center")["time"].data
307 else:
308 cpa_time = transit.track.closest_point(transit.recording.sensor)["time"].data
309
310 direction = transit.track.average_course("eight")
311 time_data = transit.recording.time_data()
312 received_power = filterbank(time_data)
313
314 segment_powers = []
315 for segment_angle, segment in segments.groupby("segment", squeeze=False):
316 segment_power = received_power.subwindow(segment).mean("time").data
317 segment_power.coords["segment"] = segment_angle
318 segment_powers.append(segment_power)
319 segment_powers = xr.concat(segment_powers, "segment")
320 segment_powers = _core.FrequencyData(segment_powers)
321
322 compensated_power = background_noise(segment_powers)
323 track = transit.track.resample(segments.sel(edge="center", drop=True).time)
324 source_power = propagation_model(
325 received_power=segment_powers, receiver=transit.recording.sensor, source=track
326 )
327 transit_time = (track._data["time"] - cpa_time) / np.timedelta64(1, "s")
328
329 transit_results = xr.Dataset(
330 data_vars=dict(
331 source_power=source_power.data,
332 received_power=compensated_power.data,
333 latitude=track.latitude,
334 longitude=track.longitude,
335 transit_time=transit_time,
336 ),
337 coords=dict(
338 direction=direction,
339 cpa_time=cpa_time,
340 ),
341 )
342 if hasattr(compensated_power, "snr"):
343 transit_results["snr"] = compensated_power.snr
344 results.append(transit_results)
345 results = xr.concat(results, "transit")
346 results.coords["transit"] = np.arange(results.sizes["transit"]) + 1
347 return cls(results)
348
349 @property
350 def source_power(self):
351 """The source power of the transits."""
352 return _core.FrequencyData(self._data["source_power"])
353
354 @property
355 def source_level(self):
356 """The source level of the transits."""
357 return _core.dB(self.source_power, power=True)
358
359 @property
360 def received_power(self):
361 """The received power during the transits."""
362 return _core.FrequencyData(self._data["received_power"])
363
364 @property
365 def received_level(self):
366 """The received level during the transits."""
367 return _core.dB(self.received_power, power=True)
368
[docs]
369 def power_average(self, dim=..., **kwargs):
370 """Power-wise average of data.
371
372 This calculates the power average of the ship levels
373 over some dimensions, and the linear average of non-power
374 quantities. SnR is always averaged on a level basis.
375
376 See `xarray.DataArray.mean` for more details.
377 """
378 return type(self)(self._data.mean(dim, **kwargs))
379
[docs]
380 def level_average(self, dims, **kwargs):
381 """Level-wise average of data.
382
383 This calculates the level average of the ship levels
384 over some dimensions, and the linear average of non-power
385 quantities. SnR is always averaged on a level basis.
386
387 See `xarray.DataArray.mean` for more details.
388 """
389 source_power = 10 ** (self.source_level.mean(dims, **kwargs) / 10)
390 received_power = 10 ** (self.received_level.mean(dims, **kwargs) / 10)
391
392 others = self._data.drop_vars(["source_power", "received_power"])
393 others = others.mean(dims, **kwargs)
394 data = others.merge(
395 {
396 "source_power": source_power.data,
397 "received_power": received_power.data,
398 }
399 )
400 return type(self)(data)
401
402 @property
403 def snr(self):
404 """The signal to noise ratio in the measurement, in dB."""
405 return _core.FrequencyData(self._data["snr"])
406
[docs]
407 def meets_snr_threshold(self, threshold):
408 """Check where the measurement meets a specific SnR threshold.
409
410 This thresholds the SnR to a specific level and returns 1 where
411 the threshold is met, 0 where the threshold is not met, and NaN
412 where there is no SnR information (typically segments that were
413 not measured).
414
415 Parameters
416 ----------
417 threshold : float
418 The threshold to compare against, in dB.
419
420 Returns
421 -------
422 meets_threshold : `~uwacan.FrequencyData`
423 Whether the SnR meets the threshold or not.
424
425 Notes
426 -----
427 This is useful to compute statistics of how often the measurement
428 meets a SnR threshold. By taking the average of the output from here,
429 we get a measure of how often we meet that threshold. By taking the
430 average before or after we compare to the threshold, we can control
431 on what granularity we measure. E.g., for a measurement with multiple
432 sensors, segments, and transits, we can get the finest granularity::
433
434 ship_levels.meets_snr_threshold(3).mean(["sensor", "segment", "transit"]) * 100
435
436 or we can choose to only look at how many of the transits meet the SnR
437 threshold after averaging over sensors and segments::
438
439 ship_levels.power_average(["sensor", "segment"]).meets_snr_threshold(3).mean("transit") * 100
440
441 We multiply both by 100 to get the value in percent.
442 """
443 snr = self._data["snr"]
444 meets_threshold = xr.where(
445 snr.isnull(),
446 np.nan,
447 snr > threshold,
448 )
449 return _core.FrequencyData(meets_threshold)
450
451
[docs]
452def convert_to_radiated_noise(source, source_depth, mode="iso", power=False):
453 r"""Convert a monopole source level to a radiated noise level.
454
455 Parameters
456 ----------
457 source : `~_core.FrequencyData`
458 The source level or source power.
459 source_depth : float
460 The source depth to use for the conversion.
461 mode : str, default="iso"
462 Which type of conversion to perform
463 power : bool, default=False
464 If the input and output are powers or levels.
465
466 Notes
467 -----
468 There are several conversion formulas implemented in this function.
469 They are described below with a conversion factor :math:`F(η)` such
470 as
471
472 .. math::
473
474 P_{RNL} = P_{MSL} F(η) \\
475 η = kd
476
477 with :math:`k` being the wavenumber and :math:`d` being the
478 source depth.
479
480 The most commonly used one is the "iso" mode:
481
482 .. math::
483
484 F = \frac{14 η^2 + 2 η^4}{14 + 2 η^2 + η^4}
485
486 This is designed to convert a monopole source level to radiated
487 noise levels measured at deep waters with hydrophone depression
488 angles of 15°, 30°, and 45°. This has a high-frequency compensation
489 of 2 (+3 dB) and a low-frequency compensation of η^2 (+20 dB/decade).
490
491 An alternative is the "average farfield" which averages all
492 depression angles
493
494 .. math::
495
496 F = 2 / (1 + 1 / η^2)
497
498 This has a high-frequency compensation of 2 (+3 dB) and a low frequency
499 compensation of 2η^2 (+3 dB + 20 dB/decade).
500
501 A third one is "isomatch", which averages up to a depression angle of θ=54.3°,
502 (measured in radians in the formulas below)
503
504 .. math::
505
506 F = 2 / (1 + 1 / G)\\
507 G = η^2 (θ - \sin(θ) \cos(θ)) / θ\\
508
509 This has the same asymptotical compensations as the "iso" method:
510 high-frequency of 2 (+3 dB) and low-frequency of η^2 (+20 dB/decade).
511
512 """
513 if mode is None or not mode:
514 return source
515 kd = 2 * np.pi * source.frequency / 1500 * source_depth
516 mode = mode.lower()
517 if mode == "iso":
518 compensation = (14 * kd**2 + 2 * kd**4) / (14 + 2 * kd**2 + kd**4)
519 elif mode == "average farfield":
520 compensation = 1 / (1 / 2 + 1 / (2 * kd**2))
521 elif mode == "isomatch":
522 truncation_angle = np.radians(54.3)
523 lf_comp = (
524 2 * kd**2 * (truncation_angle - np.sin(truncation_angle) * np.cos(truncation_angle)) / truncation_angle
525 )
526 compensation = 1 / (1 / 2 + 1 / lf_comp)
527 elif mode == "none":
528 compensation = 1
529 else:
530 raise ValueError(f"Unknown mode '{mode}'")
531 if power:
532 return source * compensation
533 else:
534 return source + 10 * np.log10(compensation)
535
536
537class Spectrum(_core.FrequencyData):
538 """Handling of spectrum data, both linear and banded.
539
540 This class is meant to handle power spectra and power spectral density.
541
542 Parameters
543 ----------
544 data : array_like
545 A `numpy.ndarray` or a `xarray.DataArray` with the frequency data.
546 frequency : array_like, optional
547 The frequencies corresponding to the data. Mandatory if ``data`` is a `numpy.ndarray`.
548 bandwidth : array_like, optional
549 The bandwidth of each data point. Can be an array with per-frequency
550 bandwidth or a single value valid for all frequencies.
551 dims : str or [str], default="frequency"
552 The dimensions of the data. Must have the same length as the number of dimensions in the data.
553 Only used for `numpy` inputs.
554 coords : `xarray.DataArray.coords`
555 Additional coordinates for this data.
556 attrs : dict, optional
557 Additional attributes to store with this data.
558 """
559
560 def _figure_template(self, **kwargs):
561 template = super()._figure_template(**kwargs)
562 template.layout.update(
563 yaxis=dict(
564 title="dB re 1μPa<sup>2</sup>/Hz"
565 )
566 )
567 return template
568
569 def plot(self, **kwargs): # noqa: D102
570 in_db = _core.dB(self)
571 return super(Spectrum, in_db).plot(**kwargs)
572
[docs]
573class Spectrogram(_core.TimeFrequencyData):
574 """Handling of spectrogram data, both linear and banded.
575
576 Parameters
577 ----------
578 data : array_like
579 A `numpy.ndarray` or a `xarray.DataArray` with the time-frequency data.
580 start_time : time_like, optional
581 The start time for the first sample in the signal.
582 This should ideally be a proper time type, but it will be parsed if it is a string.
583 Defaults to "now" if not given.
584 samplerate : float, optional
585 The samplerate for this data, in Hz. This is not the samplerate of the underlying time signal,
586 but time steps of the time axis on the time-frequency data.
587 If the `data` is a `numpy.ndarray`, this has to be given.
588 If the `data` is a `xarray.DataArray` which already has a time coordinate,
589 this can be omitted.
590 frequency : array_like, optional
591 The frequencies corresponding to the data. Mandatory if `data` is a `numpy.ndarray`.
592 bandwidth : array_like, optional
593 The bandwidth of each data point. Can be an array with per-frequency
594 bandwidth or a single value valid for all frequencies.
595 dims : str or [str], optional
596 The dimensions of the data. Must have the same length as the number of dimensions in the data.
597 Mandatory used for `numpy` inputs, not used for `xarray` inputs.
598 coords : `xarray.DataArray.coords`
599 Additional coordinates for this data.
600 attrs : dict, optional
601 Additional attributes to store with this data.
602 """
603
[docs]
604 @classmethod
605 def analyze_timedata(
606 cls,
607 time_data,
608 *,
609 bands_per_decade=None,
610 frame_step=None,
611 frame_duration=None,
612 frame_overlap=0.5,
613 min_frequency=None,
614 max_frequency=None,
615 hybrid_resolution=None,
616 fft_window="hann",
617 filepath=None,
618 batch_size=None,
619 status=None,
620 ):
621 """Compute a spectrogram from time data.
622
623 Parameters
624 ----------
625 time_data : `~uwacan.TimeData` or `~uwacan.recordings.AudioFileRecording`
626 The data to process.
627 bands_per_decade : float, optional
628 The number of frequency bands per decade for logarithmic scaling.
629 frame_step : float
630 The time step between stft frames, in seconds.
631 frame_duration : float
632 The duration of each stft frame, in seconds.
633 frame_overlap : float, default=0.5
634 The overlap factor between stft frames. A negative value leaves
635 gaps between frames.
636 min_frequency : float
637 The lowest frequency to include in the processing.
638 max_frequency : float
639 The highest frequency to include in the processing.
640 hybrid_resolution : float
641 A frequency resolution to aim for. Only used if ``frame_duration`` is not given
642 fft_window : str, default="hann"
643 The window function to apply to each rolling window before computing the FFT.
644 Can be a string specifying a window type (e.g., ``"hann"``, ``"kaiser"``, ``"blackman"``)
645 or an array-like sequence of window coefficients.
646 filepath : str, optional
647 If provided, save results to this file path. If None, return results in memory.
648 batch_size : int, optional
649 Number of frames to process before saving to file. If None and filepath is provided,
650 a suitable batch size will be computed based on available memory.
651 status : bool or callable, optional
652 Status reporting mechanism for the segments being processed. If ``True``, a default status message is
653 printed to the console showing the time window being processed. If a callable function
654 is provided, it will be called with the segment's ``time_window``.
655
656 Returns
657 -------
658 processed_data : `~uwacan.analysis.Spectrogram`
659 The processed spectrogram data.
660 """
661 if not status:
662 def status(time_window):
663 pass
664 elif status == True:
665 def status(time_window):
666 print(f"\rComputed segment {time_window.start.format_rfc3339()} to {time_window.stop.format_rfc3339()}", end="")
667
668 filterbank = spectral.Filterbank(
669 bands_per_decade=bands_per_decade,
670 frame_step=frame_step,
671 frame_duration=frame_duration,
672 frame_overlap=frame_overlap,
673 min_frequency=min_frequency,
674 max_frequency=max_frequency,
675 hybrid_resolution=hybrid_resolution,
676 fft_window=fft_window,
677 )
678
679 if filepath is None:
680 # Process all frames at once and return in memory
681 processed = filterbank(time_data)
682 return cls(processed)
683
684 roller = filterbank.rolling(time_data)
685 # If no batch size provided, compute a suitable one based on memory usage
686 if batch_size is None:
687 # Estimate memory needed per frame (in bytes)
688 memory_per_frame = 8 * np.prod(roller.shape)
689 target_memory = 128 * 1024 * 1024 # 128MB per batch as default
690 batch_size = max(1, int(target_memory / memory_per_frame))
691
692 # Process and save batches one at a time
693 for batch in roller.batches(batch_size):
694 # Report status after each batch is processed
695 status(batch.time_window)
696 batch = cls(batch) # Convert to Spectrogram to save with correct class name
697 batch.save(filepath, append_dim="time")
698
699 return cls.load(filepath)
700
701 def _figure_template(self, **kwargs):
702 template = super()._figure_template(**kwargs)
703 template.data.update(
704 heatmap=[
705 dict(
706 hovertemplate="%{x}<br>%{y:.5s}Hz<br>%{z}dB<extra></extra>",
707 colorbar_title_text="dB re 1μPa<sup>2</sup>/Hz",
708 )
709 ]
710 )
711 return template
712
[docs]
713 def plot(self, **kwargs): # noqa: D102
714 in_db = _core.dB(self)
715 return super(Spectrogram, in_db).plot(**kwargs)
716
[docs]
717 @classmethod
718 def from_dataset(cls, data, **kwargs): # noqa: D102
719 if "time" not in data.dims:
720 return Spectrum.from_dataset(data, **kwargs)
721 return super().from_dataset(data, **kwargs)
722
723
[docs]
724class SpectralProbability(_core.FrequencyData):
725 """Handles spectral probability data.
726
727 Parameters
728 ----------
729 data : array_like
730 A `numpy.ndarray` or a `xarray.DataArray` with the frequency data.
731 levels : array_like, optional
732 The dB level represented by the bins. Mandatory if ``data`` is a `numpy.ndarray`.
733 binwidth : float, optional
734 The width of the bins. Computed from the levels if not given.
735 frequency : array_like, optional
736 The frequencies corresponding to the data. Mandatory if ``data`` is a `numpy.ndarray`.
737 frequency_band_lower : array_like, optional
738 The lower edge of each frequency band. Must be provided together with
739 `frequency_band_upper`. This is the preferred method for specifying
740 frequency band information.
741 frequency_band_upper : array_like, optional
742 The upper edge of each frequency band. Must be provided together with
743 `frequency_band_lower`. This is the preferred method for specifying
744 frequency band information.
745 bandwidth : float, optional
746 A single bandwidth value valid for all frequencies. This is a convenience
747 method that assumes centered frequency bands. For per-frequency bandwidth,
748 use `frequency_band_lower`/`frequency_band_upper` instead.
749 scaling : str, default="density"
750 The scaling of the probabilities for the level bins in each frequency band.
751 Must be one of:
752
753 - ``"counts"``: the number of frames with that level;
754 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
755 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
756
757 Note that the sum of counts (at a given frequency) is the total number of frames,
758 the sum of probability is 1, and the integral of the density is 1.
759 num_frames : int
760 The number of spectra used to compute this spectral probability.
761 averaging_time : float
762 The duration from which each spectrum was averaged over, in seconds.
763 dims : str or [str], optional
764 The dimensions of the data. Must have the same length as the number of dimensions in the data.
765 Mandatory used for `numpy` inputs, not used for `xarray` inputs.
766 coords : `xarray.DataArray.coords`
767 Additional coordinates for this data.
768 attrs : dict, optional
769 Additional attributes to store with this data.
770
771 """
772
773 _coords_set_by_init = {"frequency", "levels"}
774
775 @staticmethod
776 def _compute_counts(
777 roller,
778 binwidth,
779 min_level,
780 max_level,
781 frames_to_average,
782 ):
783 max_level = int(np.ceil(max_level / binwidth))
784 min_level = int(np.floor(min_level / binwidth))
785 levels = np.arange(min_level, max_level + 1) * binwidth
786
787 edges = levels[:-1] + 0.5 * binwidth
788 edges = 10 ** (edges / 10)
789
790 counts = np.zeros(roller.shape + (levels.size,))
791 indices = np.indices(roller.shape)
792 frames = roller.numpy_frames()
793
794 for frame_idx in range(roller.num_frames // frames_to_average):
795 frame = next(frames)
796 # Running average
797 for n in range(1, frames_to_average):
798 frame += next(frames)
799 if frames_to_average > 1:
800 frame /= frames_to_average
801 bin_index = np.digitize(frame, edges)
802 counts[*indices, bin_index] += 1
803
804 return counts, levels, frame_idx + 1
805
[docs]
806 @classmethod
807 def analyze_timedata(
808 cls,
809 data,
810 *,
811 filterbank,
812 binwidth=1,
813 min_level=0,
814 max_level=200,
815 averaging_time=None,
816 scaling="density",
817 ):
818 """Compute spectral probability from time data.
819
820 Parameters
821 ----------
822 data : `~uwacan.TimeData` or `~uwacan.recordings.AudioFileRecording`
823 The data to process.
824 filterbank : `~uwacan.Filterbank`
825 A pre-created instance used to filter the time data. Includes specification
826 of the frequency bands to use.
827 binwidth : float, default=1
828 The width of each level bin, in dB.
829 min_level : float, default=0
830 The lowest level to include in the processing, in dB.
831 max_level : float, default=200
832 The highest level to include in the processing, in dB.
833 averaging_time : float or None
834 The duration over which to average psd frames.
835 This is used to average the output frames from the filterbank.
836 scaling : str, default="density"
837 The desired scaling of the probabilities for the level bins in each frequency band.
838 Must be one of:
839
840 - ``"counts"``: the number of frames with that level;
841 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
842 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
843
844 Note that the sum of counts (at a given frequency) is the total number of frames,
845 the sum of probability is 1, and the integral of the density is 1.
846
847 Notes
848 -----
849 To have representative values, each frequency bin needs sufficient averaging time.
850 A coarse recommendation can be computed from the bandwidth and a desired uncertainty,
851 see `required_averaging`. The uncertainty should ideally be smaller than the level binwidth.
852 For computational efficiency, it is often faster to use a filterbank which has much shorter
853 frames than this, even if frequency binning is used in the filterbank.
854 This is why there is an option to have additional averaging of the PSD while computing
855 the spectral probability, set using the ``averaging_time`` parameter.
856 """
857 roller = filterbank.rolling(data)
858 if averaging_time:
859 frames_to_average = int(np.ceil(averaging_time / roller.settings["step"]))
860 else:
861 frames_to_average = 1
862 averaging_time = frames_to_average * roller.settings["step"]
863
864 counts, levels, total_frames = cls._compute_counts(
865 roller,
866 binwidth,
867 min_level,
868 max_level,
869 frames_to_average,
870 )
871
872 new = cls(
873 counts,
874 levels=levels,
875 frequency=roller.frequency,
876 dims=roller.dims + ("levels",),
877 coords=roller.coords | {"time": _core.time_to_np(data.time_window.center)},
878 scaling="counts",
879 binwidth=binwidth,
880 num_frames=total_frames,
881 averaging_time=averaging_time,
882 )
883 return new.with_probability_scale(scaling)
884
[docs]
885 @classmethod
886 def analyze_spectrogram(
887 cls,
888 spectrogram,
889 *,
890 binwidth=1,
891 min_level=0,
892 max_level=200,
893 averaging_time=None,
894 scaling="density",
895 ):
896 """Compute spectral probability from a spectrogram.
897
898 Parameters
899 ----------
900 spectrogram : `~uwacan.Spectrogram`
901 The spectrogram to process.
902 binwidth : float, default=1
903 The width of each level bin, in dB.
904 min_level : float, default=0
905 The lowest level to include in the processing, in dB.
906 max_level : float, default=200
907 The highest level to include in the processing, in dB.
908 averaging_time : float or None
909 The duration over which to average psd frames.
910 This is used to average the output frames from the filterbank.
911 scaling : str, default="density"
912 The desired scaling of the probabilities for the level bins in each frequency band.
913 Must be one of:
914
915 - ``"counts"``: the number of frames with that level;
916 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
917 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
918
919 Note that the sum of counts (at a given frequency) is the total number of frames,
920 the sum of probability is 1, and the integral of the density is 1.
921
922 Notes
923 -----
924 To have representative values, each frequency bin needs sufficient averaging time.
925 A coarse recommendation can be computed from the bandwidth and a desired uncertainty,
926 see `required_averaging`. The uncertainty should ideally be smaller than the level binwidth.
927 For efficiency, the spectrogram is often computed with a much shorter frame duration.
928 This is why there is an option to have additional averaging of the PSD while computing
929 the spectral probability, set using the ``averaging_time`` parameter.
930 """
931 if averaging_time:
932 frames_to_average = int(np.ceil(averaging_time / spectrogram.attrs["frame_step"]))
933 else:
934 frames_to_average = 1
935 averaging_time = frames_to_average * spectrogram.attrs["frame_step"]
936
937 roller = spectrogram.rolling()
938
939 counts, levels, total_frames = cls._compute_counts(
940 roller,
941 binwidth,
942 min_level,
943 max_level,
944 frames_to_average,
945 )
946
947 new = cls(
948 counts,
949 levels=levels,
950 frequency=spectrogram.frequency,
951 dims=roller.dims + ("levels",),
952 coords=roller.coords | {"time": _core.time_to_np(spectrogram.time_window.center)},
953 scaling="counts",
954 binwidth=binwidth,
955 num_frames=total_frames,
956 averaging_time=averaging_time,
957 )
958 return new.with_probability_scale(scaling)
959
960 def __init__(
961 self,
962 data,
963 levels=None,
964 binwidth=None,
965 frequency=None,
966 frequency_band_lower=None,
967 frequency_band_upper=None,
968 bandwidth=None,
969 scaling=None,
970 num_frames=None,
971 averaging_time=None,
972 dims=None,
973 coords=None,
974 attrs=None,
975 **kwargs,
976 ):
977 super().__init__(
978 data, dims=dims, coords=coords, attrs=attrs,
979 frequency=frequency,
980 frequency_band_lower=frequency_band_lower, frequency_band_upper=frequency_band_upper,
981 bandwidth=bandwidth,
982 **kwargs
983 )
984 if levels is not None:
985 self.data.coords["levels"] = levels
986
987 if binwidth is not None:
988 self.data.attrs["binwidth"] = binwidth
989 elif "binwidth" not in self.data.attrs:
990 self.data.attrs["binwidth"] = np.mean(np.diff(self.levels))
991
992 if scaling is not None:
993 self.data.attrs["scaling"] = scaling
994
995 if num_frames is not None:
996 self.data.attrs["num_frames"] = num_frames
997
998 if averaging_time is not None:
999 self.data.attrs["averaging_time"] = averaging_time
1000
1001 @property
1002 def levels(self):
1003 """The dB levels the probabilities are for."""
1004 return self.data.levels
1005
[docs]
1006 def with_probability_scale(self, new_scale):
1007 """Rescale the probability data according to a new scaling method.
1008
1009 This method scales the data to the specified ``new_scale``.
1010 The method supports conversions between three scales:
1011
1012 - ``"counts"``: Represents raw event counts.
1013 - ``"probability"``: Represents probability, calculated as counts divided
1014 by the total number of frames.
1015 - ``"density"``: Represents density, calculated as probability divided
1016 by the bin width.
1017
1018 The rescaling is based on metadata such as the number of frames and bin width.
1019
1020 Parameters
1021 ----------
1022 new_scale : {"counts", "probability", "density"}
1023 The new scaling method to apply to the data.
1024
1025 Returns
1026 -------
1027 scaled : `SpectralProbability`
1028 Data with the new scaling.
1029
1030 """
1031 if new_scale not in {"counts", "probability", "density"}:
1032 raise ValueError(f"Unknown probability scaling '{new_scale}'")
1033
1034 current_scale = self.data.attrs["scaling"]
1035 if current_scale != new_scale:
1036 # We need to rescale
1037 # counts / num_frames = probability
1038 # probability / binwidth = density
1039
1040 if "counts" in (new_scale, current_scale):
1041 if "num_frames" not in self.data.attrs:
1042 raise ValueError(
1043 f"Cannot rescale from '{current_scale} to {new_scale} without knowing the number of frames analyzed."
1044 )
1045 num_frames = self.data.attrs["num_frames"]
1046
1047 if "density" in (new_scale, current_scale):
1048 if "binwidth" not in self.data.attrs:
1049 raise ValueError(
1050 f"Cannot rescale from '{current_scale} to {new_scale} without knowing the binwidth."
1051 )
1052 binwidth = self.data.attrs["binwidth"]
1053
1054 if current_scale == "counts":
1055 if new_scale == "probability":
1056 scale = 1 / num_frames
1057 elif new_scale == "density":
1058 scale = 1 / (num_frames * binwidth)
1059 elif current_scale == "probability":
1060 if new_scale == "counts":
1061 scale = num_frames
1062 elif new_scale == "density":
1063 scale = 1 / binwidth
1064 elif current_scale == "density":
1065 if new_scale == "counts":
1066 scale = num_frames * binwidth
1067 elif new_scale == "probability":
1068 scale = binwidth
1069
1070 new = self * scale
1071 new.data.attrs["scaling"] = new_scale
1072 return new
1073
1074 def _figure_template(self, **kwargs):
1075 template = super()._figure_template(**kwargs)
1076 template.layout.update(
1077 yaxis=dict(
1078 title_text="Level in dB. re 1μPa<sup>2</sup>/Hz",
1079 ),
1080 )
1081 template.data.update(
1082 heatmap=[
1083 dict(
1084 colorscale="viridis",
1085 colorbar_title_side="right",
1086 hovertemplate="%{x:.5s}Hz<br>%{y}dBHz<br>%{z}<extra></extra>",
1087 )
1088 ]
1089 )
1090 return template
1091
[docs]
1092 def plot(self, logarithmic_probabilities=True, **kwargs):
1093 """Make a heatmap trace of this data.
1094
1095 Parameters
1096 ----------
1097 logarithmic_probabilities : bool, default=True
1098 Toggles using a logarithmic colorscale for the probabilities.
1099 **kwargs : dict
1100 Keywords that will be passed to `~plotly.graph_objects.Heatmap`.
1101 Some useful keywords are:
1102
1103 - ``colorscale`` chooses the colorscale, e.g., ``"viridis"``, ``"delta"``, ``"twilight"``.
1104 - ``zmin`` and ``zmax`` sets the color range.
1105
1106 """
1107 import plotly.graph_objects as go
1108
1109 if set(self.dims) != {"levels", "frequency"}:
1110 raise ValueError(
1111 f"Cannot make heatmap of spectral probability data with dimensions '{self.dims}'. "
1112 "Use the `.groupby(dim)` method to loop over extra dimensions."
1113 )
1114
1115 data = self.data
1116 non_zero = (data != 0).any("frequency")
1117 min_level = data.levels[non_zero][0]
1118 max_level = data.levels[non_zero][-1]
1119 data = data.sel(levels=slice(min_level, max_level))
1120
1121 hovertemplate = "%{x:.5s}Hz<br>%{y}dB<br>"
1122 if data.attrs["scaling"] == "probability":
1123 data = data * 100
1124 colorbar_title = "Probability in %"
1125 hovertemplate += "%{customdata[0]:.5g}%"
1126 elif data.attrs["scaling"] == "density":
1127 data = data * 100
1128 colorbar_title = "Probability density in %/dB"
1129 hovertemplate += "%{customdata[0]:.5g}%/dB"
1130 elif data.attrs["scaling"] == "counts":
1131 data = data
1132 colorbar_title = "Total occurrences"
1133 hovertemplate += "#%{customdata[0]}"
1134 else:
1135 # This should never happen.
1136 raise ValueError(f"Unknown probability scaling '{data.attrs['scaling']}'")
1137
1138 data = data.transpose("levels", "frequency")
1139 customdata = data.data[..., None]
1140
1141 if "zmax" in kwargs:
1142 p_max = kwargs["zmax"]
1143 else:
1144 p_max = data.max().item()
1145
1146 if "zmin" in kwargs:
1147 p_min = kwargs["zmin"]
1148 if p_min == 0 and logarithmic_probabilities:
1149 # Cannot use a zero value to compute limits, since it maps to -inf
1150 p_min = data.where(data != 0).min().item()
1151 kwargs["zmin"] = p_min
1152 else:
1153 p_min = data.where(data != 0).min().item()
1154
1155 if logarithmic_probabilities:
1156 p_max = np.log10(p_max)
1157 p_min = np.log10(p_min)
1158
1159 if "zmax" in kwargs:
1160 kwargs["zmax"] = np.log10(kwargs["zmax"])
1161 if "zmin" in kwargs:
1162 kwargs["zmin"] = np.log10(kwargs["zmin"])
1163
1164 with np.errstate(divide="ignore"):
1165 data = np.log10(data)
1166
1167 # Making log-spaced ticks
1168 n_ticks = 5 # This is just a value to aim for. It usually works good to aim for 5 ticks.
1169 if np.ceil(p_max) - np.floor(p_min) + 1 >= n_ticks:
1170 # Ticks as 10^n, selecting every kth n as needed
1171 decimation = round((p_max - p_min + 1) / n_ticks)
1172 tick_max = int(np.ceil(p_max / decimation))
1173 tick_min = int(np.floor(p_min / decimation))
1174 tickvals = np.arange(tick_min, tick_max + 1) * decimation
1175 elif np.ceil(2 * (p_max - p_min)) + 1 >= n_ticks:
1176 # Ticks as [1, 3] * 10^n
1177 tick_max = int(np.ceil(p_max * 2))
1178 tick_min = int(np.floor(p_min * 2))
1179 tickvals = np.arange(tick_min, tick_max + 1) / 2
1180 # Round ticks so that 10**tick has one decimal
1181 tickvals = np.log10(np.round(10**tickvals / 10 ** np.floor(tickvals)) * 10 ** np.floor(tickvals))
1182 elif np.ceil(3 * (p_max - p_min)) + 1 >= n_ticks:
1183 # Ticks as [1, 2, 5] * 10^n
1184 tick_max = int(np.ceil(p_max * 3))
1185 tick_min = int(np.floor(p_min * 3))
1186 tickvals = np.arange(tick_min, tick_max + 1) / 3
1187 # Round ticks so that 10**tick has one decimal
1188 tickvals = np.log10(np.round(10**tickvals / 10 ** np.floor(tickvals)) * 10 ** np.floor(tickvals))
1189 else:
1190 # Linspaced ticks as [1, 2, 5] * n
1191 tick_min = 10**p_min
1192 tick_max = 10**p_max
1193 spacing = (tick_max - tick_min) / n_ticks
1194 # Round spacing to the nearest [1,2,5] * 10^n
1195 magnitude = np.floor(np.log10(spacing))
1196 mantissa = spacing / 10**magnitude
1197 if mantissa < 2:
1198 mantissa = 1
1199 elif mantissa < 5:
1200 mantissa = 2
1201 else:
1202 mantissa = 5
1203 spacing = mantissa * 10**magnitude
1204 tick_min = int(np.floor(tick_min / spacing))
1205 tick_max = int(np.ceil(tick_max / spacing))
1206 tickvals = np.arange(tick_min, tick_max + 1) * spacing
1207 tickvals = np.log10(tickvals)
1208
1209 ticktext = [f"{10.**tick:.3g}" for tick in tickvals]
1210 else:
1211 tickvals = ticktext = None
1212
1213 trace = go.Heatmap(
1214 x=data.frequency,
1215 y=data.levels,
1216 z=data,
1217 customdata=customdata,
1218 hovertemplate=hovertemplate,
1219 colorbar_tickvals=tickvals,
1220 colorbar_ticktext=ticktext,
1221 colorbar_title_text=colorbar_title,
1222 zmax=p_max + (p_max - p_min) * 0.05,
1223 zmin=p_min - (p_max - p_min) * 0.05,
1224 )
1225 return trace.update(**kwargs)
1226
[docs]
1227 def plot_percentile(self, percentile, **kwargs):
1228 import plotly.graph_objects as go
1229
1230 ecdf = self.data.cumsum("levels") * self.attrs["binwidth"]
1231 p = ecdf.where(ecdf > percentile).idxmin("levels")
1232 trace = go.Scatter(
1233 x=p.frequency,
1234 y=p,
1235 name=f"{percentile*100:.0f}th percentile",
1236 )
1237 return trace.update(**kwargs)
1238
1239
[docs]
1240class SpectralProbabilitySeries(SpectralProbability, _core.TimeData):
1241 """Handling of spectral probability series data.
1242
1243 The spectral probability series is a series of specral probabilites
1244 computed from shorter segments of time data, i.e., a time-series
1245 of spectral probabilites.
1246 As such, it describes a time-varying spectral probability, e.g.,
1247 the probability density function (histogram) at a certain frequency
1248 band as a function of sound pressure level, but varying over time.
1249
1250 Parameters
1251 ----------
1252 data : array_like
1253 A `numpy.ndarray` or a `xarray.DataArray` with the data.
1254 time : array_like, optional
1255 A `numpy.ndarray` with ``dtype=datetime64[ns]`` containing time stamps for each spectral probability.
1256 start_time : time_like, optional
1257 The start time for the first spectral probability.
1258 This should ideally be a proper time type, but it will be parsed if it is a string.
1259 Defaults to "now" if not given.
1260 samplerate : float, optional
1261 The samplerate for this data, in Hz. This refers to the rate of the spectral probabilities,
1262 not the time signal used to compute the spectra.
1263 If the ``data`` is a `numpy.ndarray`, this has to be given.
1264 If the ``data`` is a `xarray.DataArray` which already has a time coordinate,
1265 this can be omitted.
1266 levels : array_like, optional
1267 The dB level represented by the bins. Mandatory if ``data`` is a `numpy.ndarray`.
1268 binwidth : float, optional
1269 The width of the bins. Computed from the levels if not given.
1270 frequency : array_like, optional
1271 The frequencies corresponding to the data. Mandatory if ``data`` is a `numpy.ndarray`.
1272 frequency_band_lower : array_like, optional
1273 The lower edge of each frequency band. Must be provided together with
1274 `frequency_band_upper`. This is the preferred method for specifying
1275 frequency band information.
1276 frequency_band_upper : array_like, optional
1277 The upper edge of each frequency band. Must be provided together with
1278 `frequency_band_lower`. This is the preferred method for specifying
1279 frequency band information.
1280 bandwidth : float, optional
1281 A single bandwidth value valid for all frequencies. This is a convenience
1282 method that assumes centered frequency bands. For per-frequency bandwidth,
1283 use `frequency_band_lower`/`frequency_band_upper` instead.
1284 scaling : str, default="density"
1285 The scaling of the probabilities for the level bins in each frequency band.
1286 Must be one of:
1287
1288 - ``"counts"``: the number of frames with that level;
1289 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
1290 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
1291
1292 Note that the sum of counts (at a given frequency) is the total number of frames,
1293 the sum of probability is 1, and the integral of the density is 1.
1294 num_frames : int
1295 The number of spectra used to compute each spectral probability.
1296 averaging_time : float
1297 The duration from which each spectrum was averaged over. This is separate from the duration that each specral probability covers.
1298 dims : str or [str], optional
1299 The dimensions of the data. Must have the same length as the number of dimensions in the data.
1300 Mandatory used for `numpy` inputs, not used for `xarray` inputs.
1301 coords : `xarray.DataArray.coords`
1302 Additional coordinates for this data.
1303 attrs : dict, optional
1304 Additional attributes to store with this data
1305 """
1306
1307 _coords_set_by_init = {"frequency", "levels", "time"}
1308
1309 @classmethod
1310 def _analyze_segments(
1311 cls,
1312 data,
1313 *,
1314 analyzer,
1315 binwidth=1,
1316 min_level=0,
1317 max_level=200,
1318 averaging_time=None,
1319 scaling="density",
1320 segment_duration=None,
1321 segment_overlap=None,
1322 segment_step=None,
1323 filepath=None,
1324 status=None,
1325 ):
1326 """Analyse segments of data using the provided analyzer function.
1327
1328 This method is a helper method for the `analyze_timedata` and `analyze_spectrogram` methods.
1329
1330 Parameters
1331 ----------
1332 data : `~uwacan.TimeData` or `~uwacan.Spectrogram`
1333 The data to process.
1334 analyzer : callable
1335 The function to use for analyzing each segment.
1336 binwidth : float, default=1
1337 The width of each level bin, in dB.
1338 min_level : float, default=0
1339 The lowest level to include in the processing, in dB.
1340 max_level : float, default=200
1341 The highest level to include in the processing, in dB.
1342 averaging_time : float or None
1343 The duration over which to average psd frames.
1344 scaling : str, default="density"
1345 The desired scaling of the probabilities for the level bins in each frequency band.
1346 segment_duration : float
1347 The duration of each time segment used to compute one spectral probability.
1348 filepath : str, optional
1349 The file path where the results should be saved.
1350 status : bool or callable, optional
1351 Status reporting mechanism for the segments being processed.
1352
1353 Returns
1354 -------
1355 results : `SpectralProbabilitySeries`
1356 The concatenated results from all segments.
1357 """
1358 if not status:
1359 def status(time_window):
1360 pass
1361 elif status == True:
1362 def status(time_window):
1363 print(f"\rComputing segment {time_window.start.format_rfc3339()} to {time_window.stop.format_rfc3339()}", end="")
1364
1365 if filepath is None:
1366 results = []
1367
1368 for segment in data.rolling(duration=segment_duration, overlap=segment_overlap, step=segment_step):
1369 status(segment.time_window)
1370 segment = analyzer(
1371 segment,
1372 binwidth=binwidth,
1373 min_level=min_level,
1374 max_level=max_level,
1375 averaging_time=averaging_time,
1376 scaling=scaling,
1377 )
1378 if filepath is None:
1379 results.append(segment)
1380 else:
1381 segment.save(filepath, append_dim="time")
1382
1383 if filepath is None:
1384 return _core.concatenate(results, dim="time", cls=cls)
1385 else:
1386 return cls.load(filepath)
1387
[docs]
1388 @classmethod
1389 def analyze_timedata(
1390 cls,
1391 data,
1392 *,
1393 filterbank,
1394 binwidth=1,
1395 min_level=0,
1396 max_level=200,
1397 averaging_time=None,
1398 scaling="density",
1399 segment_duration=None,
1400 segment_overlap=None,
1401 segment_step=None,
1402 filepath=None,
1403 status=None,
1404 ):
1405 """Compute spectral probability segments in a recording.
1406
1407 Parameters
1408 ----------
1409 data : `~uwacan.TimeData` or `recordings.AudioFileRecording`
1410 The recording to process.
1411 filterbank : `~uwacan.Filterbank`
1412 A pre-created instance used to filter the time data. Includes specification
1413 of the frequency bands to use
1414 binwidth : float, default=1
1415 The width of each level bin, in dB.
1416 min_level : float, default=0
1417 The lowest level to include in the processing, in dB.
1418 max_level : float, default=200
1419 The highest level to include in the processing, in dB.
1420 averaging_time : float or None
1421 The duration over which to average psd frames.
1422 This is used to average the output frames from the filterbank.
1423 scaling : str, default="density"
1424 The desired scaling of the probabilities for the level bins in each frequency band.
1425 Must be one of:
1426
1427 - ``"counts"``: the number of frames with that level;
1428 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
1429 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
1430
1431 Note that the sum of counts (at a given frequency) is the total number of frames,
1432 the sum of probability is 1, and the integral of the density is 1.
1433 segment_duration : float
1434 The duration of each time segment used to compute one spectral probability.
1435 filepath : str, optional
1436 The file path where the results should be saved, if desired. If ``None`` (default), the results are
1437 concatenated in memory and returned. If provided, each segment result is saved to this file, and the
1438 concateneted results on disk are returned.
1439 status : bool or callable, optional
1440 Status reporting mechanism for the segments being processed. If ``True``, a default status message is
1441 printed to the console showing the time window being processed. If a callable function
1442 is provided, it will be called with the segment's ``time_window``.
1443
1444 Notes
1445 -----
1446 To have representative values, each frequency bin needs sufficient averaging time.
1447 A coarse recommendation can be computed from the bandwidth and a desired uncertainty,
1448 see `required_averaging`. The uncertainty should ideally be smaller than the level binwidth.
1449 For computational efficiency, it is often faster to use a filterbank which has much shorter
1450 frames than this, even if frequency binning is used in the filterbank.
1451 This is why there is an option to have additional averaging of the PSD while computing
1452 the spectral probability, set using the ``averaging_time`` parameter.
1453 """
1454 def analyzer(segment, **kwargs):
1455 return SpectralProbability.analyze_timedata(segment, filterbank=filterbank, **kwargs)
1456
1457 return cls._analyze_segments(
1458 data,
1459 analyzer=analyzer,
1460 binwidth=binwidth,
1461 min_level=min_level,
1462 max_level=max_level,
1463 averaging_time=averaging_time,
1464 scaling=scaling,
1465 segment_duration=segment_duration,
1466 segment_overlap=segment_overlap,
1467 segment_step=segment_step,
1468 filepath=filepath,
1469 status=status,
1470 )
1471
[docs]
1472 @classmethod
1473 def analyze_spectrogram(
1474 cls,
1475 data,
1476 *,
1477 binwidth=1,
1478 min_level=0,
1479 max_level=200,
1480 averaging_time=None,
1481 scaling="density",
1482 segment_duration=None,
1483 segment_overlap=None,
1484 segment_step=None,
1485 filepath=None,
1486 status=None,
1487 ):
1488 """Compute spectral probability segments from a spectrogram.
1489
1490 Parameters
1491 ----------
1492 data : `~uwacan.Spectrogram`
1493 The spectrogram to process.
1494 binwidth : float, default=1
1495 The width of each level bin, in dB.
1496 min_level : float, default=0
1497 The lowest level to include in the processing, in dB.
1498 max_level : float, default=200
1499 The highest level to include in the processing, in dB.
1500 averaging_time : float or None
1501 The duration over which to average psd frames.
1502 This is used to average the output frames from the filterbank.
1503 scaling : str, default="density"
1504 The desired scaling of the probabilities for the level bins in each frequency band.
1505 Must be one of:
1506
1507 - ``"counts"``: the number of frames with that level;
1508 - ``"probability"``: how often a certain level occurred, i.e., ``counts / num_frames``;
1509 - ``"density"``: the probability density at a certain level, i.e., ``probability / binwidth``.
1510
1511 Note that the sum of counts (at a given frequency) is the total number of frames,
1512 the sum of probability is 1, and the integral of the density is 1.
1513 segment_duration : float
1514 The duration of each time segment used to compute one spectral probability.
1515 filepath : str, optional
1516 The file path where the results should be saved, if desired. If ``None`` (default), the results are
1517 concatenated in memory and returned. If provided, each segment result is saved to this file, and the
1518 concateneted results on disk are returned.
1519 status : bool or callable, optional
1520 Status reporting mechanism for the segments being processed. If ``True``, a default status message is
1521 printed to the console showing the time window being processed. If a callable function
1522 is provided, it will be called with the segment's ``time_window``.
1523
1524 Notes
1525 -----
1526 To have representative values, each frequency bin needs sufficient averaging time.
1527 A coarse recommendation can be computed from the bandwidth and a desired uncertainty,
1528 see `required_averaging`. The uncertainty should ideally be smaller than the level binwidth.
1529 For computational efficiency, it is often faster to use a filterbank which has much shorter
1530 frames than this, even if frequency binning is used in the filterbank.
1531 This is why there is an option to have additional averaging of the PSD while computing
1532 the spectral probability, set using the ``averaging_time`` parameter.
1533 """
1534 return cls._analyze_segments(
1535 data,
1536 analyzer=SpectralProbability.analyze_spectrogram,
1537 binwidth=binwidth,
1538 min_level=min_level,
1539 max_level=max_level,
1540 averaging_time=averaging_time,
1541 scaling=scaling,
1542 segment_duration=segment_duration,
1543 segment_overlap=segment_overlap,
1544 segment_step=segment_step,
1545 filepath=filepath,
1546 status=status,
1547 )
1548
1549 def __init__(
1550 self,
1551 data,
1552 levels=None,
1553 binwidth=None,
1554 frequency=None,
1555 frequency_band_lower=None,
1556 frequency_band_upper=None,
1557 bandwidth=None,
1558 scaling=None,
1559 num_frames=None,
1560 averaging_time=None,
1561 time=None,
1562 start_time=None,
1563 samplerate=None,
1564 **kwargs,
1565 ):
1566 super().__init__(
1567 data, dims=dims, coords=coords, attrs=attrs,
1568 frequency=frequency,
1569 frequency_band_lower=frequency_band_lower, frequency_band_upper=frequency_band_upper,
1570 bandwidth=bandwidth,
1571 time=time, start_time=start_time, samplerate=samplerate,
1572 levels=levels, binwidth=binwidth, num_frames=num_frames, scaling=scaling, averaging_time=averaging_time,
1573 **kwargs
1574 )
1575
[docs]
1576 def plot(self, **kwargs):
1577 raise ValueError(f"Cannot directly plot SpectralProbabilitySeries! You likely want to first average over time.")
1578
[docs]
1579 @classmethod
1580 def from_dataset(cls, data, **kwargs):
1581 if "time" not in data.dims:
1582 return SpectralProbability.from_dataset(data, **kwargs)
1583 return super().from_dataset(data, **kwargs)
1584
1585
[docs]
1586def level_uncertainty(averaging_time, bandwidth):
1587 r"""Compute the level uncertainty for a specific averaging time and frequency bandwidth.
1588
1589 The level uncertainty here is derived from the mean and standard deviation of the power
1590 of sampled white gaussian noise. The uncertainty is the decibel difference between
1591 one half standard deviation above the mean and one half standard deviation below the mean.
1592 This is very similar to taking the standard deviation of the levels instead of the powers.
1593
1594 Parameters
1595 ----------
1596 averaging_time : float
1597 The averaging time in seconds.
1598 bandwidth : float
1599 The observed bandwidth, in Hz.
1600 For "full-band" sampled signals, this is half of the samplerate.
1601
1602 Returns
1603 -------
1604 uncertainty : float
1605 Equals ``10 * log10((2 * mu ** 0.5 + 1) / (2 * mu ** 0.5 - 1))``
1606 for ``mu = averaging_time * bandwidth``.
1607
1608 See Also
1609 --------
1610 required_averaging: Implements the opposite computation.
1611
1612 Notes
1613 -----
1614 Start with gaussian white noise in the time domain,
1615
1616 .. math:: x[n] \sim \mathcal{N}(0, \sigma^2).
1617
1618 The DFT is computed
1619
1620 .. math:: X[k] = \sum_{n=0}^{N-1} x[n] \exp(-2\pi i n k / N)
1621
1622 using :math:`N` samples in the input signal.
1623
1624 The trick is to write the DFT bins as real and complex, then they will be
1625
1626 .. math:: X[k] \sim \mathcal{N}(0, N \sigma^2 / 2) + i \mathcal{N}(0, N \sigma^2 / 2)
1627
1628 and rescale this to two standard normal distributions,
1629
1630 .. math:: X[k] = Z_r[k] \sqrt{N/2} \sigma + i Z_i[k] \sqrt{N/2} \sigma = (Z_r[k] + i Z_i[k]) \sqrt{N/2} \sigma
1631
1632 which then have
1633
1634 .. math::
1635 Z_r[k] \sim \mathcal{N}(0, 1) \qquad Z_i[k] \sim \mathcal{N}(0, 1)
1636
1637 Z_r^2[k] \sim \chi^2(1) \qquad Z_i^2[k] \sim \chi^2(1).
1638
1639 We also need the chi-squared and Gamma relations (using shape :math:`k` and scale :math:`\theta` for Gamma distributions)
1640
1641 .. math::
1642 \sum_l \chi^2(\nu_l) = \chi^2\left(\sum_l \nu_l\right)
1643
1644 \chi^2(\nu) = \Gamma(\nu/2, 2)
1645
1646 c\Gamma(k, \theta) = \Gamma(k, c\theta)
1647
1648 \sum_l \Gamma(k_l, \theta) = \Gamma\left(\sum_l k_l, \theta\right)
1649
1650 which directly lead to
1651
1652 .. math::
1653 \sum_{l=1}^{L} c \chi^2(1) = \Gamma(L/2, 2c)
1654
1655 \sum_{l=1}^{L} c \Gamma(k, \theta) = \Gamma(kL, c\theta)
1656
1657 We have the PSD in each bin computed as
1658
1659 .. math::
1660 PSD[k] &= (|X[k]|^2 + |X[-k]|^2) \frac{1}{N f_s} \\
1661 &= (\Re\{X[k]\}^2 + \Im\{X[k]\}^2 + \Re\{X[-k]\}^2 + \Im\{X[-k]\}^2) \frac{1}{N f_s} \\
1662 &= (Z_r^2[k] + Z_i^2[k] + Z_r^2[-k] + Z_i^2[-k]) \frac{N/2 \sigma^2}{N f_s} \\
1663 &= (Z_r^2[k] + Z_i^2[k] + Z_r^2[-k] + Z_i^2[-k]) \frac{\sigma^2}{2f_s} \\
1664 &= (Z_r^2[k] + Z_i^2[k]) \frac{\sigma^2}{f_s}
1665
1666 where :math:`Z_r[k] = Z_r[-k]` and :math:`Z_i[k] = - Z_i[-k]` have been used in the last step.
1667
1668 If we look at normalized PSD, defined as
1669
1670 .. math:: NPSD[k] = PSD[k] \cdot \frac{f_s}{\sigma^2} = Z_r^2[k] + Z_i^2[k],
1671
1672 it will have a distribution as
1673
1674 .. math:: NPSD[k] \sim \chi^2(2) = \Gamma(1, 2).
1675
1676 This then gives the distribution of the PSD as
1677
1678 .. math:: PSD[k] \sim \Gamma(1, 2\sigma^2/f_s).
1679
1680
1681 When we compute the average PSD in a frequency band, we take the mean of a number of individual PSD bins.
1682 They are statistically independent samples of the same Gamma distribution (since we have white noise).
1683 The band level :math:`B[k_l, k_u]` is calculated as
1684
1685 .. math:: B[k_l, k_u] = \frac{1}{k_u - k_l} \sum_{k=k_l}^{k_u - 1} PSD[k]
1686
1687 with the distribution
1688
1689 .. math::
1690 B[k_l, k_u] &\sim \frac{1}{k_u - k_l} \sum_{k=k_l}^{k_u - 1} \Gamma(1, 2\sigma^2/f_s)\\
1691 &= \Gamma\left(k_u - k_l, \frac{2\sigma^2}{f_s (k_u - k_l)}\right).
1692
1693 Finally, taking :math:`L` averages of :math:`B[k_l, k_u]` gives us
1694
1695 .. math::
1696 \tilde B[k_l, k_u] &\sim \frac{1}{L} \sum_{l=1}^{L} \Gamma\left(k_u - k_l, \frac{2\sigma^2}{f_s (k_u - k_l)}\right) \\
1697 &= \Gamma\left(L(k_u - k_l), \frac{2\sigma^2}{L f_s (k_u - k_l)}\right) \\
1698 &= \Gamma\left( \mu, \frac{2\sigma^2}{\mu f_s}\right)
1699
1700 where we have defined the number of averaged values :math:`\mu = L(k_u - k_l)`, i.e., the number of time windows times the number of frequencies in a bin.
1701 Changing the number of time windows by a factor :math:`F` will change the number of frequency bins in a certain band by :math:`1/F`, so the number of averaged values remain constant.
1702 Taking the frequency band to be the entire spectrum gives us :math:`\mu = T f_s/2` values, where :math:`T` is the total sampling time.
1703 Looking back to the relation of summed and scaled chi-squared variables, we see that the first argument to the Gamma distribution is half of the number of independent chi-squared variables that are summed.
1704 This means that :math:`\mu = T f_s / 2` is consistent with that we have :math:`T f_s` independent samples.
1705
1706 In the end, we want to know the mean and variance of this value, which we get from properties of the Gamma distribution
1707
1708 .. math::
1709 E\left\{\Gamma(k, \theta)\right\} = k\theta
1710
1711 \text{Var}\left\{\Gamma(k, \theta)\right\} = k \theta^2
1712
1713 so we get the average band power density :math:`P=2\sigma^2/f_s` as expected (:math:`\sigma^2` power over :math:`f_s/2` bandwidth)
1714 and standard deviation :math:`\Delta P = \frac{2\sigma^2}{f_s\sqrt{\mu}} = P / \sqrt{\mu}`.
1715
1716 For a frequency band covering :math:`[f_l, f_u]` we need to know how many bins fall in this band.
1717 For a time window of length :math:`T`, we have :math:`T f_s / 2` bins, so :math:`f[k] = k/T`, with :math:`k=0\ldots T f_s / 2`.
1718 Then
1719
1720 .. math::
1721 k_l = f_l T \qquad k_u = f_u T \\
1722
1723 \Rightarrow k_u - k_l = (f_u - f_l) T.
1724
1725 Since the number of averaged values :math:`\mu` for multiple realizations of the same band average is independent of the number of bands used,
1726 we can always compute the standard deviation using the full length of the signal.
1727
1728 Since the standard deviation is the mean times another value, the corresponding logarithmic standard deviation is independent of the logarithmic mean.
1729 Writing the power as :math:`P\pm\Delta P/2 = P(1 \pm \frac{1}{2\sqrt{\mu}})` (remembering :math:`\mu = T(f_u - f_l)`), we can compute the logarithmic spread as
1730
1731 .. math::
1732 \Delta L &= 10\log(P + \Delta P/2) - 10\log(P - \Delta P/2) \\
1733 &= 10\log\left(P \left( 1 + \frac{1}{2\sqrt{\mu}}\right)\right) - 10\log\left(P \left( 1 - \frac{1}{2\sqrt{\mu}}\right)\right) \\
1734 &= 10\log\left(P \frac{2\sqrt{\mu} + 1}{2\sqrt{\mu}}\right) - 10\log\left(P \frac{2\sqrt{\mu} - 1}{2\sqrt{\mu}}\right) \\
1735 &= 10\log\left(\frac{2\sqrt{\mu} + 1}{2\sqrt{\mu} - 1}\right).
1736
1737 For a spread of less than :math:`\Delta` dB, we get
1738
1739 .. math::
1740 \Delta &\geq 10\log\left(\frac{2\sqrt{\mu} + 1}{2\sqrt{\mu} - 1}\right)
1741
1742 \Rightarrow
1743 \mu &\geq \left(\frac{10^{\Delta/10} + 1}{10^{\Delta/10} - 1}\right)^2 / 4.
1744
1745 """
1746 mu = averaging_time * bandwidth
1747 return 10 * np.log10((2 * mu**0.5 + 1) / (2 * mu**0.5 - 1))
1748
1749
[docs]
1750def required_averaging(level_uncertainty, bandwidth):
1751 """Compute the required averaging time to obtain a certain uncertainty in levels.
1752
1753 The level uncertainty here is derived from the mean and standard deviation of the power
1754 of sampled white gaussian noise. The uncertainty is the decibel difference between
1755 one half standard deviation above the mean and one half standard deviation below the mean.
1756 This is almost the same as the standard deviation of the decibel levels.
1757
1758 Parameters
1759 ----------
1760 level_uncertainty : float
1761 The desired maximum uncertainty.
1762 bandwidth : float
1763 The observed bandwidth, in Hz.
1764 For "full-band" sampled signals, this is half of the samplerate.
1765
1766 Returns
1767 -------
1768 averaging_time : float
1769 The minimum time to average.
1770
1771 See Also
1772 --------
1773 level_uncertainty: Implements the opposite computation, has documentation of formulas and full derivation.
1774
1775 """
1776 p = 10 ** (level_uncertainty / 10)
1777 mu = 0.25 * ((p + 1) / (p - 1)) ** 2
1778 return mu / bandwidth