1"""Some shared core functionality in the package.
2
3This contains mostly wrappers around `xarray` objects, and some very basic functions.
4A few of these are publicly available in the main package namespace. They should be
5accessed from there if used externally, but from here if used internally.
6
7
8Classes and functions only exposed here
9---------------------------------------
10.. autosummary::
11 :toctree: generated
12
13 time_to_np
14 time_to_datetime
15 time_frame_settings
16 TimeWindow
17 xrwrap
18 DataArrayWrap
19 DatasetWrap
20 Roller
21 TimeDataRoller
22
23Classes and functions exposed in the main package namespace
24-----------------------------------------------------------
25.. autosummary::
26
27 uwacan.TimeData
28 uwacan.FrequencyData
29 uwacan.TimeFrequencyData
30 uwacan.Transit
31 uwacan.dB
32
33"""
34
35import numpy as np
36import xarray as xr
37import whenever
38from datetime import datetime as _py_datetime
39import functools
40
41__all__ = [
42 "TimeWindow",
43 "dB",
44 "TimeData",
45 "FrequencyData",
46 "TimeFrequencyData",
47 "Transit",
48]
49
50
[docs]
51def time_to_np(input, **kwargs):
52 """Convert a time to `numpy.datetime64`."""
53 if isinstance(input, xr.DataArray):
54 input = input.data
55 if isinstance(input, np.datetime64) or (
56 isinstance(input, np.ndarray) and np.issubdtype(input.dtype, np.datetime64)
57 ):
58 return input.astype("datetime64[ns]")
59 if not isinstance(input, whenever.Instant):
60 input = time_to_datetime(input, **kwargs)
61 return np.datetime64(input.timestamp_nanos(), "ns")
62
63
[docs]
64def time_to_datetime(input, fmt="RFC 3339", tz="UTC"):
65 """Convert datetimes to the same internal format.
66
67 This function takes a few types of input and tries to convert
68 the input to a `whenever.Instant`.
69 - Any datetime-like input will be converted directly.
70 - np.datetime64 and Unix timestamps are treated similarly.
71 - Strings are parsed with ``fmt`` if given, otherwise a few different common formats are tried.
72
73 Parameters
74 ----------
75 input : datetime-like, string, or numeric.
76 The input data specifying the time.
77 fmt : string, optional
78 This can be one of the standard formats ``"RFC 3339"``, ``"RFC 2822"``, or ``"ISO 8601"``, handled by `whenever <https://whenever.readthedocs.io/en/latest/overview.html#formatting-and-parsing>`_.
79 Alternatively, a custom parsing specifier can be supplied, using `strptime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_.
80 tz : string, default "UTC"
81 The assumed timezone for the input, if it does not contain one.
82 Unix timestamps have no timezone, and np.datetime64 only supports UTC.
83
84 Returns
85 -------
86 time : whenever.Instant
87 The converted time.
88 """
89 if isinstance(input, str):
90 if input == "now":
91 return whenever.Instant.now()
92 if fmt == "RFC 3339":
93 return whenever.OffsetDateTime.parse_rfc3339(input).instant()
94 if fmt == "RFC 2822":
95 return whenever.OffsetDateTime.parse_rfc2822(input).instant()
96 if "ISO" in fmt:
97 return whenever.OffsetDateTime.parse_common_iso(input).instant()
98 if "z" in fmt:
99 return whenever.OffsetDateTime.strptime(input, fmt).instant()
100 if not isinstance(tz, str):
101 raise TypeError(f"Cannot handle time zone info `{tz}`")
102 if tz == "UTC":
103 return whenever.LocalDateTime.strptime(input, fmt).assume_utc()
104 if "/" in tz:
105 return whenever.LocalDateTime.strptime(input, fmt).assume_tz(tz).instant()
106 if ":" in tz:
107 if tz[0] == "+":
108 sign = 1
109 tz = tz[1:]
110 elif tz[0] == "-":
111 sign = -1
112 tz = tz[1:]
113 else:
114 sign = 1
115 hours, minutes = tz.split(":")
116 tz = sign * whenever.TimeDelta(hours=int(hours), minutes=int(minutes))
117 return whenever.LocalDateTime.strptime(input, fmt).assume_fixed_offset(tz).instant()
118
119 if isinstance(input, whenever.Instant):
120 return input
121 if isinstance(input, (whenever.OffsetDateTime, whenever.ZonedDateTime)):
122 return input.instant()
123 if isinstance(input, whenever.LocalDateTime):
124 return input.assume_utc()
125
126 if isinstance(input, _py_datetime):
127 if input.tzinfo is not None:
128 # The datetime has a timezone - the conversion will work
129 return whenever.Instant.from_py_datetime(input)
130 # Without a timezone in the datetime we hope that the user supplied the timezone via tz.
131 # The easiest way to convert is to go via a string...
132 fmt = "%Y%m%dT%H%M%S.%f"
133 return time_to_datetime(input.strftime(fmt), fmt=fmt, tz=tz)
134
135 if isinstance(input, xr.DataArray):
136 input = input.data
137 if isinstance(input, np.datetime64) or (
138 isinstance(input, np.ndarray) and np.issubdtype(input.dtype, np.datetime64)
139 ):
140 input = int(input.astype("timedelta64") // np.timedelta64(1, "ns"))
141 return whenever.Instant.from_timestamp_nanos(input)
142
143 if hasattr(input, "timestamp"):
144 if callable(input.timestamp):
145 input = input.timestamp()
146 else:
147 input = input.timestamp
148 return whenever.Instant.from_timestamp(input)
149
150 raise TypeError(f"Cannot find time conversion for `{type(input).__name__}` object")
151
152
[docs]
153class TimeWindow:
154 """Describes a start and stop point in time.
155
156 Give two and only two of the four basic parameters.
157 Less than two will not fully define a window, while
158 more than two will overdetermine the window.
159
160 Parameters
161 ----------
162 start : time_like
163 A window that starts at this time
164 stop : time_like
165 A window stat stops at this time
166 center : time_like
167 A window centered at this time
168 duration : float
169 A window with this duration, in seconds
170 extend : float
171 Extend the window defined by two of the four above
172 with this much at each end, in seconds.
173 """
174
175 def __init__(self, start=None, stop=None, center=None, duration=None, extend=None):
176 if start is not None:
177 start = time_to_datetime(start)
178 if stop is not None:
179 stop = time_to_datetime(stop)
180 if center is not None:
181 center = time_to_datetime(center)
182
183 if None not in (start, stop):
184 _start = start
185 _stop = stop
186 start = stop = None
187 elif None not in (center, duration):
188 _start = center.subtract(seconds=duration / 2)
189 _stop = center.add(seconds=duration / 2)
190 center = duration = None
191 elif None not in (start, duration):
192 _start = start
193 _stop = start.add(seconds=duration)
194 start = duration = None
195 elif None not in (stop, duration):
196 _stop = stop
197 _start = stop.subtract(seconds=duration)
198 stop = duration = None
199 elif None not in (start, center):
200 _start = start
201 _stop = start + (center - start) * 2
202 start = center = None
203 elif None not in (stop, center):
204 _stop = stop
205 _start = stop - (stop - center) * 2
206 stop = center = None
207 else:
208 raise TypeError("Needs two of the input arguments to determine time window.")
209
210 if (start, stop, center, duration) != (None, None, None, None):
211 raise TypeError("Cannot input more than two input arguments to a time window!")
212
213 if extend is not None:
214 _start = _start.subtract(seconds=extend)
215 _stop = _stop.add(seconds=extend)
216
217 self._start = _start
218 self._stop = _stop
219
[docs]
220 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):
221 """Select a smaller window of time.
222
223 Parameters
224 ----------
225 time : time_window_like
226 An object that will be used to extract start and stop times.
227 start : time_like
228 A new window that starts at this time.
229 Give ``True`` to use the start of the existing window.
230 stop : time_like
231 A new window stat stops at this time
232 Give ``True`` to use the stop of the existing window.
233 center : time_like
234 A new window centered at this time
235 Give ``True`` to use the center of the existing window.
236 duration : float
237 A new window with this duration, in seconds
238 extend : float
239 Extend the new window defined by two of the four above
240 with this much at each end, in seconds.
241
242 Notes
243 -----
244 This takes the same basic inputs as `TimeWindow`, defining a window
245 with two out of four of ``start``, ``stop``, ``center``, and ``duration``.
246 Additionally, one of ``start``, ``stop``, ``center`` can be given as ``True``
247 instead of an actual time to use the times of the existing window.
248 If only one of ``start`` and ``stop`` is given, the other one is filled from
249 the existing window.
250
251 If a single positional argument is given, it should be time_window_like,
252 i.e., have a defined start and stop time, which will then be used.
253 This can be one of `TimeWindow`, and `xarray.Dataset`.
254 If it is a dataset, it must have a time attribute, and its minimum and maximum
255 will be used as the start and stop for the new window.
256 """
257 if time is None:
258 # Period specified with keyword arguments, convert to period.
259 if (start, stop, center, duration).count(None) == 3:
260 # Only one argument which has to be start or stop, fill the other from self.
261 if start is not None:
262 window = type(self)(start=start, stop=self.stop, extend=extend)
263 elif stop is not None:
264 window = type(self)(start=self.start, stop=stop, extend=extend)
265 else:
266 raise TypeError("Cannot create subwindow from arguments")
267 elif duration is not None and True in (start, stop, center):
268 if start is True:
269 window = type(self)(start=self.start, duration=duration, extend=extend)
270 elif stop is True:
271 window = type(self)(stop=self.stop, duration=duration, extend=extend)
272 elif center is True:
273 window = type(self)(center=self.center, duration=duration, extend=extend)
274 else:
275 raise TypeError("Cannot create subwindow from arguments")
276 else:
277 # The same types explicit arguments as the normal constructor
278 window = type(self)(start=start, stop=stop, center=center, duration=duration, extend=extend)
279 elif isinstance(time, type(self)):
280 window = time
281 elif isinstance(time, xr.Dataset):
282 window = type(self)(start=time.time.min(), stop=time.time.max(), extend=extend)
283 else:
284 # It's not a period, so it should be a single datetime. Parse or convert, check validity.
285 time = time_to_datetime(time)
286 if time not in self:
287 raise ValueError("Received time outside of contained window")
288 return time
289
290 if window not in self:
291 raise ValueError("Requested subwindow is outside contained time window")
292 return window
293
294 def __repr__(self):
295 return f"TimeWindow(start={self.start.format_rfc3339()}, stop={self.stop.format_rfc3339()})"
296
297 @property
298 def start(self):
299 """The start of this window."""
300 return self._start
301
302 @property
303 def stop(self):
304 """The stop of this window."""
305 return self._stop
306
307 @property
308 def center(self):
309 """The center of this window."""
310 return self.start.add(seconds=self.duration / 2)
311
312 @property
313 def duration(self):
314 """The duration of this window, in seconds."""
315 return (self.stop - self.start).in_seconds()
316
317 def __contains__(self, other):
318 if isinstance(other, type(self)):
319 return other.start in self and other.stop in self
320 return self.start <= time_to_datetime(other) <= self.stop
321
322
323def xr_ufunc(n_outputs=1, **apply_ufunc_kwargs):
324 """Decorator that wraps a function with `xr.apply_ufunc`.
325
326 This allows a function written for plain numpy arrays or scalars to
327 transparently accept `xarray.DataArray` inputs, with xarray handling
328 alignment, broadcasting, and coordinate propagation in a single pass
329 rather than on every intermediate operation.
330
331 The decorated function retains its original signature: positional and
332 keyword arguments are forwarded to the wrapped function via
333 `apply_ufunc`'s ``kwargs`` parameter.
334
335 Parameters
336 ----------
337 n_outputs : int, default 1
338 Number of output arrays returned by the function.
339 Sets ``output_core_dims`` to a list of ``n_outputs`` empty tuples
340 unless explicitly provided in `apply_ufunc_kwargs`.
341 **apply_ufunc_kwargs
342 Additional keyword arguments passed directly to `xr.apply_ufunc`,
343 e.g. ``input_core_dims``, ``exclude_dims``, ``vectorize``, ``dask``.
344
345 Examples
346 --------
347 Single-output function::
348
349 @xr_ufunc()
350 def distance_to(lat_1, lon_1, lat_2, lon_2):
351 ...
352
353 Multi-output function::
354
355 @xr_ufunc(n_outputs=2)
356 def shift_position(lat, lon, distance, bearing):
357 ...
358 return new_lat, new_lon
359 """
360 apply_ufunc_kwargs.setdefault("output_core_dims", [[] for _ in range(n_outputs)])
361 def decorator(func):
362 @functools.wraps(func)
363 def wrapper(*args, **kwargs):
364 return xr.apply_ufunc(
365 func,
366 *args,
367 **apply_ufunc_kwargs,
368 kwargs=kwargs,
369 )
370 return wrapper
371 return decorator
372
373
[docs]
374class xrwrap:
375 """Wrapper around `xarray` objects.
376
377 This base class exists to delegate work to our internal
378 `xarray` objects.
379 """
380
[docs]
381 @classmethod
382 def from_dataset(cls, dataset, **kwargs):
383 """Instantiate the class from a dataset.
384
385 This classmethod is mainly used to choose the correct class
386 to instantiate, depending on the data.
387 """
388 return cls(dataset, **kwargs)
389
390 def __init__(self, data, attrs=None):
391 if isinstance(data, xrwrap):
392 data = data.data
393 self._data = data
394 if attrs:
395 self._data.attrs.update(attrs)
396
397 @property
398 def attrs(self):
399 """Attributes stored in the data."""
400 return self.data.attrs
401
402 @property
403 def data(self):
404 """The contained data."""
405 return self._data
406
407 def __array_wrap__(self, data, context=None, transfer_attributes=True):
408 """Wrap output data in in a new object.
409
410 This takes data from some processing and wraps it back into a
411 suitable class. If no suitable class was found, the data is
412 returned as is.
413 """
414 try:
415 data = self.from_dataset(data)
416 except NotImplementedError:
417 return data
418 if transfer_attributes:
419 data.attrs.update(self.attrs)
420 return data
421
[docs]
422 def sel(self, indexers=None, method=None, tolerance=None, drop=False, drop_allnan=True, **indexers_kwargs):
423 """Select a subset of the data from the coordinate labels.
424
425 The selection is easiest done with keywords, e.g. ``obj.sel(sensor="Colmar 1")``
426 to select a specific sensor. For numerical coordinates, ``method="nearest"`` can
427 be quite useful. Use a slice to select a range of values, e.g.,
428 ``obj.sel(frequency=slice(10, 100))``.
429
430 For more details, see `xarray.DataArray.sel` and `xarray.Dataset.sel`.
431 """
432 new = self.data.sel(indexers=indexers, method=method, tolerance=tolerance, drop=drop, **indexers_kwargs)
433 if drop_allnan:
434 new = new.where(~new.isnull(), drop=True)
435 return self.__array_wrap__(new)
436
[docs]
437 def isel(self, indexers=None, drop=False, missing_dims="raise", drop_allnan=True, **indexers_kwargs):
438 """Select a subset of the data from the coordinate indices.
439
440 The selection is easiest done with keywords, e.g. ``obj.sel(sensor=0)``
441 to select the zeroth sensor. Use a slice to select a range of values, e.g.,
442 ``obj.sel(frequency=slice(10, 100))``.
443
444 For more details, see `xarray.DataArray.isel` and `xarray.Dataset.isel`.
445 """
446 new = self.data.isel(indexers=indexers, drop=drop, missing_dims=missing_dims, **indexers_kwargs)
447 if drop_allnan:
448 new = new.where(~new.isnull(), drop=True)
449 return self.__array_wrap__(new)
450
[docs]
451 def where(self, cond, other=xr.core.dtypes.NA, drop=False):
452 """Filter elements from this object according to a condition.
453
454 This method returns elements where the `cond` is True,
455 otherwise filling with `other`.
456 See `xarray.Dataset.where` for more details.
457
458 Parameters
459 ----------
460 cond : DataArray, Dataset, or callable
461 Locations at which to preserve this object's values. dtype must be `bool`.
462 If a callable, the callable is passed this object, and the result is used as
463 the value for cond.
464 other : scalar, DataArray, Dataset, or callable, optional
465 Value to use for locations in this object where ``cond`` is False.
466 By default, these locations are filled with NA. If a callable, it must
467 expect this object as its only parameter.
468 drop : bool, default: False
469 If True, coordinate labels that only correspond to False values of
470 the condition are dropped from the result.
471
472 Returns
473 -------
474 type(self)
475 An object wrapped using the same wrapper as the called object.
476 """
477 return self.from_dataset(self.data.where(cond, other=other, drop=drop))
478
479 @property
480 def coords(self):
481 """The coordinate (dimension) arrays for this data.
482
483 Refer to `xarray.DataArray.coords` and `xarray.Dataset.coords`.
484 """
485 return self.data.coords
486
487 @property
488 def dims(self):
489 """The dimensions of this data.
490
491 Refer to `xarray.DataArray.dims` and `xarray.Dataset.dims`.
492 """
493 return self.data.dims
494
495 @property
496 def sizes(self):
497 """Mapping from dimension names to lengths."""
498 return self.data.sizes
499
[docs]
500 def groupby(self, group):
501 for label, group in self.data.groupby(group, squeeze=False):
502 yield label, self.__array_wrap__(group.squeeze())
503
504 def _figure_template(self, **kwargs):
505 """Create default figure layout for this data."""
506 import plotly.graph_objects as go
507 import plotly.io as pio
508
509 template = go.layout.Template()
510 template.update(pio.templates[pio.templates.default])
511 return template
512
526
[docs]
527 def save(self, path, append_dim=None, **kwargs):
528 """Save the data to a Zarr file at the specified path.
529
530 The class name is stored as an attribute, so it can be used later to reconstruct the object.
531
532 Parameters
533 ----------
534 path : str or pathlib.Path
535 The file path where the data will be saved. The directory will be
536 created if it doesn't exist.
537 append_dim : str, optional
538 A dimension which should be used to append to existing data in the path.
539 **kwargs : dict, optional
540 Additional keyword arguments passed to the `xarray.Dataset.to_zarr` method, which
541 is responsible for saving the data to the Zarr format.
542 """
543 from pathlib import Path
544
545 path = Path(path)
546 path.parent.mkdir(parents=True, exist_ok=True)
547
548 module = self.__class__.__module__
549 name = self.__class__.__qualname__
550 if module is not None and module != "__builtin__":
551 name = module + "." + name
552
553 data = self.data.assign_attrs(__uwacan_class__=name)
554
555 if append_dim:
556 if append_dim not in data.dims:
557 data = data.expand_dims(append_dim)
558 if not path.exists():
559 # Cannot append if there is no data to append to, so we just write as normal
560 append_dim = None
561 if isinstance(data, xr.Dataset):
562 for var in data.dara_vars.values():
563 if np.issubdtype(var.dtype, np.datetime64):
564 var.encoding = {"units": "nanoseconds since 1970-01-01"}
565 for coord in data.coords.values():
566 if np.issubdtype(coord.dtype, np.datetime64):
567 coord.encoding = {"units": "nanoseconds since 1970-01-01"}
568
569 kwargs.setdefault("consolidated", False)
570 data.to_zarr(path, append_dim=append_dim, **kwargs)
571
[docs]
572 @classmethod
573 def load(cls, path, lookup_class=True, **kwargs):
574 """Load data from a Zarr file and optionally restore the original class.
575
576 This method loads data from a Zarr file and attempts to reconstruct the
577 original class that was used to save the data. The class information is
578 stored in the `__uwacan_class__` attribute of the dataset. If the class
579 is found, the method dynamically loads and instantiates it.
580
581 Parameters
582 ----------
583 path : str or pathlib.Path
584 The file path from which to load the data.
585 lookup_class : bool, default=True
586 If True attempts to restore the original class from the
587 metadata stored in the Zarr file. If False, the called class is used
588 to load the data.
589
590 Returns
591 -------
592 obj : cls or ``__uwacan_class__``
593 An instance of the class used to save the data (if found in the Zarr
594 file's metadata), or an instance of the called class.
595
596 Notes
597 -----
598 - The Zarr file should have the `__uwacan_class__` attribute in the
599 dataset's metadata to allow class reconstruction.
600 - If the class cannot be found, or if there is an error during dynamic
601 import, the method falls back to using the current class `cls` to
602 instantiate the object.
603
604 """
605 kwargs.setdefault("consolidated", False)
606 data = xr.open_zarr(path, **kwargs)
607 if "__xarray_dataarray_variable__" in data:
608 data = data["__xarray_dataarray_variable__"]
609 data.name = None
610 if lookup_class and "__uwacan_class__" in data.attrs:
611 import importlib
612
613 module_name, class_name = data.attrs["__uwacan_class__"].rsplit(".", 1)
614 try:
615 module = importlib.import_module(module_name)
616 cls = getattr(module, class_name)
617 except:
618 pass
619 return cls(data)
620
621
[docs]
622class DataArrayWrap(xrwrap, np.lib.mixins.NDArrayOperatorsMixin):
623 """Wrapper around `xarray.DataArray`.
624
625 This base class exists to wrap functionality in `xarray.DataArray`,
626 and numerical operations from `numpy`.
627 """
628
629 _coords_set_by_init = set()
630
631 def __init__(self, data, dims=(), coords=None, **kwargs):
632 if isinstance(data, DataArrayWrap):
633 data = data.data
634 if not isinstance(data, xr.DataArray):
635 if isinstance(dims, str):
636 dims = [dims]
637 if dims is None:
638 dims = ()
639 if np.ndim(data) != np.size(dims):
640 raise ValueError(
641 f"Dimension names '{dims}' for {type(self).__name__} does not match data with {np.ndim(data)} dimensions"
642 )
643 data = xr.DataArray(data, dims=dims)
644 if coords is not None:
645 data = data.assign_coords(
646 **{name: coord for (name, coord) in coords.items() if name not in self._coords_set_by_init}
647 )
648 super().__init__(data, **kwargs)
649
650 def __array__(self, dtype=None):
651 """Casts this object into a `numpy.ndarray`."""
652 return self.data.__array__(dtype)
653
654 @staticmethod
655 def _implements_np_func(np_func):
656 """Tag implementations of `numpy` functions.
657
658 We use the ``__array_function__`` interface to implement many
659 `numpy` functions. This decorator will only tag an implementation
660 function with which `numpy` function it implements.
661 """
662
663 def decorator(func):
664 func._implements_np_func = np_func
665 return func
666
667 return decorator
668
669 def __init_subclass__(cls):
670 """Set up the `numpy` implementations for a class.
671
672 This will run when a subclass is defined, and
673 check if there are any methods in it that are tagged
674 with a numpy implementation. All those implementations
675 will be stored in a class-level dictionary.
676 """
677 implementations = {}
678 for name, value in cls.__dict__.items():
679 if callable(value) and hasattr(value, "_implements_np_func"):
680 implementations[value._implements_np_func] = value
681 cls._np_func_implementations = implementations
682
683 def __array_function__(self, func, types, args, kwargs):
684 """Interfaces with numpy functions.
685
686 This will run when general numpy functions are used on objects
687 of this class. We have stored tagged implementations in class
688 dictionaries, so we can check if there is an explicit implementation.
689 We have no actual method which does this, so we go through the ``mro``
690 manually.
691
692 If no explicit implementation is found, we try replacing all wrappers
693 with their `xarray.DataArray` objects, and call the function on them
694 instead.
695 """
696 for cls in self.__class__.mro():
697 if hasattr(cls, "_np_func_implementations"):
698 if func in cls._np_func_implementations:
699 func = cls._np_func_implementations[func]
700 break
701 else:
702 # We couldn't find an explicit implementation.
703 # Try replacing all _DataWrapper with their data and calling the function.
704 args = (arg.data if isinstance(arg, DataArrayWrap) else arg for arg in args)
705 out = func(*args, **kwargs)
706 if not isinstance(out, xr.DataArray):
707 try:
708 out = self.data.__array_wrap__(out)
709 except:
710 # We cannot wrap this in an xarray, then we cannot wrap in our own wrapper.
711 return out
712 out = self.__array_wrap__(out)
713 return out
714 return func(*args, **kwargs)
715
716 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
717 """Interfaces with `numpy.ufunc`.
718
719 Many functions in numpy are ufuncs. `xarray.DataArray.__array_ufunc__` will
720 do the heavy lifting here.
721 """
722 inputs = (arg.data if isinstance(arg, DataArrayWrap) else arg for arg in inputs)
723 return self.__array_wrap__(self.data.__array_ufunc__(ufunc, method, *inputs, **kwargs))
724
[docs]
725 @_implements_np_func(np.mean)
726 def mean(self, dim=..., **kwargs):
727 """Average of this data, along some dimension.
728
729 See `xarray.DataArray.mean` for more details.
730 """
731 return self.__array_wrap__(self.data.mean(dim, **kwargs))
732
[docs]
733 @_implements_np_func(np.sum)
734 def sum(self, dim=..., **kwargs):
735 """Sum of this data, along some dimension.
736
737 See `xarray.DataArray.sum` for more details.
738 """
739 return self.__array_wrap__(self.data.sum(dim, **kwargs))
740
[docs]
741 @_implements_np_func(np.std)
742 def std(self, dim=..., **kwargs):
743 """Standard deviation of this data, along some dimension.
744
745 See `xarray.DataArray.std` for more details.
746 """ # noqa: D401
747 return self.__array_wrap__(self.data.std(dim, **kwargs))
748
[docs]
749 @_implements_np_func(np.max)
750 def max(self, dim=..., **kwargs):
751 """Maximum of this data, along some dimension.
752
753 See `xarray.DataArray.max` for more details.
754 """
755 return self.__array_wrap__(self.data.max(dim, **kwargs))
756
[docs]
757 @_implements_np_func(np.min)
758 def min(self, dim=..., **kwargs):
759 """Minimum of this data, along some dimension.
760
761 See `xarray.DataArray.min` for more details.
762 """
763 return self.__array_wrap__(self.data.min(dim, **kwargs))
764
[docs]
765 def apply(self, func, *args, **kwargs):
766 """Apply some function to the contained data.
767
768 This calls the supplied function with the `xarray.DataArray`
769 in this object, then wraps the output in a similar container again.
770 """
771 data = func(self.data, *args, **kwargs)
772 return self.__array_wrap__(data)
773
[docs]
774 def reduce(self, func, dim, **kwargs):
775 """Apply a reduction function along some dimension in this data.
776
777 See `xarray.DataArray.reduce` for more details.
778 """
779 data = self.data.reduce(func=func, dim=dim, **kwargs)
780 return self.__array_wrap__(data)
781
782
783DataArrayWrap.__init_subclass__()
784
785
[docs]
786class DatasetWrap(xrwrap):
787 """Wraps `xarray.Dataset` objects.
788
789 This wraps a dataset by passing indexing to the underlying dataset
790 indexing, and mimics the `xarray` attribute access by passing
791 attribute access to indexing if the attribute exists in the dataset.
792 Using a MutableMapping from collections enables lots of dict-style
793 iteration.
794 """
795
796 def __getitem__(self, key):
797 try:
798 return self._data[key]
799 except KeyError as e:
800 raise KeyError(*e.args) from None
801
802 def __setitem__(self, key, value):
803 self._data[key] = value
804
805 def __delitem__(self, key):
806 del self._data[key]
807
808 def __iter__(self):
809 return iter(self._data)
810
811 def __len__(self):
812 return len(self._data)
813
814 def __contains__(self, key):
815 return key in self.data
816
817 def keys(self):
818 return self.data.keys()
819
820 def items(self):
821 return self.data.items()
822
823 def values(self):
824 return self.data.values()
825
826 def get(self, key, default=None):
827 return self.data.get(key, default=default)
828
829 def __getattr__(self, key):
830 data = object.__getattribute__(self, "_data")
831 if key in data.variables:
832 return data[key]
833 raise AttributeError(f"'{type(self).__name__}' object has no attribute '{key}'") from None
834
835 def __dir__(self):
836 return sorted(set(super().__dir__()) | set(self._data.variables))
837
838
[docs]
839class Roller:
840 """Base class for rolling windows."""
841
842 @property
843 def num_frames(self):
844 """The number of frames in this rolling output."""
845 return self.settings["num_frames"]
846
847 @property
848 def shape(self):
849 """The shape of the output frames."""
850 ...
851
852 @property
853 def dims(self):
854 """The dimensions of the output frames."""
855 ...
856
857 @property
858 def coords(self):
859 """The coords of the output frames."""
860 ...
861
[docs]
862 def numpy_frames(self):
863 """Generate numpy frames with data."""
864 yield
865
866 def __iter__(self):
867 """Generate rolling frames of the contained data, of the same type."""
868 yield
869
870
[docs]
871class TimeData(DataArrayWrap):
872 """Handing data which varies over time.
873
874 This class is mainly used to wrap time-signals of sampled sounds.
875 As such, the time data is assumed to be sampled at a constant samplerate.
876
877 Parameters
878 ----------
879 data : array_like
880 A `numpy.ndarray` or a `xarray.DataArray` with the time data.
881 time : array_like, optional
882 A `numpy.ndarray` with ``dtype=datetime64[ns]`` containing time stamps for the samples.
883 start_time : time_like, optional
884 The start time for the first sample in the signal.
885 This should ideally be a proper time type, but it will be parsed if it is a string.
886 Defaults to "now" if not given.
887 samplerate : float, optional
888 The samplerate for this data, in Hz.
889 If the ``data`` is a `numpy.ndarray`, this has to be given.
890 If the ``data`` is a `xarray.DataArray` which already has a time coordinate,
891 this can be omitted.
892 dims : str or [str], default="time"
893 The dimensions of the data. Must have the same length as the number of dimensions in the data.
894 Only used for `numpy` inputs.
895 coords : `xarray.DataArray.coords`
896 Additional coordinates for this data.
897 attrs : dict, optional
898 Additional attributes to store with this data.
899 """
900
901 _coords_set_by_init = {"time"}
902
903 def __init__(
904 self, data, time=None, start_time=None, samplerate=None, dims="time", coords=None, attrs=None, **kwargs
905 ):
906 super().__init__(data, dims=dims, coords=coords, attrs=attrs, **kwargs)
907
908 if samplerate is None and time is not None:
909 samplerate = np.timedelta64(1, "s") / np.mean(np.diff(time[:1000]))
910 elif time is None and samplerate is not None:
911 if start_time is None:
912 if "time" in self.data.coords:
913 start_time = self.data.time[0].item()
914 else:
915 start_time = "now"
916 n_samples = self.data.sizes["time"]
917 start_time = time_to_np(start_time)
918 offsets = np.arange(n_samples) * 1e9 / samplerate
919 time = start_time + offsets.astype("timedelta64[ns]")
920
921 if time is not None:
922 if not isinstance(time, xr.DataArray):
923 time = xr.DataArray(time, dims="time")
924 if samplerate is not None:
925 time.attrs["rate"] = samplerate
926 self.data.coords["time"] = time
927 elif "rate" not in self.data.time.attrs:
928 if samplerate is None:
929 samplerate = np.timedelta64(1, "s") / np.mean(np.diff(self.data.time[:1000]))
930 self.data.time.attrs["rate"] = samplerate
931
[docs]
932 @classmethod
933 def from_dataset(cls, dataset):
934 if "time" in dataset.dims:
935 return super().from_dataset(dataset)
936 # This is not time data any more, let the caller catch the error.
937 raise NotImplementedError(f"Cannot instantiate '{cls.__name__}' from data lacking time dimension")
938
939 @property
940 def samplerate(self):
941 return self.data.time.rate
942
943 @property
944 def time(self):
945 """Time coordinates for this data."""
946 return self.data.time
947
948 @property
949 def time_window(self):
950 """A `TimeWindow` describing when the data start and stops."""
951 # Calculating duration from number and rate means the stop points to the sample after the last,
952 # which is more intuitive when considering signal durations etc.
953 return TimeWindow(
954 start=self.data.time.data[0],
955 duration=self.data.sizes["time"] / self.samplerate,
956 )
957
[docs]
958 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):
959 """Select a subset of the data over time.
960
961 See `TimeWindow.subwindow` for details on the parameters.
962 """
963 original_window = self.time_window
964 new_window = original_window.subwindow(
965 time, start=start, stop=stop, center=center, duration=duration, extend=extend
966 )
967 if isinstance(new_window, TimeWindow):
968 start = time_to_np(new_window.start)
969 stop = time_to_np(new_window.stop)
970 selected_data = self.data.sel(time=slice(start, stop))
971 else:
972 time = time_to_np(new_window)
973 selected_data = self.data.sel(time=time, method="nearest")
974
975 new = type(self)(selected_data)
976 return new
977
[docs]
978 def listen(self, downsampling=1, upsampling=None, headroom=6, **kwargs):
979 """Play back this time data over speakers.
980
981 This tries to play the time data as audio using the `sounddevice` package.
982 The audio will be centered at 0 and normalized before playback.
983
984 Parameters
985 ----------
986 downsampling : float, optional
987 Artificially uses a lower samplerate in playback to slow
988 down the audio, lowering the pitch of the content.
989 upsampling : int, optional
990 Decimates the data by selecting every Nth sample, speeding
991 up the audio and raising the pitch of the content.
992 headroom : float, default 6
993 How much headroom to leave in the normalization, in dB.
994 **kwargs : dict, optional
995 Remaining keyword arguments are passed to `sounddevice.play`.
996 The most useful arguments are ``blocking=True``, and ``device``.
997 """
998 import sounddevice as sd
999
1000 sd.stop()
1001 data = self.data
1002 if upsampling:
1003 data = data[::upsampling]
1004 scaled = data - data.mean()
1005 scaled = scaled / np.max(np.abs(scaled)) * 10 ** (-headroom / 20)
1006 sd.play(scaled, samplerate=round(self.samplerate / downsampling), **kwargs)
1007
[docs]
1008 def rolling(self, duration=None, step=None, overlap=None, squeeze_time=True):
1009 """Generate rolling windows of this data.
1010
1011 Parameters
1012 ----------
1013 duration : float
1014 The duration of each frame, in seconds.
1015 step : float
1016 The step between consecutive frames, in seconds.
1017 overlap : float
1018 The overlap between consecutive frames, as a fraction of the duration.
1019 squeeze_time : bool, default `True`
1020 If this is set to `False`, rolling over windows with single time values will still
1021 give output with a time axis/dim.
1022
1023 Returns
1024 -------
1025 roller : `TimeDataRoller`
1026 A roller object to roll over the data.
1027 """
1028 return TimeDataRoller(self, duration=duration, step=step, overlap=overlap, squeeze_time=squeeze_time)
1029
1030
[docs]
1031class TimeDataRoller(Roller):
1032 """Rolling windows of time data.
1033
1034 Parameters
1035 ----------
1036 obj : TimeData
1037 The time data wrapper to roll over.
1038 duration : float
1039 The duration of each frame, in seconds.
1040 step : float
1041 The step between consecutive frames, in seconds.
1042 overlap : float
1043 The overlap between consecutive frames, as a fraction of the duration.
1044 squeeze_time : bool, default `True`
1045 If this is set to `False`, rolling over windows with single time values will still
1046 give output with a time axis/dim.
1047 """
1048
1049 def __init__(self, obj, duration=None, step=None, overlap=0, squeeze_time=True):
1050 self.obj = obj
1051 self.settings = time_frame_settings(
1052 duration=duration,
1053 step=step,
1054 overlap=overlap,
1055 signal_length=self.obj.time_window.duration,
1056 samplerate=self.obj.samplerate,
1057 )
1058
1059 if self.settings["samples_per_frame"] == 1 and squeeze_time:
1060 self._squeeze_time = True
1061 self._slices = list(range(self.settings["num_frames"]))
1062 else:
1063 self._squeeze_time = False
1064 self._slices = [
1065 slice(start_idx, start_idx + self.settings["samples_per_frame"])
1066 for start_idx in range(
1067 0, self.settings["num_frames"] * self.settings["sample_step"], self.settings["sample_step"]
1068 )
1069 ]
1070
1071 @property
1072 def coords(self):
1073 coords = dict(self.obj.coords)
1074 if not self._squeeze_time:
1075 coords["time"] = coords["time"][: self.settings["samples_per_frame"]]
1076 else:
1077 del coords["time"]
1078 return coords
1079
1080 @property
1081 def dims(self):
1082 dims = list(self.obj.dims)
1083 dims.remove("time")
1084 if not self._squeeze_time:
1085 dims = ["time"] + dims
1086 return tuple(dims)
1087
1088 @property
1089 def shape(self):
1090 shape = list(self.obj.data.shape)
1091 del shape[self.obj.dims.index("time")]
1092 if not self._squeeze_time:
1093 shape = [self.settings["samples_per_frame"]] + shape
1094 return tuple(shape)
1095
[docs]
1096 def numpy_frames(self):
1097 data = self.obj.data.transpose("time", ...)
1098 # For large datasets the data is likely disk-mapped. Load in chunks to avoid running out of memory.
1099 num_chunks = max(1, data.nbytes // (2**30)) # At least 1 chunk
1100 chunk_size = len(self._slices) // num_chunks # Number of frames per chunk
1101
1102 for i in range(0, len(self._slices), chunk_size):
1103 chunk_slices = self._slices[i:i + chunk_size]
1104 # Load the chunk of data that covers all slices in this chunk
1105 if isinstance(chunk_slices[0], slice):
1106 start_idx = chunk_slices[0].start
1107 end_idx = chunk_slices[-1].stop
1108 else:
1109 start_idx = chunk_slices[0]
1110 end_idx = chunk_slices[-1] + self.settings["samples_per_frame"]
1111 chunk_data = data.isel(time=slice(start_idx, end_idx)).data
1112 # Yield individual frames from the loaded chunk
1113 for s in chunk_slices:
1114 if isinstance(s, slice):
1115 yield chunk_data[s.start - start_idx:s.stop - start_idx]
1116 else:
1117 yield chunk_data[s - start_idx]
1118
[docs]
1119 def time_data(self):
1120 """Generate rolling frames of the contained data, as `TimeData`."""
1121 for s in self._slices:
1122 yield self.obj.isel(time=s)
1123
1124 def __iter__(self):
1125 yield from self.time_data()
1126
1127
[docs]
1128class FrequencyData(DataArrayWrap):
1129 """Handing data which varies over frequency.
1130
1131 This class is mainly used to wrap spectra of sampled sounds.
1132 Typically, this is represented as power spectral densities,
1133 but other data types are also possible.
1134
1135 Parameters
1136 ----------
1137 data : array_like
1138 A `numpy.ndarray` or a `xarray.DataArray` with the frequency data.
1139 frequency : array_like, optional
1140 The frequencies corresponding to the data. Mandatory if ``data`` is a `numpy.ndarray`.
1141 frequency_band_lower : array_like, optional
1142 The lower edge of each frequency band. Must be provided together with
1143 `frequency_band_upper`. This is the preferred method for specifying
1144 frequency band information.
1145 frequency_band_upper : array_like, optional
1146 The upper edge of each frequency band. Must be provided together with
1147 `frequency_band_lower`. This is the preferred method for specifying
1148 frequency band information.
1149 bandwidth : float, optional
1150 A single bandwidth value valid for all frequencies. This is a convenience
1151 method that assumes centered frequency bands. For per-frequency bandwidth,
1152 use `frequency_band_lower`/`frequency_band_upper` instead.
1153 dims : str or [str], default="frequency"
1154 The dimensions of the data. Must have the same length as the number of dimensions in the data.
1155 Only used for `numpy` inputs.
1156 coords : `xarray.DataArray.coords`
1157 Additional coordinates for this data.
1158 attrs : dict, optional
1159 Additional attributes to store with this data.
1160 """
1161
1162 _coords_set_by_init = {"frequency", "frequency_band_lower", "frequency_band_upper"}
1163
1164 def __init__(self, data, frequency=None, frequency_band_lower=None, frequency_band_upper=None, bandwidth=None, dims="frequency", coords=None, attrs=None, **kwargs):
1165 super().__init__(data, dims=dims, coords=coords, attrs=attrs, **kwargs)
1166 if frequency is not None:
1167 self.data.coords["frequency"] = frequency
1168
1169 # Handle band edges (preferred method)
1170 if frequency_band_lower is not None or frequency_band_upper is not None:
1171 if frequency_band_lower is None or frequency_band_upper is None:
1172 raise ValueError("Both frequency_band_lower and frequency_band_upper must be provided together")
1173 self.data.coords["frequency_band_lower"] = ("frequency", frequency_band_lower)
1174 self.data.coords["frequency_band_upper"] = ("frequency", frequency_band_upper)
1175
1176 # Handle single bandwidth value (convenience method)
1177 elif bandwidth is not None:
1178 if np.asarray(bandwidth).size > 1:
1179 raise ValueError("bandwidth must be a single value when provided. For per-frequency bandwidth, use frequency_band_lower/frequency_band_upper instead.")
1180 # Compute edges assuming centered bands
1181 frequency_band_lower = frequency - bandwidth / 2
1182 frequency_band_upper = frequency + bandwidth / 2
1183 self.data.coords["frequency_band_lower"] = ("frequency", frequency_band_lower)
1184 self.data.coords["frequency_band_upper"] = ("frequency", frequency_band_upper)
1185
1186 @property
1187 def bandwidth(self):
1188 """Compute bandwidth from frequency band edges.
1189
1190 Returns
1191 -------
1192 bandwidth : xarray.DataArray
1193 The bandwidth of each frequency band, computed as
1194 frequency_band_upper - frequency_band_lower.
1195 """
1196 return self.data.frequency_band_upper - self.data.frequency_band_lower
1197
[docs]
1198 @classmethod
1199 def from_dataset(cls, dataset):
1200 if "frequency" in dataset.dims:
1201 return super().from_dataset(dataset)
1202 # This is not frequency data any more, just return the plain xr.DataArray
1203 raise NotImplementedError(f"Cannot instantiate '{cls.__name__}' from data lacking frequency dimension")
1204
1205 @property
1206 def frequency(self):
1207 """The frequencies for the data."""
1208 return self.data.frequency
1209
[docs]
1210 def estimate_bandwidth(self):
1211 """Estimate the bandwidth of the frequency vector.
1212
1213 This tries to determine if the frequency vector is linearly
1214 or logarithmically spaced, then uses either linear or logarithmic
1215 bandwidth estimation.
1216
1217 Returns
1218 -------
1219 bandwidth : `xarray.DataArray`
1220 The estimated bandwidth.
1221 """
1222 frequency = np.asarray(self.frequency)
1223 # Check if the frequency array seems linearly or logarithmically spaced
1224 if frequency[0] == 0:
1225 diff = frequency[2:] - frequency[1:-1]
1226 frac = frequency[2:] / frequency[1:-1]
1227 else:
1228 diff = frequency[1:] - frequency[:-1]
1229 frac = frequency[1:] / frequency[:-1]
1230 diff_err = np.std(diff) / np.mean(diff)
1231 frac_err = np.std(frac) / np.mean(frac)
1232 # Note: if there are three values and the first is zero, the std is 0 for both.
1233 # The equals option makes us end up in the linear frequency case.
1234 if diff_err <= frac_err:
1235 # Central differences, with forwards and backwards at the ends
1236 central = (frequency[2:] - frequency[:-2]) / 2
1237 first = frequency[1] - frequency[0]
1238 last = frequency[-1] - frequency[-2]
1239 else:
1240 # upper edge is at sqrt(f_{l+1} * f_l), lower edge is at sqrt(f_{l-1} * f_l)
1241 # the difference simplifies as below.
1242 central = (frequency[2:] ** 0.5 - frequency[:-2] ** 0.5) * frequency[1:-1] ** 0.5
1243 # extrapolating to one bin below lowest and one above highest using constant ratio
1244 # the expression above then simplifies to the expressions below
1245 first = (frequency[1] - frequency[0]) * (frequency[0] / frequency[1]) ** 0.5
1246 last = (frequency[-1] - frequency[-2]) * (frequency[-1] / frequency[-2]) ** 0.5
1247 bandwidth = np.concatenate([[first], central, [last]])
1248 return xr.DataArray(bandwidth, coords={"frequency": self.frequency})
1249
1250 def _figure_template(self, **kwargs):
1251 template = super()._figure_template(**kwargs)
1252 template.layout.update(
1253 xaxis=dict(
1254 type="log",
1255 title="Frequency in Hz",
1256 )
1257 )
1258 return template
1259
[docs]
1260 def plot(self, **kwargs):
1261 """Make a scatter trace of this data.
1262
1263 Some useful keyword arguments:
1264
1265 - ``name`` sets the legendentry of this trace.
1266 - ``line_color`` and ``marker_color`` sets the line and marker colors.
1267 - ``mode`` sets the mode, typically one of ``"lines"``, ``"markers"``, or ``"markers+lines"``.
1268
1269 """
1270 import plotly.graph_objects as go
1271
1272 if self.dims != ("frequency",):
1273 raise ValueError(
1274 f"Cannot simply scatter frequency data with dimensions '{self.dims}'. "
1275 "Use the `.groupby(dim)` method to loop over non-frequency dimensions."
1276 )
1277
1278 return go.Scatter(
1279 x=self.frequency,
1280 y=self.data,
1281 **kwargs,
1282 )
1283
1284
[docs]
1285class TimeFrequencyData(TimeData, FrequencyData):
1286 """Handing data which varies over time and frequency.
1287
1288 This class is mainly used to wrap spectrogram-like data.
1289 There are no processing implemented in this class, but
1290 subclasses are free to add processing methods, custom
1291 initialization, or instantiation in class methods.
1292
1293 Parameters
1294 ----------
1295 data : array_like
1296 A `numpy.ndarray` or a `xarray.DataArray` with the time-frequency data.
1297 start_time : time_like, optional
1298 The start time for the first sample in the signal.
1299 This should ideally be a proper time type, but it will be parsed if it is a string.
1300 Defaults to "now" if not given.
1301 samplerate : float, optional
1302 The samplerate for this data, in Hz. This is not the samplerate of the underlying time signal,
1303 but time steps of the time axis on the time-frequency data.
1304 If the `data` is a `numpy.ndarray`, this has to be given.
1305 If the `data` is a `xarray.DataArray` which already has a time coordinate,
1306 this can be omitted.
1307 frequency : array_like, optional
1308 The frequencies corresponding to the data. Mandatory if `data` is a `numpy.ndarray`.
1309 frequency_band_lower : array_like, optional
1310 The lower edge of each frequency band. Must be provided together with
1311 `frequency_band_upper`. This is the preferred method for specifying
1312 frequency band information.
1313 frequency_band_upper : array_like, optional
1314 The upper edge of each frequency band. Must be provided together with
1315 `frequency_band_lower`. This is the preferred method for specifying
1316 frequency band information.
1317 bandwidth : float, optional
1318 A single bandwidth value valid for all frequencies. This is a convenience
1319 method that assumes centered frequency bands. For per-frequency bandwidth,
1320 use `frequency_band_lower`/`frequency_band_upper` instead.
1321 dims : str or [str], optional
1322 The dimensions of the data. Must have the same length as the number of dimensions in the data.
1323 Mandatory used for `numpy` inputs, not used for `xarray` inputs.
1324 coords : `xarray.DataArray.coords`
1325 Additional coordinates for this data.
1326 attrs : dict, optional
1327 Additional attributes to store with this data.
1328 """
1329
1330 _coords_set_by_init = {"time", "frequency", "frequency_band_lower", "frequency_band_upper"}
1331
1332 def __init__(
1333 self,
1334 data,
1335 time=None,
1336 start_time=None,
1337 samplerate=None,
1338 frequency=None,
1339 frequency_band_lower=None,
1340 frequency_band_upper=None,
1341 bandwidth=None,
1342 dims=None,
1343 coords=None,
1344 attrs=None,
1345 **kwargs,
1346 ):
1347 super().__init__(
1348 data,
1349 dims=dims,
1350 coords=coords,
1351 attrs=attrs,
1352 time=time,
1353 start_time=start_time,
1354 samplerate=samplerate,
1355 frequency=frequency,
1356 frequency_band_lower=frequency_band_lower,
1357 frequency_band_upper=frequency_band_upper,
1358 bandwidth=bandwidth,
1359 **kwargs,
1360 )
1361
[docs]
1362 @classmethod
1363 def from_dataset(cls, data, **kwargs):
1364 if "frequency" not in data.dims:
1365 # It's not frequency-data, but it might be time data
1366 return TimeData.from_dataset(data, **kwargs)
1367 if "time" not in data.dims:
1368 # It's not time-data, but it might be frequency data
1369 return FrequencyData.from_dataset(data, **kwargs)
1370 return super().from_dataset(data, **kwargs)
1371
[docs]
1372 def rolling(self, duration=None, step=None, overlap=None, squeeze_time=True):
1373 """Generate rolling windows of this data.
1374
1375 By default, a single time instance of the data will be
1376 generated in each frame.
1377
1378 Parameters
1379 ----------
1380 duration : float
1381 The duration of each frame, in seconds.
1382 step : float
1383 The step between consecutive frames, in seconds.
1384 overlap : float
1385 The overlap between consecutive frames, as a fraction of the duration.
1386 squeeze_time : bool, default `True`
1387 If this is set to `False`, rolling over windows with single time values will still
1388 give output with a time axis/dim.
1389
1390 Returns
1391 -------
1392 roller : `TimeDataRoller`
1393 A roller object to roll over the data.
1394 """
1395 if (duration, step) == (None, None):
1396 step = 1 / self.samplerate
1397 overlap = 0
1398 return TimeDataRoller(self, duration=duration, step=step, overlap=overlap, squeeze_time=squeeze_time)
1399
1400 def _figure_template(self, **kwargs):
1401 template = super()._figure_template(**kwargs)
1402 template.layout.update(
1403 xaxis=dict(
1404 title_text="Time",
1405 type=None, # Use default which is `linear`` for "normal" data and `date` for timestamp data
1406 ),
1407 yaxis=dict(
1408 title_text="Frequency in Hz",
1409 type="log",
1410 ),
1411 )
1412 template.data.update(
1413 heatmap=[
1414 dict(
1415 colorscale="viridis",
1416 colorbar_title_side="right",
1417 hovertemplate="%{x}<br>%{y:.5s}Hz<br>%{z}<extra></extra>",
1418 )
1419 ]
1420 )
1421 return template
1422
[docs]
1423 def plot(self, **kwargs):
1424 """Make a heatmap trace of this data.
1425
1426 Some useful keyword arguments:
1427
1428 - ``name`` sets the legendentry of this trace.
1429 - ``colorscale`` chooses the colorscale, e.g., ``"viridis"``, ``"delta"``, ``"twilight"``.
1430 - ``zmin`` and ``zmax`` sets the color range.
1431
1432 """
1433 import plotly.graph_objects as go
1434
1435 if set(self.dims) != {"frequency", "time"}:
1436 raise ValueError(
1437 f"Cannot make heatmap of time-frequency data with dimensions '{self.dims}'. "
1438 "Use the `.groupby(dim)` method to loop over extra dimensions."
1439 )
1440
1441 trace = go.Heatmap(
1442 x=self.time,
1443 y=self.frequency,
1444 z=self.data.transpose("frequency", "time"),
1445 )
1446 return trace.update(**kwargs)
1447
1448
[docs]
1449class Transit:
1450 """Container for recorded ship transits.
1451
1452 This class is responsible for bundling recordings and position tracks.
1453 Note that this class does not take `TimeData` as the input.
1454 The track and recording will be restricted to a time window which
1455 both of them covers.
1456
1457 Attributes
1458 ----------
1459 recording : `recordings.Recording`
1460 A recording of the ship transit.
1461 track : `positional.Track`
1462 The position track of the ship.
1463 """
1464
1465 def __init__(self, recording, track):
1466 start = max(recording.time_window.start, track.time_window.start)
1467 stop = min(recording.time_window.stop, track.time_window.stop)
1468
1469 self.recording = recording.subwindow(start=start, stop=stop)
1470 self.track = track.subwindow(start=start, stop=stop)
1471
1472 @property
1473 def time_window(self):
1474 """A `TimeWindow` describing when the data start and stops."""
1475 rec_window = self.recording.time_window
1476 track_window = self.track.time_window
1477 return TimeWindow(start=max(rec_window.start, track_window.start), stop=min(rec_window.stop, track_window.stop))
1478
[docs]
1479 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):
1480 """Select a subset of the data over time.
1481
1482 See `TimeWindow.subwindow` for details on the parameters.
1483 """
1484 subwindow = self.time_window.subwindow(
1485 time, start=start, stop=stop, center=center, duration=duration, extend=extend
1486 )
1487 rec = self.recording.subwindow(subwindow)
1488 track = self.track.subwindow(subwindow)
1489 return type(self)(recording=rec, track=track)
1490
1491
[docs]
1492def dB(x, power=True, safe_zeros=True, ref=1):
1493 """Calculate the decibel of an input value.
1494
1495 Parameters
1496 ----------
1497 x : numeric
1498 The value to take the decibel of
1499 power : boolean, default True
1500 Specifies if the input is a power-scale quantity or a root-power quantity.
1501 For power-scale quantities, the output is 10 log(x), for root-power quantities the output is 20 log(x).
1502 If there are negative values in a power-scale input, the handling can be controlled as follows:
1503 - `power='imag'`: return imaginary values
1504 - `power='nan'`: return nan where power < 0
1505 - `power=True`: as `nan`, but raises a warning.
1506 safe_zeros : boolean, default True
1507 If this option is on, all zero values in the input will be replaced with the smallest non-zero value.
1508 ref : numeric
1509 The reference unit for the decibel. Note that this should be in the same unit as the `x` input,
1510 e.g., if `x` is a power, the `ref` value might need squaring.
1511 """
1512 if isinstance(x, DataArrayWrap):
1513 return x.apply(dB, power=power, safe_zeros=safe_zeros, ref=ref)
1514
1515 if safe_zeros and np.size(x) > 1:
1516 nonzero = x != 0
1517 min_value = np.nanmin(abs(xr.where(nonzero, x, np.nan, keep_attrs=True)))
1518 x = xr.where(nonzero, x, min_value, keep_attrs=True)
1519 if power:
1520 if np.any(x < 0):
1521 if power == "imag":
1522 return 10 * np.log10(x + 0j)
1523 if power == "nan":
1524 return 10 * np.log10(xr.where(x > 0, x, np.nan, keep_attrs=True))
1525 return 10 * np.log10(x / ref)
1526 else:
1527 return 20 * np.log10(np.abs(x) / ref)
1528
1529
[docs]
1530def time_frame_settings(
1531 duration=None,
1532 step=None,
1533 overlap=None,
1534 resolution=None,
1535 num_frames=None,
1536 signal_length=None,
1537 samplerate=None,
1538):
1539 """Calculate time frame overlap settings from various input parameters.
1540
1541 Parameters
1542 ----------
1543 duration : float
1544 How long each frame is, in seconds
1545 step : float
1546 The time between frame starts, in seconds
1547 overlap : float
1548 How much overlap there is between the frames, as a fraction of the duration.
1549 If this is negative the frames will have extra space between them.
1550 resolution : float
1551 Desired frequency resolution in Hz. Equals ``1/duration``
1552 num_frames : int
1553 The total number of frames in the signal
1554 signal_length : float, optional
1555 The total length of the signal, in seconds
1556 samplerate : float, optional
1557 The samplerate of the signal, in Hz.
1558 Only used to compute sample versions of the outputs.
1559
1560 Returns
1561 -------
1562 settings : dict
1563 The settings dict has keys:
1564 ``"duration"``, ``"step"``, ``"overlap"``, ``"resolution"``
1565 and if ``signal_length`` was given:
1566 ``"num_frames"``, ``"signal_length"``
1567 and if ``samplerate`` was given:
1568 ``"samples_per_frame"``, ``"sample_step"``, ``"sample_overlap"``
1569 and if both ``samplerate`` and ``signal_length`` was given:
1570 ``"sample_total"``
1571
1572 Raises
1573 ------
1574 ValueError
1575 If the inputs are not sufficient to determine the frame,
1576 or if priorities cannot be disambiguated.
1577
1578 Notes
1579 -----
1580 The parameters will be used in the following priority:
1581
1582 1. ``signal_length`` and ``num_frames``
1583 2. ``step`` and ``duration``
1584 3. ``resolution`` and ``overlap``
1585
1586 Each frame ``idx=[0, ..., num_frames - 1]`` has::
1587
1588 start = idx * step
1589 stop = idx * step + duration
1590
1591 The last frame thus ends at ``(num_frames - 1) step + duration``.
1592 The overlap relations are::
1593
1594 duration = step / (1 - overlap)
1595 step = duration (1 - overlap)
1596 overlap = 1 - step / duration
1597
1598 If both ``resolution`` and ``overlap`` are given, they will be treated as "minimum"
1599 parameters. This means that the output will have at least an overlap as specified
1600 (`overlap_out >= overlap_in`) and at least the specified spectral resolution
1601 (`resolution_out <= resolution_in`). If the other input parameters are sufficient
1602 to fully determine the frame settings, the output will be checked to meet the
1603 requested resolution and overlap criteria.
1604 The overlap will default to a minimum of 0 if not given, i.e. frames that are
1605 edge to edge (unless more overlap is required from other parameters).
1606
1607 This gives us the following total list of priorities:
1608
1609 1) ``signal_length`` and ``num_frames`` are given
1610 a) ``step`` or ``duration`` (not both!) given
1611 b) ``resolution`` is given
1612 c) ``overlap`` is given
1613 2) ``step`` and ``duration`` given
1614 3) ``step`` given
1615 a) ``resolution`` is given
1616 b) ``overlap`` is given
1617 4) ``duration`` given (``resolution`` is ignored, ``overlap`` is used)
1618 5) ``resolution`` given
1619
1620 For cases 2-5, ``num_frames`` is calculated if ``signal_length`` is given,
1621 and a new truncated ``signal_length`` is returned.
1622
1623 If a samplerate was given, the number of samples in an output is also computed.
1624 This is done with::
1625
1626 samples_per_frame = ceil(duration * samplerate)
1627 sample_step = floor(step * samplerate)
1628 sample_overlap = samples_per_frame - sample_step
1629
1630 We use ceil for the duration since it is often chosen to obtain a minimum frequency resolution.
1631 We use floor for the step to make sure we fit all the frames in the total length.
1632 Note that this means that the sample overlap might not equal
1633 ``round(duration * overlap * samplerate)``, due to how the rounding is done.
1634 It can at most be two samples larger than the rounded value.
1635 """
1636 if None not in (num_frames, signal_length):
1637 if None not in (duration, step):
1638 raise ValueError(
1639 "Overdetermined time frames: "
1640 "All four of `num_frames`, `signal_length`, `duration`, and `step` "
1641 "cannot be specified simultaneously."
1642 )
1643 elif step is not None:
1644 # We have the step, calculate the duration
1645 duration = signal_length - (num_frames - 1) * step
1646 elif duration is not None:
1647 # We have the duration, calculate the step
1648 step = (signal_length - duration) / (num_frames - 1)
1649 elif resolution is not None:
1650 duration = 1 / resolution
1651 step = (signal_length - duration) / (num_frames - 1)
1652 overlap = 1 - step / duration
1653 else:
1654 # This sets a duration to meet a minimum overlap, at least 0.
1655 duration = signal_length / (num_frames + (overlap or 0) - num_frames * (overlap or 0))
1656 if resolution is not None:
1657 # If a resolution requires longer duration, that's set here.
1658 duration = max(duration, 1 / resolution)
1659 step = (signal_length - duration) / (num_frames - 1)
1660 elif None in (step, duration):
1661 # Missing one of step and duration, infer it from overlap and resolution
1662 if duration is not None:
1663 step = duration * (1 - (overlap or 0))
1664 elif step is not None:
1665 duration = step / (1 - (overlap or 0))
1666 if resolution is not None:
1667 duration = max(duration, 1 / resolution)
1668 elif resolution is not None:
1669 duration = 1 / resolution
1670 step = duration * (1 - (overlap or 0))
1671 else:
1672 raise ValueError(
1673 "Must give at least one of (`step`, `duration`, `resolution`) or the pair of `signal_length` and `num_frames`."
1674 )
1675
1676 if overlap is not None:
1677 if (1 - step / duration < overlap) and not np.isclose(1 - step / duration, overlap):
1678 raise ValueError(f"Time frame step {step}s and duration {duration}s does not meet minimum overlap fraction {overlap}")
1679 if resolution is not None:
1680 if (1 / duration > resolution) and not np.isclose(1 / duration, resolution):
1681 raise ValueError(f"Duration {duration}s does not meet minimum spectral resolution {resolution}Hz.")
1682
1683 settings = {
1684 "duration": duration,
1685 "step": step,
1686 "overlap": 1 - step / duration,
1687 "resolution": 1 / duration,
1688 }
1689 if signal_length is not None:
1690 num_frames = num_frames or int(np.floor((signal_length - duration) / step + 1))
1691 settings["num_frames"] = num_frames
1692 settings["signal_length"] = (num_frames - 1) * step + duration
1693 if samplerate is not None:
1694 settings["samples_per_frame"] = int(np.ceil(samplerate * duration))
1695 settings["sample_step"] = int(np.floor(samplerate * step))
1696 settings["sample_overlap"] = settings["samples_per_frame"] - settings["sample_step"]
1697 if "num_frames" in settings:
1698 settings["sample_total"] = (settings["num_frames"] - 1) * settings["sample_step"] + settings["samples_per_frame"] # fmt: skip
1699 return settings
1700
1701
[docs]
1702def concatenate(items, dim=None, nan_between_items=False, sort=False, cls=None, **kwargs):
1703 """Concatenates a list of items along a specified dimension.
1704
1705 Parameters
1706 ----------
1707 items : list of `xarray.Dataset` or `xarray.DataArray` or `xrwrap`
1708 A list of items to be concatenated. The items can be `xr.Dataset`,
1709 `xr.DataArray`, or instances of a subclass of `xrwrap`.
1710 Most `uwacan` data wrappers are compatible.
1711 dim : str or None, optional
1712 The dimension along which to concatenate. If `None`, the function will
1713 attempt to infer the dimension from the first item. If the items have
1714 more than one dimension, a `ValueError` will be raised.
1715 The concatenation dimension can be an existing dim or a new dim.
1716 nan_between_items : bool, optional, default=False
1717 If `True`, inserts NaN values between concatenated items along the specified dimension.
1718 This is useful for visualization purposes, as it makes most plotting libraries split the lines.
1719 sort : bool, optional, default=False
1720 If `True`, sorts the concatenated items by the specified dimension.
1721 cls : class or callable, optional
1722 The class or callable to use for wrapping the concatenated result. If `None`,
1723 the function will try to infer the class from the first item. If `cls` is a subclass
1724 of `xrwrap` and has a `from_dataset` method, it will use that method to wrap
1725 the result.
1726 **kwargs : dict
1727 Additional keyword arguments passed to the class or callable specified by `cls` when
1728 wrapping the concatenated result.
1729
1730 Returns
1731 -------
1732 object
1733 The concatenated object, either as an `xr.Dataset`, `xr.DataArray`, or as an
1734 instance of the specified `cls` (or inferred class).
1735 """
1736 if dim is None:
1737 if len(items[0].dims) != 1:
1738 raise ValueError("Cannot guess concatenation dimensions for multi-dimensional items.")
1739 dim = next(iter(items[0].dims))
1740
1741 if cls is None:
1742 for item in items:
1743 if isinstance(item, xrwrap):
1744 cls = type(item)
1745 break
1746
1747 if isinstance(cls, type) and issubclass(cls, xrwrap) and hasattr(cls, "from_dataset"):
1748 cls = cls.from_dataset
1749
1750 items = [item.data if isinstance(item, xrwrap) else item for item in items]
1751 if nan_between_items:
1752 items = [x for item in items for x in [item, xr.full_like(item.isel({dim: -1}), np.nan).expand_dims(dim)]][:-1]
1753 items = xr.concat(items, dim=dim)
1754 if sort:
1755 items = items.sortby(dim)
1756 if cls:
1757 items = cls(items, **kwargs)
1758 return items