Source code for uwacan._core

   1"""Some shared core functionality in the package.
   2
   3This contains mostly wrappers around `xarray` objects, and some very basic functions.
   4A few of these are publicly available in the main package namespace. They should be
   5accessed from there if used externally, but from here if used internally.
   6
   7
   8Classes and functions only exposed here
   9---------------------------------------
  10.. autosummary::
  11    :toctree: generated
  12
  13    time_to_np
  14    time_to_datetime
  15    time_frame_settings
  16    TimeWindow
  17    xrwrap
  18    DataArrayWrap
  19    DatasetWrap
  20    Roller
  21    TimeDataRoller
  22
  23Classes and functions exposed in the main package namespace
  24-----------------------------------------------------------
  25.. autosummary::
  26
  27    uwacan.TimeData
  28    uwacan.FrequencyData
  29    uwacan.TimeFrequencyData
  30    uwacan.Transit
  31    uwacan.dB
  32
  33"""
  34
  35import numpy as np
  36import xarray as xr
  37import whenever
  38from datetime import datetime as _py_datetime
  39import functools
  40
  41__all__ = [
  42    "TimeWindow",
  43    "dB",
  44    "TimeData",
  45    "FrequencyData",
  46    "TimeFrequencyData",
  47    "Transit",
  48]
  49
  50
[docs] 51def time_to_np(input, **kwargs): 52 """Convert a time to `numpy.datetime64`.""" 53 if isinstance(input, xr.DataArray): 54 input = input.data 55 if isinstance(input, np.datetime64) or ( 56 isinstance(input, np.ndarray) and np.issubdtype(input.dtype, np.datetime64) 57 ): 58 return input.astype("datetime64[ns]") 59 if not isinstance(input, whenever.Instant): 60 input = time_to_datetime(input, **kwargs) 61 return np.datetime64(input.timestamp_nanos(), "ns")
62 63
[docs] 64def time_to_datetime(input, fmt="RFC 3339", tz="UTC"): 65 """Convert datetimes to the same internal format. 66 67 This function takes a few types of input and tries to convert 68 the input to a `whenever.Instant`. 69 - Any datetime-like input will be converted directly. 70 - np.datetime64 and Unix timestamps are treated similarly. 71 - Strings are parsed with ``fmt`` if given, otherwise a few different common formats are tried. 72 73 Parameters 74 ---------- 75 input : datetime-like, string, or numeric. 76 The input data specifying the time. 77 fmt : string, optional 78 This can be one of the standard formats ``"RFC 3339"``, ``"RFC 2822"``, or ``"ISO 8601"``, handled by `whenever <https://whenever.readthedocs.io/en/latest/overview.html#formatting-and-parsing>`_. 79 Alternatively, a custom parsing specifier can be supplied, using `strptime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_. 80 tz : string, default "UTC" 81 The assumed timezone for the input, if it does not contain one. 82 Unix timestamps have no timezone, and np.datetime64 only supports UTC. 83 84 Returns 85 ------- 86 time : whenever.Instant 87 The converted time. 88 """ 89 if isinstance(input, str): 90 if input == "now": 91 return whenever.Instant.now() 92 if fmt == "RFC 3339": 93 return whenever.OffsetDateTime.parse_rfc3339(input).instant() 94 if fmt == "RFC 2822": 95 return whenever.OffsetDateTime.parse_rfc2822(input).instant() 96 if "ISO" in fmt: 97 return whenever.OffsetDateTime.parse_common_iso(input).instant() 98 if "z" in fmt: 99 return whenever.OffsetDateTime.strptime(input, fmt).instant() 100 if not isinstance(tz, str): 101 raise TypeError(f"Cannot handle time zone info `{tz}`") 102 if tz == "UTC": 103 return whenever.LocalDateTime.strptime(input, fmt).assume_utc() 104 if "/" in tz: 105 return whenever.LocalDateTime.strptime(input, fmt).assume_tz(tz).instant() 106 if ":" in tz: 107 if tz[0] == "+": 108 sign = 1 109 tz = tz[1:] 110 elif tz[0] == "-": 111 sign = -1 112 tz = tz[1:] 113 else: 114 sign = 1 115 hours, minutes = tz.split(":") 116 tz = sign * whenever.TimeDelta(hours=int(hours), minutes=int(minutes)) 117 return whenever.LocalDateTime.strptime(input, fmt).assume_fixed_offset(tz).instant() 118 119 if isinstance(input, whenever.Instant): 120 return input 121 if isinstance(input, (whenever.OffsetDateTime, whenever.ZonedDateTime)): 122 return input.instant() 123 if isinstance(input, whenever.LocalDateTime): 124 return input.assume_utc() 125 126 if isinstance(input, _py_datetime): 127 if input.tzinfo is not None: 128 # The datetime has a timezone - the conversion will work 129 return whenever.Instant.from_py_datetime(input) 130 # Without a timezone in the datetime we hope that the user supplied the timezone via tz. 131 # The easiest way to convert is to go via a string... 132 fmt = "%Y%m%dT%H%M%S.%f" 133 return time_to_datetime(input.strftime(fmt), fmt=fmt, tz=tz) 134 135 if isinstance(input, xr.DataArray): 136 input = input.data 137 if isinstance(input, np.datetime64) or ( 138 isinstance(input, np.ndarray) and np.issubdtype(input.dtype, np.datetime64) 139 ): 140 input = int(input.astype("timedelta64") // np.timedelta64(1, "ns")) 141 return whenever.Instant.from_timestamp_nanos(input) 142 143 if hasattr(input, "timestamp"): 144 if callable(input.timestamp): 145 input = input.timestamp() 146 else: 147 input = input.timestamp 148 return whenever.Instant.from_timestamp(input) 149 150 raise TypeError(f"Cannot find time conversion for `{type(input).__name__}` object")
151 152
[docs] 153class TimeWindow: 154 """Describes a start and stop point in time. 155 156 Give two and only two of the four basic parameters. 157 Less than two will not fully define a window, while 158 more than two will overdetermine the window. 159 160 Parameters 161 ---------- 162 start : time_like 163 A window that starts at this time 164 stop : time_like 165 A window stat stops at this time 166 center : time_like 167 A window centered at this time 168 duration : float 169 A window with this duration, in seconds 170 extend : float 171 Extend the window defined by two of the four above 172 with this much at each end, in seconds. 173 """ 174 175 def __init__(self, start=None, stop=None, center=None, duration=None, extend=None): 176 if start is not None: 177 start = time_to_datetime(start) 178 if stop is not None: 179 stop = time_to_datetime(stop) 180 if center is not None: 181 center = time_to_datetime(center) 182 183 if None not in (start, stop): 184 _start = start 185 _stop = stop 186 start = stop = None 187 elif None not in (center, duration): 188 _start = center.subtract(seconds=duration / 2) 189 _stop = center.add(seconds=duration / 2) 190 center = duration = None 191 elif None not in (start, duration): 192 _start = start 193 _stop = start.add(seconds=duration) 194 start = duration = None 195 elif None not in (stop, duration): 196 _stop = stop 197 _start = stop.subtract(seconds=duration) 198 stop = duration = None 199 elif None not in (start, center): 200 _start = start 201 _stop = start + (center - start) * 2 202 start = center = None 203 elif None not in (stop, center): 204 _stop = stop 205 _start = stop - (stop - center) * 2 206 stop = center = None 207 else: 208 raise TypeError("Needs two of the input arguments to determine time window.") 209 210 if (start, stop, center, duration) != (None, None, None, None): 211 raise TypeError("Cannot input more than two input arguments to a time window!") 212 213 if extend is not None: 214 _start = _start.subtract(seconds=extend) 215 _stop = _stop.add(seconds=extend) 216 217 self._start = _start 218 self._stop = _stop 219
[docs] 220 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): 221 """Select a smaller window of time. 222 223 Parameters 224 ---------- 225 time : time_window_like 226 An object that will be used to extract start and stop times. 227 start : time_like 228 A new window that starts at this time. 229 Give ``True`` to use the start of the existing window. 230 stop : time_like 231 A new window stat stops at this time 232 Give ``True`` to use the stop of the existing window. 233 center : time_like 234 A new window centered at this time 235 Give ``True`` to use the center of the existing window. 236 duration : float 237 A new window with this duration, in seconds 238 extend : float 239 Extend the new window defined by two of the four above 240 with this much at each end, in seconds. 241 242 Notes 243 ----- 244 This takes the same basic inputs as `TimeWindow`, defining a window 245 with two out of four of ``start``, ``stop``, ``center``, and ``duration``. 246 Additionally, one of ``start``, ``stop``, ``center`` can be given as ``True`` 247 instead of an actual time to use the times of the existing window. 248 If only one of ``start`` and ``stop`` is given, the other one is filled from 249 the existing window. 250 251 If a single positional argument is given, it should be time_window_like, 252 i.e., have a defined start and stop time, which will then be used. 253 This can be one of `TimeWindow`, and `xarray.Dataset`. 254 If it is a dataset, it must have a time attribute, and its minimum and maximum 255 will be used as the start and stop for the new window. 256 """ 257 if time is None: 258 # Period specified with keyword arguments, convert to period. 259 if (start, stop, center, duration).count(None) == 3: 260 # Only one argument which has to be start or stop, fill the other from self. 261 if start is not None: 262 window = type(self)(start=start, stop=self.stop, extend=extend) 263 elif stop is not None: 264 window = type(self)(start=self.start, stop=stop, extend=extend) 265 else: 266 raise TypeError("Cannot create subwindow from arguments") 267 elif duration is not None and True in (start, stop, center): 268 if start is True: 269 window = type(self)(start=self.start, duration=duration, extend=extend) 270 elif stop is True: 271 window = type(self)(stop=self.stop, duration=duration, extend=extend) 272 elif center is True: 273 window = type(self)(center=self.center, duration=duration, extend=extend) 274 else: 275 raise TypeError("Cannot create subwindow from arguments") 276 else: 277 # The same types explicit arguments as the normal constructor 278 window = type(self)(start=start, stop=stop, center=center, duration=duration, extend=extend) 279 elif isinstance(time, type(self)): 280 window = time 281 elif isinstance(time, xr.Dataset): 282 window = type(self)(start=time.time.min(), stop=time.time.max(), extend=extend) 283 else: 284 # It's not a period, so it should be a single datetime. Parse or convert, check validity. 285 time = time_to_datetime(time) 286 if time not in self: 287 raise ValueError("Received time outside of contained window") 288 return time 289 290 if window not in self: 291 raise ValueError("Requested subwindow is outside contained time window") 292 return window
293 294 def __repr__(self): 295 return f"TimeWindow(start={self.start.format_rfc3339()}, stop={self.stop.format_rfc3339()})" 296 297 @property 298 def start(self): 299 """The start of this window.""" 300 return self._start 301 302 @property 303 def stop(self): 304 """The stop of this window.""" 305 return self._stop 306 307 @property 308 def center(self): 309 """The center of this window.""" 310 return self.start.add(seconds=self.duration / 2) 311 312 @property 313 def duration(self): 314 """The duration of this window, in seconds.""" 315 return (self.stop - self.start).in_seconds() 316 317 def __contains__(self, other): 318 if isinstance(other, type(self)): 319 return other.start in self and other.stop in self 320 return self.start <= time_to_datetime(other) <= self.stop
321 322 323def xr_ufunc(n_outputs=1, **apply_ufunc_kwargs): 324 """Decorator that wraps a function with `xr.apply_ufunc`. 325 326 This allows a function written for plain numpy arrays or scalars to 327 transparently accept `xarray.DataArray` inputs, with xarray handling 328 alignment, broadcasting, and coordinate propagation in a single pass 329 rather than on every intermediate operation. 330 331 The decorated function retains its original signature: positional and 332 keyword arguments are forwarded to the wrapped function via 333 `apply_ufunc`'s ``kwargs`` parameter. 334 335 Parameters 336 ---------- 337 n_outputs : int, default 1 338 Number of output arrays returned by the function. 339 Sets ``output_core_dims`` to a list of ``n_outputs`` empty tuples 340 unless explicitly provided in `apply_ufunc_kwargs`. 341 **apply_ufunc_kwargs 342 Additional keyword arguments passed directly to `xr.apply_ufunc`, 343 e.g. ``input_core_dims``, ``exclude_dims``, ``vectorize``, ``dask``. 344 345 Examples 346 -------- 347 Single-output function:: 348 349 @xr_ufunc() 350 def distance_to(lat_1, lon_1, lat_2, lon_2): 351 ... 352 353 Multi-output function:: 354 355 @xr_ufunc(n_outputs=2) 356 def shift_position(lat, lon, distance, bearing): 357 ... 358 return new_lat, new_lon 359 """ 360 apply_ufunc_kwargs.setdefault("output_core_dims", [[] for _ in range(n_outputs)]) 361 def decorator(func): 362 @functools.wraps(func) 363 def wrapper(*args, **kwargs): 364 return xr.apply_ufunc( 365 func, 366 *args, 367 **apply_ufunc_kwargs, 368 kwargs=kwargs, 369 ) 370 return wrapper 371 return decorator 372 373
[docs] 374class xrwrap: 375 """Wrapper around `xarray` objects. 376 377 This base class exists to delegate work to our internal 378 `xarray` objects. 379 """ 380
[docs] 381 @classmethod 382 def from_dataset(cls, dataset, **kwargs): 383 """Instantiate the class from a dataset. 384 385 This classmethod is mainly used to choose the correct class 386 to instantiate, depending on the data. 387 """ 388 return cls(dataset, **kwargs)
389 390 def __init__(self, data, attrs=None): 391 if isinstance(data, xrwrap): 392 data = data.data 393 self._data = data 394 if attrs: 395 self._data.attrs.update(attrs) 396 397 @property 398 def attrs(self): 399 """Attributes stored in the data.""" 400 return self.data.attrs 401 402 @property 403 def data(self): 404 """The contained data.""" 405 return self._data 406 407 def __array_wrap__(self, data, context=None, transfer_attributes=True): 408 """Wrap output data in in a new object. 409 410 This takes data from some processing and wraps it back into a 411 suitable class. If no suitable class was found, the data is 412 returned as is. 413 """ 414 try: 415 data = self.from_dataset(data) 416 except NotImplementedError: 417 return data 418 if transfer_attributes: 419 data.attrs.update(self.attrs) 420 return data 421
[docs] 422 def sel(self, indexers=None, method=None, tolerance=None, drop=False, drop_allnan=True, **indexers_kwargs): 423 """Select a subset of the data from the coordinate labels. 424 425 The selection is easiest done with keywords, e.g. ``obj.sel(sensor="Colmar 1")`` 426 to select a specific sensor. For numerical coordinates, ``method="nearest"`` can 427 be quite useful. Use a slice to select a range of values, e.g., 428 ``obj.sel(frequency=slice(10, 100))``. 429 430 For more details, see `xarray.DataArray.sel` and `xarray.Dataset.sel`. 431 """ 432 new = self.data.sel(indexers=indexers, method=method, tolerance=tolerance, drop=drop, **indexers_kwargs) 433 if drop_allnan: 434 new = new.where(~new.isnull(), drop=True) 435 return self.__array_wrap__(new)
436
[docs] 437 def isel(self, indexers=None, drop=False, missing_dims="raise", drop_allnan=True, **indexers_kwargs): 438 """Select a subset of the data from the coordinate indices. 439 440 The selection is easiest done with keywords, e.g. ``obj.sel(sensor=0)`` 441 to select the zeroth sensor. Use a slice to select a range of values, e.g., 442 ``obj.sel(frequency=slice(10, 100))``. 443 444 For more details, see `xarray.DataArray.isel` and `xarray.Dataset.isel`. 445 """ 446 new = self.data.isel(indexers=indexers, drop=drop, missing_dims=missing_dims, **indexers_kwargs) 447 if drop_allnan: 448 new = new.where(~new.isnull(), drop=True) 449 return self.__array_wrap__(new)
450
[docs] 451 def where(self, cond, other=xr.core.dtypes.NA, drop=False): 452 """Filter elements from this object according to a condition. 453 454 This method returns elements where the `cond` is True, 455 otherwise filling with `other`. 456 See `xarray.Dataset.where` for more details. 457 458 Parameters 459 ---------- 460 cond : DataArray, Dataset, or callable 461 Locations at which to preserve this object's values. dtype must be `bool`. 462 If a callable, the callable is passed this object, and the result is used as 463 the value for cond. 464 other : scalar, DataArray, Dataset, or callable, optional 465 Value to use for locations in this object where ``cond`` is False. 466 By default, these locations are filled with NA. If a callable, it must 467 expect this object as its only parameter. 468 drop : bool, default: False 469 If True, coordinate labels that only correspond to False values of 470 the condition are dropped from the result. 471 472 Returns 473 ------- 474 type(self) 475 An object wrapped using the same wrapper as the called object. 476 """ 477 return self.from_dataset(self.data.where(cond, other=other, drop=drop))
478 479 @property 480 def coords(self): 481 """The coordinate (dimension) arrays for this data. 482 483 Refer to `xarray.DataArray.coords` and `xarray.Dataset.coords`. 484 """ 485 return self.data.coords 486 487 @property 488 def dims(self): 489 """The dimensions of this data. 490 491 Refer to `xarray.DataArray.dims` and `xarray.Dataset.dims`. 492 """ 493 return self.data.dims 494 495 @property 496 def sizes(self): 497 """Mapping from dimension names to lengths.""" 498 return self.data.sizes 499
[docs] 500 def groupby(self, group): 501 for label, group in self.data.groupby(group, squeeze=False): 502 yield label, self.__array_wrap__(group.squeeze())
503 504 def _figure_template(self, **kwargs): 505 """Create default figure layout for this data.""" 506 import plotly.graph_objects as go 507 import plotly.io as pio 508 509 template = go.layout.Template() 510 template.update(pio.templates[pio.templates.default]) 511 return template 512
[docs] 513 def make_figure(self, **kwargs): 514 """Create a plotly figure, styled for this data. 515 516 Some useful keyword arguments: 517 518 - ``xaxis_title`` and ``yaxis_title`` controls axis titles for the figure. 519 - ``height`` and ``width`` sets the figure size in pixels. 520 - ``title`` adds a top level title. 521 """ 522 import plotly.graph_objects as go 523 524 fig = go.Figure(layout=dict(template=self._figure_template(**kwargs), **kwargs)) 525 return fig
526
[docs] 527 def save(self, path, append_dim=None, **kwargs): 528 """Save the data to a Zarr file at the specified path. 529 530 The class name is stored as an attribute, so it can be used later to reconstruct the object. 531 532 Parameters 533 ---------- 534 path : str or pathlib.Path 535 The file path where the data will be saved. The directory will be 536 created if it doesn't exist. 537 append_dim : str, optional 538 A dimension which should be used to append to existing data in the path. 539 **kwargs : dict, optional 540 Additional keyword arguments passed to the `xarray.Dataset.to_zarr` method, which 541 is responsible for saving the data to the Zarr format. 542 """ 543 from pathlib import Path 544 545 path = Path(path) 546 path.parent.mkdir(parents=True, exist_ok=True) 547 548 module = self.__class__.__module__ 549 name = self.__class__.__qualname__ 550 if module is not None and module != "__builtin__": 551 name = module + "." + name 552 553 data = self.data.assign_attrs(__uwacan_class__=name) 554 555 if append_dim: 556 if append_dim not in data.dims: 557 data = data.expand_dims(append_dim) 558 if not path.exists(): 559 # Cannot append if there is no data to append to, so we just write as normal 560 append_dim = None 561 if isinstance(data, xr.Dataset): 562 for var in data.dara_vars.values(): 563 if np.issubdtype(var.dtype, np.datetime64): 564 var.encoding = {"units": "nanoseconds since 1970-01-01"} 565 for coord in data.coords.values(): 566 if np.issubdtype(coord.dtype, np.datetime64): 567 coord.encoding = {"units": "nanoseconds since 1970-01-01"} 568 569 kwargs.setdefault("consolidated", False) 570 data.to_zarr(path, append_dim=append_dim, **kwargs)
571
[docs] 572 @classmethod 573 def load(cls, path, lookup_class=True, **kwargs): 574 """Load data from a Zarr file and optionally restore the original class. 575 576 This method loads data from a Zarr file and attempts to reconstruct the 577 original class that was used to save the data. The class information is 578 stored in the `__uwacan_class__` attribute of the dataset. If the class 579 is found, the method dynamically loads and instantiates it. 580 581 Parameters 582 ---------- 583 path : str or pathlib.Path 584 The file path from which to load the data. 585 lookup_class : bool, default=True 586 If True attempts to restore the original class from the 587 metadata stored in the Zarr file. If False, the called class is used 588 to load the data. 589 590 Returns 591 ------- 592 obj : cls or ``__uwacan_class__`` 593 An instance of the class used to save the data (if found in the Zarr 594 file's metadata), or an instance of the called class. 595 596 Notes 597 ----- 598 - The Zarr file should have the `__uwacan_class__` attribute in the 599 dataset's metadata to allow class reconstruction. 600 - If the class cannot be found, or if there is an error during dynamic 601 import, the method falls back to using the current class `cls` to 602 instantiate the object. 603 604 """ 605 kwargs.setdefault("consolidated", False) 606 data = xr.open_zarr(path, **kwargs) 607 if "__xarray_dataarray_variable__" in data: 608 data = data["__xarray_dataarray_variable__"] 609 data.name = None 610 if lookup_class and "__uwacan_class__" in data.attrs: 611 import importlib 612 613 module_name, class_name = data.attrs["__uwacan_class__"].rsplit(".", 1) 614 try: 615 module = importlib.import_module(module_name) 616 cls = getattr(module, class_name) 617 except: 618 pass 619 return cls(data)
620 621
[docs] 622class DataArrayWrap(xrwrap, np.lib.mixins.NDArrayOperatorsMixin): 623 """Wrapper around `xarray.DataArray`. 624 625 This base class exists to wrap functionality in `xarray.DataArray`, 626 and numerical operations from `numpy`. 627 """ 628 629 _coords_set_by_init = set() 630 631 def __init__(self, data, dims=(), coords=None, **kwargs): 632 if isinstance(data, DataArrayWrap): 633 data = data.data 634 if not isinstance(data, xr.DataArray): 635 if isinstance(dims, str): 636 dims = [dims] 637 if dims is None: 638 dims = () 639 if np.ndim(data) != np.size(dims): 640 raise ValueError( 641 f"Dimension names '{dims}' for {type(self).__name__} does not match data with {np.ndim(data)} dimensions" 642 ) 643 data = xr.DataArray(data, dims=dims) 644 if coords is not None: 645 data = data.assign_coords( 646 **{name: coord for (name, coord) in coords.items() if name not in self._coords_set_by_init} 647 ) 648 super().__init__(data, **kwargs) 649 650 def __array__(self, dtype=None): 651 """Casts this object into a `numpy.ndarray`.""" 652 return self.data.__array__(dtype) 653 654 @staticmethod 655 def _implements_np_func(np_func): 656 """Tag implementations of `numpy` functions. 657 658 We use the ``__array_function__`` interface to implement many 659 `numpy` functions. This decorator will only tag an implementation 660 function with which `numpy` function it implements. 661 """ 662 663 def decorator(func): 664 func._implements_np_func = np_func 665 return func 666 667 return decorator 668 669 def __init_subclass__(cls): 670 """Set up the `numpy` implementations for a class. 671 672 This will run when a subclass is defined, and 673 check if there are any methods in it that are tagged 674 with a numpy implementation. All those implementations 675 will be stored in a class-level dictionary. 676 """ 677 implementations = {} 678 for name, value in cls.__dict__.items(): 679 if callable(value) and hasattr(value, "_implements_np_func"): 680 implementations[value._implements_np_func] = value 681 cls._np_func_implementations = implementations 682 683 def __array_function__(self, func, types, args, kwargs): 684 """Interfaces with numpy functions. 685 686 This will run when general numpy functions are used on objects 687 of this class. We have stored tagged implementations in class 688 dictionaries, so we can check if there is an explicit implementation. 689 We have no actual method which does this, so we go through the ``mro`` 690 manually. 691 692 If no explicit implementation is found, we try replacing all wrappers 693 with their `xarray.DataArray` objects, and call the function on them 694 instead. 695 """ 696 for cls in self.__class__.mro(): 697 if hasattr(cls, "_np_func_implementations"): 698 if func in cls._np_func_implementations: 699 func = cls._np_func_implementations[func] 700 break 701 else: 702 # We couldn't find an explicit implementation. 703 # Try replacing all _DataWrapper with their data and calling the function. 704 args = (arg.data if isinstance(arg, DataArrayWrap) else arg for arg in args) 705 out = func(*args, **kwargs) 706 if not isinstance(out, xr.DataArray): 707 try: 708 out = self.data.__array_wrap__(out) 709 except: 710 # We cannot wrap this in an xarray, then we cannot wrap in our own wrapper. 711 return out 712 out = self.__array_wrap__(out) 713 return out 714 return func(*args, **kwargs) 715 716 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): 717 """Interfaces with `numpy.ufunc`. 718 719 Many functions in numpy are ufuncs. `xarray.DataArray.__array_ufunc__` will 720 do the heavy lifting here. 721 """ 722 inputs = (arg.data if isinstance(arg, DataArrayWrap) else arg for arg in inputs) 723 return self.__array_wrap__(self.data.__array_ufunc__(ufunc, method, *inputs, **kwargs)) 724
[docs] 725 @_implements_np_func(np.mean) 726 def mean(self, dim=..., **kwargs): 727 """Average of this data, along some dimension. 728 729 See `xarray.DataArray.mean` for more details. 730 """ 731 return self.__array_wrap__(self.data.mean(dim, **kwargs))
732
[docs] 733 @_implements_np_func(np.sum) 734 def sum(self, dim=..., **kwargs): 735 """Sum of this data, along some dimension. 736 737 See `xarray.DataArray.sum` for more details. 738 """ 739 return self.__array_wrap__(self.data.sum(dim, **kwargs))
740
[docs] 741 @_implements_np_func(np.std) 742 def std(self, dim=..., **kwargs): 743 """Standard deviation of this data, along some dimension. 744 745 See `xarray.DataArray.std` for more details. 746 """ # noqa: D401 747 return self.__array_wrap__(self.data.std(dim, **kwargs))
748
[docs] 749 @_implements_np_func(np.max) 750 def max(self, dim=..., **kwargs): 751 """Maximum of this data, along some dimension. 752 753 See `xarray.DataArray.max` for more details. 754 """ 755 return self.__array_wrap__(self.data.max(dim, **kwargs))
756
[docs] 757 @_implements_np_func(np.min) 758 def min(self, dim=..., **kwargs): 759 """Minimum of this data, along some dimension. 760 761 See `xarray.DataArray.min` for more details. 762 """ 763 return self.__array_wrap__(self.data.min(dim, **kwargs))
764
[docs] 765 def apply(self, func, *args, **kwargs): 766 """Apply some function to the contained data. 767 768 This calls the supplied function with the `xarray.DataArray` 769 in this object, then wraps the output in a similar container again. 770 """ 771 data = func(self.data, *args, **kwargs) 772 return self.__array_wrap__(data)
773
[docs] 774 def reduce(self, func, dim, **kwargs): 775 """Apply a reduction function along some dimension in this data. 776 777 See `xarray.DataArray.reduce` for more details. 778 """ 779 data = self.data.reduce(func=func, dim=dim, **kwargs) 780 return self.__array_wrap__(data)
781 782 783DataArrayWrap.__init_subclass__() 784 785
[docs] 786class DatasetWrap(xrwrap): 787 """Wraps `xarray.Dataset` objects. 788 789 This wraps a dataset by passing indexing to the underlying dataset 790 indexing, and mimics the `xarray` attribute access by passing 791 attribute access to indexing if the attribute exists in the dataset. 792 Using a MutableMapping from collections enables lots of dict-style 793 iteration. 794 """ 795 796 def __getitem__(self, key): 797 try: 798 return self._data[key] 799 except KeyError as e: 800 raise KeyError(*e.args) from None 801 802 def __setitem__(self, key, value): 803 self._data[key] = value 804 805 def __delitem__(self, key): 806 del self._data[key] 807 808 def __iter__(self): 809 return iter(self._data) 810 811 def __len__(self): 812 return len(self._data) 813 814 def __contains__(self, key): 815 return key in self.data 816 817 def keys(self): 818 return self.data.keys() 819 820 def items(self): 821 return self.data.items() 822 823 def values(self): 824 return self.data.values() 825 826 def get(self, key, default=None): 827 return self.data.get(key, default=default) 828 829 def __getattr__(self, key): 830 data = object.__getattribute__(self, "_data") 831 if key in data.variables: 832 return data[key] 833 raise AttributeError(f"'{type(self).__name__}' object has no attribute '{key}'") from None 834 835 def __dir__(self): 836 return sorted(set(super().__dir__()) | set(self._data.variables))
837 838
[docs] 839class Roller: 840 """Base class for rolling windows.""" 841 842 @property 843 def num_frames(self): 844 """The number of frames in this rolling output.""" 845 return self.settings["num_frames"] 846 847 @property 848 def shape(self): 849 """The shape of the output frames.""" 850 ... 851 852 @property 853 def dims(self): 854 """The dimensions of the output frames.""" 855 ... 856 857 @property 858 def coords(self): 859 """The coords of the output frames.""" 860 ... 861
[docs] 862 def numpy_frames(self): 863 """Generate numpy frames with data.""" 864 yield
865 866 def __iter__(self): 867 """Generate rolling frames of the contained data, of the same type.""" 868 yield
869 870
[docs] 871class TimeData(DataArrayWrap): 872 """Handing data which varies over time. 873 874 This class is mainly used to wrap time-signals of sampled sounds. 875 As such, the time data is assumed to be sampled at a constant samplerate. 876 877 Parameters 878 ---------- 879 data : array_like 880 A `numpy.ndarray` or a `xarray.DataArray` with the time data. 881 time : array_like, optional 882 A `numpy.ndarray` with ``dtype=datetime64[ns]`` containing time stamps for the samples. 883 start_time : time_like, optional 884 The start time for the first sample in the signal. 885 This should ideally be a proper time type, but it will be parsed if it is a string. 886 Defaults to "now" if not given. 887 samplerate : float, optional 888 The samplerate for this data, in Hz. 889 If the ``data`` is a `numpy.ndarray`, this has to be given. 890 If the ``data`` is a `xarray.DataArray` which already has a time coordinate, 891 this can be omitted. 892 dims : str or [str], default="time" 893 The dimensions of the data. Must have the same length as the number of dimensions in the data. 894 Only used for `numpy` inputs. 895 coords : `xarray.DataArray.coords` 896 Additional coordinates for this data. 897 attrs : dict, optional 898 Additional attributes to store with this data. 899 """ 900 901 _coords_set_by_init = {"time"} 902 903 def __init__( 904 self, data, time=None, start_time=None, samplerate=None, dims="time", coords=None, attrs=None, **kwargs 905 ): 906 super().__init__(data, dims=dims, coords=coords, attrs=attrs, **kwargs) 907 908 if samplerate is None and time is not None: 909 samplerate = np.timedelta64(1, "s") / np.mean(np.diff(time[:1000])) 910 elif time is None and samplerate is not None: 911 if start_time is None: 912 if "time" in self.data.coords: 913 start_time = self.data.time[0].item() 914 else: 915 start_time = "now" 916 n_samples = self.data.sizes["time"] 917 start_time = time_to_np(start_time) 918 offsets = np.arange(n_samples) * 1e9 / samplerate 919 time = start_time + offsets.astype("timedelta64[ns]") 920 921 if time is not None: 922 if not isinstance(time, xr.DataArray): 923 time = xr.DataArray(time, dims="time") 924 if samplerate is not None: 925 time.attrs["rate"] = samplerate 926 self.data.coords["time"] = time 927 elif "rate" not in self.data.time.attrs: 928 if samplerate is None: 929 samplerate = np.timedelta64(1, "s") / np.mean(np.diff(self.data.time[:1000])) 930 self.data.time.attrs["rate"] = samplerate 931
[docs] 932 @classmethod 933 def from_dataset(cls, dataset): 934 if "time" in dataset.dims: 935 return super().from_dataset(dataset) 936 # This is not time data any more, let the caller catch the error. 937 raise NotImplementedError(f"Cannot instantiate '{cls.__name__}' from data lacking time dimension")
938 939 @property 940 def samplerate(self): 941 return self.data.time.rate 942 943 @property 944 def time(self): 945 """Time coordinates for this data.""" 946 return self.data.time 947 948 @property 949 def time_window(self): 950 """A `TimeWindow` describing when the data start and stops.""" 951 # Calculating duration from number and rate means the stop points to the sample after the last, 952 # which is more intuitive when considering signal durations etc. 953 return TimeWindow( 954 start=self.data.time.data[0], 955 duration=self.data.sizes["time"] / self.samplerate, 956 ) 957
[docs] 958 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): 959 """Select a subset of the data over time. 960 961 See `TimeWindow.subwindow` for details on the parameters. 962 """ 963 original_window = self.time_window 964 new_window = original_window.subwindow( 965 time, start=start, stop=stop, center=center, duration=duration, extend=extend 966 ) 967 if isinstance(new_window, TimeWindow): 968 start = time_to_np(new_window.start) 969 stop = time_to_np(new_window.stop) 970 selected_data = self.data.sel(time=slice(start, stop)) 971 else: 972 time = time_to_np(new_window) 973 selected_data = self.data.sel(time=time, method="nearest") 974 975 new = type(self)(selected_data) 976 return new
977
[docs] 978 def listen(self, downsampling=1, upsampling=None, headroom=6, **kwargs): 979 """Play back this time data over speakers. 980 981 This tries to play the time data as audio using the `sounddevice` package. 982 The audio will be centered at 0 and normalized before playback. 983 984 Parameters 985 ---------- 986 downsampling : float, optional 987 Artificially uses a lower samplerate in playback to slow 988 down the audio, lowering the pitch of the content. 989 upsampling : int, optional 990 Decimates the data by selecting every Nth sample, speeding 991 up the audio and raising the pitch of the content. 992 headroom : float, default 6 993 How much headroom to leave in the normalization, in dB. 994 **kwargs : dict, optional 995 Remaining keyword arguments are passed to `sounddevice.play`. 996 The most useful arguments are ``blocking=True``, and ``device``. 997 """ 998 import sounddevice as sd 999 1000 sd.stop() 1001 data = self.data 1002 if upsampling: 1003 data = data[::upsampling] 1004 scaled = data - data.mean() 1005 scaled = scaled / np.max(np.abs(scaled)) * 10 ** (-headroom / 20) 1006 sd.play(scaled, samplerate=round(self.samplerate / downsampling), **kwargs)
1007
[docs] 1008 def rolling(self, duration=None, step=None, overlap=None, squeeze_time=True): 1009 """Generate rolling windows of this data. 1010 1011 Parameters 1012 ---------- 1013 duration : float 1014 The duration of each frame, in seconds. 1015 step : float 1016 The step between consecutive frames, in seconds. 1017 overlap : float 1018 The overlap between consecutive frames, as a fraction of the duration. 1019 squeeze_time : bool, default `True` 1020 If this is set to `False`, rolling over windows with single time values will still 1021 give output with a time axis/dim. 1022 1023 Returns 1024 ------- 1025 roller : `TimeDataRoller` 1026 A roller object to roll over the data. 1027 """ 1028 return TimeDataRoller(self, duration=duration, step=step, overlap=overlap, squeeze_time=squeeze_time)
1029 1030
[docs] 1031class TimeDataRoller(Roller): 1032 """Rolling windows of time data. 1033 1034 Parameters 1035 ---------- 1036 obj : TimeData 1037 The time data wrapper to roll over. 1038 duration : float 1039 The duration of each frame, in seconds. 1040 step : float 1041 The step between consecutive frames, in seconds. 1042 overlap : float 1043 The overlap between consecutive frames, as a fraction of the duration. 1044 squeeze_time : bool, default `True` 1045 If this is set to `False`, rolling over windows with single time values will still 1046 give output with a time axis/dim. 1047 """ 1048 1049 def __init__(self, obj, duration=None, step=None, overlap=0, squeeze_time=True): 1050 self.obj = obj 1051 self.settings = time_frame_settings( 1052 duration=duration, 1053 step=step, 1054 overlap=overlap, 1055 signal_length=self.obj.time_window.duration, 1056 samplerate=self.obj.samplerate, 1057 ) 1058 1059 if self.settings["samples_per_frame"] == 1 and squeeze_time: 1060 self._squeeze_time = True 1061 self._slices = list(range(self.settings["num_frames"])) 1062 else: 1063 self._squeeze_time = False 1064 self._slices = [ 1065 slice(start_idx, start_idx + self.settings["samples_per_frame"]) 1066 for start_idx in range( 1067 0, self.settings["num_frames"] * self.settings["sample_step"], self.settings["sample_step"] 1068 ) 1069 ] 1070 1071 @property 1072 def coords(self): 1073 coords = dict(self.obj.coords) 1074 if not self._squeeze_time: 1075 coords["time"] = coords["time"][: self.settings["samples_per_frame"]] 1076 else: 1077 del coords["time"] 1078 return coords 1079 1080 @property 1081 def dims(self): 1082 dims = list(self.obj.dims) 1083 dims.remove("time") 1084 if not self._squeeze_time: 1085 dims = ["time"] + dims 1086 return tuple(dims) 1087 1088 @property 1089 def shape(self): 1090 shape = list(self.obj.data.shape) 1091 del shape[self.obj.dims.index("time")] 1092 if not self._squeeze_time: 1093 shape = [self.settings["samples_per_frame"]] + shape 1094 return tuple(shape) 1095
[docs] 1096 def numpy_frames(self): 1097 data = self.obj.data.transpose("time", ...) 1098 # For large datasets the data is likely disk-mapped. Load in chunks to avoid running out of memory. 1099 num_chunks = max(1, data.nbytes // (2**30)) # At least 1 chunk 1100 chunk_size = len(self._slices) // num_chunks # Number of frames per chunk 1101 1102 for i in range(0, len(self._slices), chunk_size): 1103 chunk_slices = self._slices[i:i + chunk_size] 1104 # Load the chunk of data that covers all slices in this chunk 1105 if isinstance(chunk_slices[0], slice): 1106 start_idx = chunk_slices[0].start 1107 end_idx = chunk_slices[-1].stop 1108 else: 1109 start_idx = chunk_slices[0] 1110 end_idx = chunk_slices[-1] + self.settings["samples_per_frame"] 1111 chunk_data = data.isel(time=slice(start_idx, end_idx)).data 1112 # Yield individual frames from the loaded chunk 1113 for s in chunk_slices: 1114 if isinstance(s, slice): 1115 yield chunk_data[s.start - start_idx:s.stop - start_idx] 1116 else: 1117 yield chunk_data[s - start_idx]
1118
[docs] 1119 def time_data(self): 1120 """Generate rolling frames of the contained data, as `TimeData`.""" 1121 for s in self._slices: 1122 yield self.obj.isel(time=s)
1123 1124 def __iter__(self): 1125 yield from self.time_data()
1126 1127
[docs] 1128class FrequencyData(DataArrayWrap): 1129 """Handing data which varies over frequency. 1130 1131 This class is mainly used to wrap spectra of sampled sounds. 1132 Typically, this is represented as power spectral densities, 1133 but other data types are also possible. 1134 1135 Parameters 1136 ---------- 1137 data : array_like 1138 A `numpy.ndarray` or a `xarray.DataArray` with the frequency data. 1139 frequency : array_like, optional 1140 The frequencies corresponding to the data. Mandatory if ``data`` is a `numpy.ndarray`. 1141 frequency_band_lower : array_like, optional 1142 The lower edge of each frequency band. Must be provided together with 1143 `frequency_band_upper`. This is the preferred method for specifying 1144 frequency band information. 1145 frequency_band_upper : array_like, optional 1146 The upper edge of each frequency band. Must be provided together with 1147 `frequency_band_lower`. This is the preferred method for specifying 1148 frequency band information. 1149 bandwidth : float, optional 1150 A single bandwidth value valid for all frequencies. This is a convenience 1151 method that assumes centered frequency bands. For per-frequency bandwidth, 1152 use `frequency_band_lower`/`frequency_band_upper` instead. 1153 dims : str or [str], default="frequency" 1154 The dimensions of the data. Must have the same length as the number of dimensions in the data. 1155 Only used for `numpy` inputs. 1156 coords : `xarray.DataArray.coords` 1157 Additional coordinates for this data. 1158 attrs : dict, optional 1159 Additional attributes to store with this data. 1160 """ 1161 1162 _coords_set_by_init = {"frequency", "frequency_band_lower", "frequency_band_upper"} 1163 1164 def __init__(self, data, frequency=None, frequency_band_lower=None, frequency_band_upper=None, bandwidth=None, dims="frequency", coords=None, attrs=None, **kwargs): 1165 super().__init__(data, dims=dims, coords=coords, attrs=attrs, **kwargs) 1166 if frequency is not None: 1167 self.data.coords["frequency"] = frequency 1168 1169 # Handle band edges (preferred method) 1170 if frequency_band_lower is not None or frequency_band_upper is not None: 1171 if frequency_band_lower is None or frequency_band_upper is None: 1172 raise ValueError("Both frequency_band_lower and frequency_band_upper must be provided together") 1173 self.data.coords["frequency_band_lower"] = ("frequency", frequency_band_lower) 1174 self.data.coords["frequency_band_upper"] = ("frequency", frequency_band_upper) 1175 1176 # Handle single bandwidth value (convenience method) 1177 elif bandwidth is not None: 1178 if np.asarray(bandwidth).size > 1: 1179 raise ValueError("bandwidth must be a single value when provided. For per-frequency bandwidth, use frequency_band_lower/frequency_band_upper instead.") 1180 # Compute edges assuming centered bands 1181 frequency_band_lower = frequency - bandwidth / 2 1182 frequency_band_upper = frequency + bandwidth / 2 1183 self.data.coords["frequency_band_lower"] = ("frequency", frequency_band_lower) 1184 self.data.coords["frequency_band_upper"] = ("frequency", frequency_band_upper) 1185 1186 @property 1187 def bandwidth(self): 1188 """Compute bandwidth from frequency band edges. 1189 1190 Returns 1191 ------- 1192 bandwidth : xarray.DataArray 1193 The bandwidth of each frequency band, computed as 1194 frequency_band_upper - frequency_band_lower. 1195 """ 1196 return self.data.frequency_band_upper - self.data.frequency_band_lower 1197
[docs] 1198 @classmethod 1199 def from_dataset(cls, dataset): 1200 if "frequency" in dataset.dims: 1201 return super().from_dataset(dataset) 1202 # This is not frequency data any more, just return the plain xr.DataArray 1203 raise NotImplementedError(f"Cannot instantiate '{cls.__name__}' from data lacking frequency dimension")
1204 1205 @property 1206 def frequency(self): 1207 """The frequencies for the data.""" 1208 return self.data.frequency 1209
[docs] 1210 def estimate_bandwidth(self): 1211 """Estimate the bandwidth of the frequency vector. 1212 1213 This tries to determine if the frequency vector is linearly 1214 or logarithmically spaced, then uses either linear or logarithmic 1215 bandwidth estimation. 1216 1217 Returns 1218 ------- 1219 bandwidth : `xarray.DataArray` 1220 The estimated bandwidth. 1221 """ 1222 frequency = np.asarray(self.frequency) 1223 # Check if the frequency array seems linearly or logarithmically spaced 1224 if frequency[0] == 0: 1225 diff = frequency[2:] - frequency[1:-1] 1226 frac = frequency[2:] / frequency[1:-1] 1227 else: 1228 diff = frequency[1:] - frequency[:-1] 1229 frac = frequency[1:] / frequency[:-1] 1230 diff_err = np.std(diff) / np.mean(diff) 1231 frac_err = np.std(frac) / np.mean(frac) 1232 # Note: if there are three values and the first is zero, the std is 0 for both. 1233 # The equals option makes us end up in the linear frequency case. 1234 if diff_err <= frac_err: 1235 # Central differences, with forwards and backwards at the ends 1236 central = (frequency[2:] - frequency[:-2]) / 2 1237 first = frequency[1] - frequency[0] 1238 last = frequency[-1] - frequency[-2] 1239 else: 1240 # upper edge is at sqrt(f_{l+1} * f_l), lower edge is at sqrt(f_{l-1} * f_l) 1241 # the difference simplifies as below. 1242 central = (frequency[2:] ** 0.5 - frequency[:-2] ** 0.5) * frequency[1:-1] ** 0.5 1243 # extrapolating to one bin below lowest and one above highest using constant ratio 1244 # the expression above then simplifies to the expressions below 1245 first = (frequency[1] - frequency[0]) * (frequency[0] / frequency[1]) ** 0.5 1246 last = (frequency[-1] - frequency[-2]) * (frequency[-1] / frequency[-2]) ** 0.5 1247 bandwidth = np.concatenate([[first], central, [last]]) 1248 return xr.DataArray(bandwidth, coords={"frequency": self.frequency})
1249 1250 def _figure_template(self, **kwargs): 1251 template = super()._figure_template(**kwargs) 1252 template.layout.update( 1253 xaxis=dict( 1254 type="log", 1255 title="Frequency in Hz", 1256 ) 1257 ) 1258 return template 1259
[docs] 1260 def plot(self, **kwargs): 1261 """Make a scatter trace of this data. 1262 1263 Some useful keyword arguments: 1264 1265 - ``name`` sets the legendentry of this trace. 1266 - ``line_color`` and ``marker_color`` sets the line and marker colors. 1267 - ``mode`` sets the mode, typically one of ``"lines"``, ``"markers"``, or ``"markers+lines"``. 1268 1269 """ 1270 import plotly.graph_objects as go 1271 1272 if self.dims != ("frequency",): 1273 raise ValueError( 1274 f"Cannot simply scatter frequency data with dimensions '{self.dims}'. " 1275 "Use the `.groupby(dim)` method to loop over non-frequency dimensions." 1276 ) 1277 1278 return go.Scatter( 1279 x=self.frequency, 1280 y=self.data, 1281 **kwargs, 1282 )
1283 1284
[docs] 1285class TimeFrequencyData(TimeData, FrequencyData): 1286 """Handing data which varies over time and frequency. 1287 1288 This class is mainly used to wrap spectrogram-like data. 1289 There are no processing implemented in this class, but 1290 subclasses are free to add processing methods, custom 1291 initialization, or instantiation in class methods. 1292 1293 Parameters 1294 ---------- 1295 data : array_like 1296 A `numpy.ndarray` or a `xarray.DataArray` with the time-frequency data. 1297 start_time : time_like, optional 1298 The start time for the first sample in the signal. 1299 This should ideally be a proper time type, but it will be parsed if it is a string. 1300 Defaults to "now" if not given. 1301 samplerate : float, optional 1302 The samplerate for this data, in Hz. This is not the samplerate of the underlying time signal, 1303 but time steps of the time axis on the time-frequency data. 1304 If the `data` is a `numpy.ndarray`, this has to be given. 1305 If the `data` is a `xarray.DataArray` which already has a time coordinate, 1306 this can be omitted. 1307 frequency : array_like, optional 1308 The frequencies corresponding to the data. Mandatory if `data` is a `numpy.ndarray`. 1309 frequency_band_lower : array_like, optional 1310 The lower edge of each frequency band. Must be provided together with 1311 `frequency_band_upper`. This is the preferred method for specifying 1312 frequency band information. 1313 frequency_band_upper : array_like, optional 1314 The upper edge of each frequency band. Must be provided together with 1315 `frequency_band_lower`. This is the preferred method for specifying 1316 frequency band information. 1317 bandwidth : float, optional 1318 A single bandwidth value valid for all frequencies. This is a convenience 1319 method that assumes centered frequency bands. For per-frequency bandwidth, 1320 use `frequency_band_lower`/`frequency_band_upper` instead. 1321 dims : str or [str], optional 1322 The dimensions of the data. Must have the same length as the number of dimensions in the data. 1323 Mandatory used for `numpy` inputs, not used for `xarray` inputs. 1324 coords : `xarray.DataArray.coords` 1325 Additional coordinates for this data. 1326 attrs : dict, optional 1327 Additional attributes to store with this data. 1328 """ 1329 1330 _coords_set_by_init = {"time", "frequency", "frequency_band_lower", "frequency_band_upper"} 1331 1332 def __init__( 1333 self, 1334 data, 1335 time=None, 1336 start_time=None, 1337 samplerate=None, 1338 frequency=None, 1339 frequency_band_lower=None, 1340 frequency_band_upper=None, 1341 bandwidth=None, 1342 dims=None, 1343 coords=None, 1344 attrs=None, 1345 **kwargs, 1346 ): 1347 super().__init__( 1348 data, 1349 dims=dims, 1350 coords=coords, 1351 attrs=attrs, 1352 time=time, 1353 start_time=start_time, 1354 samplerate=samplerate, 1355 frequency=frequency, 1356 frequency_band_lower=frequency_band_lower, 1357 frequency_band_upper=frequency_band_upper, 1358 bandwidth=bandwidth, 1359 **kwargs, 1360 ) 1361
[docs] 1362 @classmethod 1363 def from_dataset(cls, data, **kwargs): 1364 if "frequency" not in data.dims: 1365 # It's not frequency-data, but it might be time data 1366 return TimeData.from_dataset(data, **kwargs) 1367 if "time" not in data.dims: 1368 # It's not time-data, but it might be frequency data 1369 return FrequencyData.from_dataset(data, **kwargs) 1370 return super().from_dataset(data, **kwargs)
1371
[docs] 1372 def rolling(self, duration=None, step=None, overlap=None, squeeze_time=True): 1373 """Generate rolling windows of this data. 1374 1375 By default, a single time instance of the data will be 1376 generated in each frame. 1377 1378 Parameters 1379 ---------- 1380 duration : float 1381 The duration of each frame, in seconds. 1382 step : float 1383 The step between consecutive frames, in seconds. 1384 overlap : float 1385 The overlap between consecutive frames, as a fraction of the duration. 1386 squeeze_time : bool, default `True` 1387 If this is set to `False`, rolling over windows with single time values will still 1388 give output with a time axis/dim. 1389 1390 Returns 1391 ------- 1392 roller : `TimeDataRoller` 1393 A roller object to roll over the data. 1394 """ 1395 if (duration, step) == (None, None): 1396 step = 1 / self.samplerate 1397 overlap = 0 1398 return TimeDataRoller(self, duration=duration, step=step, overlap=overlap, squeeze_time=squeeze_time)
1399 1400 def _figure_template(self, **kwargs): 1401 template = super()._figure_template(**kwargs) 1402 template.layout.update( 1403 xaxis=dict( 1404 title_text="Time", 1405 type=None, # Use default which is `linear`` for "normal" data and `date` for timestamp data 1406 ), 1407 yaxis=dict( 1408 title_text="Frequency in Hz", 1409 type="log", 1410 ), 1411 ) 1412 template.data.update( 1413 heatmap=[ 1414 dict( 1415 colorscale="viridis", 1416 colorbar_title_side="right", 1417 hovertemplate="%{x}<br>%{y:.5s}Hz<br>%{z}<extra></extra>", 1418 ) 1419 ] 1420 ) 1421 return template 1422
[docs] 1423 def plot(self, **kwargs): 1424 """Make a heatmap trace of this data. 1425 1426 Some useful keyword arguments: 1427 1428 - ``name`` sets the legendentry of this trace. 1429 - ``colorscale`` chooses the colorscale, e.g., ``"viridis"``, ``"delta"``, ``"twilight"``. 1430 - ``zmin`` and ``zmax`` sets the color range. 1431 1432 """ 1433 import plotly.graph_objects as go 1434 1435 if set(self.dims) != {"frequency", "time"}: 1436 raise ValueError( 1437 f"Cannot make heatmap of time-frequency data with dimensions '{self.dims}'. " 1438 "Use the `.groupby(dim)` method to loop over extra dimensions." 1439 ) 1440 1441 trace = go.Heatmap( 1442 x=self.time, 1443 y=self.frequency, 1444 z=self.data.transpose("frequency", "time"), 1445 ) 1446 return trace.update(**kwargs)
1447 1448
[docs] 1449class Transit: 1450 """Container for recorded ship transits. 1451 1452 This class is responsible for bundling recordings and position tracks. 1453 Note that this class does not take `TimeData` as the input. 1454 The track and recording will be restricted to a time window which 1455 both of them covers. 1456 1457 Attributes 1458 ---------- 1459 recording : `recordings.Recording` 1460 A recording of the ship transit. 1461 track : `positional.Track` 1462 The position track of the ship. 1463 """ 1464 1465 def __init__(self, recording, track): 1466 start = max(recording.time_window.start, track.time_window.start) 1467 stop = min(recording.time_window.stop, track.time_window.stop) 1468 1469 self.recording = recording.subwindow(start=start, stop=stop) 1470 self.track = track.subwindow(start=start, stop=stop) 1471 1472 @property 1473 def time_window(self): 1474 """A `TimeWindow` describing when the data start and stops.""" 1475 rec_window = self.recording.time_window 1476 track_window = self.track.time_window 1477 return TimeWindow(start=max(rec_window.start, track_window.start), stop=min(rec_window.stop, track_window.stop)) 1478
[docs] 1479 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): 1480 """Select a subset of the data over time. 1481 1482 See `TimeWindow.subwindow` for details on the parameters. 1483 """ 1484 subwindow = self.time_window.subwindow( 1485 time, start=start, stop=stop, center=center, duration=duration, extend=extend 1486 ) 1487 rec = self.recording.subwindow(subwindow) 1488 track = self.track.subwindow(subwindow) 1489 return type(self)(recording=rec, track=track)
1490 1491
[docs] 1492def dB(x, power=True, safe_zeros=True, ref=1): 1493 """Calculate the decibel of an input value. 1494 1495 Parameters 1496 ---------- 1497 x : numeric 1498 The value to take the decibel of 1499 power : boolean, default True 1500 Specifies if the input is a power-scale quantity or a root-power quantity. 1501 For power-scale quantities, the output is 10 log(x), for root-power quantities the output is 20 log(x). 1502 If there are negative values in a power-scale input, the handling can be controlled as follows: 1503 - `power='imag'`: return imaginary values 1504 - `power='nan'`: return nan where power < 0 1505 - `power=True`: as `nan`, but raises a warning. 1506 safe_zeros : boolean, default True 1507 If this option is on, all zero values in the input will be replaced with the smallest non-zero value. 1508 ref : numeric 1509 The reference unit for the decibel. Note that this should be in the same unit as the `x` input, 1510 e.g., if `x` is a power, the `ref` value might need squaring. 1511 """ 1512 if isinstance(x, DataArrayWrap): 1513 return x.apply(dB, power=power, safe_zeros=safe_zeros, ref=ref) 1514 1515 if safe_zeros and np.size(x) > 1: 1516 nonzero = x != 0 1517 min_value = np.nanmin(abs(xr.where(nonzero, x, np.nan, keep_attrs=True))) 1518 x = xr.where(nonzero, x, min_value, keep_attrs=True) 1519 if power: 1520 if np.any(x < 0): 1521 if power == "imag": 1522 return 10 * np.log10(x + 0j) 1523 if power == "nan": 1524 return 10 * np.log10(xr.where(x > 0, x, np.nan, keep_attrs=True)) 1525 return 10 * np.log10(x / ref) 1526 else: 1527 return 20 * np.log10(np.abs(x) / ref)
1528 1529
[docs] 1530def time_frame_settings( 1531 duration=None, 1532 step=None, 1533 overlap=None, 1534 resolution=None, 1535 num_frames=None, 1536 signal_length=None, 1537 samplerate=None, 1538): 1539 """Calculate time frame overlap settings from various input parameters. 1540 1541 Parameters 1542 ---------- 1543 duration : float 1544 How long each frame is, in seconds 1545 step : float 1546 The time between frame starts, in seconds 1547 overlap : float 1548 How much overlap there is between the frames, as a fraction of the duration. 1549 If this is negative the frames will have extra space between them. 1550 resolution : float 1551 Desired frequency resolution in Hz. Equals ``1/duration`` 1552 num_frames : int 1553 The total number of frames in the signal 1554 signal_length : float, optional 1555 The total length of the signal, in seconds 1556 samplerate : float, optional 1557 The samplerate of the signal, in Hz. 1558 Only used to compute sample versions of the outputs. 1559 1560 Returns 1561 ------- 1562 settings : dict 1563 The settings dict has keys: 1564 ``"duration"``, ``"step"``, ``"overlap"``, ``"resolution"`` 1565 and if ``signal_length`` was given: 1566 ``"num_frames"``, ``"signal_length"`` 1567 and if ``samplerate`` was given: 1568 ``"samples_per_frame"``, ``"sample_step"``, ``"sample_overlap"`` 1569 and if both ``samplerate`` and ``signal_length`` was given: 1570 ``"sample_total"`` 1571 1572 Raises 1573 ------ 1574 ValueError 1575 If the inputs are not sufficient to determine the frame, 1576 or if priorities cannot be disambiguated. 1577 1578 Notes 1579 ----- 1580 The parameters will be used in the following priority: 1581 1582 1. ``signal_length`` and ``num_frames`` 1583 2. ``step`` and ``duration`` 1584 3. ``resolution`` and ``overlap`` 1585 1586 Each frame ``idx=[0, ..., num_frames - 1]`` has:: 1587 1588 start = idx * step 1589 stop = idx * step + duration 1590 1591 The last frame thus ends at ``(num_frames - 1) step + duration``. 1592 The overlap relations are:: 1593 1594 duration = step / (1 - overlap) 1595 step = duration (1 - overlap) 1596 overlap = 1 - step / duration 1597 1598 If both ``resolution`` and ``overlap`` are given, they will be treated as "minimum" 1599 parameters. This means that the output will have at least an overlap as specified 1600 (`overlap_out >= overlap_in`) and at least the specified spectral resolution 1601 (`resolution_out <= resolution_in`). If the other input parameters are sufficient 1602 to fully determine the frame settings, the output will be checked to meet the 1603 requested resolution and overlap criteria. 1604 The overlap will default to a minimum of 0 if not given, i.e. frames that are 1605 edge to edge (unless more overlap is required from other parameters). 1606 1607 This gives us the following total list of priorities: 1608 1609 1) ``signal_length`` and ``num_frames`` are given 1610 a) ``step`` or ``duration`` (not both!) given 1611 b) ``resolution`` is given 1612 c) ``overlap`` is given 1613 2) ``step`` and ``duration`` given 1614 3) ``step`` given 1615 a) ``resolution`` is given 1616 b) ``overlap`` is given 1617 4) ``duration`` given (``resolution`` is ignored, ``overlap`` is used) 1618 5) ``resolution`` given 1619 1620 For cases 2-5, ``num_frames`` is calculated if ``signal_length`` is given, 1621 and a new truncated ``signal_length`` is returned. 1622 1623 If a samplerate was given, the number of samples in an output is also computed. 1624 This is done with:: 1625 1626 samples_per_frame = ceil(duration * samplerate) 1627 sample_step = floor(step * samplerate) 1628 sample_overlap = samples_per_frame - sample_step 1629 1630 We use ceil for the duration since it is often chosen to obtain a minimum frequency resolution. 1631 We use floor for the step to make sure we fit all the frames in the total length. 1632 Note that this means that the sample overlap might not equal 1633 ``round(duration * overlap * samplerate)``, due to how the rounding is done. 1634 It can at most be two samples larger than the rounded value. 1635 """ 1636 if None not in (num_frames, signal_length): 1637 if None not in (duration, step): 1638 raise ValueError( 1639 "Overdetermined time frames: " 1640 "All four of `num_frames`, `signal_length`, `duration`, and `step` " 1641 "cannot be specified simultaneously." 1642 ) 1643 elif step is not None: 1644 # We have the step, calculate the duration 1645 duration = signal_length - (num_frames - 1) * step 1646 elif duration is not None: 1647 # We have the duration, calculate the step 1648 step = (signal_length - duration) / (num_frames - 1) 1649 elif resolution is not None: 1650 duration = 1 / resolution 1651 step = (signal_length - duration) / (num_frames - 1) 1652 overlap = 1 - step / duration 1653 else: 1654 # This sets a duration to meet a minimum overlap, at least 0. 1655 duration = signal_length / (num_frames + (overlap or 0) - num_frames * (overlap or 0)) 1656 if resolution is not None: 1657 # If a resolution requires longer duration, that's set here. 1658 duration = max(duration, 1 / resolution) 1659 step = (signal_length - duration) / (num_frames - 1) 1660 elif None in (step, duration): 1661 # Missing one of step and duration, infer it from overlap and resolution 1662 if duration is not None: 1663 step = duration * (1 - (overlap or 0)) 1664 elif step is not None: 1665 duration = step / (1 - (overlap or 0)) 1666 if resolution is not None: 1667 duration = max(duration, 1 / resolution) 1668 elif resolution is not None: 1669 duration = 1 / resolution 1670 step = duration * (1 - (overlap or 0)) 1671 else: 1672 raise ValueError( 1673 "Must give at least one of (`step`, `duration`, `resolution`) or the pair of `signal_length` and `num_frames`." 1674 ) 1675 1676 if overlap is not None: 1677 if (1 - step / duration < overlap) and not np.isclose(1 - step / duration, overlap): 1678 raise ValueError(f"Time frame step {step}s and duration {duration}s does not meet minimum overlap fraction {overlap}") 1679 if resolution is not None: 1680 if (1 / duration > resolution) and not np.isclose(1 / duration, resolution): 1681 raise ValueError(f"Duration {duration}s does not meet minimum spectral resolution {resolution}Hz.") 1682 1683 settings = { 1684 "duration": duration, 1685 "step": step, 1686 "overlap": 1 - step / duration, 1687 "resolution": 1 / duration, 1688 } 1689 if signal_length is not None: 1690 num_frames = num_frames or int(np.floor((signal_length - duration) / step + 1)) 1691 settings["num_frames"] = num_frames 1692 settings["signal_length"] = (num_frames - 1) * step + duration 1693 if samplerate is not None: 1694 settings["samples_per_frame"] = int(np.ceil(samplerate * duration)) 1695 settings["sample_step"] = int(np.floor(samplerate * step)) 1696 settings["sample_overlap"] = settings["samples_per_frame"] - settings["sample_step"] 1697 if "num_frames" in settings: 1698 settings["sample_total"] = (settings["num_frames"] - 1) * settings["sample_step"] + settings["samples_per_frame"] # fmt: skip 1699 return settings
1700 1701
[docs] 1702def concatenate(items, dim=None, nan_between_items=False, sort=False, cls=None, **kwargs): 1703 """Concatenates a list of items along a specified dimension. 1704 1705 Parameters 1706 ---------- 1707 items : list of `xarray.Dataset` or `xarray.DataArray` or `xrwrap` 1708 A list of items to be concatenated. The items can be `xr.Dataset`, 1709 `xr.DataArray`, or instances of a subclass of `xrwrap`. 1710 Most `uwacan` data wrappers are compatible. 1711 dim : str or None, optional 1712 The dimension along which to concatenate. If `None`, the function will 1713 attempt to infer the dimension from the first item. If the items have 1714 more than one dimension, a `ValueError` will be raised. 1715 The concatenation dimension can be an existing dim or a new dim. 1716 nan_between_items : bool, optional, default=False 1717 If `True`, inserts NaN values between concatenated items along the specified dimension. 1718 This is useful for visualization purposes, as it makes most plotting libraries split the lines. 1719 sort : bool, optional, default=False 1720 If `True`, sorts the concatenated items by the specified dimension. 1721 cls : class or callable, optional 1722 The class or callable to use for wrapping the concatenated result. If `None`, 1723 the function will try to infer the class from the first item. If `cls` is a subclass 1724 of `xrwrap` and has a `from_dataset` method, it will use that method to wrap 1725 the result. 1726 **kwargs : dict 1727 Additional keyword arguments passed to the class or callable specified by `cls` when 1728 wrapping the concatenated result. 1729 1730 Returns 1731 ------- 1732 object 1733 The concatenated object, either as an `xr.Dataset`, `xr.DataArray`, or as an 1734 instance of the specified `cls` (or inferred class). 1735 """ 1736 if dim is None: 1737 if len(items[0].dims) != 1: 1738 raise ValueError("Cannot guess concatenation dimensions for multi-dimensional items.") 1739 dim = next(iter(items[0].dims)) 1740 1741 if cls is None: 1742 for item in items: 1743 if isinstance(item, xrwrap): 1744 cls = type(item) 1745 break 1746 1747 if isinstance(cls, type) and issubclass(cls, xrwrap) and hasattr(cls, "from_dataset"): 1748 cls = cls.from_dataset 1749 1750 items = [item.data if isinstance(item, xrwrap) else item for item in items] 1751 if nan_between_items: 1752 items = [x for item in items for x in [item, xr.full_like(item.isel({dim: -1}), np.nan).expand_dims(dim)]][:-1] 1753 items = xr.concat(items, dim=dim) 1754 if sort: 1755 items = items.sortby(dim) 1756 if cls: 1757 items = cls(items, **kwargs) 1758 return items