Source code for uwacan.recordings

   1"""Reading recordings from files on disk.
   2
   3This module contains classes used to read data created by
   4field data recorders, typically recording hydrophone data
   5as audio files.
   6
   7.. currentmodule:: uwacan.recordings
   8
   9Main recording classes
  10----------------------
  11.. autosummary::
  12    :toctree: generated
  13
  14    SoundTrap
  15    SylenceLP
  16    LoggerheadDSG
  17    MultichannelAudioInterfaceRecording
  18
  19Utilities
  20---------
  21.. autosummary::
  22    :toctree: generated
  23
  24    RecordingArray
  25    TimeCompensation
  26    calibrate_raw_data
  27    dBx_to_peak_volts
  28
  29Implementation interfaces
  30-------------------------
  31.. autosummary::
  32    :toctree: generated
  33
  34    Recording
  35    FileRecording
  36    AudioFileRecording
  37    AudioFileRoller
  38
  39"""
  40
  41import bisect
  42import collections
  43import numpy as np
  44from . import _core, positional
  45import abc
  46import soundfile
  47import xarray as xr
  48from pathlib import Path
  49
  50
[docs] 51def dBx_to_peak_volts(db): 52 """Convert dBu or dBV to peak volts. 53 54 Parameters 55 ---------- 56 db : str 57 Decibel value as a string with units, e.g., ``"10dBu"``, ``"-20dBV"``. 58 59 Returns 60 ------- 61 volts : float 62 Peak voltage corresponding to the input decibel value. 63 64 Raises 65 ------ 66 ValueError 67 If the input string does not contain a valid dB unit (``"dBu"`` or ``"dBV"``). 68 """ 69 if not np.ndim(db) == 0: 70 return np.vectorize(dBx_to_peak_volts)(db) 71 db = db.lower() 72 if "dbu" in db: 73 dbu = float(db.replace("dbu", "").strip()) 74 # dBu is an RMS level -> multiply with 2**0.5 75 # dBu reference is 1mW over 600Ω, i.e. sqrt(0.6) volts 76 volts = 10 ** (dbu / 20) * 2**0.5 * 0.6**0.5 77 elif "dbv" in db: 78 dbv = float(db.replace("dbv", "").strip()) 79 # dBV is an RMS level -> multiply with 2**0.5 80 # dBV reference is 1V 81 volts = 10 ** (dbv / 20) * 2**0.5 82 else: 83 raise ValueError(f"Unknown dB volts reference unit in {db}") 84 return volts
85 86
[docs] 87def calibrate_raw_data( 88 raw_data, 89 sensitivity=None, 90 gain=None, 91 adc_range=None, 92 file_range=None, 93): 94 """Calibrates raw data read from files into physical units. 95 96 There are three conversion steps handled in this calibration function: 97 98 1) The transducer conversion from physical quantity ``q`` into voltage ``u`` 99 2) Amplification of the transducer voltage ``u`` to ADC voltage ``v`` 100 3) Conversion from ADC voltage ``v`` to digital values ``d`` in the file. 101 102 The sensitivity and gain inputs to this function are in decibels, converted to linear 103 values as ``s = 10 ** (sensitivity / 20)`` and ``g = 10 ** (gain / 20)``. 104 The ``adc_range`` is specified as the peak voltage that the ADC can handle, 105 which should be recorded as ``file_range`` in the raw data. 106 107 The equations that govern this are 108 109 1) ``u = q * s``, sensitivity ``s`` in V/Q, e.g. V/Pa. 110 2) ``v = u * g``, gain ``g`` is unitless. 111 3) ``d / d_ref = v / v_ref``, relating file values to ADC voltage input. 112 113 for a final expression of ``q = d * (v_ref / d_ref / s / g)``. 114 All conversion factors default to 1 if not given. 115 116 Parameters 117 ---------- 118 raw_data : array_like 119 The raw input data read from a file. 120 sensitivity : array_like 121 Sensitivity of the sensor, in dB re. V/Q, 122 where Q is the desired physical unit. 123 gain : array_like 124 The gain applied to the voltage from the sensor, in dB. 125 adc_range : array_like 126 The peak voltage that the ADC can handle. 127 file_range : array_like 128 The peak value that the raw data contains, 129 corresponding to the ``adc_range``. 130 131 Returns 132 ------- 133 q : array_like 134 The calibrated values, as per the equations above. 135 136 """ 137 calibration = 1.0 138 # Avoiding in-place operations since they cannot handle broadcasting 139 if adc_range is not None: 140 calibration = calibration * adc_range 141 if file_range is not None: 142 calibration = calibration / file_range 143 if gain is not None: 144 calibration = calibration / 10 ** (gain / 20) 145 if sensitivity is not None: 146 calibration = calibration / 10 ** (sensitivity / 20) 147 148 return raw_data * calibration
149 150 151class _LazyPropertyMixin: 152 def __init__(self, *args, **kwargs): 153 super().__init__(*args, **kwargs) 154 self.__property_cache = {} 155 156 @staticmethod 157 def _lazy_property(key): 158 def getter(self): 159 try: 160 return self.__property_cache[key] 161 except KeyError: 162 self.__property_cache.update(self._lazy_load()) 163 return self.__property_cache[key] 164 165 return property(getter) 166 167 @abc.abstractmethod 168 def _lazy_load(self): 169 return {} 170 171
[docs] 172class TimeCompensation: 173 """Compensates time drift and offset in a recording. 174 175 This is based on the actual and recorded time of one or more events. 176 These have to be detected elsewhere, and the times for them are 177 given here to build the model. 178 If a single pair of times is given, the offset between them is used to compensate. 179 If multiple pairs are given, the offset will be linearly interpolated between them. 180 181 Parameters 182 ---------- 183 actual_time : time_like or [time_like] 184 Actual time for synchronization event(s). 185 recorded_time : time_like or [time_like] 186 Recorded time for synchronization event(s). 187 """ 188 189 def __init__(self, actual_time, recorded_time): 190 if isinstance(actual_time, str): 191 actual_time = [actual_time] 192 if isinstance(recorded_time, str): 193 recorded_time = [recorded_time] 194 try: 195 iter(actual_time) 196 except TypeError: 197 actual_time = [actual_time] 198 try: 199 iter(recorded_time) 200 except TypeError: 201 recorded_time = [recorded_time] 202 203 actual_time = list(map(_core.time_to_datetime, actual_time)) 204 recorded_time = list(map(_core.time_to_datetime, recorded_time)) 205 206 self._time_offset = [(recorded - actual).in_seconds() for (recorded, actual) in zip(recorded_time, actual_time)] 207 if len(self._time_offset) > 1: 208 self._actual_timestamps = [t.timestamp() for t in actual_time] 209 self._recorded_timestamps = [t.timestamp() for t in recorded_time] 210
[docs] 211 def recorded_to_actual(self, recorded_time): 212 """Convert a recorded time to the actual time.""" 213 recorded_time = _core.time_to_datetime(recorded_time) 214 if len(self._time_offset) == 1: 215 time_offset = self._time_offset[0] 216 else: 217 time_offset = np.interp(recorded_time.timestamp(), self._recorded_timestamps, self._time_offset) 218 return recorded_time.subtract(seconds=time_offset)
219
[docs] 220 def actual_to_recorded(self, actual_time): 221 """Convert an actual time to the time recorded.""" 222 actual_time = _core.time_to_datetime(actual_time) 223 if len(self._time_offset) == 1: 224 time_offset = self._time_offset[0] 225 else: 226 time_offset = np.interp(actual_time.timestamp(), self._actual_timestamps, self._time_offset) 227 return actual_time.add(seconds=time_offset)
228 229
[docs] 230class Recording: 231 """Base class for recordings. 232 233 This class defines the interface for what a 234 recording needs to implement for the rest 235 of the package to use it. 236 """ 237 238 def __init__(self, sensor=None): 239 self.sensor = sensor 240 241 @property 242 @abc.abstractmethod 243 def samplerate(self): 244 """The samplerate of the recording, in Hz.""" 245 246 @property 247 @abc.abstractmethod 248 def num_channels(self): 249 """The number of channel in the recording, and the read data.""" 250 251 @property 252 @abc.abstractmethod 253 def time_window(self): 254 """A `~uwacan.TimeWindow` that covers the recording.""" 255
[docs] 256 @abc.abstractmethod 257 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): 258 """Select a subset of the recording. 259 260 See `~uwacan.TimeWindow.subwindow` for details on the parameters. 261 """
262
[docs] 263 @abc.abstractmethod 264 def time_data(self): 265 """Read stored time data. 266 267 This method reads the recorded data from 268 disk, and returns it as a `~uwacan.TimeData` object. 269 """
270 271
[docs] 272class RecordingArray(Recording): 273 """Holds multiple separate recordings. 274 275 This class handles multiple different recording 276 instances at once. This is typically needed 277 when more than one hardware recorder was used 278 for a field trial, and the data from them should 279 be analyzed together. 280 281 Parameters 282 ---------- 283 *recordings : `Recording` 284 The recording objects. 285 """ 286 287 def __init__(self, *recordings): 288 self.recordings = {recording.sensor.label: recording for recording in recordings} 289 290 @property 291 def samplerate(self): 292 """The samplerate(s) of the recordings.""" 293 rates = [recording.samplerate for recording in self.recordings.values()] 294 if np.ptp(rates) == 0: 295 return rates[0] 296 return xr.DataArray(rates, dims="sensor", coords={"sensor": list(self.recordings.keys())}) 297 298 @property 299 def num_channels(self): 300 """The total number of channels.""" 301 return sum(recording.num_channels for recording in self.recordings.values()) 302 303 @property 304 def sensor(self): 305 """The sensors used, as a `~uwacan.sensor_array`.""" 306 return positional.SensorArray.concatenate([rec.sensor for rec in self.recordings.values()]) 307 308 @property 309 def time_window(self): # noqa: D102, takes the docstring from the superclass 310 windows = [recording.time_window for recording in self.recordings.values()] 311 return _core.TimeWindow( 312 start=max(w.start for w in windows), 313 stop=min(w.stop for w in windows), 314 ) 315
[docs] 316 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): # noqa: D102, takes the docstring from the superclass 317 subwindow = self.time_window.subwindow( 318 time, start=start, stop=stop, center=center, duration=duration, extend=extend 319 ) 320 return type(self)(*[recording.subwindow(subwindow) for recording in self.recordings.values()])
321
[docs] 322 def time_data(self): # noqa: D102, takes the docstring from the superclass 323 if np.ndim(self.samplerate) > 0: 324 raise NotImplementedError("Stacking time data from recording with different samplerates not implemented!") 325 return _core.TimeData( 326 xr.concat([recording.time_data().data for recording in self.recordings.values()], dim="sensor") 327 )
328 329
[docs] 330class FileRecording(Recording): 331 """Base class for recordings using multiple files. 332 333 This class has some interface definitions and some 334 shared logic for implementing recordings that use 335 multiple files to store the data. 336 337 Subclasses need to implement a `RecordedFile` inner class, 338 some way to read the files (typically a classmethod), and 339 the `time_data` function (typically using `raw_data`). 340 341 .. autoclass:: uwacan.recordings::FileRecording.RecordedFile 342 343 """ 344 345 allowable_interrupt = 0 346 """How long gap is allowed between files when reading.""" 347
[docs] 348 class RecordedFile(abc.ABC): 349 """Interface class for single recording files. 350 351 This interface class defines how subclasses 352 should implement wrappers around individual files. 353 """ 354 355 def __init__(self, filepath): 356 super().__init__() 357 self.filepath = Path(filepath) 358 359 @property 360 def filepath(self): 361 """The `Path` to the file.""" 362 return self._filepath 363 364 @filepath.setter 365 def filepath(self, filepath): 366 if not isinstance(filepath, Path): 367 filepath = Path(filepath) 368 self._filepath = filepath 369 370 @abc.abstractmethod 371 def read_data(self, start_idx, stop_idx): 372 """Read raw data from the file. 373 374 Parameters 375 ---------- 376 start_idx : int 377 The starting index to read from, inclusive. 378 stop_idx : int 379 The last index to read to, exclusive. 380 381 Returns 382 ------- 383 data : array_like 384 The data read from disk. 385 """ 386 387 @property 388 @abc.abstractmethod 389 def start_time(self): 390 """The start time of this file.""" 391 392 @property 393 @abc.abstractmethod 394 def stop_time(self): 395 """The stop time of this file.""" 396 397 @property 398 @abc.abstractmethod 399 def duration(self): 400 """The duration of this file.""" 401 402 @property 403 @abc.abstractmethod 404 def num_samples(self): 405 """The number of samples in this file, per channel.""" 406 407 @property 408 @abc.abstractmethod 409 def num_channels(self): 410 """The number of channels in this file.""" 411 412 @property 413 @abc.abstractmethod 414 def samplerate(self): 415 """The samplerate in this file.""" 416 417 def __bool__(self): 418 return self.filepath.exists() 419 420 def __contains__(self, time): 421 return (self.start_time <= time) and (time <= self.stop_time)
422 423 def __init__(self, files, assume_sorted=False, **kwargs): 424 super().__init__(**kwargs) 425 if not assume_sorted: 426 files = sorted(files, key=lambda f: f.start_time) 427 self.files = files 428 self._file_time_cache = collections.OrderedDict() 429 430 @property 431 def samplerate(self): # noqa: D102, takes the docstring from the superclass 432 return self.files[0].samplerate 433 434 @property 435 def num_channels(self): # noqa: D102, takes the docstring from the superclass 436 return self.files[0].num_channels 437 438 @property 439 def time_window(self): # noqa: D102, takes the docstring from the superclass 440 try: 441 return self._window 442 except AttributeError: 443 self._window = _core.TimeWindow( 444 start=self.files[0].start_time, 445 stop=self.files[-1].stop_time, 446 ) 447 return self._window 448
[docs] 449 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): # noqa: D102, takes the docstring from the superclass 450 new_window = self.time_window.subwindow( 451 time, start=start, stop=stop, center=center, duration=duration, extend=extend 452 ) 453 new = type(self)( 454 files=self.files, 455 sensor=self.sensor, 456 ) 457 new._window = new_window 458 return new
459 460 def _find_file_time(self, time): 461 """Find a file containing a certain time.""" 462 time = _core.time_to_datetime(time) 463 if time in self._file_time_cache: 464 self._file_time_cache.move_to_end(time) 465 return self._file_time_cache[time] 466 467 # bisect_right(items, target) returns an idx such that items[idx - 1] <= target < items[idx] 468 # Subtracting one from the output means we get the last file that starts before (or equal) to the target time 469 idx = bisect.bisect_right(self.files, time, key=lambda file: file.start_time) - 1 470 if time in self.files[idx]: 471 self._file_time_cache[time] = self.files[idx] 472 if len(self._file_time_cache) > 128: 473 self._file_time_cache.popitem(last=False) 474 return self.files[idx] 475 else: 476 raise ValueError(f"Time {time} does not exist inside any recorded files") 477
[docs] 478 def check_file_continuity(self, start_time=None, stop_time=None, allowable_interrupt=None, mode="raise"): 479 """Check the continuity of recorded data. 480 481 Parameters 482 ---------- 483 start_time : datetime, optional 484 The start time of the period to check for continuity. If not provided, 485 the start of `self.time_window` will be used. 486 stop_time : datetime, optional 487 The stop time of the period to check for continuity. If not provided, 488 the end of `self.time_window` will be used. 489 allowable_interrupt : float, optional 490 How much of a gap to allow between files. Will by default use the 491 class attribute. 492 mode : {"raise", "return", "print"}, optional 493 The action to take when an interruption is found. 494 - "raise" (default): raises a `ValueError` with details about the interruption. 495 - "return": returns `False` if an interruption is found, `True` otherwise. 496 - "print": prints a warning message with details about the interruption and continues execution. 497 498 Returns 499 ------- 500 bool 501 Returns `True` if the data is continuous within the specified time range. 502 If mode is set to "return", it returns `False` if an interruption is found. 503 No return value if mode is set to "raise" or "print". 504 505 Raises 506 ------ 507 ValueError 508 If `mode` is set to "raise" and an interruption larger than `self.allowable_interrupt` 509 is detected between the files, a `ValueError` is raised with details of the missing time. 510 511 Notes 512 ----- 513 The method checks the continuity of data by comparing the `stop_time` of each file 514 with the `start_time` of the next file within the specified range. If the gap between 515 two files exceeds `self.allowable_interrupt` (in seconds), it is considered an interruption. 516 """ 517 if start_time is None: 518 start_time = self.time_window.start 519 if stop_time is None: 520 stop_time = self.time_window.stop 521 if allowable_interrupt is None: 522 allowable_interrupt = self.allowable_interrupt 523 first_file = self._find_file_time(start_time) 524 first_idx = self.files.index(first_file) 525 last_file = self._find_file_time(stop_time) 526 last_idx = self.files.index(last_file) 527 528 for early, late in zip(self.files[first_idx : last_idx - 1], self.files[first_idx + 1 : last_idx]): 529 interrupt = (late.start_time - early.stop_time).in_seconds() 530 if interrupt > allowable_interrupt: 531 message = ( 532 f"Data is not continuous, missing {interrupt} seconds between files\n " 533 f"{early.filepath} ending at {early.stop_time}\n" 534 f"{late.filepath} starting at {late.start_time}" 535 ) 536 if mode == "raise": 537 raise ValueError(message) 538 elif mode == "return": 539 return False 540 else: 541 print(message) 542 return True
543
[docs] 544 def raw_data(self, start_time=None, stop_time=None): 545 """Read raw data from files on disk. 546 547 Retrieves raw data samples from a start time to a stop time, 548 defaulting to reading between times in ``self.time_window``. 549 This method reads from multiple files if needed, and checks 550 file timestamps for approximate data continuity. 551 552 Parameters 553 ---------- 554 start_time : date-like, optional 555 The start of the time window to read. 556 stop_time : date-like, optional 557 The end of the time window to read. 558 559 Returns 560 ------- 561 numpy.ndarray 562 The raw data read from the files, concatenated into a single NumPy array. 563 """ 564 # This is just a wrapper to get a single frame from the frame generator. 565 # Without a framesize, it defaults to a single large frame with all the data. 566 frame = next(self.raw_frames(start_time=start_time, stop_time=stop_time)) 567 return frame
568
[docs] 569 def raw_frames(self, start_time=None, stop_time=None, framesize=None): 570 """Generate frames of raw data from files on disk. 571 572 This retrieves raw data samples between the start time and stop time 573 (defaulting to times in ``self.time_window``), and yields frames of 574 a fixed size. The frames have no overlap - use ``self.rolling`` for 575 overlapping frames. If no framesize is given, it defaults to yielding 576 a single large frame with all the data. 577 If needed, data will be loaded from several files on disk. In those 578 cases, the file timestamps will be checked for approximate data 579 continuity before any loading starts. 580 581 Parameters 582 ---------- 583 start_time : date-like, optional 584 The start of the time window to read. 585 stop_time : date-like, optional 586 The end of the time window to read. 587 framesize : int, optional 588 The number of samples to yield in each frame. 589 590 Yields 591 ------ 592 numpy.ndarray 593 The frames with raw data. 594 595 Notes 596 ----- 597 This method is intended as the base data loader, mainly for internal 598 use in the package. It's used both to load all data within a time 599 window, but also as an IO optimization in `self.rolling`` to load 600 larger chunks of data than the desired rolling window. 601 """ 602 start_time = start_time or self.time_window.start 603 stop_time = stop_time or self.time_window.stop 604 self.check_file_continuity(start_time=start_time, stop_time=stop_time) 605 606 samplerate = self.samplerate 607 remaining_samples = int(np.floor((stop_time - start_time).in_seconds() * samplerate)) 608 if remaining_samples == 0: 609 # No samples requested, but we want to yield something of the right shape and type 610 yield self.files[0].read_data(start_idx=0, stop_idx=0) 611 return 612 613 if framesize: 614 # With a given framesize we increase the number of samples to yield full frames 615 remaining_samples = int(framesize * np.ceil(remaining_samples / framesize)) 616 else: 617 # One single frame with all samples. Used to get all data at once. 618 framesize = remaining_samples 619 620 # Where we read - sample_idx in file_idx. This moves along as we read more data. 621 file_idx = self.files.index(self._find_file_time(start_time)) 622 sample_idx = int(np.floor((start_time - self.files[file_idx].start_time).in_seconds() * samplerate)) 623 624 while remaining_samples > 0: # Loop over frames 625 chunks = [] 626 remaining_in_frame = framesize 627 while remaining_in_frame > 0: # Loop over chunks from different files 628 chunk = self.files[file_idx].read_data(start_idx=sample_idx, stop_idx=sample_idx + remaining_in_frame) 629 chunks.append(chunk) 630 remaining_in_frame -= chunk.shape[0] 631 632 if remaining_in_frame: 633 # This file couldn't fill this frame - go to the beginning of the next file. 634 sample_idx = 0 635 file_idx += 1 636 else: 637 # This frame is full, but the file has more data. 638 sample_idx += chunk.shape[0] 639 640 # Assemble the frame from the chunks. 641 if len(chunks) == 1: 642 # Optimization - a single chunk doesn't need concatenation. 643 frame = chunks[0] 644 else: 645 frame = np.concatenate(chunks, axis=0) 646 remaining_samples -= frame.shape[0] 647 yield frame
648
[docs] 649 def select_file_time(self, time): 650 """Get a recording for a specific file, by time. 651 652 This finds the file corresponding to a specific time, 653 then returns a recording subwindow corresponding 654 to that file. 655 """ 656 time = _core.time_to_datetime(time) 657 for file in reversed(self.files): 658 if file.start_time > time: 659 continue 660 if file.stop_time < time: 661 raise ValueError(f"Time {time} does not exist inside any recorded files.") 662 return self.subwindow(start=file.start_time, stop=file.stop_time)
663
[docs] 664 def select_file_name(self, name): 665 """Get a recording for a specific file, by name. 666 667 This finds the file with a specific name, 668 then returns a recording subwindow corresponding 669 to that file. 670 """ 671 stem = Path(name).stem 672 for file in self.files: 673 if stem == file.filepath.stem: 674 return self.subwindow(start=file.start_time, stop=file.stop_time) 675 raise ValueError(f"Could not file file matching name '{name}'")
676 677
[docs] 678class AudioFileRecording(FileRecording): 679 """Class for audio file recordings. 680 681 This class handles reading audio files using the 682 `soundfile` python package. 683 This is a fully functional class, but reading data 684 requires a ``start_time_parser`` function passed to the 685 `read_folder` classmethod. A more convenient approach 686 is to subclass this class and customize the `read_folder` 687 classmethod. 688 """ 689 690 file_range = None 691 """The input range of the read files.""" 692 gain = None 693 """The gain of this recording.""" 694 adc_range = None 695 """The voltage peak range of the adc in this recording.""" 696
[docs] 697 @classmethod 698 def read_folder( 699 cls, 700 folder, 701 start_time_parser, 702 sensor=None, 703 file_filter=None, 704 time_compensation=None, 705 glob_pattern="**/*.wav", 706 file_kwargs=None, 707 ): 708 """Read all matching files in a folder and parse their start times. 709 710 Parameters 711 ---------- 712 folder : str or Path 713 The path to the folder containing the files. 714 start_time_parser : str or callable 715 If a string is provided, it is treated as a format string and will be used 716 to parse the start time from the filename. If a callable is provided, it 717 should accept a file path and return a `whenever.Instant` object representing the start time. 718 sensor : str or None, optional 719 The sensor associated with the files. 720 file_filter : callable or None, optional 721 A callable that accepts a file path and returns True if the file should be processed, 722 and False otherwise. If None, all files matching the ``glob_pattern`` are processed. 723 time_compensation : `TimeCompensation`, int, or callable, optional 724 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times. 725 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times. 726 - If a callable is provided, it should accept a timestamp and return a compensated timestamp. 727 - If None, no time compensation is applied. 728 glob_pattern : str, optional 729 A glob pattern used to match files in the folder, by default ``"**/*.wav"``. 730 file_kwargs : dict or callable, optional 731 Additional keyword arguments to be passed when creating the `RecordedFile` instances. 732 If a callable is provided, it should accept a file path and return a dictionary of keyword arguments. 733 If None, no additional keyword arguments are passed to the files. 734 735 Returns 736 ------- 737 cls 738 An instance of the class containing the loaded files. 739 740 Raises 741 ------ 742 RuntimeError 743 If the folder does not exist, is not a directory, or no matching files are found. 744 """ 745 folder = Path(folder) 746 if not folder.exists(): 747 raise RuntimeError(f"'{folder}' does not exist") 748 if not folder.is_dir(): 749 raise RuntimeError(f"'{folder}' is not a folder") 750 751 if isinstance(start_time_parser, str): 752 start_time_format = start_time_parser 753 754 def start_time_parser(file): 755 return _core.time_to_datetime(file.stem, fmt=start_time_format) 756 757 if time_compensation is None: 758 759 def time_compensation(timestamp): 760 return timestamp 761 762 if isinstance(time_compensation, TimeCompensation): 763 time_compensation = time_compensation.recorded_to_actual 764 if not callable(time_compensation): 765 offset = time_compensation 766 767 def time_compensation(timestamp): 768 return timestamp.subtract(seconds=offset) 769 770 if file_filter is None: 771 772 def file_filter(filepath): 773 return True 774 775 if file_kwargs is None: 776 777 def file_kwargs(filepath): 778 return {} 779 780 if not callable(file_kwargs): 781 _file_kwargs = file_kwargs 782 783 def file_kwargs(filepath): 784 return _file_kwargs 785 786 files = [] 787 for file in Path(folder).glob(glob_pattern): 788 if file_filter(file): 789 start_time = start_time_parser(file) 790 files.append(cls.RecordedFile(file, time_compensation(start_time), **file_kwargs(file))) 791 792 if not files: 793 raise RuntimeError(f"No matching files found in '{folder}'") 794 795 return cls( 796 files=files, 797 sensor=sensor, 798 )
799 800 class RecordedFile(FileRecording.RecordedFile, _LazyPropertyMixin): 801 """Wrapper for audio files.""" 802 803 def __init__(self, filepath, start_time): 804 super().__init__(filepath=filepath) 805 self._start_time = start_time 806 807 def _lazy_load(self): 808 sfi = soundfile.info(self.filepath.as_posix()) 809 return super()._lazy_load() | dict( 810 num_samples=sfi.frames, 811 num_channels=sfi.channels, 812 samplerate=sfi.samplerate, 813 ) 814 815 @property 816 def start_time(self): # noqa: D102, takes the docstring from the superclass 817 return self._start_time 818 819 num_samples = _LazyPropertyMixin._lazy_property("num_samples") 820 num_channels = _LazyPropertyMixin._lazy_property("num_channels") 821 samplerate = _LazyPropertyMixin._lazy_property("samplerate") 822 823 @property 824 def stop_time(self): # noqa: D102, takes the docstring from the superclass 825 return self.start_time.add(seconds=self.duration) 826 827 @property 828 def duration(self): # noqa: D102, takes the docstring from the superclass 829 return self.num_samples / self.samplerate 830 831 def read_data(self, start_idx=None, stop_idx=None): # noqa: D102, takes the docstring from the superclass 832 return soundfile.read(self.filepath.as_posix(), start=start_idx, stop=stop_idx, dtype="float32")[0] 833
[docs] 834 def time_data(self): # noqa: D102, takes the docstring from the superclass 835 data = self.raw_data() 836 if np.ndim(data) == 1: 837 dims = "time" 838 coords = None 839 elif np.ndim(data) == 2: 840 if self.sensor is not None and "sensor" in self.sensor and np.shape(data)[1] == self.sensor["sensor"].size: 841 dims = ("time", "sensor") 842 coords = {"sensor": self.sensor["sensor"]} 843 else: 844 dims = ("time", "channel") 845 if self.sensor is not None and "channel" in self.sensor: 846 coords = {"channel": self.sensor["channel"]} 847 else: 848 coords = None 849 else: 850 raise NotImplementedError("Audio files with more than 2 dimensions are not supported") 851 data = _core.TimeData( 852 data=data, 853 samplerate=self.samplerate, 854 start_time=self.time_window.start, 855 dims=dims, 856 coords=coords, 857 ) 858 data = calibrate_raw_data( 859 raw_data=data, 860 sensitivity=self.sensor.get("sensitivity", None), 861 gain=self.gain, 862 adc_range=self.adc_range, 863 file_range=self.file_range, 864 ) 865 return data
866
[docs] 867 def rolling(self, duration=None, step=None, overlap=None): 868 """Generate rolling frames of data. 869 870 Parameters 871 ---------- 872 duration : float 873 The size of each frame, in seconds. 874 step : float 875 The step between consecutive frames, in seconds. 876 overlap : float, default=0 877 The fraction of overlap between consecutive frames. Should be less than one. 878 Negative values will make "gaps" in the output. 879 880 Returns 881 ------- 882 roller : `AudioFileRoller` 883 Implementation of rolling time windows for recordings. 884 """ 885 return AudioFileRoller(self, duration=duration, step=step, overlap=overlap)
886 887
[docs] 888class AudioFileRoller(_core.TimeDataRoller): 889 """Rolling windows of time data. 890 891 Parameters 892 ---------- 893 obj : AudioFileRecording 894 The audio file wrapper to roll over. 895 duration : float 896 The duration of each frame, in seconds. 897 step : float 898 The step between consecutive frames, in seconds. 899 overlap : float 900 The overlap between consecutive frames, as a fraction of the duration. 901 """ 902 903 def __init__(self, obj, duration=None, step=None, overlap=0): 904 super().__init__(obj, duration=duration, step=step, overlap=overlap) 905 self._dummy_data = self.obj.subwindow(start=True, duration=0).time_data().data 906 calibration = calibrate_raw_data( 907 1, 908 gain=self.obj.gain, 909 sensitivity=self.obj.sensor.get("sensitivity"), 910 adc_range=self.obj.adc_range, 911 file_range=self.obj.file_range, 912 ) 913 self._calibration = xr.align(self._dummy_data, calibration)[1].data 914 915 @property 916 def shape(self): # noqa: D102, inherited from parent 917 shape = [self._dummy_data.sizes[dim] for dim in self.dims if dim != "time"] 918 shape = [self.settings["samples_per_frame"]] + shape 919 return tuple(shape) 920 921 @property 922 def dims(self): # noqa: D102, inherited from parent 923 dims = list(self._dummy_data.dims) 924 dims.remove("time") 925 return tuple(["time"] + dims) 926 927 @property 928 def coords(self): # noqa: D102, inherited from parent 929 coords = dict(self._dummy_data.coords) 930 return coords 931
[docs] 932 def numpy_frames(self, io_blocksize=1_000_000): # noqa: D102, inherited from parent 933 # This method essentially re-chunks frames read from disk to have overlap and a possibly smaller size. 934 # This allows reading frames from disk with a framesize optimized for reading, independently 935 # from any desired signal processing frame size. 936 samples_per_frame = self.settings["samples_per_frame"] 937 sample_step = self.settings["sample_step"] 938 io_blocksize = max(io_blocksize, samples_per_frame) # We need to fit at least a full frame in one raw_frame. 939 940 out = np.zeros(self.shape) 941 buffer = np.zeros(0) 942 frame_idx = 0 943 944 # We loop over large frames from the data on disk, reducing IO overhead. 945 for raw_idx, raw_frame in enumerate(self.obj.raw_frames(framesize=io_blocksize)): 946 # While there's enough data in this raw_frame (and the buffer) to fill one output frame (and we should still yield more frames). 947 while raw_frame.shape[0] + buffer.shape[0] >= samples_per_frame and frame_idx < self.num_frames: 948 if buffer.shape[0]: 949 # We have data in the buffer, it goes first into the output frame. 950 # The buffer is never larger than one output frame. 951 out[:buffer.shape[0]] = buffer 952 # The buffer won't fill the entire frame - take the rest of the samples from the raw_frame 953 out[buffer.shape[0]:] = raw_frame[:samples_per_frame - buffer.shape[0]] 954 # If we're out of buffer after taking a step, we start consuming the raw_frame. 955 raw_frame = raw_frame[max(0, sample_step - buffer.shape[0]):] 956 # Consume step samples from the buffer. 957 buffer = buffer[sample_step:] 958 else: 959 # No buffer - just take a frame from the raw_frame 960 # Since `out` gets modified in place when copying the buffer, `out` cannot point to `raw_frame`! 961 # Hence the need to write the values into `out[:]`, not take a view and save it to `out`. 962 out[:] = raw_frame[:samples_per_frame] 963 # Consume step samples from the raw_frame. 964 raw_frame = raw_frame[sample_step:] 965 966 # Calibrate, yield, and increment the frame index 967 yield out * self._calibration 968 frame_idx += 1 969 # Not enough data in raw_frame (buffer is empty by now). 970 # Buffer this incomplete raw frame and get a new one. 971 buffer = raw_frame
972
[docs] 973 def time_data(self): # noqa: D102, inherited from parent 974 offsets = np.arange(self.settings["samples_per_frame"]) * 1e9 / self.obj.samplerate 975 first_time_vec = _core.time_to_np(self.obj.time_window.start) + offsets.astype("timedelta64[ns]") 976 for frame_idx, frame in enumerate(self.numpy_frames()): 977 time_since_start = frame_idx * self.settings["sample_step"] / self.obj.samplerate 978 time_since_start = np.timedelta64(int(time_since_start * 1e9), "ns") 979 yield _core.TimeData( 980 frame, 981 time=first_time_vec + time_since_start, 982 samplerate=self.obj.samplerate, 983 coords=self.coords, 984 dims=self.dims, 985 )
986 987 def __iter__(self): 988 start_time = self.obj.time_window.start 989 for frame_idx in range(self.num_frames): 990 yield self.obj.subwindow(start=start_time, duration=self.settings["duration"]) 991 start_time = start_time.add(seconds=self.settings["step"])
992 993
[docs] 994class SoundTrap(AudioFileRecording): 995 """Class to read data from OceanInstruments SoundTrap recorders. 996 997 The main way to read SoundTrap data is through the 998 `read_folder` classmethod. 999 """ 1000 1001 allowable_interrupt = 1 1002 gain = None 1003 adc_range = None 1004 file_range = 1 1005
[docs] 1006 @classmethod 1007 def read_folder(cls, folder, sensor=None, serial_number=None, time_compensation=None): 1008 """Read files in a folder, filtered on an optional serial number. 1009 1010 Parameters 1011 ---------- 1012 folder : str or Path 1013 The path to the folder containing the files. 1014 sensor : str or None, optional 1015 The sensor associated with the files. 1016 serial_number : int or None, optional 1017 If provided, only files with the matching serial number in their filename will be processed. 1018 If None, all files in the folder will be processed. 1019 time_compensation : `TimeCompensation`, int, or callable, optional 1020 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times. 1021 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times. 1022 - If a callable is provided, it should accept a timestamp and return a compensated timestamp. 1023 - If None, no time compensation is applied. 1024 1025 Returns 1026 ------- 1027 cls 1028 An instance of the class containing the loaded files. 1029 1030 Raises 1031 ------ 1032 RuntimeError 1033 If the folder does not exist, is not a directory, or no matching files are found. 1034 1035 Notes 1036 ----- 1037 This method filters the files in the folder based on the provided ``serial_number`` and 1038 parses the start time from the filenames using a specific format (``"YYMMDDHHmmss"``). 1039 It then delegates the actual file reading to the `read_folder` method of the parent class. 1040 """ 1041 if serial_number is None: 1042 1043 def file_filter(filepath): 1044 return True 1045 else: 1046 1047 def file_filter(filepath): 1048 return int(filepath.stem.split(".")[0]) == serial_number 1049 1050 def start_time_parser(filepath): 1051 return _core.time_to_datetime(filepath.stem.split(".")[1], fmt="%y%m%d%H%M%S") 1052 1053 return super().read_folder( 1054 folder=folder, 1055 start_time_parser=start_time_parser, 1056 sensor=sensor, 1057 file_filter=file_filter, 1058 time_compensation=time_compensation, 1059 )
1060 1061
[docs] 1062class SylenceLP(AudioFileRecording): 1063 """Class to read data from RTsys SylenceLP recorders. 1064 1065 The main way to read Sylence data is through the 1066 `read_folder` classmethod. 1067 """ 1068 1069 adc_range = 2.5 1070 file_range = 1 1071 allowable_interrupt = 1 1072 1073 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass 1074 def _lazy_load(self): # noqa: D102, takes the docstring from the superclass 1075 with self.filepath.open("rb") as file: 1076 base_header = file.read(36) 1077 # chunk_id = base_header[0:4].decode('ascii') # always equals RIFF 1078 # file_size = int.from_bytes(base_header[4:8], byteorder='little', signed=False) # total file size not important 1079 # chunk_format = base_header[8:12].decode('ascii') # always equals WAVE 1080 # subchunk_id = base_header[12:16].decode('ascii') # always equals fmt 1081 # subchunk_size = int.from_bytes(base_header[16:20], byteorder='little', signed=False)) # always equals 16 1082 # audio_format = int.from_bytes(base_header[20:22], byteorder='little', signed=False)) # not important in current implementation 1083 num_channels = int.from_bytes(base_header[22:24], byteorder="little", signed=False) 1084 if num_channels != 1: 1085 raise ValueError( 1086 f"Expected file for SylenceLP with a single channel, read file with {num_channels} channels" 1087 ) 1088 samplerate = int.from_bytes(base_header[24:28], byteorder="little", signed=False) 1089 # byte rate = int.from_bytes(base_header[28:32], byteorder='little', signed=False) # not important in current implementation 1090 bytes_per_sample = int.from_bytes(base_header[32:34], byteorder="little", signed=False) 1091 bitdepth = int.from_bytes(base_header[34:36], byteorder="little", signed=False) 1092 1093 conf_header = file.peek(8) # uses peak to keep indices aligned with the manual 1094 conf_size = int.from_bytes(conf_header[4:8], byteorder="little", signed=False) 1095 if conf_size != 460: 1096 raise ValueError(f"Incorrect size of SylenceLP config: '{conf_size}'B, expected 460B") 1097 conf_header = file.read(conf_size + 8) 1098 1099 subchunk_id = conf_header[:4].decode("ascii") # always conf 1100 if subchunk_id != "conf": 1101 raise ValueError(f"Expected 'conf' section in SylenceLP config, found '{subchunk_id}'") 1102 # subchunk_size = int.from_bytes(conf_header[4:8], byteorder='little', signed=False) # the same as conf_size 1103 config_version = int.from_bytes(conf_header[8:12], byteorder="little", signed=False) 1104 if config_version != 2: 1105 raise NotImplementedError(f"Cannot handle SylenceLP config version {config_version}") 1106 # recording_start = datetime.datetime.fromtimestamp(int.from_bytes(conf_header[16:24], byteorder='little', signed=True)) # This value is not actually when the recording starts. No idea what it actually is 1107 channel = conf_header[24:28].decode("ascii") 1108 if channel.strip("\x00") != "": 1109 raise NotImplementedError( 1110 f"No implementation for multichannel SylenceLP recorders, found channel specification '{channel}'" 1111 ) 1112 samplerate_alt = np.frombuffer(conf_header[28:32], dtype="f4").squeeze() 1113 if samplerate != samplerate_alt: 1114 raise ValueError( 1115 f"Mismatched samplerate for hardware and file, read file samplerate {samplerate} and config samplerate {samplerate_alt}" 1116 ) 1117 1118 hydrophone_sensitivity = np.frombuffer(conf_header[32:48], dtype="f4") 1119 gain = np.frombuffer(conf_header[48:64], dtype="f4") 1120 # gain_correction = np.frombuffer(conf_header[64:80], dtype='f4') # is just 1/gain 1121 serialnumber = conf_header[80:100].decode("ascii") 1122 active_channels = conf_header[100:104].decode("ascii") 1123 if active_channels != "A\x00\x00\x00": 1124 raise NotImplementedError( 1125 f"No implementation for multichannel SylenceLP recorders, found channel specification '{active_channels}'" 1126 ) 1127 1128 data_header = file.read(4).decode("ascii") 1129 if data_header != "data": 1130 raise ValueError(f"Expected file header 'data', read {data_header}") 1131 data_size = int.from_bytes(file.read(4), byteorder="little", signed=False) 1132 1133 num_samples = data_size / bytes_per_sample 1134 if int(num_samples) != num_samples: 1135 raise ValueError(f"Size of data is not divisible by bytes per sample, file '{self.name}' is corrupt!") 1136 1137 return super()._lazy_load() | dict( 1138 samplerate=samplerate, 1139 bitdepth=bitdepth, 1140 num_samples=int(num_samples), 1141 hydrophone_sensitivity=hydrophone_sensitivity[0], 1142 serial_number=serialnumber.strip("\x00"), 1143 gain=-20 * np.log10(gain[0]), 1144 ) 1145 1146 bitdepth = _LazyPropertyMixin._lazy_property("bitdepth") 1147 hydrophone_sensitivity = _LazyPropertyMixin._lazy_property("hydrophone_sensitivity") 1148 serial_number = _LazyPropertyMixin._lazy_property("serial_number") 1149 gain = _LazyPropertyMixin._lazy_property("gain") 1150 1151 @property 1152 def gain(self): # noqa: D102, takes the docstring from the superclass 1153 return self.files[0].gain 1154
[docs] 1155 @classmethod 1156 def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None): 1157 """Read all files in a folder. 1158 1159 Parameters 1160 ---------- 1161 folder : str or Path 1162 The path to the folder containing the files. 1163 sensor : str or None, optional 1164 The sensor associated with the files. 1165 time_compensation : `TimeCompensation`, int, or callable, optional 1166 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times. 1167 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times. 1168 - If a callable is provided, it should accept a timestamp and return a compensated timestamp. 1169 - If None, no time compensation is applied. 1170 file_filter : callable or None, optional 1171 A callable that accepts a file path and returns True if the file should be processed, 1172 and False otherwise. If None, all files are processed. 1173 1174 Returns 1175 ------- 1176 cls 1177 An instance of the class containing the loaded files. 1178 1179 Raises 1180 ------ 1181 RuntimeError 1182 If the folder does not exist, is not a directory, or no matching files are found. 1183 1184 """ 1185 1186 def start_time_parser(filepath): 1187 return _core.time_to_datetime(filepath.stem[9:], fmt="%Y-%m-%d_%H-%M-%S") 1188 1189 return super().read_folder( 1190 folder=folder, 1191 start_time_parser=start_time_parser, 1192 sensor=sensor, 1193 file_filter=file_filter, 1194 time_compensation=time_compensation, 1195 )
1196 1197
[docs] 1198class MultichannelAudioInterfaceRecording(AudioFileRecording): 1199 """Class for handling multichannel audio interface recordings.""" 1200 1201 file_range = 1 1202 1203 @property 1204 def gain(self): # noqa: D102, takes the docstring from the superclass 1205 return self.sensor.get("gain", None) 1206 1207 @property 1208 def adc_range(self): # noqa: D102, takes the docstring from the superclass 1209 return self.sensor.get("adc_range", None) 1210 1211 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass 1212 def __init__(self, filepath, start_time, channels): 1213 super().__init__(filepath=filepath, start_time=start_time) 1214 self.channels = list(channels) 1215 1216 def read_data(self, start_idx=None, stop_idx=None): # noqa: D102, takes the docstring from the superclass 1217 all_channels = soundfile.read( 1218 self.filepath.as_posix(), 1219 start=start_idx, 1220 stop=stop_idx, 1221 dtype="float32", 1222 always_2d=True, 1223 )[0] 1224 return all_channels[:, self.channels] 1225 1226 @property 1227 def num_channels(self): # noqa: D102, inherits from superclass. 1228 return len(self.channels) 1229 1230 @classmethod 1231 def _merge_channel_info(cls, sensor, channel, gain, adc_range): 1232 """Merge channel information with the sensor data. 1233 1234 This function has two main operating modes, depending on if 1235 there is existing sensor information or not. 1236 1237 1. There is sensor information: The channel, gain, and adc_range 1238 will be passed to `uwacan.positional.Sensor.with_data`, and the 1239 resulting `~uwacan.positional.Sensor` object is returned. 1240 This allows using dictionaries to supply the channel, gain, and adc_range. 1241 2. If there is no sensor information: The channels will be used as 1242 the dimension and coordinate, and must as such be an array_like. 1243 The gain and adc_range has to be compatible with this channel information. 1244 1245 Parameters 1246 ---------- 1247 sensor : `uwacan.positional.Sensor` or None 1248 The sensor to which the channel information will be merged. 1249 If None, a new dataset is created, and the output is a dataset. 1250 channel : array_like, or dict 1251 Channel information to be added to the sensor dataset. 1252 gain : array_like, scalar, or dict 1253 Gain information to be added to the sensor dataset. 1254 adc_range : array_like, scalar, or dict 1255 ADC range information to be added to the sensor dataset. 1256 """ 1257 if sensor is None: 1258 sensor = xr.Dataset() 1259 if channel is not None: 1260 if not isinstance(channel, xr.DataArray): 1261 channel = xr.DataArray(channel, dims="channel", coords={"channel": channel}) 1262 sensor["channel"] = channel 1263 if gain is not None: 1264 if not isinstance(gain, xr.DataArray) and np.ndim(gain) != 0: 1265 gain = xr.DataArray(gain, dims="channel", coords={"channel": channel}) 1266 sensor["gain"] = gain 1267 if adc_range is not None: 1268 if not isinstance(adc_range, xr.DataArray) and np.ndim(adc_range) != 0: 1269 adc_range = xr.DataArray(adc_range, dims="channel", coords={"channel": channel}) 1270 sensor["adc_range"] = adc_range 1271 return sensor 1272 1273 assigns = {} 1274 if "channel" not in sensor: 1275 if channel is None: 1276 channel = list(range(len(sensor.sensors))) 1277 assigns["channel"] = channel 1278 elif channel is not None: 1279 raise ValueError( 1280 "Should not give explicit channel if the channel information is already in the sensor information" 1281 ) 1282 1283 if "gain" not in sensor: 1284 if gain is None: 1285 gain = 0 1286 assigns["gain"] = gain 1287 elif gain is not None: 1288 raise ValueError( 1289 "Should not give explicit gain if the gain information is already in the sensor information" 1290 ) 1291 1292 if "adc_range" not in sensor: 1293 if adc_range is None: 1294 adc_range = 1 1295 assigns["adc_range"] = adc_range 1296 elif adc_range is not None: 1297 raise ValueError( 1298 "Should not give explicit adc_range if the adc_range information is already in the sensor information" 1299 ) 1300 sensor = sensor.with_data(**assigns) 1301 return sensor 1302
[docs] 1303 @classmethod 1304 def read_folder( 1305 cls, 1306 folder, 1307 start_time_parser, 1308 channel=None, 1309 gain=None, 1310 adc_range=None, 1311 one_recording_per_file=False, 1312 sensor=None, 1313 file_filter=None, 1314 time_compensation=None, 1315 glob_pattern="**/*.wav", 1316 ): 1317 """Read files in a folder. 1318 1319 This method collects audio files from the specified folder into a recording object. 1320 The sensor and audio interface settings can be supplied in two ways, depending on if 1321 there is sensor information or not: 1322 1323 1. There is sensor information: Use `uwacan.sensor_array` to specify 1324 the sensor particulars. Give the ``channel``, ``gain``, and ``adc_range`` 1325 as dicts with the sensor names as keys, or scalars for all the sensors. 1326 2. If there is no sensor information: Give channel labels as a list to the ``channel``, 1327 and array_like or scalar ``gain`` and ``adc_range``. 1328 1329 Parameters 1330 ---------- 1331 folder : str or Path 1332 The folder containing the audio files. 1333 start_time_parser : callable or str 1334 - A function to parse the start time from file names, or 1335 - a sting specifying the datetime format, e.g., ``"YYYY-MM-DD_HH-mm-ss"``. 1336 1337 sensor : `~uwacan.positional.Sensor` 1338 Sensor information with sensitivity, positions, etc. 1339 channel : dict or array_like 1340 The channel index in the read data, from 0. 1341 1342 1. A mapping from sensor names to channel index, if sensor information is given. 1343 2. A list of channel labels, if no sensor information is given. 1344 1345 gain : dict, array_like, or scalar 1346 The gain used for the interface, in dB. 1347 1348 1. A mapping from sensor names to interface gain, if sensor information is given. 1349 2. A list of gains, if no sensor information is given. 1350 3. A single gain for all interface channels/sensors. 1351 1352 adc_range : dict, array_like, or scalar 1353 The peak voltage input of the ADC. 1354 1355 1. A mapping from sensor names to interface ADC range, if sensor information is given. 1356 2. A list of ADC ranges, if no sensor information is given. 1357 3. A single ADC range for all interface channels/sensors 1358 1359 one_recording_per_file : bool, optional 1360 If True, the output will be a list of recordings, one for each file. 1361 file_filter : callable, optional 1362 A function to filter files based on specific criteria. Will be called with the file path. 1363 The file is skipped if the filter returns ``False``. 1364 time_compensation : `TimeCompensation`, int, or callable, optional 1365 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times. 1366 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times. 1367 - If a callable is provided, it should accept a timestamp and return a compensated timestamp. 1368 - If None, no time compensation is applied. 1369 glob_pattern : str, optional 1370 The glob pattern to match files in the folder. Defaults to ``"**/*.wav"``. 1371 1372 """ 1373 sensor = cls._merge_channel_info(sensor=sensor, channel=channel, gain=gain, adc_range=adc_range) 1374 recordings = super().read_folder( 1375 folder=folder, 1376 start_time_parser=start_time_parser, 1377 sensor=sensor, 1378 file_filter=file_filter, 1379 time_compensation=time_compensation, 1380 glob_pattern=glob_pattern, 1381 file_kwargs={"channels": sensor["channel"].values}, 1382 ) 1383 if not one_recording_per_file: 1384 return recordings 1385 return [recordings.subwindow(start=file.start_time, stop=file.stop_time) for file in recordings.files]
1386 1387
[docs] 1388class LoggerheadDSG(AudioFileRecording): 1389 """Class to read data from Loggerhead DSG recorders. 1390 1391 The main way to read Loggerhead data is through the 1392 `read_folder` classmethod. 1393 """ 1394 1395 allowable_interrupt = 1 1396 adc_range = None 1397 file_range = 1 1398
[docs] 1399 @classmethod 1400 def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None): 1401 """Read all files in a folder. 1402 1403 Parameters 1404 ---------- 1405 folder : str or Path 1406 The path to the folder containing the files. 1407 sensor : str or None, optional 1408 The sensor associated with the files. 1409 time_compensation : `TimeCompensation`, int, or callable, optional 1410 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times. 1411 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times. 1412 - If a callable is provided, it should accept a timestamp and return a compensated timestamp. 1413 - If None, no time compensation is applied. 1414 file_filter : callable or None, optional 1415 A callable that accepts a file path and returns True if the file should be processed, 1416 and False otherwise. If None, all files are processed. 1417 1418 Returns 1419 ------- 1420 cls 1421 An instance of the class containing the loaded files. 1422 1423 Raises 1424 ------ 1425 RuntimeError 1426 If the folder does not exist, is not a directory, or no matching files are found. 1427 1428 """ 1429 1430 def start_time_parser(filepath): 1431 return _core.time_to_datetime(filepath.stem[:15], "%Y%m%dT%H%M%S") 1432 1433 return super().read_folder( 1434 folder=folder, 1435 start_time_parser=start_time_parser, 1436 sensor=sensor, 1437 file_filter=file_filter, 1438 time_compensation=time_compensation, 1439 )
1440 1441 @property 1442 def gain(self): # noqa: D102, takes the docstring from the superclass 1443 return self.files[0].gain 1444 1445 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass 1446 def _lazy_load(self): # noqa: D102, takes the docstring from the superclass 1447 gain = self.filepath.stem.split("_")[2] 1448 if not gain.endswith("dB"): 1449 raise ValueError( 1450 f"File `{self.filepath}` does not seem to be a file from a Loggerhead DSG, could not extract gain" 1451 ) 1452 return super()._lazy_load() | dict(gain=float(gain.rstrip("dB"))) 1453 1454 gain = _LazyPropertyMixin._lazy_property("gain")