Source code for uwacan.recordings

   1"""Reading recordings from files on disk.
   2
   3This module contains classes used to read data created by
   4field data recorders, typically recording hydrophone data
   5as audio files.
   6
   7.. currentmodule:: uwacan.recordings
   8
   9Main recording classes
  10----------------------
  11.. autosummary::
  12    :toctree: generated
  13
  14    SoundTrap
  15    SylenceLP
  16    LoggerheadDSG
  17    MultichannelAudioInterfaceRecording
  18
  19Utilities
  20---------
  21.. autosummary::
  22    :toctree: generated
  23
  24    RecordingArray
  25    TimeCompensation
  26    calibrate_raw_data
  27    dBx_to_peak_volts
  28
  29Implementation interfaces
  30-------------------------
  31.. autosummary::
  32    :toctree: generated
  33
  34    Recording
  35    FileRecording
  36    AudioFileRecording
  37    AudioFileRoller
  38
  39"""
  40
  41import bisect
  42import collections
  43import numpy as np
  44from . import _core, positional
  45import abc
  46import soundfile
  47import xarray as xr
  48from pathlib import Path
  49
  50

[docs]
  51def dBx_to_peak_volts(db):
  52    """Convert dBu or dBV to peak volts.
  53
  54    Parameters
  55    ----------
  56    db : str
  57        Decibel value as a string with units, e.g., ``"10dBu"``, ``"-20dBV"``.
  58
  59    Returns
  60    -------
  61    volts : float
  62        Peak voltage corresponding to the input decibel value.
  63
  64    Raises
  65    ------
  66    ValueError
  67        If the input string does not contain a valid dB unit (``"dBu"`` or ``"dBV"``).
  68    """
  69    if not np.ndim(db) == 0:
  70        return np.vectorize(dBx_to_peak_volts)(db)
  71    db = db.lower()
  72    if "dbu" in db:
  73        dbu = float(db.replace("dbu", "").strip())
  74        # dBu is an RMS level -> multiply with 2**0.5
  75        # dBu reference is 1mW over 600Ω, i.e. sqrt(0.6) volts
  76        volts = 10 ** (dbu / 20) * 2**0.5 * 0.6**0.5
  77    elif "dbv" in db:
  78        dbv = float(db.replace("dbv", "").strip())
  79        # dBV is an RMS level -> multiply with 2**0.5
  80        # dBV reference is 1V
  81        volts = 10 ** (dbv / 20) * 2**0.5
  82    else:
  83        raise ValueError(f"Unknown dB volts reference unit in {db}")
  84    return volts

  85
  86

[docs]
  87def calibrate_raw_data(
  88    raw_data,
  89    sensitivity=None,
  90    gain=None,
  91    adc_range=None,
  92    file_range=None,
  93):
  94    """Calibrates raw data read from files into physical units.
  95
  96    There are three conversion steps handled in this calibration function:
  97
  98    1) The transducer conversion from physical quantity ``q`` into voltage ``u``
  99    2) Amplification of the transducer voltage ``u`` to ADC voltage ``v``
 100    3) Conversion from ADC voltage ``v`` to digital values ``d`` in the file.
 101
 102    The sensitivity and gain inputs to this function are in decibels, converted to linear
 103    values as ``s = 10 ** (sensitivity / 20)`` and ``g = 10 ** (gain / 20)``.
 104    The ``adc_range`` is specified as the peak voltage that the ADC can handle,
 105    which should be recorded as ``file_range`` in the raw data.
 106
 107    The equations that govern this are
 108
 109    1) ``u = q * s``, sensitivity ``s`` in V/Q, e.g. V/Pa.
 110    2) ``v = u * g``, gain ``g`` is unitless.
 111    3) ``d / d_ref = v / v_ref``, relating file values to ADC voltage input.
 112
 113    for a final expression of ``q = d * (v_ref / d_ref / s / g)``.
 114    All conversion factors default to 1 if not given.
 115
 116    Parameters
 117    ----------
 118    raw_data : array_like
 119        The raw input data read from a file.
 120    sensitivity : array_like
 121        Sensitivity of the sensor, in dB re. V/Q,
 122        where Q is the desired physical unit.
 123    gain : array_like
 124        The gain applied to the voltage from the sensor, in dB.
 125    adc_range : array_like
 126        The peak voltage that the ADC can handle.
 127    file_range : array_like
 128        The peak value that the raw data contains,
 129        corresponding to the ``adc_range``.
 130
 131    Returns
 132    -------
 133    q : array_like
 134        The calibrated values, as per the equations above.
 135
 136    """
 137    calibration = 1.0
 138    # Avoiding in-place operations since they cannot handle broadcasting
 139    if adc_range is not None:
 140        calibration = calibration * adc_range
 141    if file_range is not None:
 142        calibration = calibration / file_range
 143    if gain is not None:
 144        calibration = calibration / 10 ** (gain / 20)
 145    if sensitivity is not None:
 146        calibration = calibration / 10 ** (sensitivity / 20)
 147
 148    return raw_data * calibration

 149
 150
 151class _LazyPropertyMixin:
 152    def __init__(self, *args, **kwargs):
 153        super().__init__(*args, **kwargs)
 154        self.__property_cache = {}
 155
 156    @staticmethod
 157    def _lazy_property(key):
 158        def getter(self):
 159            try:
 160                return self.__property_cache[key]
 161            except KeyError:
 162                self.__property_cache.update(self._lazy_load())
 163            return self.__property_cache[key]
 164
 165        return property(getter)
 166
 167    @abc.abstractmethod
 168    def _lazy_load(self):
 169        return {}
 170
 171

[docs]
 172class TimeCompensation:
 173    """Compensates time drift and offset in a recording.
 174
 175    This is based on the actual and recorded time of one or more events.
 176    These have to be detected elsewhere, and the times for them are
 177    given here to build the model.
 178    If a single pair of times is given, the offset between them is used to compensate.
 179    If multiple pairs are given, the offset will be linearly interpolated between them.
 180
 181    Parameters
 182    ----------
 183    actual_time : time_like or [time_like]
 184        Actual time for synchronization event(s).
 185    recorded_time : time_like or [time_like]
 186        Recorded time for synchronization event(s).
 187    """
 188
 189    def __init__(self, actual_time, recorded_time):
 190        if isinstance(actual_time, str):
 191            actual_time = [actual_time]
 192        if isinstance(recorded_time, str):
 193            recorded_time = [recorded_time]
 194        try:
 195            iter(actual_time)
 196        except TypeError:
 197            actual_time = [actual_time]
 198        try:
 199            iter(recorded_time)
 200        except TypeError:
 201            recorded_time = [recorded_time]
 202
 203        actual_time = list(map(_core.time_to_datetime, actual_time))
 204        recorded_time = list(map(_core.time_to_datetime, recorded_time))
 205
 206        self._time_offset = [(recorded - actual).in_seconds() for (recorded, actual) in zip(recorded_time, actual_time)]
 207        if len(self._time_offset) > 1:
 208            self._actual_timestamps = [t.timestamp() for t in actual_time]
 209            self._recorded_timestamps = [t.timestamp() for t in recorded_time]
 210

[docs]
 211    def recorded_to_actual(self, recorded_time):
 212        """Convert a recorded time to the actual time."""
 213        recorded_time = _core.time_to_datetime(recorded_time)
 214        if len(self._time_offset) == 1:
 215            time_offset = self._time_offset[0]
 216        else:
 217            time_offset = np.interp(recorded_time.timestamp(), self._recorded_timestamps, self._time_offset)
 218        return recorded_time.subtract(seconds=time_offset)

 219

[docs]
 220    def actual_to_recorded(self, actual_time):
 221        """Convert an actual time to the time recorded."""
 222        actual_time = _core.time_to_datetime(actual_time)
 223        if len(self._time_offset) == 1:
 224            time_offset = self._time_offset[0]
 225        else:
 226            time_offset = np.interp(actual_time.timestamp(), self._actual_timestamps, self._time_offset)
 227        return actual_time.add(seconds=time_offset)


 228
 229

[docs]
 230class Recording:
 231    """Base class for recordings.
 232
 233    This class defines the interface for what a
 234    recording needs to implement for the rest
 235    of the package to use it.
 236    """
 237
 238    def __init__(self, sensor=None):
 239        self.sensor = sensor
 240
 241    @property
 242    @abc.abstractmethod
 243    def samplerate(self):
 244        """The samplerate of the recording, in Hz."""
 245
 246    @property
 247    @abc.abstractmethod
 248    def num_channels(self):
 249        """The number of channel in the recording, and the read data."""
 250
 251    @property
 252    @abc.abstractmethod
 253    def time_window(self):
 254        """A `~uwacan.TimeWindow` that covers the recording."""
 255

[docs]
 256    @abc.abstractmethod
 257    def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):
 258        """Select a subset of the recording.
 259
 260        See `~uwacan.TimeWindow.subwindow` for details on the parameters.
 261        """

 262

[docs]
 263    @abc.abstractmethod
 264    def time_data(self):
 265        """Read stored time data.
 266
 267        This method reads the recorded data from
 268        disk, and returns it as a `~uwacan.TimeData` object.
 269        """


 270
 271

[docs]
 272class RecordingArray(Recording):
 273    """Holds multiple separate recordings.
 274
 275    This class handles multiple different recording
 276    instances at once. This is typically needed
 277    when more than one hardware recorder was used
 278    for a field trial, and the data from them should
 279    be analyzed together.
 280
 281    Parameters
 282    ----------
 283    *recordings : `Recording`
 284        The recording objects.
 285    """
 286
 287    def __init__(self, *recordings):
 288        self.recordings = {recording.sensor.label: recording for recording in recordings}
 289
 290    @property
 291    def samplerate(self):
 292        """The samplerate(s) of the recordings."""
 293        rates = [recording.samplerate for recording in self.recordings.values()]
 294        if np.ptp(rates) == 0:
 295            return rates[0]
 296        return xr.DataArray(rates, dims="sensor", coords={"sensor": list(self.recordings.keys())})
 297
 298    @property
 299    def num_channels(self):
 300        """The total number of channels."""
 301        return sum(recording.num_channels for recording in self.recordings.values())
 302
 303    @property
 304    def sensor(self):
 305        """The sensors used, as a `~uwacan.sensor_array`."""
 306        return positional.SensorArray.concatenate([rec.sensor for rec in self.recordings.values()])
 307
 308    @property
 309    def time_window(self):  # noqa: D102, takes the docstring from the superclass
 310        windows = [recording.time_window for recording in self.recordings.values()]
 311        return _core.TimeWindow(
 312            start=max(w.start for w in windows),
 313            stop=min(w.stop for w in windows),
 314        )
 315

[docs]
 316    def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):  # noqa: D102, takes the docstring from the superclass
 317        subwindow = self.time_window.subwindow(
 318            time, start=start, stop=stop, center=center, duration=duration, extend=extend
 319        )
 320        return type(self)(*[recording.subwindow(subwindow) for recording in self.recordings.values()])

 321

[docs]
 322    def time_data(self):  # noqa: D102, takes the docstring from the superclass
 323        if np.ndim(self.samplerate) > 0:
 324            raise NotImplementedError("Stacking time data from recording with different samplerates not implemented!")
 325        return _core.TimeData(
 326            xr.concat([recording.time_data().data for recording in self.recordings.values()], dim="sensor")
 327        )


 328
 329

[docs]
 330class FileRecording(Recording):
 331    """Base class for recordings using multiple files.
 332
 333    This class has some interface definitions and some
 334    shared logic for implementing recordings that use
 335    multiple files to store the data.
 336
 337    Subclasses need to implement a `RecordedFile` inner class,
 338    some way to read the files (typically a classmethod), and
 339    the `time_data` function (typically using `raw_data`).
 340
 341    .. autoclass:: uwacan.recordings::FileRecording.RecordedFile
 342
 343    """
 344
 345    allowable_interrupt = 0
 346    """How long gap is allowed between files when reading."""
 347

[docs]
 348    class RecordedFile(abc.ABC):
 349        """Interface class for single recording files.
 350
 351        This interface class defines how subclasses
 352        should implement wrappers around individual files.
 353        """
 354
 355        def __init__(self, filepath):
 356            super().__init__()
 357            self.filepath = Path(filepath)
 358
 359        @property
 360        def filepath(self):
 361            """The `Path` to the file."""
 362            return self._filepath
 363
 364        @filepath.setter
 365        def filepath(self, filepath):
 366            if not isinstance(filepath, Path):
 367                filepath = Path(filepath)
 368            self._filepath = filepath
 369
 370        @abc.abstractmethod
 371        def read_data(self, start_idx, stop_idx):
 372            """Read raw data from the file.
 373
 374            Parameters
 375            ----------
 376            start_idx : int
 377                The starting index to read from, inclusive.
 378            stop_idx : int
 379                The last index to read to, exclusive.
 380
 381            Returns
 382            -------
 383            data : array_like
 384                The data read from disk.
 385            """
 386
 387        @property
 388        @abc.abstractmethod
 389        def start_time(self):
 390            """The start time of this file."""
 391
 392        @property
 393        @abc.abstractmethod
 394        def stop_time(self):
 395            """The stop time of this file."""
 396
 397        @property
 398        @abc.abstractmethod
 399        def duration(self):
 400            """The duration of this file."""
 401
 402        @property
 403        @abc.abstractmethod
 404        def num_samples(self):
 405            """The number of samples in this file, per channel."""
 406
 407        @property
 408        @abc.abstractmethod
 409        def num_channels(self):
 410            """The number of channels in this file."""
 411
 412        @property
 413        @abc.abstractmethod
 414        def samplerate(self):
 415            """The samplerate in this file."""
 416
 417        def __bool__(self):
 418            return self.filepath.exists()
 419
 420        def __contains__(self, time):
 421            return (self.start_time <= time) and (time <= self.stop_time)

 422
 423    def __init__(self, files, assume_sorted=False, **kwargs):
 424        super().__init__(**kwargs)
 425        if not assume_sorted:
 426            files = sorted(files, key=lambda f: f.start_time)
 427        self.files = files
 428        self._file_time_cache = collections.OrderedDict()
 429
 430    @property
 431    def samplerate(self):  # noqa: D102, takes the docstring from the superclass
 432        return self.files[0].samplerate
 433
 434    @property
 435    def num_channels(self):  # noqa: D102, takes the docstring from the superclass
 436        return self.files[0].num_channels
 437
 438    @property
 439    def time_window(self):  # noqa: D102, takes the docstring from the superclass
 440        try:
 441            return self._window
 442        except AttributeError:
 443            self._window = _core.TimeWindow(
 444                start=self.files[0].start_time,
 445                stop=self.files[-1].stop_time,
 446            )
 447        return self._window
 448

[docs]
 449    def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):  # noqa: D102, takes the docstring from the superclass
 450        new_window = self.time_window.subwindow(
 451            time, start=start, stop=stop, center=center, duration=duration, extend=extend
 452        )
 453        new = type(self)(
 454            files=self.files,
 455            sensor=self.sensor,
 456        )
 457        new._window = new_window
 458        return new

 459
 460    def _find_file_time(self, time):
 461        """Find a file containing a certain time."""
 462        time = _core.time_to_datetime(time)
 463        if time in self._file_time_cache:
 464            self._file_time_cache.move_to_end(time)
 465            return self._file_time_cache[time]
 466
 467        # bisect_right(items, target) returns an idx such that items[idx - 1] <= target < items[idx]
 468        # Subtracting one from the output means we get the last file that starts before (or equal) to the target time
 469        idx = bisect.bisect_right(self.files, time, key=lambda file: file.start_time) - 1
 470        if time in self.files[idx]:
 471            self._file_time_cache[time] = self.files[idx]
 472            if len(self._file_time_cache) > 128:
 473                self._file_time_cache.popitem(last=False)
 474            return self.files[idx]
 475        else:
 476            raise ValueError(f"Time {time} does not exist inside any recorded files")
 477

[docs]
 478    def check_file_continuity(self, start_time=None, stop_time=None, allowable_interrupt=None, mode="raise"):
 479        """Check the continuity of recorded data.
 480
 481        Parameters
 482        ----------
 483        start_time : datetime, optional
 484            The start time of the period to check for continuity. If not provided,
 485            the start of `self.time_window` will be used.
 486        stop_time : datetime, optional
 487            The stop time of the period to check for continuity. If not provided,
 488            the end of `self.time_window` will be used.
 489        allowable_interrupt : float, optional
 490            How much of a gap to allow between files. Will by default use the
 491            class attribute.
 492        mode : {"raise", "return", "print"}, optional
 493            The action to take when an interruption is found.
 494            - "raise" (default): raises a `ValueError` with details about the interruption.
 495            - "return": returns `False` if an interruption is found, `True` otherwise.
 496            - "print": prints a warning message with details about the interruption and continues execution.
 497
 498        Returns
 499        -------
 500        bool
 501            Returns `True` if the data is continuous within the specified time range.
 502            If mode is set to "return", it returns `False` if an interruption is found.
 503            No return value if mode is set to "raise" or "print".
 504
 505        Raises
 506        ------
 507        ValueError
 508            If `mode` is set to "raise" and an interruption larger than `self.allowable_interrupt`
 509            is detected between the files, a `ValueError` is raised with details of the missing time.
 510
 511        Notes
 512        -----
 513        The method checks the continuity of data by comparing the `stop_time` of each file
 514        with the `start_time` of the next file within the specified range. If the gap between
 515        two files exceeds `self.allowable_interrupt` (in seconds), it is considered an interruption.
 516        """
 517        if start_time is None:
 518            start_time = self.time_window.start
 519        if stop_time is None:
 520            stop_time = self.time_window.stop
 521        if allowable_interrupt is None:
 522            allowable_interrupt = self.allowable_interrupt
 523        first_file = self._find_file_time(start_time)
 524        first_idx = self.files.index(first_file)
 525        last_file = self._find_file_time(stop_time)
 526        last_idx = self.files.index(last_file)
 527
 528        for early, late in zip(self.files[first_idx : last_idx - 1], self.files[first_idx + 1 : last_idx]):
 529            interrupt = (late.start_time - early.stop_time).in_seconds()
 530            if interrupt > allowable_interrupt:
 531                message = (
 532                    f"Data is not continuous, missing {interrupt} seconds between files\n "
 533                    f"{early.filepath} ending at {early.stop_time}\n"
 534                    f"{late.filepath} starting at {late.start_time}"
 535                )
 536                if mode == "raise":
 537                    raise ValueError(message)
 538                elif mode == "return":
 539                    return False
 540                else:
 541                    print(message)
 542        return True

 543

[docs]
 544    def raw_data(self, start_time=None, stop_time=None):
 545        """Read raw data from files on disk.
 546
 547        Retrieves raw data samples from a start time to a stop time,
 548        defaulting to reading between times in ``self.time_window``.
 549        This method reads from multiple files if needed, and checks
 550        file timestamps for approximate data continuity.
 551
 552        Parameters
 553        ----------
 554        start_time : date-like, optional
 555            The start of the time window to read.
 556        stop_time : date-like, optional
 557            The end of the time window to read.
 558
 559        Returns
 560        -------
 561        numpy.ndarray
 562            The raw data read from the files, concatenated into a single NumPy array.
 563        """
 564        # This is just a wrapper to get a single frame from the frame generator.
 565        # Without a framesize, it defaults to a single large frame with all the data.
 566        frame = next(self.raw_frames(start_time=start_time, stop_time=stop_time))
 567        return frame

 568

[docs]
 569    def raw_frames(self, start_time=None, stop_time=None, framesize=None):
 570        """Generate frames of raw data from files on disk.
 571
 572        This retrieves raw data samples between the start time and stop time
 573        (defaulting to times in ``self.time_window``), and yields frames of
 574        a fixed size. The frames have no overlap - use ``self.rolling`` for
 575        overlapping frames. If no framesize is given, it defaults to yielding
 576        a single large frame with all the data.
 577        If needed, data will be loaded from several files on disk. In those
 578        cases, the file timestamps will be checked for approximate data
 579        continuity before any loading starts.
 580
 581        Parameters
 582        ----------
 583        start_time : date-like, optional
 584            The start of the time window to read.
 585        stop_time : date-like, optional
 586            The end of the time window to read.
 587        framesize : int, optional
 588            The number of samples to yield in each frame.
 589
 590        Yields
 591        ------
 592        numpy.ndarray
 593            The frames with raw data.
 594
 595        Notes
 596        -----
 597        This method is intended as the base data loader, mainly for internal
 598        use in the package. It's used both to load all data within a time
 599        window, but also as an IO optimization in `self.rolling`` to load
 600        larger chunks of data than the desired rolling window.
 601        """
 602        start_time = start_time or self.time_window.start
 603        stop_time = stop_time or self.time_window.stop
 604        self.check_file_continuity(start_time=start_time, stop_time=stop_time)
 605
 606        samplerate = self.samplerate
 607        remaining_samples = int(np.floor((stop_time - start_time).in_seconds() * samplerate))
 608        if remaining_samples == 0:
 609            # No samples requested, but we want to yield something of the right shape and type
 610            yield self.files[0].read_data(start_idx=0, stop_idx=0)
 611            return
 612
 613        if framesize:
 614            # With a given framesize we increase the number of samples to yield full frames
 615            remaining_samples = int(framesize * np.ceil(remaining_samples / framesize))
 616        else:
 617            # One single frame with all samples. Used to get all data at once.
 618            framesize = remaining_samples
 619
 620        # Where we read - sample_idx in file_idx. This moves along as we read more data.
 621        file_idx = self.files.index(self._find_file_time(start_time))
 622        sample_idx = int(np.floor((start_time - self.files[file_idx].start_time).in_seconds() * samplerate))
 623
 624        while remaining_samples > 0:  # Loop over frames
 625            chunks = []
 626            remaining_in_frame = framesize
 627            while remaining_in_frame > 0:  # Loop over chunks from different files
 628                chunk = self.files[file_idx].read_data(start_idx=sample_idx, stop_idx=sample_idx + remaining_in_frame)
 629                chunks.append(chunk)
 630                remaining_in_frame -= chunk.shape[0]
 631
 632                if remaining_in_frame:
 633                    # This file couldn't fill this frame - go to the beginning of the next file.
 634                    sample_idx = 0
 635                    file_idx += 1
 636                else:
 637                    # This frame is full, but the file has more data.
 638                    sample_idx += chunk.shape[0]
 639
 640            # Assemble the frame from the chunks.
 641            if len(chunks) == 1:
 642                # Optimization - a single chunk doesn't need concatenation.
 643                frame = chunks[0]
 644            else:
 645                frame = np.concatenate(chunks, axis=0)
 646            remaining_samples -= frame.shape[0]
 647            yield frame

 648

[docs]
 649    def select_file_time(self, time):
 650        """Get a recording for a specific file, by time.
 651
 652        This finds the file corresponding to a specific time,
 653        then returns a recording subwindow corresponding
 654        to that file.
 655        """
 656        time = _core.time_to_datetime(time)
 657        for file in reversed(self.files):
 658            if file.start_time > time:
 659                continue
 660            if file.stop_time < time:
 661                raise ValueError(f"Time {time} does not exist inside any recorded files.")
 662            return self.subwindow(start=file.start_time, stop=file.stop_time)

 663

[docs]
 664    def select_file_name(self, name):
 665        """Get a recording for a specific file, by name.
 666
 667        This finds the file with a specific name,
 668        then returns a recording subwindow corresponding
 669        to that file.
 670        """
 671        stem = Path(name).stem
 672        for file in self.files:
 673            if stem == file.filepath.stem:
 674                return self.subwindow(start=file.start_time, stop=file.stop_time)
 675        raise ValueError(f"Could not file file matching name '{name}'")


 676
 677

[docs]
 678class AudioFileRecording(FileRecording):
 679    """Class for audio file recordings.
 680
 681    This class handles reading audio files using the
 682    `soundfile` python package.
 683    This is a fully functional class, but reading data
 684    requires a ``start_time_parser`` function passed to the
 685    `read_folder` classmethod. A more convenient approach
 686    is to subclass this class and customize the `read_folder`
 687    classmethod.
 688    """
 689
 690    file_range = None
 691    """The input range of the read files."""
 692    gain = None
 693    """The gain of this recording."""
 694    adc_range = None
 695    """The voltage peak range of the adc in this recording."""
 696

[docs]
 697    @classmethod
 698    def read_folder(
 699        cls,
 700        folder,
 701        start_time_parser,
 702        sensor=None,
 703        file_filter=None,
 704        time_compensation=None,
 705        glob_pattern="**/*.wav",
 706        file_kwargs=None,
 707    ):
 708        """Read all matching files in a folder and parse their start times.
 709
 710        Parameters
 711        ----------
 712        folder : str or Path
 713            The path to the folder containing the files.
 714        start_time_parser : str or callable
 715            If a string is provided, it is treated as a format string and will be used
 716            to parse the start time from the filename. If a callable is provided, it
 717            should accept a file path and return a `whenever.Instant` object representing the start time.
 718        sensor : str or None, optional
 719            The sensor associated with the files.
 720        file_filter : callable or None, optional
 721            A callable that accepts a file path and returns True if the file should be processed,
 722            and False otherwise. If None, all files matching the ``glob_pattern`` are processed.
 723        time_compensation : `TimeCompensation`, int, or callable, optional
 724            - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
 725            - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
 726            - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
 727            - If None, no time compensation is applied.
 728        glob_pattern : str, optional
 729            A glob pattern used to match files in the folder, by default ``"**/*.wav"``.
 730        file_kwargs : dict or callable, optional
 731            Additional keyword arguments to be passed when creating the `RecordedFile` instances.
 732            If a callable is provided, it should accept a file path and return a dictionary of keyword arguments.
 733            If None, no additional keyword arguments are passed to the files.
 734
 735        Returns
 736        -------
 737        cls
 738            An instance of the class containing the loaded files.
 739
 740        Raises
 741        ------
 742        RuntimeError
 743            If the folder does not exist, is not a directory, or no matching files are found.
 744        """
 745        folder = Path(folder)
 746        if not folder.exists():
 747            raise RuntimeError(f"'{folder}' does not exist")
 748        if not folder.is_dir():
 749            raise RuntimeError(f"'{folder}' is not a folder")
 750
 751        if isinstance(start_time_parser, str):
 752            start_time_format = start_time_parser
 753
 754            def start_time_parser(file):
 755                return _core.time_to_datetime(file.stem, fmt=start_time_format)
 756
 757        if time_compensation is None:
 758
 759            def time_compensation(timestamp):
 760                return timestamp
 761
 762        if isinstance(time_compensation, TimeCompensation):
 763            time_compensation = time_compensation.recorded_to_actual
 764        if not callable(time_compensation):
 765            offset = time_compensation
 766
 767            def time_compensation(timestamp):
 768                return timestamp.subtract(seconds=offset)
 769
 770        if file_filter is None:
 771
 772            def file_filter(filepath):
 773                return True
 774
 775        if file_kwargs is None:
 776
 777            def file_kwargs(filepath):
 778                return {}
 779
 780        if not callable(file_kwargs):
 781            _file_kwargs = file_kwargs
 782
 783            def file_kwargs(filepath):
 784                return _file_kwargs
 785
 786        files = []
 787        for file in Path(folder).glob(glob_pattern):
 788            if file_filter(file):
 789                start_time = start_time_parser(file)
 790                files.append(cls.RecordedFile(file, time_compensation(start_time), **file_kwargs(file)))
 791
 792        if not files:
 793            raise RuntimeError(f"No matching files found in '{folder}'")
 794
 795        return cls(
 796            files=files,
 797            sensor=sensor,
 798        )

 799
 800    class RecordedFile(FileRecording.RecordedFile, _LazyPropertyMixin):
 801        """Wrapper for audio files."""
 802
 803        def __init__(self, filepath, start_time):
 804            super().__init__(filepath=filepath)
 805            self._start_time = start_time
 806
 807        def _lazy_load(self):
 808            sfi = soundfile.info(self.filepath.as_posix())
 809            return super()._lazy_load() | dict(
 810                num_samples=sfi.frames,
 811                num_channels=sfi.channels,
 812                samplerate=sfi.samplerate,
 813            )
 814
 815        @property
 816        def start_time(self):  # noqa: D102, takes the docstring from the superclass
 817            return self._start_time
 818
 819        num_samples = _LazyPropertyMixin._lazy_property("num_samples")
 820        num_channels = _LazyPropertyMixin._lazy_property("num_channels")
 821        samplerate = _LazyPropertyMixin._lazy_property("samplerate")
 822
 823        @property
 824        def stop_time(self):  # noqa: D102, takes the docstring from the superclass
 825            return self.start_time.add(seconds=self.duration)
 826
 827        @property
 828        def duration(self):  # noqa: D102, takes the docstring from the superclass
 829            return self.num_samples / self.samplerate
 830
 831        def read_data(self, start_idx=None, stop_idx=None):  # noqa: D102, takes the docstring from the superclass
 832            return soundfile.read(self.filepath.as_posix(), start=start_idx, stop=stop_idx, dtype="float32")[0]
 833

[docs]
 834    def time_data(self):  # noqa: D102, takes the docstring from the superclass
 835        data = self.raw_data()
 836        if np.ndim(data) == 1:
 837            dims = "time"
 838            coords = None
 839        elif np.ndim(data) == 2:
 840            if self.sensor is not None and "sensor" in self.sensor and np.shape(data)[1] == self.sensor["sensor"].size:
 841                dims = ("time", "sensor")
 842                coords = {"sensor": self.sensor["sensor"]}
 843            else:
 844                dims = ("time", "channel")
 845                if self.sensor is not None and "channel" in self.sensor:
 846                    coords = {"channel": self.sensor["channel"]}
 847                else:
 848                    coords = None
 849        else:
 850            raise NotImplementedError("Audio files with more than 2 dimensions are not supported")
 851        data = _core.TimeData(
 852            data=data,
 853            samplerate=self.samplerate,
 854            start_time=self.time_window.start,
 855            dims=dims,
 856            coords=coords,
 857        )
 858        data = calibrate_raw_data(
 859            raw_data=data,
 860            sensitivity=self.sensor.get("sensitivity", None),
 861            gain=self.gain,
 862            adc_range=self.adc_range,
 863            file_range=self.file_range,
 864        )
 865        return data

 866

[docs]
 867    def rolling(self, duration=None, step=None, overlap=None):
 868        """Generate rolling frames of data.
 869
 870        Parameters
 871        ----------
 872        duration : float
 873            The size of each frame, in seconds.
 874        step : float
 875            The step between consecutive frames, in seconds.
 876        overlap : float, default=0
 877            The fraction of overlap between consecutive frames. Should be less than one.
 878            Negative values will make "gaps" in the output.
 879
 880        Returns
 881        -------
 882        roller : `AudioFileRoller`
 883            Implementation of rolling time windows for recordings.
 884        """
 885        return AudioFileRoller(self, duration=duration, step=step, overlap=overlap)


 886
 887

[docs]
 888class AudioFileRoller(_core.TimeDataRoller):
 889    """Rolling windows of time data.
 890
 891    Parameters
 892    ----------
 893    obj : AudioFileRecording
 894        The audio file wrapper to roll over.
 895    duration : float
 896        The duration of each frame, in seconds.
 897    step : float
 898        The step between consecutive frames, in seconds.
 899    overlap : float
 900        The overlap between consecutive frames, as a fraction of the duration.
 901    """
 902
 903    def __init__(self, obj, duration=None, step=None, overlap=0):
 904        super().__init__(obj, duration=duration, step=step, overlap=overlap)
 905        self._dummy_data = self.obj.subwindow(start=True, duration=0).time_data().data
 906        calibration = calibrate_raw_data(
 907            1,
 908            gain=self.obj.gain,
 909            sensitivity=self.obj.sensor.get("sensitivity"),
 910            adc_range=self.obj.adc_range,
 911            file_range=self.obj.file_range,
 912        )
 913        self._calibration = xr.align(self._dummy_data, calibration)[1].data
 914
 915    @property
 916    def shape(self):  # noqa: D102, inherited from parent
 917        shape = [self._dummy_data.sizes[dim] for dim in self.dims if dim != "time"]
 918        shape = [self.settings["samples_per_frame"]] + shape
 919        return tuple(shape)
 920
 921    @property
 922    def dims(self):  # noqa: D102, inherited from parent
 923        dims = list(self._dummy_data.dims)
 924        dims.remove("time")
 925        return tuple(["time"] + dims)
 926
 927    @property
 928    def coords(self):  # noqa: D102, inherited from parent
 929        coords = dict(self._dummy_data.coords)
 930        return coords
 931

[docs]
 932    def numpy_frames(self, io_blocksize=1_000_000):  # noqa: D102, inherited from parent
 933        # This method essentially re-chunks frames read from disk to have overlap and a possibly smaller size.
 934        # This allows reading frames from disk with a framesize optimized for reading, independently
 935        # from any desired signal processing frame size.
 936        samples_per_frame = self.settings["samples_per_frame"]
 937        sample_step = self.settings["sample_step"]
 938        io_blocksize = max(io_blocksize, samples_per_frame)  # We need to fit at least a full frame in one raw_frame.
 939
 940        out = np.zeros(self.shape)
 941        buffer = np.zeros(0)
 942        frame_idx = 0
 943
 944        # We loop over large frames from the data on disk, reducing IO overhead.
 945        for raw_idx, raw_frame in enumerate(self.obj.raw_frames(framesize=io_blocksize)):
 946            # While there's enough data in this raw_frame (and the buffer) to fill one output frame (and we should still yield more frames).
 947            while raw_frame.shape[0] + buffer.shape[0] >= samples_per_frame and frame_idx < self.num_frames:
 948                if buffer.shape[0]:
 949                    # We have data in the buffer, it goes first into the output frame.
 950                    # The buffer is never larger than one output frame.
 951                    out[:buffer.shape[0]] = buffer
 952                    # The buffer won't fill the entire frame - take the rest of the samples from the raw_frame
 953                    out[buffer.shape[0]:] = raw_frame[:samples_per_frame - buffer.shape[0]]
 954                    # If we're out of buffer after taking a step, we start consuming the raw_frame.
 955                    raw_frame = raw_frame[max(0, sample_step - buffer.shape[0]):]
 956                    # Consume step samples from the buffer.
 957                    buffer = buffer[sample_step:]
 958                else:
 959                    # No buffer - just take a frame from the raw_frame
 960                    # Since `out` gets modified in place when copying the buffer, `out` cannot point to `raw_frame`!
 961                    # Hence the need to write the values into `out[:]`, not take a view and save it to `out`.
 962                    out[:] = raw_frame[:samples_per_frame]
 963                    # Consume step samples from the raw_frame.
 964                    raw_frame = raw_frame[sample_step:]
 965
 966                # Calibrate, yield, and increment the frame index
 967                yield out * self._calibration
 968                frame_idx += 1
 969            # Not enough data in raw_frame (buffer is empty by now).
 970            # Buffer this incomplete raw frame and get a new one.
 971            buffer = raw_frame

 972

[docs]
 973    def time_data(self):  # noqa: D102, inherited from parent
 974        offsets = np.arange(self.settings["samples_per_frame"]) * 1e9 / self.obj.samplerate
 975        first_time_vec = _core.time_to_np(self.obj.time_window.start) + offsets.astype("timedelta64[ns]")
 976        for frame_idx, frame in enumerate(self.numpy_frames()):
 977            time_since_start = frame_idx * self.settings["sample_step"] / self.obj.samplerate
 978            time_since_start = np.timedelta64(int(time_since_start * 1e9), "ns")
 979            yield _core.TimeData(
 980                frame,
 981                time=first_time_vec + time_since_start,
 982                samplerate=self.obj.samplerate,
 983                coords=self.coords,
 984                dims=self.dims,
 985            )

 986
 987    def __iter__(self):
 988        start_time = self.obj.time_window.start
 989        for frame_idx in range(self.num_frames):
 990            yield self.obj.subwindow(start=start_time, duration=self.settings["duration"])
 991            start_time = start_time.add(seconds=self.settings["step"])

 992
 993

[docs]
 994class SoundTrap(AudioFileRecording):
 995    """Class to read data from OceanInstruments SoundTrap recorders.
 996
 997    The main way to read SoundTrap data is through the
 998    `read_folder` classmethod.
 999    """
1000
1001    allowable_interrupt = 1
1002    gain = None
1003    adc_range = None
1004    file_range = 1
1005

[docs]
1006    @classmethod
1007    def read_folder(cls, folder, sensor=None, serial_number=None, time_compensation=None):
1008        """Read files in a folder, filtered on an optional serial number.
1009
1010        Parameters
1011        ----------
1012        folder : str or Path
1013            The path to the folder containing the files.
1014        sensor : str or None, optional
1015            The sensor associated with the files.
1016        serial_number : int or None, optional
1017            If provided, only files with the matching serial number in their filename will be processed.
1018            If None, all files in the folder will be processed.
1019        time_compensation : `TimeCompensation`, int, or callable, optional
1020            - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1021            - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1022            - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1023            - If None, no time compensation is applied.
1024
1025        Returns
1026        -------
1027        cls
1028            An instance of the class containing the loaded files.
1029
1030        Raises
1031        ------
1032        RuntimeError
1033            If the folder does not exist, is not a directory, or no matching files are found.
1034
1035        Notes
1036        -----
1037        This method filters the files in the folder based on the provided ``serial_number`` and
1038        parses the start time from the filenames using a specific format (``"YYMMDDHHmmss"``).
1039        It then delegates the actual file reading to the `read_folder` method of the parent class.
1040        """
1041        if serial_number is None:
1042
1043            def file_filter(filepath):
1044                return True
1045        else:
1046
1047            def file_filter(filepath):
1048                return int(filepath.stem.split(".")[0]) == serial_number
1049
1050        def start_time_parser(filepath):
1051            return _core.time_to_datetime(filepath.stem.split(".")[1], fmt="%y%m%d%H%M%S")
1052
1053        return super().read_folder(
1054            folder=folder,
1055            start_time_parser=start_time_parser,
1056            sensor=sensor,
1057            file_filter=file_filter,
1058            time_compensation=time_compensation,
1059        )


1060
1061

[docs]
1062class SylenceLP(AudioFileRecording):
1063    """Class to read data from RTsys SylenceLP recorders.
1064
1065    The main way to read Sylence data is through the
1066    `read_folder` classmethod.
1067    """
1068
1069    adc_range = 2.5
1070    file_range = 1
1071    allowable_interrupt = 1
1072
1073    class RecordedFile(AudioFileRecording.RecordedFile):  # noqa: D106, takes the docstring from the superclass
1074        def _lazy_load(self):  # noqa: D102, takes the docstring from the superclass
1075            with self.filepath.open("rb") as file:
1076                base_header = file.read(36)
1077                # chunk_id = base_header[0:4].decode('ascii')  # always equals RIFF
1078                # file_size = int.from_bytes(base_header[4:8], byteorder='little', signed=False)  # total file size not important
1079                # chunk_format = base_header[8:12].decode('ascii')  # always equals WAVE
1080                # subchunk_id = base_header[12:16].decode('ascii')  # always equals fmt
1081                # subchunk_size = int.from_bytes(base_header[16:20], byteorder='little', signed=False))  # always equals 16
1082                # audio_format = int.from_bytes(base_header[20:22], byteorder='little', signed=False))  # not important in current implementation
1083                num_channels = int.from_bytes(base_header[22:24], byteorder="little", signed=False)
1084                if num_channels != 1:
1085                    raise ValueError(
1086                        f"Expected file for SylenceLP with a single channel, read file with {num_channels} channels"
1087                    )
1088                samplerate = int.from_bytes(base_header[24:28], byteorder="little", signed=False)
1089                # byte rate = int.from_bytes(base_header[28:32], byteorder='little', signed=False)  # not important in current implementation
1090                bytes_per_sample = int.from_bytes(base_header[32:34], byteorder="little", signed=False)
1091                bitdepth = int.from_bytes(base_header[34:36], byteorder="little", signed=False)
1092
1093                conf_header = file.peek(8)  # uses peak to keep indices aligned with the manual
1094                conf_size = int.from_bytes(conf_header[4:8], byteorder="little", signed=False)
1095                if conf_size != 460:
1096                    raise ValueError(f"Incorrect size of SylenceLP config: '{conf_size}'B, expected 460B")
1097                conf_header = file.read(conf_size + 8)
1098
1099                subchunk_id = conf_header[:4].decode("ascii")  # always conf
1100                if subchunk_id != "conf":
1101                    raise ValueError(f"Expected 'conf' section in SylenceLP config, found '{subchunk_id}'")
1102                # subchunk_size = int.from_bytes(conf_header[4:8], byteorder='little', signed=False)  # the same as conf_size
1103                config_version = int.from_bytes(conf_header[8:12], byteorder="little", signed=False)
1104                if config_version != 2:
1105                    raise NotImplementedError(f"Cannot handle SylenceLP config version {config_version}")
1106                # recording_start = datetime.datetime.fromtimestamp(int.from_bytes(conf_header[16:24], byteorder='little', signed=True))  # This value is not actually when the recording starts. No idea what it actually is
1107                channel = conf_header[24:28].decode("ascii")
1108                if channel.strip("\x00") != "":
1109                    raise NotImplementedError(
1110                        f"No implementation for multichannel SylenceLP recorders, found channel specification '{channel}'"
1111                    )
1112                samplerate_alt = np.frombuffer(conf_header[28:32], dtype="f4").squeeze()
1113                if samplerate != samplerate_alt:
1114                    raise ValueError(
1115                        f"Mismatched samplerate for hardware and file, read file samplerate {samplerate} and config samplerate {samplerate_alt}"
1116                    )
1117
1118                hydrophone_sensitivity = np.frombuffer(conf_header[32:48], dtype="f4")
1119                gain = np.frombuffer(conf_header[48:64], dtype="f4")
1120                # gain_correction = np.frombuffer(conf_header[64:80], dtype='f4')  # is just 1/gain
1121                serialnumber = conf_header[80:100].decode("ascii")
1122                active_channels = conf_header[100:104].decode("ascii")
1123                if active_channels != "A\x00\x00\x00":
1124                    raise NotImplementedError(
1125                        f"No implementation for multichannel SylenceLP recorders, found channel specification '{active_channels}'"
1126                    )
1127
1128                data_header = file.read(4).decode("ascii")
1129                if data_header != "data":
1130                    raise ValueError(f"Expected file header 'data', read {data_header}")
1131                data_size = int.from_bytes(file.read(4), byteorder="little", signed=False)
1132
1133            num_samples = data_size / bytes_per_sample
1134            if int(num_samples) != num_samples:
1135                raise ValueError(f"Size of data is not divisible by bytes per sample, file '{self.name}' is corrupt!")
1136
1137            return super()._lazy_load() | dict(
1138                samplerate=samplerate,
1139                bitdepth=bitdepth,
1140                num_samples=int(num_samples),
1141                hydrophone_sensitivity=hydrophone_sensitivity[0],
1142                serial_number=serialnumber.strip("\x00"),
1143                gain=-20 * np.log10(gain[0]),
1144            )
1145
1146        bitdepth = _LazyPropertyMixin._lazy_property("bitdepth")
1147        hydrophone_sensitivity = _LazyPropertyMixin._lazy_property("hydrophone_sensitivity")
1148        serial_number = _LazyPropertyMixin._lazy_property("serial_number")
1149        gain = _LazyPropertyMixin._lazy_property("gain")
1150
1151    @property
1152    def gain(self):  # noqa: D102, takes the docstring from the superclass
1153        return self.files[0].gain
1154

[docs]
1155    @classmethod
1156    def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None):
1157        """Read all files in a folder.
1158
1159        Parameters
1160        ----------
1161        folder : str or Path
1162            The path to the folder containing the files.
1163        sensor : str or None, optional
1164            The sensor associated with the files.
1165        time_compensation : `TimeCompensation`, int, or callable, optional
1166            - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1167            - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1168            - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1169            - If None, no time compensation is applied.
1170        file_filter : callable or None, optional
1171            A callable that accepts a file path and returns True if the file should be processed,
1172            and False otherwise. If None, all files are processed.
1173
1174        Returns
1175        -------
1176        cls
1177            An instance of the class containing the loaded files.
1178
1179        Raises
1180        ------
1181        RuntimeError
1182            If the folder does not exist, is not a directory, or no matching files are found.
1183
1184        """
1185
1186        def start_time_parser(filepath):
1187            return _core.time_to_datetime(filepath.stem[9:], fmt="%Y-%m-%d_%H-%M-%S")
1188
1189        return super().read_folder(
1190            folder=folder,
1191            start_time_parser=start_time_parser,
1192            sensor=sensor,
1193            file_filter=file_filter,
1194            time_compensation=time_compensation,
1195        )


1196
1197

[docs]
1198class MultichannelAudioInterfaceRecording(AudioFileRecording):
1199    """Class for handling multichannel audio interface recordings."""
1200
1201    file_range = 1
1202
1203    @property
1204    def gain(self):  # noqa: D102, takes the docstring from the superclass
1205        return self.sensor.get("gain", None)
1206
1207    @property
1208    def adc_range(self):  # noqa: D102, takes the docstring from the superclass
1209        return self.sensor.get("adc_range", None)
1210
1211    class RecordedFile(AudioFileRecording.RecordedFile):  # noqa: D106, takes the docstring from the superclass
1212        def __init__(self, filepath, start_time, channels):
1213            super().__init__(filepath=filepath, start_time=start_time)
1214            self.channels = list(channels)
1215
1216        def read_data(self, start_idx=None, stop_idx=None):  # noqa: D102, takes the docstring from the superclass
1217            all_channels = soundfile.read(
1218                self.filepath.as_posix(),
1219                start=start_idx,
1220                stop=stop_idx,
1221                dtype="float32",
1222                always_2d=True,
1223            )[0]
1224            return all_channels[:, self.channels]
1225
1226        @property
1227        def num_channels(self):  # noqa: D102, inherits from superclass.
1228            return len(self.channels)
1229
1230    @classmethod
1231    def _merge_channel_info(cls, sensor, channel, gain, adc_range):
1232        """Merge channel information with the sensor data.
1233
1234        This function has two main operating modes, depending on if
1235        there is existing sensor information or not.
1236
1237        1. There is sensor information: The channel, gain, and adc_range
1238        will be passed to `uwacan.positional.Sensor.with_data`, and the
1239        resulting `~uwacan.positional.Sensor` object is returned.
1240        This allows using dictionaries to supply the channel, gain, and adc_range.
1241        2. If there is no sensor information: The channels will be used as
1242        the dimension and coordinate, and must as such be an array_like.
1243        The gain and adc_range has to be compatible with this channel information.
1244
1245        Parameters
1246        ----------
1247        sensor : `uwacan.positional.Sensor` or None
1248            The sensor to which the channel information will be merged.
1249            If None, a new dataset is created, and the output is a dataset.
1250        channel : array_like, or dict
1251            Channel information to be added to the sensor dataset.
1252        gain : array_like, scalar, or dict
1253            Gain information to be added to the sensor dataset.
1254        adc_range : array_like, scalar, or dict
1255            ADC range information to be added to the sensor dataset.
1256        """
1257        if sensor is None:
1258            sensor = xr.Dataset()
1259            if channel is not None:
1260                if not isinstance(channel, xr.DataArray):
1261                    channel = xr.DataArray(channel, dims="channel", coords={"channel": channel})
1262                sensor["channel"] = channel
1263            if gain is not None:
1264                if not isinstance(gain, xr.DataArray) and np.ndim(gain) != 0:
1265                    gain = xr.DataArray(gain, dims="channel", coords={"channel": channel})
1266                sensor["gain"] = gain
1267            if adc_range is not None:
1268                if not isinstance(adc_range, xr.DataArray) and np.ndim(adc_range) != 0:
1269                    adc_range = xr.DataArray(adc_range, dims="channel", coords={"channel": channel})
1270                sensor["adc_range"] = adc_range
1271            return sensor
1272
1273        assigns = {}
1274        if "channel" not in sensor:
1275            if channel is None:
1276                channel = list(range(len(sensor.sensors)))
1277            assigns["channel"] = channel
1278        elif channel is not None:
1279            raise ValueError(
1280                "Should not give explicit channel if the channel information is already in the sensor information"
1281            )
1282
1283        if "gain" not in sensor:
1284            if gain is None:
1285                gain = 0
1286            assigns["gain"] = gain
1287        elif gain is not None:
1288            raise ValueError(
1289                "Should not give explicit gain if the gain information is already in the sensor information"
1290            )
1291
1292        if "adc_range" not in sensor:
1293            if adc_range is None:
1294                adc_range = 1
1295            assigns["adc_range"] = adc_range
1296        elif adc_range is not None:
1297            raise ValueError(
1298                "Should not give explicit adc_range if the adc_range information is already in the sensor information"
1299            )
1300        sensor = sensor.with_data(**assigns)
1301        return sensor
1302

[docs]
1303    @classmethod
1304    def read_folder(
1305        cls,
1306        folder,
1307        start_time_parser,
1308        channel=None,
1309        gain=None,
1310        adc_range=None,
1311        one_recording_per_file=False,
1312        sensor=None,
1313        file_filter=None,
1314        time_compensation=None,
1315        glob_pattern="**/*.wav",
1316    ):
1317        """Read files in a folder.
1318
1319        This method collects audio files from the specified folder into a recording object.
1320        The sensor and audio interface settings can be supplied in two ways, depending on if
1321        there is sensor information or not:
1322
1323        1. There is sensor information: Use `uwacan.sensor_array` to specify
1324           the sensor particulars. Give the ``channel``, ``gain``, and ``adc_range``
1325           as dicts with the sensor names as keys, or scalars for all the sensors.
1326        2. If there is no sensor information: Give channel labels as a list to the ``channel``,
1327           and array_like or scalar ``gain`` and ``adc_range``.
1328
1329        Parameters
1330        ----------
1331        folder : str or Path
1332            The folder containing the audio files.
1333        start_time_parser : callable or str
1334            - A function to parse the start time from file names, or
1335            - a sting specifying the datetime format, e.g., ``"YYYY-MM-DD_HH-mm-ss"``.
1336
1337        sensor : `~uwacan.positional.Sensor`
1338            Sensor information with sensitivity, positions, etc.
1339        channel : dict or array_like
1340            The channel index in the read data, from 0.
1341
1342            1. A mapping from sensor names to channel index, if sensor information is given.
1343            2. A list of channel labels, if no sensor information is given.
1344
1345        gain : dict, array_like, or scalar
1346            The gain used for the interface, in dB.
1347
1348            1. A mapping from sensor names to interface gain, if sensor information is given.
1349            2. A list of gains, if no sensor information is given.
1350            3. A single gain for all interface channels/sensors.
1351
1352        adc_range : dict, array_like, or scalar
1353            The peak voltage input of the ADC.
1354
1355            1. A mapping from sensor names to interface ADC range, if sensor information is given.
1356            2. A list of ADC ranges, if no sensor information is given.
1357            3. A single ADC range for all interface channels/sensors
1358
1359        one_recording_per_file : bool, optional
1360            If True, the output will be a list of recordings, one for each file.
1361        file_filter : callable, optional
1362            A function to filter files based on specific criteria. Will be called with the file path.
1363            The file is skipped if the filter returns ``False``.
1364        time_compensation : `TimeCompensation`, int, or callable, optional
1365            - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1366            - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1367            - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1368            - If None, no time compensation is applied.
1369        glob_pattern : str, optional
1370            The glob pattern to match files in the folder. Defaults to ``"**/*.wav"``.
1371
1372        """
1373        sensor = cls._merge_channel_info(sensor=sensor, channel=channel, gain=gain, adc_range=adc_range)
1374        recordings = super().read_folder(
1375            folder=folder,
1376            start_time_parser=start_time_parser,
1377            sensor=sensor,
1378            file_filter=file_filter,
1379            time_compensation=time_compensation,
1380            glob_pattern=glob_pattern,
1381            file_kwargs={"channels": sensor["channel"].values},
1382        )
1383        if not one_recording_per_file:
1384            return recordings
1385        return [recordings.subwindow(start=file.start_time, stop=file.stop_time) for file in recordings.files]


1386
1387

[docs]
1388class LoggerheadDSG(AudioFileRecording):
1389    """Class to read data from Loggerhead DSG recorders.
1390
1391    The main way to read Loggerhead data is through the
1392    `read_folder` classmethod.
1393    """
1394
1395    allowable_interrupt = 1
1396    adc_range = None
1397    file_range = 1
1398

[docs]
1399    @classmethod
1400    def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None):
1401        """Read all files in a folder.
1402
1403        Parameters
1404        ----------
1405        folder : str or Path
1406            The path to the folder containing the files.
1407        sensor : str or None, optional
1408            The sensor associated with the files.
1409        time_compensation : `TimeCompensation`, int, or callable, optional
1410            - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1411            - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1412            - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1413            - If None, no time compensation is applied.
1414        file_filter : callable or None, optional
1415            A callable that accepts a file path and returns True if the file should be processed,
1416            and False otherwise. If None, all files are processed.
1417
1418        Returns
1419        -------
1420        cls
1421            An instance of the class containing the loaded files.
1422
1423        Raises
1424        ------
1425        RuntimeError
1426            If the folder does not exist, is not a directory, or no matching files are found.
1427
1428        """
1429
1430        def start_time_parser(filepath):
1431            return _core.time_to_datetime(filepath.stem[:15], "%Y%m%dT%H%M%S")
1432
1433        return super().read_folder(
1434            folder=folder,
1435            start_time_parser=start_time_parser,
1436            sensor=sensor,
1437            file_filter=file_filter,
1438            time_compensation=time_compensation,
1439        )

1440
1441    @property
1442    def gain(self):  # noqa: D102, takes the docstring from the superclass
1443        return self.files[0].gain
1444
1445    class RecordedFile(AudioFileRecording.RecordedFile):  # noqa: D106, takes the docstring from the superclass
1446        def _lazy_load(self):  # noqa: D102, takes the docstring from the superclass
1447            gain = self.filepath.stem.split("_")[2]
1448            if not gain.endswith("dB"):
1449                raise ValueError(
1450                    f"File `{self.filepath}` does not seem to be a file from a Loggerhead DSG, could not extract gain"
1451                )
1452            return super()._lazy_load() | dict(gain=float(gain.rstrip("dB")))
1453
1454        gain = _LazyPropertyMixin._lazy_property("gain")