1"""Reading recordings from files on disk.
2
3This module contains classes used to read data created by
4field data recorders, typically recording hydrophone data
5as audio files.
6
7.. currentmodule:: uwacan.recordings
8
9Main recording classes
10----------------------
11.. autosummary::
12 :toctree: generated
13
14 SoundTrap
15 SylenceLP
16 LoggerheadDSG
17 MultichannelAudioInterfaceRecording
18
19Utilities
20---------
21.. autosummary::
22 :toctree: generated
23
24 RecordingArray
25 TimeCompensation
26 calibrate_raw_data
27 dBx_to_peak_volts
28
29Implementation interfaces
30-------------------------
31.. autosummary::
32 :toctree: generated
33
34 Recording
35 FileRecording
36 AudioFileRecording
37 AudioFileRoller
38
39"""
40
41import bisect
42import collections
43import numpy as np
44from . import _core, positional
45import abc
46import soundfile
47import xarray as xr
48from pathlib import Path
49
50
[docs]
51def dBx_to_peak_volts(db):
52 """Convert dBu or dBV to peak volts.
53
54 Parameters
55 ----------
56 db : str
57 Decibel value as a string with units, e.g., ``"10dBu"``, ``"-20dBV"``.
58
59 Returns
60 -------
61 volts : float
62 Peak voltage corresponding to the input decibel value.
63
64 Raises
65 ------
66 ValueError
67 If the input string does not contain a valid dB unit (``"dBu"`` or ``"dBV"``).
68 """
69 if not np.ndim(db) == 0:
70 return np.vectorize(dBx_to_peak_volts)(db)
71 db = db.lower()
72 if "dbu" in db:
73 dbu = float(db.replace("dbu", "").strip())
74 # dBu is an RMS level -> multiply with 2**0.5
75 # dBu reference is 1mW over 600Ω, i.e. sqrt(0.6) volts
76 volts = 10 ** (dbu / 20) * 2**0.5 * 0.6**0.5
77 elif "dbv" in db:
78 dbv = float(db.replace("dbv", "").strip())
79 # dBV is an RMS level -> multiply with 2**0.5
80 # dBV reference is 1V
81 volts = 10 ** (dbv / 20) * 2**0.5
82 else:
83 raise ValueError(f"Unknown dB volts reference unit in {db}")
84 return volts
85
86
[docs]
87def calibrate_raw_data(
88 raw_data,
89 sensitivity=None,
90 gain=None,
91 adc_range=None,
92 file_range=None,
93):
94 """Calibrates raw data read from files into physical units.
95
96 There are three conversion steps handled in this calibration function:
97
98 1) The transducer conversion from physical quantity ``q`` into voltage ``u``
99 2) Amplification of the transducer voltage ``u`` to ADC voltage ``v``
100 3) Conversion from ADC voltage ``v`` to digital values ``d`` in the file.
101
102 The sensitivity and gain inputs to this function are in decibels, converted to linear
103 values as ``s = 10 ** (sensitivity / 20)`` and ``g = 10 ** (gain / 20)``.
104 The ``adc_range`` is specified as the peak voltage that the ADC can handle,
105 which should be recorded as ``file_range`` in the raw data.
106
107 The equations that govern this are
108
109 1) ``u = q * s``, sensitivity ``s`` in V/Q, e.g. V/Pa.
110 2) ``v = u * g``, gain ``g`` is unitless.
111 3) ``d / d_ref = v / v_ref``, relating file values to ADC voltage input.
112
113 for a final expression of ``q = d * (v_ref / d_ref / s / g)``.
114 All conversion factors default to 1 if not given.
115
116 Parameters
117 ----------
118 raw_data : array_like
119 The raw input data read from a file.
120 sensitivity : array_like
121 Sensitivity of the sensor, in dB re. V/Q,
122 where Q is the desired physical unit.
123 gain : array_like
124 The gain applied to the voltage from the sensor, in dB.
125 adc_range : array_like
126 The peak voltage that the ADC can handle.
127 file_range : array_like
128 The peak value that the raw data contains,
129 corresponding to the ``adc_range``.
130
131 Returns
132 -------
133 q : array_like
134 The calibrated values, as per the equations above.
135
136 """
137 calibration = 1.0
138 # Avoiding in-place operations since they cannot handle broadcasting
139 if adc_range is not None:
140 calibration = calibration * adc_range
141 if file_range is not None:
142 calibration = calibration / file_range
143 if gain is not None:
144 calibration = calibration / 10 ** (gain / 20)
145 if sensitivity is not None:
146 calibration = calibration / 10 ** (sensitivity / 20)
147
148 return raw_data * calibration
149
150
151class _LazyPropertyMixin:
152 def __init__(self, *args, **kwargs):
153 super().__init__(*args, **kwargs)
154 self.__property_cache = {}
155
156 @staticmethod
157 def _lazy_property(key):
158 def getter(self):
159 try:
160 return self.__property_cache[key]
161 except KeyError:
162 self.__property_cache.update(self._lazy_load())
163 return self.__property_cache[key]
164
165 return property(getter)
166
167 @abc.abstractmethod
168 def _lazy_load(self):
169 return {}
170
171
[docs]
172class TimeCompensation:
173 """Compensates time drift and offset in a recording.
174
175 This is based on the actual and recorded time of one or more events.
176 These have to be detected elsewhere, and the times for them are
177 given here to build the model.
178 If a single pair of times is given, the offset between them is used to compensate.
179 If multiple pairs are given, the offset will be linearly interpolated between them.
180
181 Parameters
182 ----------
183 actual_time : time_like or [time_like]
184 Actual time for synchronization event(s).
185 recorded_time : time_like or [time_like]
186 Recorded time for synchronization event(s).
187 """
188
189 def __init__(self, actual_time, recorded_time):
190 if isinstance(actual_time, str):
191 actual_time = [actual_time]
192 if isinstance(recorded_time, str):
193 recorded_time = [recorded_time]
194 try:
195 iter(actual_time)
196 except TypeError:
197 actual_time = [actual_time]
198 try:
199 iter(recorded_time)
200 except TypeError:
201 recorded_time = [recorded_time]
202
203 actual_time = list(map(_core.time_to_datetime, actual_time))
204 recorded_time = list(map(_core.time_to_datetime, recorded_time))
205
206 self._time_offset = [(recorded - actual).in_seconds() for (recorded, actual) in zip(recorded_time, actual_time)]
207 if len(self._time_offset) > 1:
208 self._actual_timestamps = [t.timestamp() for t in actual_time]
209 self._recorded_timestamps = [t.timestamp() for t in recorded_time]
210
[docs]
211 def recorded_to_actual(self, recorded_time):
212 """Convert a recorded time to the actual time."""
213 recorded_time = _core.time_to_datetime(recorded_time)
214 if len(self._time_offset) == 1:
215 time_offset = self._time_offset[0]
216 else:
217 time_offset = np.interp(recorded_time.timestamp(), self._recorded_timestamps, self._time_offset)
218 return recorded_time.subtract(seconds=time_offset)
219
[docs]
220 def actual_to_recorded(self, actual_time):
221 """Convert an actual time to the time recorded."""
222 actual_time = _core.time_to_datetime(actual_time)
223 if len(self._time_offset) == 1:
224 time_offset = self._time_offset[0]
225 else:
226 time_offset = np.interp(actual_time.timestamp(), self._actual_timestamps, self._time_offset)
227 return actual_time.add(seconds=time_offset)
228
229
[docs]
230class Recording:
231 """Base class for recordings.
232
233 This class defines the interface for what a
234 recording needs to implement for the rest
235 of the package to use it.
236 """
237
238 def __init__(self, sensor=None):
239 self.sensor = sensor
240
241 @property
242 @abc.abstractmethod
243 def samplerate(self):
244 """The samplerate of the recording, in Hz."""
245
246 @property
247 @abc.abstractmethod
248 def num_channels(self):
249 """The number of channel in the recording, and the read data."""
250
251 @property
252 @abc.abstractmethod
253 def time_window(self):
254 """A `~uwacan.TimeWindow` that covers the recording."""
255
[docs]
256 @abc.abstractmethod
257 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None):
258 """Select a subset of the recording.
259
260 See `~uwacan.TimeWindow.subwindow` for details on the parameters.
261 """
262
[docs]
263 @abc.abstractmethod
264 def time_data(self):
265 """Read stored time data.
266
267 This method reads the recorded data from
268 disk, and returns it as a `~uwacan.TimeData` object.
269 """
270
271
[docs]
272class RecordingArray(Recording):
273 """Holds multiple separate recordings.
274
275 This class handles multiple different recording
276 instances at once. This is typically needed
277 when more than one hardware recorder was used
278 for a field trial, and the data from them should
279 be analyzed together.
280
281 Parameters
282 ----------
283 *recordings : `Recording`
284 The recording objects.
285 """
286
287 def __init__(self, *recordings):
288 self.recordings = {recording.sensor.label: recording for recording in recordings}
289
290 @property
291 def samplerate(self):
292 """The samplerate(s) of the recordings."""
293 rates = [recording.samplerate for recording in self.recordings.values()]
294 if np.ptp(rates) == 0:
295 return rates[0]
296 return xr.DataArray(rates, dims="sensor", coords={"sensor": list(self.recordings.keys())})
297
298 @property
299 def num_channels(self):
300 """The total number of channels."""
301 return sum(recording.num_channels for recording in self.recordings.values())
302
303 @property
304 def sensor(self):
305 """The sensors used, as a `~uwacan.sensor_array`."""
306 return positional.SensorArray.concatenate([rec.sensor for rec in self.recordings.values()])
307
308 @property
309 def time_window(self): # noqa: D102, takes the docstring from the superclass
310 windows = [recording.time_window for recording in self.recordings.values()]
311 return _core.TimeWindow(
312 start=max(w.start for w in windows),
313 stop=min(w.stop for w in windows),
314 )
315
[docs]
316 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): # noqa: D102, takes the docstring from the superclass
317 subwindow = self.time_window.subwindow(
318 time, start=start, stop=stop, center=center, duration=duration, extend=extend
319 )
320 return type(self)(*[recording.subwindow(subwindow) for recording in self.recordings.values()])
321
[docs]
322 def time_data(self): # noqa: D102, takes the docstring from the superclass
323 if np.ndim(self.samplerate) > 0:
324 raise NotImplementedError("Stacking time data from recording with different samplerates not implemented!")
325 return _core.TimeData(
326 xr.concat([recording.time_data().data for recording in self.recordings.values()], dim="sensor")
327 )
328
329
[docs]
330class FileRecording(Recording):
331 """Base class for recordings using multiple files.
332
333 This class has some interface definitions and some
334 shared logic for implementing recordings that use
335 multiple files to store the data.
336
337 Subclasses need to implement a `RecordedFile` inner class,
338 some way to read the files (typically a classmethod), and
339 the `time_data` function (typically using `raw_data`).
340
341 .. autoclass:: uwacan.recordings::FileRecording.RecordedFile
342
343 """
344
345 allowable_interrupt = 0
346 """How long gap is allowed between files when reading."""
347
[docs]
348 class RecordedFile(abc.ABC):
349 """Interface class for single recording files.
350
351 This interface class defines how subclasses
352 should implement wrappers around individual files.
353 """
354
355 def __init__(self, filepath):
356 super().__init__()
357 self.filepath = Path(filepath)
358
359 @property
360 def filepath(self):
361 """The `Path` to the file."""
362 return self._filepath
363
364 @filepath.setter
365 def filepath(self, filepath):
366 if not isinstance(filepath, Path):
367 filepath = Path(filepath)
368 self._filepath = filepath
369
370 @abc.abstractmethod
371 def read_data(self, start_idx, stop_idx):
372 """Read raw data from the file.
373
374 Parameters
375 ----------
376 start_idx : int
377 The starting index to read from, inclusive.
378 stop_idx : int
379 The last index to read to, exclusive.
380
381 Returns
382 -------
383 data : array_like
384 The data read from disk.
385 """
386
387 @property
388 @abc.abstractmethod
389 def start_time(self):
390 """The start time of this file."""
391
392 @property
393 @abc.abstractmethod
394 def stop_time(self):
395 """The stop time of this file."""
396
397 @property
398 @abc.abstractmethod
399 def duration(self):
400 """The duration of this file."""
401
402 @property
403 @abc.abstractmethod
404 def num_samples(self):
405 """The number of samples in this file, per channel."""
406
407 @property
408 @abc.abstractmethod
409 def num_channels(self):
410 """The number of channels in this file."""
411
412 @property
413 @abc.abstractmethod
414 def samplerate(self):
415 """The samplerate in this file."""
416
417 def __bool__(self):
418 return self.filepath.exists()
419
420 def __contains__(self, time):
421 return (self.start_time <= time) and (time <= self.stop_time)
422
423 def __init__(self, files, assume_sorted=False, **kwargs):
424 super().__init__(**kwargs)
425 if not assume_sorted:
426 files = sorted(files, key=lambda f: f.start_time)
427 self.files = files
428 self._file_time_cache = collections.OrderedDict()
429
430 @property
431 def samplerate(self): # noqa: D102, takes the docstring from the superclass
432 return self.files[0].samplerate
433
434 @property
435 def num_channels(self): # noqa: D102, takes the docstring from the superclass
436 return self.files[0].num_channels
437
438 @property
439 def time_window(self): # noqa: D102, takes the docstring from the superclass
440 try:
441 return self._window
442 except AttributeError:
443 self._window = _core.TimeWindow(
444 start=self.files[0].start_time,
445 stop=self.files[-1].stop_time,
446 )
447 return self._window
448
[docs]
449 def subwindow(self, time=None, /, *, start=None, stop=None, center=None, duration=None, extend=None): # noqa: D102, takes the docstring from the superclass
450 new_window = self.time_window.subwindow(
451 time, start=start, stop=stop, center=center, duration=duration, extend=extend
452 )
453 new = type(self)(
454 files=self.files,
455 sensor=self.sensor,
456 )
457 new._window = new_window
458 return new
459
460 def _find_file_time(self, time):
461 """Find a file containing a certain time."""
462 time = _core.time_to_datetime(time)
463 if time in self._file_time_cache:
464 self._file_time_cache.move_to_end(time)
465 return self._file_time_cache[time]
466
467 # bisect_right(items, target) returns an idx such that items[idx - 1] <= target < items[idx]
468 # Subtracting one from the output means we get the last file that starts before (or equal) to the target time
469 idx = bisect.bisect_right(self.files, time, key=lambda file: file.start_time) - 1
470 if time in self.files[idx]:
471 self._file_time_cache[time] = self.files[idx]
472 if len(self._file_time_cache) > 128:
473 self._file_time_cache.popitem(last=False)
474 return self.files[idx]
475 else:
476 raise ValueError(f"Time {time} does not exist inside any recorded files")
477
[docs]
478 def check_file_continuity(self, start_time=None, stop_time=None, allowable_interrupt=None, mode="raise"):
479 """Check the continuity of recorded data.
480
481 Parameters
482 ----------
483 start_time : datetime, optional
484 The start time of the period to check for continuity. If not provided,
485 the start of `self.time_window` will be used.
486 stop_time : datetime, optional
487 The stop time of the period to check for continuity. If not provided,
488 the end of `self.time_window` will be used.
489 allowable_interrupt : float, optional
490 How much of a gap to allow between files. Will by default use the
491 class attribute.
492 mode : {"raise", "return", "print"}, optional
493 The action to take when an interruption is found.
494 - "raise" (default): raises a `ValueError` with details about the interruption.
495 - "return": returns `False` if an interruption is found, `True` otherwise.
496 - "print": prints a warning message with details about the interruption and continues execution.
497
498 Returns
499 -------
500 bool
501 Returns `True` if the data is continuous within the specified time range.
502 If mode is set to "return", it returns `False` if an interruption is found.
503 No return value if mode is set to "raise" or "print".
504
505 Raises
506 ------
507 ValueError
508 If `mode` is set to "raise" and an interruption larger than `self.allowable_interrupt`
509 is detected between the files, a `ValueError` is raised with details of the missing time.
510
511 Notes
512 -----
513 The method checks the continuity of data by comparing the `stop_time` of each file
514 with the `start_time` of the next file within the specified range. If the gap between
515 two files exceeds `self.allowable_interrupt` (in seconds), it is considered an interruption.
516 """
517 if start_time is None:
518 start_time = self.time_window.start
519 if stop_time is None:
520 stop_time = self.time_window.stop
521 if allowable_interrupt is None:
522 allowable_interrupt = self.allowable_interrupt
523 first_file = self._find_file_time(start_time)
524 first_idx = self.files.index(first_file)
525 last_file = self._find_file_time(stop_time)
526 last_idx = self.files.index(last_file)
527
528 for early, late in zip(self.files[first_idx : last_idx - 1], self.files[first_idx + 1 : last_idx]):
529 interrupt = (late.start_time - early.stop_time).in_seconds()
530 if interrupt > allowable_interrupt:
531 message = (
532 f"Data is not continuous, missing {interrupt} seconds between files\n "
533 f"{early.filepath} ending at {early.stop_time}\n"
534 f"{late.filepath} starting at {late.start_time}"
535 )
536 if mode == "raise":
537 raise ValueError(message)
538 elif mode == "return":
539 return False
540 else:
541 print(message)
542 return True
543
[docs]
544 def raw_data(self, start_time=None, stop_time=None):
545 """Read raw data from files on disk.
546
547 Retrieves raw data samples from a start time to a stop time,
548 defaulting to reading between times in ``self.time_window``.
549 This method reads from multiple files if needed, and checks
550 file timestamps for approximate data continuity.
551
552 Parameters
553 ----------
554 start_time : date-like, optional
555 The start of the time window to read.
556 stop_time : date-like, optional
557 The end of the time window to read.
558
559 Returns
560 -------
561 numpy.ndarray
562 The raw data read from the files, concatenated into a single NumPy array.
563 """
564 # This is just a wrapper to get a single frame from the frame generator.
565 # Without a framesize, it defaults to a single large frame with all the data.
566 frame = next(self.raw_frames(start_time=start_time, stop_time=stop_time))
567 return frame
568
[docs]
569 def raw_frames(self, start_time=None, stop_time=None, framesize=None):
570 """Generate frames of raw data from files on disk.
571
572 This retrieves raw data samples between the start time and stop time
573 (defaulting to times in ``self.time_window``), and yields frames of
574 a fixed size. The frames have no overlap - use ``self.rolling`` for
575 overlapping frames. If no framesize is given, it defaults to yielding
576 a single large frame with all the data.
577 If needed, data will be loaded from several files on disk. In those
578 cases, the file timestamps will be checked for approximate data
579 continuity before any loading starts.
580
581 Parameters
582 ----------
583 start_time : date-like, optional
584 The start of the time window to read.
585 stop_time : date-like, optional
586 The end of the time window to read.
587 framesize : int, optional
588 The number of samples to yield in each frame.
589
590 Yields
591 ------
592 numpy.ndarray
593 The frames with raw data.
594
595 Notes
596 -----
597 This method is intended as the base data loader, mainly for internal
598 use in the package. It's used both to load all data within a time
599 window, but also as an IO optimization in `self.rolling`` to load
600 larger chunks of data than the desired rolling window.
601 """
602 start_time = start_time or self.time_window.start
603 stop_time = stop_time or self.time_window.stop
604 self.check_file_continuity(start_time=start_time, stop_time=stop_time)
605
606 samplerate = self.samplerate
607 remaining_samples = int(np.floor((stop_time - start_time).in_seconds() * samplerate))
608 if remaining_samples == 0:
609 # No samples requested, but we want to yield something of the right shape and type
610 yield self.files[0].read_data(start_idx=0, stop_idx=0)
611 return
612
613 if framesize:
614 # With a given framesize we increase the number of samples to yield full frames
615 remaining_samples = int(framesize * np.ceil(remaining_samples / framesize))
616 else:
617 # One single frame with all samples. Used to get all data at once.
618 framesize = remaining_samples
619
620 # Where we read - sample_idx in file_idx. This moves along as we read more data.
621 file_idx = self.files.index(self._find_file_time(start_time))
622 sample_idx = int(np.floor((start_time - self.files[file_idx].start_time).in_seconds() * samplerate))
623
624 while remaining_samples > 0: # Loop over frames
625 chunks = []
626 remaining_in_frame = framesize
627 while remaining_in_frame > 0: # Loop over chunks from different files
628 chunk = self.files[file_idx].read_data(start_idx=sample_idx, stop_idx=sample_idx + remaining_in_frame)
629 chunks.append(chunk)
630 remaining_in_frame -= chunk.shape[0]
631
632 if remaining_in_frame:
633 # This file couldn't fill this frame - go to the beginning of the next file.
634 sample_idx = 0
635 file_idx += 1
636 else:
637 # This frame is full, but the file has more data.
638 sample_idx += chunk.shape[0]
639
640 # Assemble the frame from the chunks.
641 if len(chunks) == 1:
642 # Optimization - a single chunk doesn't need concatenation.
643 frame = chunks[0]
644 else:
645 frame = np.concatenate(chunks, axis=0)
646 remaining_samples -= frame.shape[0]
647 yield frame
648
[docs]
649 def select_file_time(self, time):
650 """Get a recording for a specific file, by time.
651
652 This finds the file corresponding to a specific time,
653 then returns a recording subwindow corresponding
654 to that file.
655 """
656 time = _core.time_to_datetime(time)
657 for file in reversed(self.files):
658 if file.start_time > time:
659 continue
660 if file.stop_time < time:
661 raise ValueError(f"Time {time} does not exist inside any recorded files.")
662 return self.subwindow(start=file.start_time, stop=file.stop_time)
663
[docs]
664 def select_file_name(self, name):
665 """Get a recording for a specific file, by name.
666
667 This finds the file with a specific name,
668 then returns a recording subwindow corresponding
669 to that file.
670 """
671 stem = Path(name).stem
672 for file in self.files:
673 if stem == file.filepath.stem:
674 return self.subwindow(start=file.start_time, stop=file.stop_time)
675 raise ValueError(f"Could not file file matching name '{name}'")
676
677
[docs]
678class AudioFileRecording(FileRecording):
679 """Class for audio file recordings.
680
681 This class handles reading audio files using the
682 `soundfile` python package.
683 This is a fully functional class, but reading data
684 requires a ``start_time_parser`` function passed to the
685 `read_folder` classmethod. A more convenient approach
686 is to subclass this class and customize the `read_folder`
687 classmethod.
688 """
689
690 file_range = None
691 """The input range of the read files."""
692 gain = None
693 """The gain of this recording."""
694 adc_range = None
695 """The voltage peak range of the adc in this recording."""
696
[docs]
697 @classmethod
698 def read_folder(
699 cls,
700 folder,
701 start_time_parser,
702 sensor=None,
703 file_filter=None,
704 time_compensation=None,
705 glob_pattern="**/*.wav",
706 file_kwargs=None,
707 ):
708 """Read all matching files in a folder and parse their start times.
709
710 Parameters
711 ----------
712 folder : str or Path
713 The path to the folder containing the files.
714 start_time_parser : str or callable
715 If a string is provided, it is treated as a format string and will be used
716 to parse the start time from the filename. If a callable is provided, it
717 should accept a file path and return a `whenever.Instant` object representing the start time.
718 sensor : str or None, optional
719 The sensor associated with the files.
720 file_filter : callable or None, optional
721 A callable that accepts a file path and returns True if the file should be processed,
722 and False otherwise. If None, all files matching the ``glob_pattern`` are processed.
723 time_compensation : `TimeCompensation`, int, or callable, optional
724 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
725 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
726 - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
727 - If None, no time compensation is applied.
728 glob_pattern : str, optional
729 A glob pattern used to match files in the folder, by default ``"**/*.wav"``.
730 file_kwargs : dict or callable, optional
731 Additional keyword arguments to be passed when creating the `RecordedFile` instances.
732 If a callable is provided, it should accept a file path and return a dictionary of keyword arguments.
733 If None, no additional keyword arguments are passed to the files.
734
735 Returns
736 -------
737 cls
738 An instance of the class containing the loaded files.
739
740 Raises
741 ------
742 RuntimeError
743 If the folder does not exist, is not a directory, or no matching files are found.
744 """
745 folder = Path(folder)
746 if not folder.exists():
747 raise RuntimeError(f"'{folder}' does not exist")
748 if not folder.is_dir():
749 raise RuntimeError(f"'{folder}' is not a folder")
750
751 if isinstance(start_time_parser, str):
752 start_time_format = start_time_parser
753
754 def start_time_parser(file):
755 return _core.time_to_datetime(file.stem, fmt=start_time_format)
756
757 if time_compensation is None:
758
759 def time_compensation(timestamp):
760 return timestamp
761
762 if isinstance(time_compensation, TimeCompensation):
763 time_compensation = time_compensation.recorded_to_actual
764 if not callable(time_compensation):
765 offset = time_compensation
766
767 def time_compensation(timestamp):
768 return timestamp.subtract(seconds=offset)
769
770 if file_filter is None:
771
772 def file_filter(filepath):
773 return True
774
775 if file_kwargs is None:
776
777 def file_kwargs(filepath):
778 return {}
779
780 if not callable(file_kwargs):
781 _file_kwargs = file_kwargs
782
783 def file_kwargs(filepath):
784 return _file_kwargs
785
786 files = []
787 for file in Path(folder).glob(glob_pattern):
788 if file_filter(file):
789 start_time = start_time_parser(file)
790 files.append(cls.RecordedFile(file, time_compensation(start_time), **file_kwargs(file)))
791
792 if not files:
793 raise RuntimeError(f"No matching files found in '{folder}'")
794
795 return cls(
796 files=files,
797 sensor=sensor,
798 )
799
800 class RecordedFile(FileRecording.RecordedFile, _LazyPropertyMixin):
801 """Wrapper for audio files."""
802
803 def __init__(self, filepath, start_time):
804 super().__init__(filepath=filepath)
805 self._start_time = start_time
806
807 def _lazy_load(self):
808 sfi = soundfile.info(self.filepath.as_posix())
809 return super()._lazy_load() | dict(
810 num_samples=sfi.frames,
811 num_channels=sfi.channels,
812 samplerate=sfi.samplerate,
813 )
814
815 @property
816 def start_time(self): # noqa: D102, takes the docstring from the superclass
817 return self._start_time
818
819 num_samples = _LazyPropertyMixin._lazy_property("num_samples")
820 num_channels = _LazyPropertyMixin._lazy_property("num_channels")
821 samplerate = _LazyPropertyMixin._lazy_property("samplerate")
822
823 @property
824 def stop_time(self): # noqa: D102, takes the docstring from the superclass
825 return self.start_time.add(seconds=self.duration)
826
827 @property
828 def duration(self): # noqa: D102, takes the docstring from the superclass
829 return self.num_samples / self.samplerate
830
831 def read_data(self, start_idx=None, stop_idx=None): # noqa: D102, takes the docstring from the superclass
832 return soundfile.read(self.filepath.as_posix(), start=start_idx, stop=stop_idx, dtype="float32")[0]
833
[docs]
834 def time_data(self): # noqa: D102, takes the docstring from the superclass
835 data = self.raw_data()
836 if np.ndim(data) == 1:
837 dims = "time"
838 coords = None
839 elif np.ndim(data) == 2:
840 if self.sensor is not None and "sensor" in self.sensor and np.shape(data)[1] == self.sensor["sensor"].size:
841 dims = ("time", "sensor")
842 coords = {"sensor": self.sensor["sensor"]}
843 else:
844 dims = ("time", "channel")
845 if self.sensor is not None and "channel" in self.sensor:
846 coords = {"channel": self.sensor["channel"]}
847 else:
848 coords = None
849 else:
850 raise NotImplementedError("Audio files with more than 2 dimensions are not supported")
851 data = _core.TimeData(
852 data=data,
853 samplerate=self.samplerate,
854 start_time=self.time_window.start,
855 dims=dims,
856 coords=coords,
857 )
858 data = calibrate_raw_data(
859 raw_data=data,
860 sensitivity=self.sensor.get("sensitivity", None),
861 gain=self.gain,
862 adc_range=self.adc_range,
863 file_range=self.file_range,
864 )
865 return data
866
[docs]
867 def rolling(self, duration=None, step=None, overlap=None):
868 """Generate rolling frames of data.
869
870 Parameters
871 ----------
872 duration : float
873 The size of each frame, in seconds.
874 step : float
875 The step between consecutive frames, in seconds.
876 overlap : float, default=0
877 The fraction of overlap between consecutive frames. Should be less than one.
878 Negative values will make "gaps" in the output.
879
880 Returns
881 -------
882 roller : `AudioFileRoller`
883 Implementation of rolling time windows for recordings.
884 """
885 return AudioFileRoller(self, duration=duration, step=step, overlap=overlap)
886
887
[docs]
888class AudioFileRoller(_core.TimeDataRoller):
889 """Rolling windows of time data.
890
891 Parameters
892 ----------
893 obj : AudioFileRecording
894 The audio file wrapper to roll over.
895 duration : float
896 The duration of each frame, in seconds.
897 step : float
898 The step between consecutive frames, in seconds.
899 overlap : float
900 The overlap between consecutive frames, as a fraction of the duration.
901 """
902
903 def __init__(self, obj, duration=None, step=None, overlap=0):
904 super().__init__(obj, duration=duration, step=step, overlap=overlap)
905 self._dummy_data = self.obj.subwindow(start=True, duration=0).time_data().data
906 calibration = calibrate_raw_data(
907 1,
908 gain=self.obj.gain,
909 sensitivity=self.obj.sensor.get("sensitivity"),
910 adc_range=self.obj.adc_range,
911 file_range=self.obj.file_range,
912 )
913 self._calibration = xr.align(self._dummy_data, calibration)[1].data
914
915 @property
916 def shape(self): # noqa: D102, inherited from parent
917 shape = [self._dummy_data.sizes[dim] for dim in self.dims if dim != "time"]
918 shape = [self.settings["samples_per_frame"]] + shape
919 return tuple(shape)
920
921 @property
922 def dims(self): # noqa: D102, inherited from parent
923 dims = list(self._dummy_data.dims)
924 dims.remove("time")
925 return tuple(["time"] + dims)
926
927 @property
928 def coords(self): # noqa: D102, inherited from parent
929 coords = dict(self._dummy_data.coords)
930 return coords
931
[docs]
932 def numpy_frames(self, io_blocksize=1_000_000): # noqa: D102, inherited from parent
933 # This method essentially re-chunks frames read from disk to have overlap and a possibly smaller size.
934 # This allows reading frames from disk with a framesize optimized for reading, independently
935 # from any desired signal processing frame size.
936 samples_per_frame = self.settings["samples_per_frame"]
937 sample_step = self.settings["sample_step"]
938 io_blocksize = max(io_blocksize, samples_per_frame) # We need to fit at least a full frame in one raw_frame.
939
940 out = np.zeros(self.shape)
941 buffer = np.zeros(0)
942 frame_idx = 0
943
944 # We loop over large frames from the data on disk, reducing IO overhead.
945 for raw_idx, raw_frame in enumerate(self.obj.raw_frames(framesize=io_blocksize)):
946 # While there's enough data in this raw_frame (and the buffer) to fill one output frame (and we should still yield more frames).
947 while raw_frame.shape[0] + buffer.shape[0] >= samples_per_frame and frame_idx < self.num_frames:
948 if buffer.shape[0]:
949 # We have data in the buffer, it goes first into the output frame.
950 # The buffer is never larger than one output frame.
951 out[:buffer.shape[0]] = buffer
952 # The buffer won't fill the entire frame - take the rest of the samples from the raw_frame
953 out[buffer.shape[0]:] = raw_frame[:samples_per_frame - buffer.shape[0]]
954 # If we're out of buffer after taking a step, we start consuming the raw_frame.
955 raw_frame = raw_frame[max(0, sample_step - buffer.shape[0]):]
956 # Consume step samples from the buffer.
957 buffer = buffer[sample_step:]
958 else:
959 # No buffer - just take a frame from the raw_frame
960 # Since `out` gets modified in place when copying the buffer, `out` cannot point to `raw_frame`!
961 # Hence the need to write the values into `out[:]`, not take a view and save it to `out`.
962 out[:] = raw_frame[:samples_per_frame]
963 # Consume step samples from the raw_frame.
964 raw_frame = raw_frame[sample_step:]
965
966 # Calibrate, yield, and increment the frame index
967 yield out * self._calibration
968 frame_idx += 1
969 # Not enough data in raw_frame (buffer is empty by now).
970 # Buffer this incomplete raw frame and get a new one.
971 buffer = raw_frame
972
[docs]
973 def time_data(self): # noqa: D102, inherited from parent
974 offsets = np.arange(self.settings["samples_per_frame"]) * 1e9 / self.obj.samplerate
975 first_time_vec = _core.time_to_np(self.obj.time_window.start) + offsets.astype("timedelta64[ns]")
976 for frame_idx, frame in enumerate(self.numpy_frames()):
977 time_since_start = frame_idx * self.settings["sample_step"] / self.obj.samplerate
978 time_since_start = np.timedelta64(int(time_since_start * 1e9), "ns")
979 yield _core.TimeData(
980 frame,
981 time=first_time_vec + time_since_start,
982 samplerate=self.obj.samplerate,
983 coords=self.coords,
984 dims=self.dims,
985 )
986
987 def __iter__(self):
988 start_time = self.obj.time_window.start
989 for frame_idx in range(self.num_frames):
990 yield self.obj.subwindow(start=start_time, duration=self.settings["duration"])
991 start_time = start_time.add(seconds=self.settings["step"])
992
993
[docs]
994class SoundTrap(AudioFileRecording):
995 """Class to read data from OceanInstruments SoundTrap recorders.
996
997 The main way to read SoundTrap data is through the
998 `read_folder` classmethod.
999 """
1000
1001 allowable_interrupt = 1
1002 gain = None
1003 adc_range = None
1004 file_range = 1
1005
[docs]
1006 @classmethod
1007 def read_folder(cls, folder, sensor=None, serial_number=None, time_compensation=None):
1008 """Read files in a folder, filtered on an optional serial number.
1009
1010 Parameters
1011 ----------
1012 folder : str or Path
1013 The path to the folder containing the files.
1014 sensor : str or None, optional
1015 The sensor associated with the files.
1016 serial_number : int or None, optional
1017 If provided, only files with the matching serial number in their filename will be processed.
1018 If None, all files in the folder will be processed.
1019 time_compensation : `TimeCompensation`, int, or callable, optional
1020 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1021 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1022 - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1023 - If None, no time compensation is applied.
1024
1025 Returns
1026 -------
1027 cls
1028 An instance of the class containing the loaded files.
1029
1030 Raises
1031 ------
1032 RuntimeError
1033 If the folder does not exist, is not a directory, or no matching files are found.
1034
1035 Notes
1036 -----
1037 This method filters the files in the folder based on the provided ``serial_number`` and
1038 parses the start time from the filenames using a specific format (``"YYMMDDHHmmss"``).
1039 It then delegates the actual file reading to the `read_folder` method of the parent class.
1040 """
1041 if serial_number is None:
1042
1043 def file_filter(filepath):
1044 return True
1045 else:
1046
1047 def file_filter(filepath):
1048 return int(filepath.stem.split(".")[0]) == serial_number
1049
1050 def start_time_parser(filepath):
1051 return _core.time_to_datetime(filepath.stem.split(".")[1], fmt="%y%m%d%H%M%S")
1052
1053 return super().read_folder(
1054 folder=folder,
1055 start_time_parser=start_time_parser,
1056 sensor=sensor,
1057 file_filter=file_filter,
1058 time_compensation=time_compensation,
1059 )
1060
1061
[docs]
1062class SylenceLP(AudioFileRecording):
1063 """Class to read data from RTsys SylenceLP recorders.
1064
1065 The main way to read Sylence data is through the
1066 `read_folder` classmethod.
1067 """
1068
1069 adc_range = 2.5
1070 file_range = 1
1071 allowable_interrupt = 1
1072
1073 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass
1074 def _lazy_load(self): # noqa: D102, takes the docstring from the superclass
1075 with self.filepath.open("rb") as file:
1076 base_header = file.read(36)
1077 # chunk_id = base_header[0:4].decode('ascii') # always equals RIFF
1078 # file_size = int.from_bytes(base_header[4:8], byteorder='little', signed=False) # total file size not important
1079 # chunk_format = base_header[8:12].decode('ascii') # always equals WAVE
1080 # subchunk_id = base_header[12:16].decode('ascii') # always equals fmt
1081 # subchunk_size = int.from_bytes(base_header[16:20], byteorder='little', signed=False)) # always equals 16
1082 # audio_format = int.from_bytes(base_header[20:22], byteorder='little', signed=False)) # not important in current implementation
1083 num_channels = int.from_bytes(base_header[22:24], byteorder="little", signed=False)
1084 if num_channels != 1:
1085 raise ValueError(
1086 f"Expected file for SylenceLP with a single channel, read file with {num_channels} channels"
1087 )
1088 samplerate = int.from_bytes(base_header[24:28], byteorder="little", signed=False)
1089 # byte rate = int.from_bytes(base_header[28:32], byteorder='little', signed=False) # not important in current implementation
1090 bytes_per_sample = int.from_bytes(base_header[32:34], byteorder="little", signed=False)
1091 bitdepth = int.from_bytes(base_header[34:36], byteorder="little", signed=False)
1092
1093 conf_header = file.peek(8) # uses peak to keep indices aligned with the manual
1094 conf_size = int.from_bytes(conf_header[4:8], byteorder="little", signed=False)
1095 if conf_size != 460:
1096 raise ValueError(f"Incorrect size of SylenceLP config: '{conf_size}'B, expected 460B")
1097 conf_header = file.read(conf_size + 8)
1098
1099 subchunk_id = conf_header[:4].decode("ascii") # always conf
1100 if subchunk_id != "conf":
1101 raise ValueError(f"Expected 'conf' section in SylenceLP config, found '{subchunk_id}'")
1102 # subchunk_size = int.from_bytes(conf_header[4:8], byteorder='little', signed=False) # the same as conf_size
1103 config_version = int.from_bytes(conf_header[8:12], byteorder="little", signed=False)
1104 if config_version != 2:
1105 raise NotImplementedError(f"Cannot handle SylenceLP config version {config_version}")
1106 # recording_start = datetime.datetime.fromtimestamp(int.from_bytes(conf_header[16:24], byteorder='little', signed=True)) # This value is not actually when the recording starts. No idea what it actually is
1107 channel = conf_header[24:28].decode("ascii")
1108 if channel.strip("\x00") != "":
1109 raise NotImplementedError(
1110 f"No implementation for multichannel SylenceLP recorders, found channel specification '{channel}'"
1111 )
1112 samplerate_alt = np.frombuffer(conf_header[28:32], dtype="f4").squeeze()
1113 if samplerate != samplerate_alt:
1114 raise ValueError(
1115 f"Mismatched samplerate for hardware and file, read file samplerate {samplerate} and config samplerate {samplerate_alt}"
1116 )
1117
1118 hydrophone_sensitivity = np.frombuffer(conf_header[32:48], dtype="f4")
1119 gain = np.frombuffer(conf_header[48:64], dtype="f4")
1120 # gain_correction = np.frombuffer(conf_header[64:80], dtype='f4') # is just 1/gain
1121 serialnumber = conf_header[80:100].decode("ascii")
1122 active_channels = conf_header[100:104].decode("ascii")
1123 if active_channels != "A\x00\x00\x00":
1124 raise NotImplementedError(
1125 f"No implementation for multichannel SylenceLP recorders, found channel specification '{active_channels}'"
1126 )
1127
1128 data_header = file.read(4).decode("ascii")
1129 if data_header != "data":
1130 raise ValueError(f"Expected file header 'data', read {data_header}")
1131 data_size = int.from_bytes(file.read(4), byteorder="little", signed=False)
1132
1133 num_samples = data_size / bytes_per_sample
1134 if int(num_samples) != num_samples:
1135 raise ValueError(f"Size of data is not divisible by bytes per sample, file '{self.name}' is corrupt!")
1136
1137 return super()._lazy_load() | dict(
1138 samplerate=samplerate,
1139 bitdepth=bitdepth,
1140 num_samples=int(num_samples),
1141 hydrophone_sensitivity=hydrophone_sensitivity[0],
1142 serial_number=serialnumber.strip("\x00"),
1143 gain=-20 * np.log10(gain[0]),
1144 )
1145
1146 bitdepth = _LazyPropertyMixin._lazy_property("bitdepth")
1147 hydrophone_sensitivity = _LazyPropertyMixin._lazy_property("hydrophone_sensitivity")
1148 serial_number = _LazyPropertyMixin._lazy_property("serial_number")
1149 gain = _LazyPropertyMixin._lazy_property("gain")
1150
1151 @property
1152 def gain(self): # noqa: D102, takes the docstring from the superclass
1153 return self.files[0].gain
1154
[docs]
1155 @classmethod
1156 def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None):
1157 """Read all files in a folder.
1158
1159 Parameters
1160 ----------
1161 folder : str or Path
1162 The path to the folder containing the files.
1163 sensor : str or None, optional
1164 The sensor associated with the files.
1165 time_compensation : `TimeCompensation`, int, or callable, optional
1166 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1167 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1168 - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1169 - If None, no time compensation is applied.
1170 file_filter : callable or None, optional
1171 A callable that accepts a file path and returns True if the file should be processed,
1172 and False otherwise. If None, all files are processed.
1173
1174 Returns
1175 -------
1176 cls
1177 An instance of the class containing the loaded files.
1178
1179 Raises
1180 ------
1181 RuntimeError
1182 If the folder does not exist, is not a directory, or no matching files are found.
1183
1184 """
1185
1186 def start_time_parser(filepath):
1187 return _core.time_to_datetime(filepath.stem[9:], fmt="%Y-%m-%d_%H-%M-%S")
1188
1189 return super().read_folder(
1190 folder=folder,
1191 start_time_parser=start_time_parser,
1192 sensor=sensor,
1193 file_filter=file_filter,
1194 time_compensation=time_compensation,
1195 )
1196
1197
[docs]
1198class MultichannelAudioInterfaceRecording(AudioFileRecording):
1199 """Class for handling multichannel audio interface recordings."""
1200
1201 file_range = 1
1202
1203 @property
1204 def gain(self): # noqa: D102, takes the docstring from the superclass
1205 return self.sensor.get("gain", None)
1206
1207 @property
1208 def adc_range(self): # noqa: D102, takes the docstring from the superclass
1209 return self.sensor.get("adc_range", None)
1210
1211 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass
1212 def __init__(self, filepath, start_time, channels):
1213 super().__init__(filepath=filepath, start_time=start_time)
1214 self.channels = list(channels)
1215
1216 def read_data(self, start_idx=None, stop_idx=None): # noqa: D102, takes the docstring from the superclass
1217 all_channels = soundfile.read(
1218 self.filepath.as_posix(),
1219 start=start_idx,
1220 stop=stop_idx,
1221 dtype="float32",
1222 always_2d=True,
1223 )[0]
1224 return all_channels[:, self.channels]
1225
1226 @property
1227 def num_channels(self): # noqa: D102, inherits from superclass.
1228 return len(self.channels)
1229
1230 @classmethod
1231 def _merge_channel_info(cls, sensor, channel, gain, adc_range):
1232 """Merge channel information with the sensor data.
1233
1234 This function has two main operating modes, depending on if
1235 there is existing sensor information or not.
1236
1237 1. There is sensor information: The channel, gain, and adc_range
1238 will be passed to `uwacan.positional.Sensor.with_data`, and the
1239 resulting `~uwacan.positional.Sensor` object is returned.
1240 This allows using dictionaries to supply the channel, gain, and adc_range.
1241 2. If there is no sensor information: The channels will be used as
1242 the dimension and coordinate, and must as such be an array_like.
1243 The gain and adc_range has to be compatible with this channel information.
1244
1245 Parameters
1246 ----------
1247 sensor : `uwacan.positional.Sensor` or None
1248 The sensor to which the channel information will be merged.
1249 If None, a new dataset is created, and the output is a dataset.
1250 channel : array_like, or dict
1251 Channel information to be added to the sensor dataset.
1252 gain : array_like, scalar, or dict
1253 Gain information to be added to the sensor dataset.
1254 adc_range : array_like, scalar, or dict
1255 ADC range information to be added to the sensor dataset.
1256 """
1257 if sensor is None:
1258 sensor = xr.Dataset()
1259 if channel is not None:
1260 if not isinstance(channel, xr.DataArray):
1261 channel = xr.DataArray(channel, dims="channel", coords={"channel": channel})
1262 sensor["channel"] = channel
1263 if gain is not None:
1264 if not isinstance(gain, xr.DataArray) and np.ndim(gain) != 0:
1265 gain = xr.DataArray(gain, dims="channel", coords={"channel": channel})
1266 sensor["gain"] = gain
1267 if adc_range is not None:
1268 if not isinstance(adc_range, xr.DataArray) and np.ndim(adc_range) != 0:
1269 adc_range = xr.DataArray(adc_range, dims="channel", coords={"channel": channel})
1270 sensor["adc_range"] = adc_range
1271 return sensor
1272
1273 assigns = {}
1274 if "channel" not in sensor:
1275 if channel is None:
1276 channel = list(range(len(sensor.sensors)))
1277 assigns["channel"] = channel
1278 elif channel is not None:
1279 raise ValueError(
1280 "Should not give explicit channel if the channel information is already in the sensor information"
1281 )
1282
1283 if "gain" not in sensor:
1284 if gain is None:
1285 gain = 0
1286 assigns["gain"] = gain
1287 elif gain is not None:
1288 raise ValueError(
1289 "Should not give explicit gain if the gain information is already in the sensor information"
1290 )
1291
1292 if "adc_range" not in sensor:
1293 if adc_range is None:
1294 adc_range = 1
1295 assigns["adc_range"] = adc_range
1296 elif adc_range is not None:
1297 raise ValueError(
1298 "Should not give explicit adc_range if the adc_range information is already in the sensor information"
1299 )
1300 sensor = sensor.with_data(**assigns)
1301 return sensor
1302
[docs]
1303 @classmethod
1304 def read_folder(
1305 cls,
1306 folder,
1307 start_time_parser,
1308 channel=None,
1309 gain=None,
1310 adc_range=None,
1311 one_recording_per_file=False,
1312 sensor=None,
1313 file_filter=None,
1314 time_compensation=None,
1315 glob_pattern="**/*.wav",
1316 ):
1317 """Read files in a folder.
1318
1319 This method collects audio files from the specified folder into a recording object.
1320 The sensor and audio interface settings can be supplied in two ways, depending on if
1321 there is sensor information or not:
1322
1323 1. There is sensor information: Use `uwacan.sensor_array` to specify
1324 the sensor particulars. Give the ``channel``, ``gain``, and ``adc_range``
1325 as dicts with the sensor names as keys, or scalars for all the sensors.
1326 2. If there is no sensor information: Give channel labels as a list to the ``channel``,
1327 and array_like or scalar ``gain`` and ``adc_range``.
1328
1329 Parameters
1330 ----------
1331 folder : str or Path
1332 The folder containing the audio files.
1333 start_time_parser : callable or str
1334 - A function to parse the start time from file names, or
1335 - a sting specifying the datetime format, e.g., ``"YYYY-MM-DD_HH-mm-ss"``.
1336
1337 sensor : `~uwacan.positional.Sensor`
1338 Sensor information with sensitivity, positions, etc.
1339 channel : dict or array_like
1340 The channel index in the read data, from 0.
1341
1342 1. A mapping from sensor names to channel index, if sensor information is given.
1343 2. A list of channel labels, if no sensor information is given.
1344
1345 gain : dict, array_like, or scalar
1346 The gain used for the interface, in dB.
1347
1348 1. A mapping from sensor names to interface gain, if sensor information is given.
1349 2. A list of gains, if no sensor information is given.
1350 3. A single gain for all interface channels/sensors.
1351
1352 adc_range : dict, array_like, or scalar
1353 The peak voltage input of the ADC.
1354
1355 1. A mapping from sensor names to interface ADC range, if sensor information is given.
1356 2. A list of ADC ranges, if no sensor information is given.
1357 3. A single ADC range for all interface channels/sensors
1358
1359 one_recording_per_file : bool, optional
1360 If True, the output will be a list of recordings, one for each file.
1361 file_filter : callable, optional
1362 A function to filter files based on specific criteria. Will be called with the file path.
1363 The file is skipped if the filter returns ``False``.
1364 time_compensation : `TimeCompensation`, int, or callable, optional
1365 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1366 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1367 - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1368 - If None, no time compensation is applied.
1369 glob_pattern : str, optional
1370 The glob pattern to match files in the folder. Defaults to ``"**/*.wav"``.
1371
1372 """
1373 sensor = cls._merge_channel_info(sensor=sensor, channel=channel, gain=gain, adc_range=adc_range)
1374 recordings = super().read_folder(
1375 folder=folder,
1376 start_time_parser=start_time_parser,
1377 sensor=sensor,
1378 file_filter=file_filter,
1379 time_compensation=time_compensation,
1380 glob_pattern=glob_pattern,
1381 file_kwargs={"channels": sensor["channel"].values},
1382 )
1383 if not one_recording_per_file:
1384 return recordings
1385 return [recordings.subwindow(start=file.start_time, stop=file.stop_time) for file in recordings.files]
1386
1387
[docs]
1388class LoggerheadDSG(AudioFileRecording):
1389 """Class to read data from Loggerhead DSG recorders.
1390
1391 The main way to read Loggerhead data is through the
1392 `read_folder` classmethod.
1393 """
1394
1395 allowable_interrupt = 1
1396 adc_range = None
1397 file_range = 1
1398
[docs]
1399 @classmethod
1400 def read_folder(cls, folder, sensor=None, time_compensation=None, file_filter=None):
1401 """Read all files in a folder.
1402
1403 Parameters
1404 ----------
1405 folder : str or Path
1406 The path to the folder containing the files.
1407 sensor : str or None, optional
1408 The sensor associated with the files.
1409 time_compensation : `TimeCompensation`, int, or callable, optional
1410 - If a `TimeCompensation` object is provided, it is used to adjust the recorded times.
1411 - If an number is provided, it is treated as a time offset in seconds and subtracted from recorded times.
1412 - If a callable is provided, it should accept a timestamp and return a compensated timestamp.
1413 - If None, no time compensation is applied.
1414 file_filter : callable or None, optional
1415 A callable that accepts a file path and returns True if the file should be processed,
1416 and False otherwise. If None, all files are processed.
1417
1418 Returns
1419 -------
1420 cls
1421 An instance of the class containing the loaded files.
1422
1423 Raises
1424 ------
1425 RuntimeError
1426 If the folder does not exist, is not a directory, or no matching files are found.
1427
1428 """
1429
1430 def start_time_parser(filepath):
1431 return _core.time_to_datetime(filepath.stem[:15], "%Y%m%dT%H%M%S")
1432
1433 return super().read_folder(
1434 folder=folder,
1435 start_time_parser=start_time_parser,
1436 sensor=sensor,
1437 file_filter=file_filter,
1438 time_compensation=time_compensation,
1439 )
1440
1441 @property
1442 def gain(self): # noqa: D102, takes the docstring from the superclass
1443 return self.files[0].gain
1444
1445 class RecordedFile(AudioFileRecording.RecordedFile): # noqa: D106, takes the docstring from the superclass
1446 def _lazy_load(self): # noqa: D102, takes the docstring from the superclass
1447 gain = self.filepath.stem.split("_")[2]
1448 if not gain.endswith("dB"):
1449 raise ValueError(
1450 f"File `{self.filepath}` does not seem to be a file from a Loggerhead DSG, could not extract gain"
1451 )
1452 return super()._lazy_load() | dict(gain=float(gain.rstrip("dB")))
1453
1454 gain = _LazyPropertyMixin._lazy_property("gain")