diff --git a/.appveyor.yml b/.appveyor.yml index ecb5d751..5228af7b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -36,7 +36,9 @@ init: install: - git submodule update --init --recursive - - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" +# - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" + - echo ". C:\Miniconda3-x64/etc/profile.d/conda.sh" >> ~/.bashrc + - echo "conda activate" >> ~/.bashrc - "python --version" - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ff1e45f5..5a458024 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -22,6 +22,7 @@ Added - Acoustic properties of different materials in ``pyroomacoustics.materials`` - Scattering from the wall is handled via ray tracing method, scattering coefficients are provided in ``pyroomacoustics.materials.Material`` objects +- ``random`` sub-package for room generation tools. Changed diff --git a/docs/index.rst b/docs/index.rst index 5f6efed9..1a542a16 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ Table of contents pyroomacoustics.bss pyroomacoustics.doa pyroomacoustics.denoise + pyroomacoustics.random pyroomacoustics Indices and tables diff --git a/docs/pyroomacoustics.datasets.rst b/docs/pyroomacoustics.datasets.rst index 6c463630..24993a8a 100644 --- a/docs/pyroomacoustics.datasets.rst +++ b/docs/pyroomacoustics.datasets.rst @@ -25,4 +25,6 @@ Tools and Helpers pyroomacoustics.datasets.base pyroomacoustics.datasets.utils + pyroomacoustics.datasets.distribution + pyroomacoustics.datasets.room diff --git a/docs/pyroomacoustics.experimental.rst b/docs/pyroomacoustics.experimental.rst index aa7a4c39..7804e2d4 100644 --- a/docs/pyroomacoustics.experimental.rst +++ b/docs/pyroomacoustics.experimental.rst @@ -1,4 +1,4 @@ -pyroomacoustics.experimental package +Experimental ==================================== Submodules diff --git a/docs/pyroomacoustics.random.distribution.rst b/docs/pyroomacoustics.random.distribution.rst new file mode 100644 index 00000000..15ce87ac --- /dev/null +++ b/docs/pyroomacoustics.random.distribution.rst @@ -0,0 +1,7 @@ +pyroomacoustics.random.distribution module +========================================== + +.. automodule:: pyroomacoustics.random.distribution + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/pyroomacoustics.random.room.rst b/docs/pyroomacoustics.random.room.rst new file mode 100644 index 00000000..a8bdc3db --- /dev/null +++ b/docs/pyroomacoustics.random.room.rst @@ -0,0 +1,7 @@ +pyroomacoustics.random.room module +================================== + +.. automodule:: pyroomacoustics.random.room + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/pyroomacoustics.random.rst b/docs/pyroomacoustics.random.rst new file mode 100644 index 00000000..cfda3668 --- /dev/null +++ b/docs/pyroomacoustics.random.rst @@ -0,0 +1,19 @@ +Random Generation +============================== + +Submodules +---------- + +.. toctree:: + + pyroomacoustics.random.distribution + pyroomacoustics.random.room + pyroomacoustics.random.source + +Module contents +--------------- + +.. automodule:: pyroomacoustics.random + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/pyroomacoustics.random.source.rst b/docs/pyroomacoustics.random.source.rst new file mode 100644 index 00000000..f9c4c146 --- /dev/null +++ b/docs/pyroomacoustics.random.source.rst @@ -0,0 +1,7 @@ +pyroomacoustics.random.source module +==================================== + +.. automodule:: pyroomacoustics.random.source + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/pyroomacoustics.rst b/docs/pyroomacoustics.rst index 52efeb60..bc1d4128 100644 --- a/docs/pyroomacoustics.rst +++ b/docs/pyroomacoustics.rst @@ -13,6 +13,7 @@ Subpackages pyroomacoustics.doa pyroomacoustics.experimental pyroomacoustics.transform + pyroomacoustics.random Submodules ---------- diff --git a/examples/generate_room_dataset.py b/examples/generate_room_dataset.py new file mode 100644 index 00000000..e6ec7eda --- /dev/null +++ b/examples/generate_room_dataset.py @@ -0,0 +1,217 @@ +import numpy as np +import json +import os +from glob import glob +from pprint import pprint +import random +import soundfile as sf +import argparse + +from pyroomacoustics.utilities import rms, sample_audio +from pyroomacoustics.random.room import ShoeBoxRoomGenerator + + +""" + +Example script for: + +1) Generating a dataset of random room configuration and saving their +corresponding room impulse responses. +``` +python examples/generate_room_dataset.py make_dataset +``` + +2) Randomly selecting a room from the dataset and applying its room impulse +responses to a randomly selected speech file and (depending on the selected +room) some noise sources. +``` +python examples/generate_room_dataset.py apply_rir \ + --room_dataset +``` + +""" + +example_noise_files = [ + 'examples/input_samples/doing_the_dishes.wav', + 'examples/input_samples/exercise_bike.wav', + 'examples/input_samples/running_tap.wav', +] + + +def make_dataset(n_rooms, source_min_dist_mic): + """ + + Generate a dataset of room impulse responses. A new folder will be created + with the name `pra_room_dataset_` with the following structure: + + ``` + pra_room_dataset_/ + room_metadata.json + data/ + room_.npz + room_.npz + ... + ``` + + where `room_metadata.json` contains metadata about each room configuration + in the `data` folder. + + The `apply_rir` functions shows a room can be selected at random in order + to simulate a measurement in one of the randomly generated configurations. + + + Parameters + ----------- + n_rooms : int + Number of room configurations to generate. + source_min_dist_mic : float + Minimum distance between each source and the microphone(s). + """ + + room_generator = ShoeBoxRoomGenerator( + source_min_dist_mic=source_min_dist_mic) + room_generator.create_dataset(n_rooms) + + +def apply_rir(room_dataset, target_speech, noise_dir, snr_db, output_file): + """ + + Randomly selecting a room from the dataset and applying its room impulse + responses to a randomly selected speech file and (depending on the selected + room) some noise sources. + + Parameters + ----------- + room_dataset : str + Path to room dataset from calling `make_dataset`. + target_speech : str + Path to a target speech WAV file. + noise_dir : str + Path to a directory with noise WAV files. Default is to apply the room + impulse response to WAV file(s) from `examples/input_samples`. + snr_db : float + Desired signal-to-noise ratio resulting from simulation. + output_file : str + Path of output WAV file from simulation. + + """ + if room_dataset is None: + raise ValueError('Provide a path to a room dataset. You can compute ' + 'one with the `make_dataset` command.') + + with open(os.path.join(room_dataset, 'room_metadata.json')) as json_file: + room_metadata = json.load(json_file) + + # pick a room at random + random_room_key = random.choice(list(room_metadata.keys())) + _room_metadata = room_metadata[random_room_key] + print('Room metadata') + pprint(_room_metadata) + + # load target audio + target_data, fs_target = sf.read(target_speech) + + # load impulse responses + ir_file = os.path.join(room_dataset, 'data', _room_metadata['file']) + ir_data = np.load(ir_file) + n_noises = ir_data['n_noise'] + sample_rate = ir_data['sample_rate'] + assert sample_rate == fs_target, 'Target sampling rate does not match IR' \ + 'sampling rate.' + + # apply target IR + target_ir = ir_data['target_ir'] + n_mics, ir_len = target_ir.shape + output_len = ir_len + len(target_data) - 1 + room_output = np.zeros((n_mics, output_len)) + for n in range(n_mics): + room_output[n] = np.convolve(target_data, target_ir[n]) + + # apply noise IR(s) if applicable + if n_noises: + + if noise_dir is None: + noise_files = example_noise_files + else: + noise_files = glob(os.path.join(noise_dir, '*.wav')) + print('\nNumber of noise files : {}'.format(len(noise_files))) + + _noise_files = np.random.choice(noise_files, size=n_noises, + replace=False) + print('Selected noise file(s) : {}'.format(_noise_files)) + noise_output = np.zeros_like(room_output) + for k, _file in enumerate(_noise_files): + + # load audio + noise_data, fs_noise = sf.read(_file) + assert fs_noise == sample_rate, 'Noise sampling rate {} does ' \ + 'not match IR sampling rate.' \ + ''.format(_file) + + # load impulse response + noise_ir = ir_data['noise_ir_{}'.format(k)] + + # sample segment of noise and normalize so each source has + # roughly similar amplitude + # take a bit more audio than target audio so we are sure to fill + # up the end with noise (end of IR is sparse) + _noise = sample_audio(noise_data, int(1.1*output_len)) + _noise /= _noise.max() + + # apply impulse response + for n in range(n_mics): + noise_output[n] = np.convolve(_noise, noise_ir[n])[:output_len] + + # rescale noise according to specified SNR, add to target signal + noise_rms = rms(noise_output[0]) + signal_rms = rms(room_output[0]) + noise_fact = signal_rms / noise_rms * 10 ** (-snr_db / 20.) + room_output += (noise_output * noise_fact) + + else: + print('\nNo noise source in selected room!') + + # write output to file + sf.write(output_file, np.squeeze(room_output), sample_rate) + print('\nOutput written to : {}'.format(output_file)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='Show how to generate a dataset of random room ' + 'configurations and how to apply the impulse responses ' + 'of one of the rooms.') + parser.add_argument('-n', '--n_rooms', type=int, default=50, + help='Number of rooms to generate') + parser.add_argument('-d', '--source_min_dist_mic', type=float, + default=0.5, + help='Minimum distance between each source and the' + 'microphone(s)') + parser.add_argument('-a', '--apply_room', type=str, + help='Path to room dataset. If provided, a room will' + 'be randomly selected.') + parser.add_argument('-t', '--target_speech', type=str, + default='examples/input_samples/cmu_arctic_us_aew_a0001.wav', + help='Path to a target speech WAV file.') + parser.add_argument('-v', '--noise_dir', type=str, + help='Path to a directory with noise WAV files. ' + 'Default is to apply the room impulse response ' + 'to WAV file(s) from `examples/input_samples`.') + parser.add_argument('-s', '--snr_db', type=float, + default=5., + help='Desired signal-to-noise ratio resulting from ' + 'simulation.') + parser.add_argument('-o', '--output_file', type=str, + default='simulated_output.wav', + help='Path of output WAV file from simulation.') + args = parser.parse_args() + + if args.apply_room is not None: + apply_rir(room_dataset=args.apply_room, + target_speech=args.target_speech, + noise_dir=args.noise_dir, + snr_db=args.snr_db, + output_file=args.output_file) + else: + make_dataset(args.n_rooms, args.source_min_dist_mic) diff --git a/examples/input_samples/README.md b/examples/input_samples/README.md index d84e7ea4..30dd21ee 100644 --- a/examples/input_samples/README.md +++ b/examples/input_samples/README.md @@ -11,9 +11,11 @@ corpus and are included for testing purposes. * cmu_arctic_us_aew_a0003.wav * cmu_arctic_us_axb_a0005.wav -The following noise sample was taken from Google's [Speech Commands Dataset](https://research.googleblog.com/2017/08/launching-speech-commands-dataset.html) +The following noise samples were taken from Google's [Speech Commands Dataset](https://research.googleblog.com/2017/08/launching-speech-commands-dataset.html) * doing_the_dishes.wav +* exercise_bike.wav +* running_tap.wav The following two samples are from unknown origin and were found online. diff --git a/examples/input_samples/exercise_bike.wav b/examples/input_samples/exercise_bike.wav new file mode 100644 index 00000000..6ed588cc Binary files /dev/null and b/examples/input_samples/exercise_bike.wav differ diff --git a/examples/input_samples/running_tap.wav b/examples/input_samples/running_tap.wav new file mode 100644 index 00000000..c297041a Binary files /dev/null and b/examples/input_samples/running_tap.wav differ diff --git a/pyroomacoustics/__init__.py b/pyroomacoustics/__init__.py index 59b116c7..93d88444 100644 --- a/pyroomacoustics/__init__.py +++ b/pyroomacoustics/__init__.py @@ -104,6 +104,9 @@ :py:obj:`pyroomacoustics.transform` Block frequency domain processing tools +:py:obj:`pyroomacoustics.random` + Tools for random generation of rooms + Utilities --------- @@ -137,6 +140,7 @@ from . import datasets from . import bss from . import denoise +from . import random import warnings warnings.warn( diff --git a/pyroomacoustics/datasets/__init__.py b/pyroomacoustics/datasets/__init__.py index 342c41bf..98ff7487 100644 --- a/pyroomacoustics/datasets/__init__.py +++ b/pyroomacoustics/datasets/__init__.py @@ -141,3 +141,4 @@ from .timit import Word, Sentence, TimitCorpus from .cmu_arctic import CMUArcticCorpus, CMUArcticSentence, cmu_arctic_speakers from .google_speech_commands import GoogleSpeechCommands, GoogleSample + diff --git a/pyroomacoustics/random/__init__.py b/pyroomacoustics/random/__init__.py new file mode 100644 index 00000000..6cd7a0fe --- /dev/null +++ b/pyroomacoustics/random/__init__.py @@ -0,0 +1,14 @@ +""" +Random +====== + +This sub-package provides classes and methods in order to randomly generate +objects rooms of e.g. various shapes, microphone and source placement, and +reverberation properties. + +""" + +from pyroomacoustics.random.microphone import * +from pyroomacoustics.random.room import ShoeBoxRoomGenerator +from pyroomacoustics.random.distribution import UniformDistribution, \ + MultiUniformDistribution, DiscreteDistribution, MultiDiscreteDistribution \ No newline at end of file diff --git a/pyroomacoustics/random/distribution.py b/pyroomacoustics/random/distribution.py new file mode 100644 index 00000000..c956ce36 --- /dev/null +++ b/pyroomacoustics/random/distribution.py @@ -0,0 +1,140 @@ +# Classes for creating distributions to randomly sample. +# Copyright (C) 2019 Eric Bezzam +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# You should have received a copy of the MIT License along with this program. If +# not, see . + +import numpy as np +from abc import ABCMeta, abstractmethod + + +class Distribution(object): + """ + Abstract class for distributions. + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def sample(self): + pass + + +class UniformDistribution(Distribution): + """ + + Create a uniform distribution between two values. + + Parameters + ------------- + vals_range : tuple / list + Tuple or list of two values, (lower bound, upper bound). + + """ + def __init__(self, vals_range): + super(UniformDistribution, self).__init__() + assert len(vals_range) == 2, 'Length of `vals_range` must be 2.' + assert vals_range[0] <= vals_range[1], '`vals_range[0]` must be ' \ + 'less than or equal to ' \ + '`vals_range[1]`.' + self.vals_range = vals_range + + def sample(self): + return np.random.uniform(self.vals_range[0], self.vals_range[1]) + + +class MultiUniformDistribution(Distribution): + """ + + Sample from multiple uniform distributions. + + Parameters + ------------ + ranges : list of tuples / lists + List of tuples / lists, each with two values. + + """ + def __init__(self, ranges): + super(MultiUniformDistribution, self).__init__() + self.distributions = [UniformDistribution(r) for r in ranges] + + def sample(self): + return [d.sample() for d in self.distributions] + + +class DiscreteDistribution(Distribution): + """ + + Create a discrete distribution which samples from a given set of values + and (optionally) a given set of probabilities. + + Parameters + ------------ + values : list + List of values to sample from. + prob : list + Corresponding list of probabilities. Default to equal probability for + all values. + + """ + def __init__(self, values, prob=None): + super(DiscreteDistribution, self).__init__() + if prob is None: + prob = np.ones_like(values) + assert len(values) == len(prob), \ + 'len(values)={}, len(prob)={}'.format(len(values), len(prob)) + self.values = values + self.prob = np.array(prob) / float(sum(prob)) + + def sample(self): + return np.random.choice(self.values, p=self.prob) + + +class MultiDiscreteDistribution(Distribution): + """ + + Sample from multiple discrete distributions. + + Parameters + ------------ + ranges : list of tuples / lists + List of tuples / lists, each with two values. + + """ + def __init__(self, values_list, prob_list=None): + super(MultiDiscreteDistribution, self).__init__() + if prob_list is not None: + assert len(values_list) == len(prob_list), \ + 'Lengths of `values_list` and `prob_list` must match.' + else: + prob_list = [None] * len(values_list) + self.distributions = [ + DiscreteDistribution(values=tup[0], prob=tup[1]) + for tup in zip(values_list, prob_list) + ] + + def sample(self): + return [d.sample() for d in self.distributions] + diff --git a/pyroomacoustics/random/microphone.py b/pyroomacoustics/random/microphone.py new file mode 100644 index 00000000..230019d6 --- /dev/null +++ b/pyroomacoustics/random/microphone.py @@ -0,0 +1,170 @@ +# Utilities for generating random microphone(s) in rooms. +# Copyright (C) 2019 Eric Bezzam +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# You should have received a copy of the MIT License along with this program. If +# not, see . + +import abc +import six +import numpy as np + + +@six.add_metaclass(abc.ABCMeta) +class Microphone(object): + """ + Abstract class for sampling a microphone / array in a provided room. + """ + def __init__(self): + pass + + @abc.abstractmethod + def sample(self, room): + """ + Abstract method to sample a microphone / array within the provided + room. + + Returns the locations of a microphone / array. + + Parameters + ---------- + room : Room object + Room to randomly place a microphone / array inside. + """ + pass + + +class OmniMicrophone(Microphone): + """ + Object to randomly sample an omnidirectional microphone in a provided + room. + + Parameters + ---------- + min_dist_wall : float + Minimum distance from the microphone to each wall. + min_height : float + Minimum height of microphone. + max_height : float + Maximum height of microphone. + """ + def __init__(self, min_dist_wall=0.1, min_height=0.4, max_height=1.5): + Microphone.__init__(self) + self.min_dist_wall = min_dist_wall + self.min_height = min_height + self.max_height = max_height + + def _sample_pos(self, room): + bbox = room.get_bbox() + dim = bbox.shape[0] + + count = 0 + while True: + count += 1 + + # sample within bounding box + sampled_pos = [] + for d in range(dim): + if d < 2: + x = np.random.uniform( + bbox[d, 0] + self.min_dist_wall, + bbox[d, 1] - self.min_dist_wall + ) + else: + x = np.random.uniform( + max(self.min_height, bbox[d, 0] + self.min_dist_wall), + min(self.max_height, bbox[d, 1] - self.min_dist_wall) + ) + sampled_pos.append(x) + sampled_pos = np.array(sampled_pos) + + # check inside room + if not room.is_inside(sampled_pos): + continue + + # verify minimum distance to wall + for k in range(len(room.walls)): + v_corn = room.walls[k].corners[:, 0] - sampled_pos + w = room.walls[k].normal + dist2wall = np.dot(v_corn, w) + if 0 < dist2wall < self.min_dist_wall: + continue + break + + return sampled_pos + + def sample(self, room): + return self._sample_pos(room) + + +class OmniMicrophoneArray(OmniMicrophone): + """ + Object to randomly sample an array of omnidirectional microphones in a + provided room. + + Array is assumed to be parallel to the X-Y plane (typically the floor) so + only 2D coordinates should be provided. + + Parameters + ---------- + geometry : 2D array + Array of microphone positions, where each column corresponds to the + coordinates of a microphone. If not centered, it will be done by + subtracting the mean. + min_dist_wall : float + Minimum distance from the microphone to each wall. + min_height : float + Minimum height of microphone. + max_height : float + Maximum height of microphone. + """ + def __init__(self, geometry, min_dist_wall=0.1, min_height=0.4, + max_height=1.5): + assert len(geometry.shape) == 2, 'Only detected one microphone. Use' \ + ' `OmniMicrophone` for generating ' \ + 'single microphone positions.' + assert geometry.shape[0] == 2, 'Must provide X and Y coordinates ' \ + 'for microphone positions.' + + # center + geometry = geometry - geometry.mean(axis=1)[:, np.newaxis] + + # for placing array inside room + self.max_radius = max( + np.sqrt(np.diag(np.dot(geometry.T, geometry)))) + + # add third dimension + self.geometry = np.concatenate( + (geometry, np.zeros((1, geometry.shape[1]))), + axis=0 + ) + + OmniMicrophone.__init__(self, + min_dist_wall=min_dist_wall+self.max_radius, + min_height=min_height, max_height=max_height) + + def sample(self, room): + array_center = self._sample_pos(room) + return self.geometry + array_center[:, np.newaxis] + + +class DirectionalMicrophone(Microphone): + def __init__(self): + raise NotImplementedError \ No newline at end of file diff --git a/pyroomacoustics/random/room.py b/pyroomacoustics/random/room.py new file mode 100644 index 00000000..5d8e7f20 --- /dev/null +++ b/pyroomacoustics/random/room.py @@ -0,0 +1,495 @@ +# Utility functions generating a dataset of room impulse responses. +# Copyright (C) 2019 Eric Bezzam +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# You should have received a copy of the MIT License along with this program. If +# not, see . + +import numpy as np +import datetime +import os +import uuid +import json +from pprint import pprint + +import pyroomacoustics as pra +from pyroomacoustics.random.distribution import DiscreteDistribution, \ + MultiUniformDistribution, UniformDistribution +from pyroomacoustics.doa.utils import spher2cart +from pyroomacoustics.room import ShoeBox + +""" +Utility functions for creating a dataset of room impulse responses. + +See `examples/generate_room_dataset.py` for an example of creating a dataset +of room impulse responses and applying them. +""" + + +class RoomSimulationDistributions(object): + """ + Largely based off of distributions presented in Section 2.1 of + `this paper `_. + """ + + snr = DiscreteDistribution( + values=[0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30], + prob=[6, 10, 14.5, 16.5, 16.5, 14.5, 10, 6, 2.5, 1, 0.5] + ) + room_dimensions = MultiUniformDistribution( + # width, length, height + ranges=[[3, 10], [3, 8], [2.5, 6]] + ) + target_orientation = MultiUniformDistribution( + # azimuth, elevation + ranges=[[-180, 180], [45, 135]] + ) + target_mic_dist = DiscreteDistribution( + values=[1, 2, 3, 4, 5, 6, 7], + prob=[15, 22, 29, 21, 8, 3, 0.5] + ) + noise_mic_dist = DiscreteDistribution( + values=[1, 2, 3, 4, 5, 6, 7], + prob=[15, 22, 29, 21, 8, 3, 0.5] + ) + n_noise = DiscreteDistribution( + values=[0, 1, 2, 3], + prob=[1, 1, 1, 1] + ) + noise_orientation = MultiUniformDistribution( + # azimuth, elevation + ranges=[[-180, 180], [-30, 180]] + ) + rt60 = DiscreteDistribution( + # reverberation time + values=[0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + prob=[4, 6, 7.5, 10, 12.5, 16.5, 16.5, 12.5, 7.5, 4] + ) + mic_height = UniformDistribution(vals_range=(1., 1.5)) + + +class ShoeBoxRoomGenerator(object): + """ + + `ShoeBox` room generator. + + Default distributions correspond to parameters from + `this paper `_. + + Parameters + ----------- + room_dim_distrib : `MultiUniformDistribution` or `MultiDiscreteDistribution` + Multi-dimensional distribution for (width, length, height) of ShoeBox + room. + target_orientation_distrib : `MultiUniformDistribution` or `MultiDiscreteDistribution` + Multi-dimensional distribution for (azimuth, elevation) angles of + target source. + target_mic_dist_distrib : `UniformDistribution` or `DiscreteDistribution` + Single dimensional distribution for distance between target and + microphone(s). + n_noise_distrib : `DiscreteDistribution` + Discrete distribution for number of noise sources. + noise_orientation_distrib : `MultiUniformDistribution` or `MultiDiscreteDistribution` + Multi-dimensional distribution for (azimuth, elevation) angles of + noise source. + noise_mic_dist_distrib : `UniformDistribution` or `DiscreteDistribution` + Single dimensional distribution for distance between noise and + microphone(s). + rt60_distrib : `UniformDistribution` or `DiscreteDistribution` + Single dimensional distribution for average reverberation time of room. + mic_height_distrib : `UniformDistribution` or `DiscreteDistribution` + Single dimensional distribution for height of microphone(s). + mic_min_dist_wall : float + Minimum distance between microphone(s) and each wall. Default is 0.1 + meters. + source_min_dist_wall : float + Minimum distance between microphone(s) and each source. Default is 0.5 + meters. + sample_rate : int + Sample rate in Hz. Default is 16000 Hz. + ism_order : int + Image Source Method order for RIR generation. Default is 17. + air_absorption: bool, optional + If set to True, absorption of sound energy by the air will be + simulated. + ray_tracing: bool, optional + If set to True, the ray tracing simulator will be used along with + image source model. + timeout : int + Number of times to try generating a room with the desired configuration + until giving up. Default is 1000. + """ + + def __init__(self, + room_dim_distrib=None, + target_orientation_distrib=None, + target_mic_dist_distrib=None, + noise_mic_dist_distrib=None, + noise_orientation_distrib=None, + n_noise_distrib=None, + rt60_distrib=None, + mic_height_distrib=None, + mic_min_dist_wall=0.1, + source_min_dist_wall=0.5, + source_min_dist_mic=0.1, + sample_rate=16000, + ism_order=17, + air_absorption=True, + ray_tracing=True, + timeout=1000, + ): + + self.room_dim_distrib = room_dim_distrib \ + if room_dim_distrib is not None \ + else RoomSimulationDistributions.room_dimensions + self.target_orientation_distrib = target_orientation_distrib \ + if target_orientation_distrib is not None \ + else RoomSimulationDistributions.target_orientation + self.target_mic_dist_distrib = target_mic_dist_distrib \ + if target_mic_dist_distrib is not None \ + else RoomSimulationDistributions.target_mic_dist + self.noise_mic_dist_distrib = noise_mic_dist_distrib \ + if noise_mic_dist_distrib is not None \ + else RoomSimulationDistributions.noise_mic_dist + self.noise_orientation_distrib = noise_orientation_distrib \ + if noise_orientation_distrib is not None \ + else RoomSimulationDistributions.noise_orientation + self.n_noise_distrib = n_noise_distrib \ + if n_noise_distrib is not None \ + else RoomSimulationDistributions.n_noise + self.rt60_distrib = rt60_distrib if rt60_distrib is not None \ + else RoomSimulationDistributions.rt60 + self.mic_height_distrib = mic_height_distrib \ + if mic_height_distrib is not None \ + else RoomSimulationDistributions.mic_height + + self.mic_min_dist_wall = mic_min_dist_wall + self.source_min_dist_wall = source_min_dist_wall + self.source_min_dist_mic = source_min_dist_mic + self.sample_rate = sample_rate + self.ism_order = ism_order + self.air_absorption = air_absorption + self.ray_tracing = ray_tracing + self.timeout = timeout + + def create_dataset(self, n_rooms, output_dir=None, verbose=False): + """ + Create a dataset of room impulse responses with the following + structure:: + + / + room_metadata.json + data/ + room_.npz + room_.npz + ... + + Parameters + ---------- + n_rooms : int + Number of rooms to generate. + output_dir : str, optional + Path to place create dataset. If not provided, the dataset will + be created in the same directory as where the script is called + and with a timestamp as part of the directory. + + """ + + if output_dir is None: + ts = datetime.datetime.now() + output_dir = 'pra_room_dataset_{}'.format( + ts.strftime('%Y-%m-%d-%Hh%Mm%Ss')) + + # create output directory + os.mkdir(output_dir) + print('Created output directory : {}'.format(output_dir)) + data_dir = os.path.join(output_dir, 'data') + os.mkdir(data_dir) + + # sample rooms + room_metadata = dict() + for _ in range(n_rooms): + + count = 0 + while count < self.timeout: + + try: + + # sample room + room_dim = self.room_dim_distrib.sample() + + # sample absorption factor / material + rt60 = self.rt60_distrib.sample() + energy_absorp = rt60_to_absorption_eyring(room_dim, rt60) + materials = pra.Material.make_freq_flat( + absorption=energy_absorp + ) + + # mic location + mic_height = self.mic_height_distrib.sample() + assert mic_height > self.mic_min_dist_wall + assert mic_height < room_dim[2] - self.mic_min_dist_wall + mic_loc = sample_mic_location(room_dim, + self.mic_min_dist_wall) + mic_loc.append(mic_height) + + # create Room object + R = np.array(mic_loc, ndmin=2).T + room = ShoeBox(p=room_dim, + fs=self.sample_rate, + materials=materials, + max_order=self.ism_order, + mics=pra.MicrophoneArray(R, + self.sample_rate), + air_absorption=self.air_absorption, + ray_tracing=self.ray_tracing, + ) + + # sample target location + target_orientation = \ + self.target_orientation_distrib.sample() + target_dist = self.target_mic_dist_distrib.sample() + if target_dist < self.source_min_dist_mic: + count += 1 + continue + target_loc = mic_loc + \ + spher2cart(r=target_dist, + azimuth=target_orientation[0], + colatitude=target_orientation[1] + ) + + # make sure inside room and meets constraint + if not is_inside(target_loc, room_dim, + self.source_min_dist_wall): + count += 1 + continue + + # sample noise + n_noise = self.n_noise_distrib.sample() + noise_loc = [] + noise_orientations = [] + noise_dists = [] + for _ in range(n_noise): + noise_orientation = \ + self.noise_orientation_distrib.sample() + noise_orientations.append(noise_orientation) + noise_dist = sample_source_distance(room, mic_loc, + noise_orientation) + if noise_dist < self.source_min_dist_mic: + break + noise_dists.append(noise_dist) + _noise_loc = mic_loc + \ + spher2cart( + r=noise_dist, + azimuth=noise_orientation[0], + colatitude=noise_orientation[1]) + + # make sure inside room and meets constraint + if not is_inside(_noise_loc, room_dim, + self.source_min_dist_wall): + break + + noise_loc.append(_noise_loc.tolist()) + + if len(noise_loc) != n_noise: + # couldn't find good noise location(s) + count += 1 + continue + + # compute room impulse responses (RIRs) + room.add_source(list(target_loc)) + for n in range(n_noise): + room.add_source(list(noise_loc[n])) + try: + room.compute_rir() + except: + count += 1 + continue + + # found valid configuration + break + + except NoValidRoom: + # try again + count += 1 + + if count == self.timeout: + print('Could not find valid room configuration. ' + 'One less room...') + continue + + # gather room metadata to save + room_uuid = 'room_' + str(uuid.uuid4()) + room_params = { + 'file': room_uuid + '.npz', + 'dimensions': room_dim, + 'mic_location': mic_loc, + 'target_location': target_loc.tolist(), + 'target_dist': int(target_dist), + 'target_orientation': target_orientation, + 'rt60': rt60, + 'absorption': energy_absorp, + 'n_noise': int(n_noise), + 'noise_loc': noise_loc, + 'noise_dist': noise_dists, + 'noise_orientation': noise_orientations, + } + room_metadata[room_uuid] = room_params + + if verbose: + pprint(room_params) + + # collect RIRs + n_mics = R.shape[1] + target_ir = np.array([room.rir[n][0] for n in range(n_mics)]) + noise_irs = [] + for t in range(n_noise): + noise_irs.append( + np.array([room.rir[n][t + 1] for n in range(n_mics)])) + + # save RIRs + _output_file = os.path.join(data_dir, room_uuid + '.npz') + if n_noise: + noise_irs_dict = dict( + ('noise_ir_{}'.format(idx), ir) + for (idx, ir) in enumerate(noise_irs) + ) + np.savez( + _output_file, + target_ir=target_ir, + sample_rate=self.sample_rate, + n_noise=n_noise, + **noise_irs_dict + ) + else: + np.savez( + _output_file, + target_ir=target_ir, + sample_rate=self.sample_rate, + n_noise=n_noise + ) + + # write metadata to JSON + output_json = os.path.join(output_dir, 'room_metadata.json') + with open(output_json, 'w') as f: + json.dump(room_metadata, f, indent=4) + + print('Done.') + + +def rt60_to_absorption_eyring(room_dim, rt60): + """ + + Determine absorption factor given dimensions of (shoebox) room and RT60 + using Eyring's empirical equation. + + Parameters + ------------ + room_dim : tuple / list + Tuple / list of three elements, (width, length, height). + rt60 : float + Reverberation time (for 60 dB drop) in seconds. + """ + if rt60 < 1e-5: + return 1. + else: + width, length, height = room_dim + vol = width * length * height + area = 2 * (width * length + length * height + width * height) + return 1. - np.exp(-0.16 * vol / rt60 / area) + + +class NoValidRoom(Exception): + pass + + +def sample_mic_location(room_dim, mic_min_dist_wall): + """ + + Sample (x,y) coordinates of microphone location. + + Parameters + ------------ + room_dim : tuple / list + Tuple / list of three elements, (width, length, height) + mic_min_dist_wall : float + Minimum distance of mic from wall in meters. + + """ + + width, length, _ = room_dim + assert width >= 2 * mic_min_dist_wall and length >= 2 * mic_min_dist_wall + return [ + np.random.uniform(mic_min_dist_wall, width - mic_min_dist_wall), + np.random.uniform(mic_min_dist_wall, length - mic_min_dist_wall) + ] + + +def is_inside(source_loc, room_dim, source_min_dist_wall): + """ + + Determine if source is inside room and meets the minimum distance to wall + constraint. Problem is very simplified as we assume ShoeBox room with + walls aligned to x, y, and z axis. + + Parameters + ------------ + source_loc : 3D array + x, y, and z coordinates of source in question. + room_dim : 3D array + width, length, and height of room + source_min_dist_wall : float + minimum distance from wall in meters. + + """ + for s, r in zip(*[source_loc, room_dim]): + if s > r or s < source_min_dist_wall: + return False + return True + + +def sample_source_distance(room, mic_loc, orientation): + """ + + Sample a valid source distance. + + Parameters + ----------- + room : Shoebox Room object + mic_loc : array + 3D coordinates of microphone in room. + orientation : array + 2D array containing azimuth and elevation angle of source. + + """ + + # for shoebox, max possible distance is diagonal between extreme corners + diag_dist = np.sqrt(sum(room.shoebox_dim ** 2)) + test_point = mic_loc + \ + spher2cart(diag_dist, orientation[0], orientation[1]) + + # determine intersection and then sample distance in between + intersection = np.zeros(3, dtype=np.float32) + mic_loc = np.array(mic_loc).astype(np.float32) + test_point = np.array(test_point).astype(np.float32) + for k, w in enumerate(room.walls): + if w.intersection(mic_loc, test_point, intersection) == 0: + max_dist = np.sqrt(sum((mic_loc - intersection) ** 2)) + return np.random.uniform(0, max_dist) diff --git a/pyroomacoustics/random/source.py b/pyroomacoustics/random/source.py new file mode 100644 index 00000000..da127fb0 --- /dev/null +++ b/pyroomacoustics/random/source.py @@ -0,0 +1,102 @@ +# Utilities for generating random source positions. +# Copyright (C) 2019 Eric Bezzam +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# You should have received a copy of the MIT License along with this program. If +# not, see . + +import numpy as np +from pyroomacoustics.random.distribution import DiscreteDistribution, \ + UniformDistribution, Distribution +from pyroomacoustics.doa.utils import spher2cart + + +class RandomSource(object): + """ + Object to randomly sample a source position relative to a certain position. + + Parameters + ----------- + distance_distrib : Distribution + Distribution to sample distance of source. + azimuth_distrib : Distribution + Distribution to sample azimuth angle of source. + elevation_distrib : Distribution + Distribution to sample elevation angle of source. + + """ + def __init__(self, + distance_distrib=None, + azimuth_distrib=None, + elevation_distrib=None): + + # set distance distribution + if distance_distrib is None: + self.distance_distrib = DiscreteDistribution( + values=[1, 2, 3, 4, 5, 6, 7], + prob=[15, 22, 29, 21, 8, 3, 0.5] + ) + else: + self.distance_distrib = distance_distrib + assert isinstance(self.distance_distrib, Distribution) + + # set azimuth distribution + if azimuth_distrib is None: + self.azimuth_distrib = UniformDistribution( + vals_range=[-180, 180] + ) + else: + self.azimuth_distrib = azimuth_distrib + assert isinstance(self.azimuth_distrib, Distribution) + + # set elevation distribution + if elevation_distrib is None: + self.elevation_distrib = UniformDistribution( + vals_range=[45, 135] + ) + else: + self.elevation_distrib = elevation_distrib + assert isinstance(self.elevation_distrib, Distribution) + + def sample(self, cartesian=True): + """ + Sample a source location according to specified distributions. + + Parameters + ---------- + cartesian : bool, optional + Whether to return coordinates in cartesian form. Default is True. + """ + distance = self.distance_distrib.sample() + azimuth = self.azimuth_distrib.sample() + elevation = self.elevation_distrib.sample() + + if cartesian: + return spher2cart( + r=distance, + azimuth=azimuth, + colatitude=elevation + ) + else: + return np.array([distance, azimuth, elevation]) + + + + diff --git a/pyroomacoustics/utilities.py b/pyroomacoustics/utilities.py index 70b6b0ad..09e710c2 100644 --- a/pyroomacoustics/utilities.py +++ b/pyroomacoustics/utilities.py @@ -25,11 +25,38 @@ import numpy as np from scipy import signal from scipy.io import wavfile +import random from .parameters import constants, eps from .sync import correlate +def sample_audio(audio_data, desired_len): + """ + Sample a segment of audio of desired length. If audio is too short, repeat + it. + + Parameters + ----------- + audio_data : array + 1D numpy array containing audio data. + desired_len : int + Desired length in samples. + """ + data_len = len(audio_data) + + if data_len < desired_len: + # repeat file + n_times = int(np.ceil(desired_len / float(data_len))) + audio_data = np.tile(audio_data, (n_times, 1)) + data_len = len(audio_data) + + # randomly pick segment + start = random.randint(0, data_len - desired_len) + end = start + desired_len + return audio_data[start:end] + + def create_noisy_signal(signal_fp, snr, noise_fp=None, offset=None): """ Create a noisy signal of a specified SNR.