Configuration#
[general] dist_dir = "/users/bdda/xhao/Datasets_bdda5/multichannel_multispeaker_2022_04_02/train" # output (release) directory max_num_files_per_dir = 1000 # "false" means that all simluated data are saved into one directory
Dataloader#
Mixer
#
__init__
#
Initialize the mixer.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
snr_list |
list[float]
|
signal to noise ratio list. |
required |
sir_list |
list[float]
|
signal-to-interference ratio list. |
required |
target_rms_level |
float
|
defaults to -25. |
-25
|
target_rms_level_floating |
float
|
floating value of level. Defaults to -5. |
-5
|
mixing_mode |
str
|
defaults to "min". |
'min'
|
mix
#
mixing_mode = mixing_mode
instance-attribute
#
sir_list = sir_list
instance-attribute
#
snr_list = snr_list
instance-attribute
#
target_rms_level = target_rms_level
instance-attribute
#
target_rms_level_floating = target_rms_level_floating
instance-attribute
#
Source
#
__init__
#
add_rvb
#
Filter the RIR with the source audio.
Note
For the function simulateTrajectory
, if the dim of num_sources or num_traj is 1, it will
omit the first dim. Otherwise, it will do dynamic convolution.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rir |
np.ndarray
|
RIR with the shape of [num_sources or num_traj, num_mic, num_channels] |
required |
further_split_rir |
bool
|
If True, it will further split the RIR into direct-path, early and late parts. |
False
|
is_static
property
#
Check if the source is static or not.
Returns:
Type | Description |
---|---|
bool
|
True if the source is static; False if the source is dynamic. |
Raise
ValueError: if the trajectory of the source is not initialized.
loudness_gain: float = -1
instance-attribute
#
n_y: np.ndarray | None = None
instance-attribute
#
path: Path | None = None
instance-attribute
#
rir: np.ndarray | None = None
instance-attribute
#
rir_direct_path: np.ndarray | None = None
instance-attribute
#
rir_early: np.ndarray | None = None
instance-attribute
#
rir_late: np.ndarray | None = None
instance-attribute
#
rir_peak_idx: np.ndarray | None = None
instance-attribute
#
source_id: str | None = None
instance-attribute
#
source_spk_id: str | None = None
instance-attribute
#
split_rir
#
Split the RIR into direct-path, early and late parts.
sr = sr
instance-attribute
#
traj: np.ndarray | None = None
instance-attribute
#
traj_len
property
#
transcription: str = ''
instance-attribute
#
vad_label: np.ndarray | None = None
instance-attribute
#
y = y
instance-attribute
#
y_rvb: np.ndarray | None = None
instance-attribute
#
y_rvb_direct_path: np.ndarray | None = None
instance-attribute
#
y_rvb_early: np.ndarray | None = None
instance-attribute
#
y_rvb_late: np.ndarray | None = None
instance-attribute
#
y_vad: np.ndarray | None = None
instance-attribute
#
loudness_norm_rms
#
Loudness normalize a signal based on the Root Mean Square (RMS).
Normalize the RMS of signals to a given RMS based on Decibels Relative to Full Scale (dBFS).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
y |
np.ndarray
|
[C, T] or [T,]. |
required |
scalar |
float | None
|
scalar to normalize the RMS, default to None. |
None
|
target_rms |
target RMS in dBFS. |
required | |
ref_mic |
int
|
reference mic for multi-channel signals. |
-1
|
Returns:
Type | Description |
---|---|
tuple[np.ndarray, float]
|
Loudness normalized signal and scalar. |
Note
A small amount of signal samples would be clipped after normalization, but it does not matter.
scalar_to_desired_sxr
#
Generally calculate the gains of interference to fulfill a desired SXR (SNR or SIR) ratio.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
meaningful |
np.ndarray
|
meaningful input, like target speech. |
required |
meaningless |
np.ndarray
|
meaningless or unwanted input, like background noise. |
required |
desired_ratio |
float
|
SNR or SIR ratio. |
required |
Returns:
Type | Description |
---|---|
float
|
Gain, which can be used to adjust the RMS of the meaningless signals to satisfy the given ratio. |
Source
#
__init__
#
add_rvb
#
Filter the RIR with the source audio.
Note
For the function simulateTrajectory
, if the dim of num_sources or num_traj is 1, it will
omit the first dim. Otherwise, it will do dynamic convolution.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rir |
np.ndarray
|
RIR with the shape of [num_sources or num_traj, num_mic, num_channels] |
required |
further_split_rir |
bool
|
If True, it will further split the RIR into direct-path, early and late parts. |
False
|
is_static
property
#
Check if the source is static or not.
Returns:
Type | Description |
---|---|
bool
|
True if the source is static; False if the source is dynamic. |
Raise
ValueError: if the trajectory of the source is not initialized.
loudness_gain: float = -1
instance-attribute
#
n_y: np.ndarray | None = None
instance-attribute
#
path: Path | None = None
instance-attribute
#
rir: np.ndarray | None = None
instance-attribute
#
rir_direct_path: np.ndarray | None = None
instance-attribute
#
rir_early: np.ndarray | None = None
instance-attribute
#
rir_late: np.ndarray | None = None
instance-attribute
#
rir_peak_idx: np.ndarray | None = None
instance-attribute
#
source_id: str | None = None
instance-attribute
#
source_spk_id: str | None = None
instance-attribute
#
split_rir
#
Split the RIR into direct-path, early and late parts.
sr = sr
instance-attribute
#
traj: np.ndarray | None = None
instance-attribute
#
traj_len
property
#
transcription: str = ''
instance-attribute
#
vad_label: np.ndarray | None = None
instance-attribute
#
y = y
instance-attribute
#
y_rvb: np.ndarray | None = None
instance-attribute
#
y_rvb_direct_path: np.ndarray | None = None
instance-attribute
#
y_rvb_early: np.ndarray | None = None
instance-attribute
#
y_rvb_late: np.ndarray | None = None
instance-attribute
#
y_vad: np.ndarray | None = None
instance-attribute
#
SourceDataloader
#
__getitem__
#
__init__
#
Load data from a database and return Source class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
database |
str
|
A file containing file paths of the clean speech files.. |
required |
num_sources |
int
|
Number of sources to be loaded. |
required |
offset |
int
|
The offset of the database. |
0
|
limit |
int | None
|
The maximum number of files in the |
None
|
inclide_vad |
bool
|
Whether to include the VAD label. |
False
|
sr |
int
|
The sampling rate of the audio. |
16000
|
preload |
bool
|
Whether to preload the audio with parallel. |
False
|
max_norm |
bool
|
Whether to normalize the audio. |
True
|
source_id_fn |
Callable[[Path], str] | None
|
A function to get the source id. |
None
|
source_spk_id_fn |
Callable[[Path], str] | None
|
A function to get the source speaker id. |
None
|
__len__
#
fpath_list = fpath_list
instance-attribute
#
inclide_vad = inclide_vad
instance-attribute
#
max_norm = max_norm
instance-attribute
#
num_sources = num_sources
instance-attribute
#
preload = preload
instance-attribute
#
source_id_fn = source_id_fn
instance-attribute
#
source_spk_id_fn = source_spk_id_fn
instance-attribute
#
sr = sr
instance-attribute
#
waveform_list = []
instance-attribute
#
expand_path
#
load_wav
#
clean_fpath_list = "/users/bdda/xhao/Datasets/wsj0-si/si_tr_s.txt" # clean
clean_list_limit = false
clean_list_offset = 0
noise_fpath_list = "/users/bdda/xhao/Datasets/chime4-noise/chime4-noise.txt"
noise_list_limit = false
noise_list_offset = 0
preload_clean_data = true
preload_noise_data = true
silence_duration = 0.02
[rir_simulator]
[rir_simulator.room]
sr = 16000
t60_list = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
room_size_range = [[6.0, 6.0, 3.05], [10.0, 8.0, 3.05]]
absorption_coeff_range = [[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]]
[rir_simulator.microphone]
array_setup = "dicit"
[rir_simulator.gpurir_generator]
enable = true
[rir_simulator.pyroomacoustics_generator]
enable = false
[rir_simulator.trajecotry_generator]
moving_speed_range = [1.1, 1.5]
speaker_height_range = [1.5, 1.9]
moving_interval = 0.125
min_allowable_distance_to_wall = 0.5
[rir_simulator.trajecotry_generator.curve]
enable = true
num_control_points = 1
[rir_simulator.trajecotry_generator.line]
enable = true
[rir_simulator.trajecotry_generator.static]
enable = true
[rir_simulator.trajecotry_generator.curved_quadrilateral]
enable = true
num_control_points = 1
[mixer]
snr_list = [-5, 0, 5, 10, 15, 20]
sir_list = [-6, -3, 0, -3, 6]
sr = 16000
target_loudness_level = -25
loudness_floating_value = 5