Python源码示例:torchaudio.load()
示例1
def load_commonvoice_item(line: List[str],
header: List[str],
path: str,
folder_audio: str) -> Tuple[Tensor, int, Dict[str, str]]:
# Each line as the following data:
# client_id, path, sentence, up_votes, down_votes, age, gender, accent
assert header[1] == "path"
fileid = line[1]
filename = os.path.join(path, folder_audio, fileid)
waveform, sample_rate = torchaudio.load(filename)
dic = dict(zip(header, line))
return waveform, sample_rate, dic
示例2
def load_cmuarctic_item(line: str,
path: str,
folder_audio: str,
ext_audio: str) -> Tuple[Tensor, int, str, str]:
utterance_id, utterance = line[0].strip().split(" ", 2)[1:]
# Remove space, double quote, and single parenthesis from utterance
utterance = utterance[1:-3]
file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)
# Load audio
waveform, sample_rate = torchaudio.load(file_audio)
return (
waveform,
sample_rate,
utterance,
utterance_id.split("_")[1]
)
示例3
def set_audio_backend(backend: Optional[str]) -> None:
"""Set the backend for I/O operation
Args:
backend (str): Name of the backend. One of "sox" or "soundfile",
based on availability of the system.
"""
if backend is not None and backend not in list_audio_backends():
raise RuntimeError(
f'Backend "{backend}" is not one of '
f'available backends: {list_audio_backends()}.')
if backend is None:
module = no_backend
elif backend == 'sox':
module = sox_backend
elif backend == 'sox_io':
module = sox_io_backend
elif backend == 'soundfile':
module = soundfile_backend
else:
raise NotImplementedError(f'Unexpected backend "{backend}"')
for func in ['save', 'load', 'load_wav', 'info']:
setattr(torchaudio, func, getattr(module, func))
示例4
def test_vad(self):
sample_files = [
common_utils.get_asset_path("vad-go-stereo-44100.wav"),
common_utils.get_asset_path("vad-go-mono-32000.wav")
]
for sample_file in sample_files:
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(sample_file)
E.append_effect_to_chain("vad")
x, _ = E.sox_build_flow_effects()
x_orig, sample_rate = torchaudio.load(sample_file)
vad = torchaudio.transforms.Vad(sample_rate)
y = vad(x_orig)
self.assertTrue(x.allclose(y, rtol=1e-4, atol=1e-4))
示例5
def test_batch_mulaw(self):
test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
waveform, _ = torchaudio.load(test_filepath) # (2, 278756), 44100
# Single then transform then batch
waveform_encoded = torchaudio.transforms.MuLawEncoding()(waveform)
expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)
# Batch then transform
waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
computed = torchaudio.transforms.MuLawEncoding()(waveform_batched)
# shape = (3, 2, 201, 1394)
self.assertEqual(computed, expected)
# Single then transform then batch
waveform_decoded = torchaudio.transforms.MuLawDecoding()(waveform_encoded)
expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)
# Batch then transform
computed = torchaudio.transforms.MuLawDecoding()(computed)
# shape = (3, 2, 201, 1394)
self.assertEqual(computed, expected)
示例6
def test_resample_size(self):
input_path = common_utils.get_asset_path('sinewave.wav')
waveform, sample_rate = torchaudio.load(input_path)
upsample_rate = sample_rate * 2
downsample_rate = sample_rate // 2
invalid_resample = torchaudio.transforms.Resample(sample_rate, upsample_rate, resampling_method='foo')
self.assertRaises(ValueError, invalid_resample, waveform)
upsample_resample = torchaudio.transforms.Resample(
sample_rate, upsample_rate, resampling_method='sinc_interpolation')
up_sampled = upsample_resample(waveform)
# we expect the upsampled signal to have twice as many samples
self.assertTrue(up_sampled.size(-1) == waveform.size(-1) * 2)
downsample_resample = torchaudio.transforms.Resample(
sample_rate, downsample_rate, resampling_method='sinc_interpolation')
down_sampled = downsample_resample(waveform)
# we expect the downsampled signal to have half as many samples
self.assertTrue(down_sampled.size(-1) == waveform.size(-1) // 2)
示例7
def _create_data_set(self):
# used to generate the dataset to test on. this is not used in testing (offline procedure)
test_filepath = common_utils.get_asset_path('kaldi_file.wav')
sr = 16000
x = torch.arange(0, 20).float()
# between [-6,6]
y = torch.cos(2 * math.pi * x) + 3 * torch.sin(math.pi * x) + 2 * torch.cos(x)
# between [-2^30, 2^30]
y = (y / 6 * (1 << 30)).long()
# clear the last 16 bits because they aren't used anyways
y = ((y >> 16) << 16).float()
torchaudio.save(test_filepath, y, sr)
sound, sample_rate = torchaudio.load(test_filepath, normalization=False)
print(y >> 16)
self.assertTrue(sample_rate == sr)
torch.testing.assert_allclose(y, sound)
示例8
def test_dither(self):
test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
waveform, _ = torchaudio.load(test_filepath)
waveform_dithered = F.dither(waveform)
waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(test_filepath)
E.append_effect_to_chain("dither", [])
sox_dither_waveform = E.sox_build_flow_effects()[0]
self.assertEqual(waveform_dithered, sox_dither_waveform, atol=1e-04, rtol=1e-5)
E.clear_chain()
E.append_effect_to_chain("dither", ["-s"])
sox_dither_waveform_ns = E.sox_build_flow_effects()[0]
self.assertEqual(waveform_dithered_noiseshaped, sox_dither_waveform_ns, atol=1e-02, rtol=1e-5)
示例9
def test_vctk_transform_pipeline(self):
test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)
# rate
sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
wf_vctk = sample(wf_vctk)
# dither
wf_vctk = F.dither(wf_vctk, noise_shaping=True)
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(test_filepath_vctk)
E.append_effect_to_chain("gain", ["-h"])
E.append_effect_to_chain("channels", [1])
E.append_effect_to_chain("rate", [16000])
E.append_effect_to_chain("gain", ["-rh"])
E.append_effect_to_chain("dither", ["-s"])
wf_vctk_sox = E.sox_build_flow_effects()[0]
self.assertEqual(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03)
示例10
def test_lowpass(self):
"""
Test biquad lowpass filter, compare to SoX implementation
"""
cutoff_freq = 3000
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("lowpass", [cutoff_freq])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例11
def test_allpass(self):
"""
Test biquad allpass filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例12
def test_bandpass_with_csg(self):
"""
Test biquad bandpass filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
const_skirt_gain = True
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例13
def test_bandpass_without_csg(self):
"""
Test biquad bandpass filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
const_skirt_gain = False
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例14
def test_bandreject(self):
"""
Test biquad bandreject filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例15
def test_band_with_noise(self):
"""
Test biquad band filter with noise mode, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
noise = True
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例16
def test_treble(self):
"""
Test biquad treble filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
gain = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例17
def test_bass(self):
"""
Test biquad bass filter, compare to SoX implementation
"""
central_freq = 1000
q = 0.707
gain = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bass", [gain, central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bass_biquad(waveform, sample_rate, gain, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-4, rtol=1e-5)
示例18
def test_overdrive(self):
"""
Test overdrive effect, compare to SoX implementation
"""
gain = 30
colour = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("overdrive", [gain, colour])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.overdrive(waveform, gain, colour)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例19
def test_phaser_sine(self):
"""
Test phaser effect with sine moduldation, compare to SoX implementation
"""
gain_in = 0.5
gain_out = 0.8
delay_ms = 2.0
decay = 0.4
speed = 0.5
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例20
def test_phaser_triangle(self):
"""
Test phaser effect with triangle modulation, compare to SoX implementation
"""
gain_in = 0.5
gain_out = 0.8
delay_ms = 2.0
decay = 0.4
speed = 0.5
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例21
def test_flanger_triangle_linear(self):
"""
Test flanger effect with triangle modulation and linear interpolation, compare to SoX implementation
"""
delay = 0.6
depth = 0.87
regen = 3.0
width = 0.9
speed = 0.5
phase = 30
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "linear"])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
modulation='triangular', interpolation='linear')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例22
def test_flanger_sine_linear(self):
"""
Test flanger effect with sine modulation and linear interpolation, compare to SoX implementation
"""
delay = 0.8
depth = 0.88
regen = 3.0
width = 0.23
speed = 1.3
phase = 60
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "linear"])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
modulation='sinusoidal', interpolation='linear')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例23
def test_flanger_sine_quad(self):
"""
Test flanger effect with sine modulation and quadratic interpolation, compare to SoX implementation
"""
delay = 0.9
depth = 0.9
regen = 4.0
width = 0.23
speed = 1.3
phase = 25
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "quadratic"])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
modulation='sinusoidal', interpolation='quadratic')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例24
def test_equalizer(self):
"""
Test biquad peaking equalizer filter, compare to SoX implementation
"""
center_freq = 300
q = 0.707
gain = 1
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("equalizer", [center_freq, q, gain])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
示例25
def test_perf_biquad_filtering(self):
fn_sine = common_utils.get_asset_path('whitenoise.wav')
b0 = 0.4
b1 = 0.2
b2 = 0.9
a0 = 0.7
a1 = 0.2
a2 = 0.6
# SoX method
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(fn_sine)
E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
waveform_sox_out, _ = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(fn_sine, normalization=True)
waveform_lfilter_out = F.lfilter(
waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
)
self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5)
示例26
def test_info_wav(self, dtype, sample_rate, num_channels):
"""`sox_io_backend.info` is torchscript-able and returns the same result"""
audio_path = self.get_temp_path(f'{dtype}_{sample_rate}_{num_channels}.wav')
data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate)
save_wav(audio_path, data, sample_rate)
script_path = self.get_temp_path('info_func.zip')
torch.jit.script(py_info_func).save(script_path)
ts_info_func = torch.jit.load(script_path)
py_info = py_info_func(audio_path)
ts_info = ts_info_func(audio_path)
assert py_info.get_sample_rate() == ts_info.get_sample_rate()
assert py_info.get_num_frames() == ts_info.get_num_frames()
assert py_info.get_num_channels() == ts_info.get_num_channels()
示例27
def test_load_wav(self, dtype, sample_rate, num_channels, normalize, channels_first):
"""`sox_io_backend.load` is torchscript-able and returns the same result"""
audio_path = self.get_temp_path(f'test_load_{dtype}_{sample_rate}_{num_channels}_{normalize}.wav')
data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate)
save_wav(audio_path, data, sample_rate)
script_path = self.get_temp_path('load_func.zip')
torch.jit.script(py_load_func).save(script_path)
ts_load_func = torch.jit.load(script_path)
py_data, py_sr = py_load_func(
audio_path, normalize=normalize, channels_first=channels_first)
ts_data, ts_sr = ts_load_func(
audio_path, normalize=normalize, channels_first=channels_first)
self.assertEqual(py_sr, ts_sr)
self.assertEqual(py_data, ts_data)
示例28
def test_save_wav(self, dtype, sample_rate, num_channels):
script_path = self.get_temp_path('save_func.zip')
torch.jit.script(py_save_func).save(script_path)
ts_save_func = torch.jit.load(script_path)
expected = get_wav_data(dtype, num_channels)
py_path = self.get_temp_path(f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav')
ts_path = self.get_temp_path(f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav')
py_save_func(py_path, expected, sample_rate, True, None)
ts_save_func(ts_path, expected, sample_rate, True, None)
py_data, py_sr = load_wav(py_path)
ts_data, ts_sr = load_wav(ts_path)
self.assertEqual(sample_rate, py_sr)
self.assertEqual(sample_rate, ts_sr)
self.assertEqual(expected, py_data)
self.assertEqual(expected, ts_data)
示例29
def test_save_flac(self, sample_rate, num_channels, compression_level):
script_path = self.get_temp_path('save_func.zip')
torch.jit.script(py_save_func).save(script_path)
ts_save_func = torch.jit.load(script_path)
expected = get_wav_data('float32', num_channels)
py_path = self.get_temp_path(f'test_save_py_{sample_rate}_{num_channels}_{compression_level}.flac')
ts_path = self.get_temp_path(f'test_save_ts_{sample_rate}_{num_channels}_{compression_level}.flac')
py_save_func(py_path, expected, sample_rate, True, compression_level)
ts_save_func(ts_path, expected, sample_rate, True, compression_level)
# converting to 32 bit because flac file has 24 bit depth which scipy cannot handle.
py_path_wav = f'{py_path}.wav'
ts_path_wav = f'{ts_path}.wav'
sox_utils.convert_audio_file(py_path, py_path_wav, bit_depth=32)
sox_utils.convert_audio_file(ts_path, ts_path_wav, bit_depth=32)
py_data, py_sr = load_wav(py_path_wav, normalize=True)
ts_data, ts_sr = load_wav(ts_path_wav, normalize=True)
self.assertEqual(sample_rate, py_sr)
self.assertEqual(sample_rate, ts_sr)
self.assertEqual(expected, py_data)
self.assertEqual(expected, ts_data)
示例30
def __init__(self, data_root, utt2class, split_list,
chunker=None,
verbose=True):
self.data_root = data_root
if not isinstance(utt2class, str):
raise ValueError('Please specify a path to a utt2class '
'file for loading data.')
if not isinstance(split_list, str) and not isinstance(split_list, list):
raise ValueError('Please specify a path to a split_list '
'file for loading data or to the list itself.')
utt2class_ext = utt2class.split('.')[1]
if utt2class_ext == 'json':
with open(utt2class, 'r') as u2s_f:
self.utt2class = json.load(u2s_f)
else:
self.utt2class = np.load(utt2class)
self.utt2class = dict(self.utt2class.any())
print('Found {} classes'.format(len(set(self.utt2class.values()))))
self.chunker = chunker
if isinstance(split_list, list):
self.split_list = split_list
else:
with open(split_list, 'r') as sl_f:
self.split_list = [l.rstrip() for l in sl_f]
print('Found {} wav files'.format(len(self.split_list)))