Python源码示例:torchaudio.load()

示例1
def load_commonvoice_item(line: List[str],
                          header: List[str],
                          path: str,
                          folder_audio: str) -> Tuple[Tensor, int, Dict[str, str]]:
    # Each line as the following data:
    # client_id, path, sentence, up_votes, down_votes, age, gender, accent

    assert header[1] == "path"
    fileid = line[1]

    filename = os.path.join(path, folder_audio, fileid)

    waveform, sample_rate = torchaudio.load(filename)

    dic = dict(zip(header, line))

    return waveform, sample_rate, dic 
示例2
def load_cmuarctic_item(line: str,
                        path: str,
                        folder_audio: str,
                        ext_audio: str) -> Tuple[Tensor, int, str, str]:

    utterance_id, utterance = line[0].strip().split(" ", 2)[1:]

    # Remove space, double quote, and single parenthesis from utterance
    utterance = utterance[1:-3]

    file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)

    # Load audio
    waveform, sample_rate = torchaudio.load(file_audio)

    return (
        waveform,
        sample_rate,
        utterance,
        utterance_id.split("_")[1]
    ) 
示例3
def set_audio_backend(backend: Optional[str]) -> None:
    """Set the backend for I/O operation

    Args:
        backend (str): Name of the backend. One of "sox" or "soundfile",
            based on availability of the system.
    """
    if backend is not None and backend not in list_audio_backends():
        raise RuntimeError(
            f'Backend "{backend}" is not one of '
            f'available backends: {list_audio_backends()}.')

    if backend is None:
        module = no_backend
    elif backend == 'sox':
        module = sox_backend
    elif backend == 'sox_io':
        module = sox_io_backend
    elif backend == 'soundfile':
        module = soundfile_backend
    else:
        raise NotImplementedError(f'Unexpected backend "{backend}"')

    for func in ['save', 'load', 'load_wav', 'info']:
        setattr(torchaudio, func, getattr(module, func)) 
示例4
def test_vad(self):
        sample_files = [
            common_utils.get_asset_path("vad-go-stereo-44100.wav"),
            common_utils.get_asset_path("vad-go-mono-32000.wav")
        ]

        for sample_file in sample_files:
            E = torchaudio.sox_effects.SoxEffectsChain()
            E.set_input_file(sample_file)
            E.append_effect_to_chain("vad")
            x, _ = E.sox_build_flow_effects()

            x_orig, sample_rate = torchaudio.load(sample_file)
            vad = torchaudio.transforms.Vad(sample_rate)

            y = vad(x_orig)
            self.assertTrue(x.allclose(y, rtol=1e-4, atol=1e-4)) 
示例5
def test_batch_mulaw(self):
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        waveform_encoded = torchaudio.transforms.MuLawEncoding()(waveform)
        expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = torchaudio.transforms.MuLawEncoding()(waveform_batched)

        # shape = (3, 2, 201, 1394)
        self.assertEqual(computed, expected)

        # Single then transform then batch
        waveform_decoded = torchaudio.transforms.MuLawDecoding()(waveform_encoded)
        expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MuLawDecoding()(computed)

        # shape = (3, 2, 201, 1394)
        self.assertEqual(computed, expected) 
示例6
def test_resample_size(self):
        input_path = common_utils.get_asset_path('sinewave.wav')
        waveform, sample_rate = torchaudio.load(input_path)

        upsample_rate = sample_rate * 2
        downsample_rate = sample_rate // 2
        invalid_resample = torchaudio.transforms.Resample(sample_rate, upsample_rate, resampling_method='foo')

        self.assertRaises(ValueError, invalid_resample, waveform)

        upsample_resample = torchaudio.transforms.Resample(
            sample_rate, upsample_rate, resampling_method='sinc_interpolation')
        up_sampled = upsample_resample(waveform)

        # we expect the upsampled signal to have twice as many samples
        self.assertTrue(up_sampled.size(-1) == waveform.size(-1) * 2)

        downsample_resample = torchaudio.transforms.Resample(
            sample_rate, downsample_rate, resampling_method='sinc_interpolation')
        down_sampled = downsample_resample(waveform)

        # we expect the downsampled signal to have half as many samples
        self.assertTrue(down_sampled.size(-1) == waveform.size(-1) // 2) 
示例7
def _create_data_set(self):
        # used to generate the dataset to test on. this is not used in testing (offline procedure)
        test_filepath = common_utils.get_asset_path('kaldi_file.wav')
        sr = 16000
        x = torch.arange(0, 20).float()
        # between [-6,6]
        y = torch.cos(2 * math.pi * x) + 3 * torch.sin(math.pi * x) + 2 * torch.cos(x)
        # between [-2^30, 2^30]
        y = (y / 6 * (1 << 30)).long()
        # clear the last 16 bits because they aren't used anyways
        y = ((y >> 16) << 16).float()
        torchaudio.save(test_filepath, y, sr)
        sound, sample_rate = torchaudio.load(test_filepath, normalization=False)
        print(y >> 16)
        self.assertTrue(sample_rate == sr)
        torch.testing.assert_allclose(y, sound) 
示例8
def test_dither(self):
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
        waveform, _ = torchaudio.load(test_filepath)

        waveform_dithered = F.dither(waveform)
        waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("dither", [])
        sox_dither_waveform = E.sox_build_flow_effects()[0]

        self.assertEqual(waveform_dithered, sox_dither_waveform, atol=1e-04, rtol=1e-5)
        E.clear_chain()

        E.append_effect_to_chain("dither", ["-s"])
        sox_dither_waveform_ns = E.sox_build_flow_effects()[0]

        self.assertEqual(waveform_dithered_noiseshaped, sox_dither_waveform_ns, atol=1e-02, rtol=1e-5) 
示例9
def test_vctk_transform_pipeline(self):
        test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
        wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)

        # rate
        sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

        self.assertEqual(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03) 
示例10
def test_lowpass(self):
        """
        Test biquad lowpass filter, compare to SoX implementation
        """

        cutoff_freq = 3000

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("lowpass", [cutoff_freq])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例11
def test_allpass(self):
        """
        Test biquad allpass filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例12
def test_bandpass_with_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        const_skirt_gain = True

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例13
def test_bandpass_without_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        const_skirt_gain = False

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例14
def test_bandreject(self):
        """
        Test biquad bandreject filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例15
def test_band_with_noise(self):
        """
        Test biquad band filter with noise mode, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        noise = True

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例16
def test_treble(self):
        """
        Test biquad treble filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        gain = 40

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例17
def test_bass(self):
        """
        Test biquad bass filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        gain = 40

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bass", [gain, central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bass_biquad(waveform, sample_rate, gain, central_freq, q)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-4, rtol=1e-5) 
示例18
def test_overdrive(self):
        """
        Test overdrive effect, compare to SoX implementation
        """
        gain = 30
        colour = 40
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("overdrive", [gain, colour])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.overdrive(waveform, gain, colour)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例19
def test_phaser_sine(self):
        """
        Test phaser effect with sine moduldation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例20
def test_phaser_triangle(self):
        """
        Test phaser effect with triangle modulation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例21
def test_flanger_triangle_linear(self):
        """
        Test flanger effect with triangle modulation and linear interpolation, compare to SoX implementation
        """
        delay = 0.6
        depth = 0.87
        regen = 3.0
        width = 0.9
        speed = 0.5
        phase = 30
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "linear"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
                                    modulation='triangular', interpolation='linear')

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例22
def test_flanger_sine_linear(self):
        """
        Test flanger effect with sine modulation and linear interpolation, compare to SoX implementation
        """
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.23
        speed = 1.3
        phase = 60
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "linear"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
                                    modulation='sinusoidal', interpolation='linear')

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例23
def test_flanger_sine_quad(self):
        """
        Test flanger effect with sine modulation and quadratic interpolation, compare to SoX implementation
        """
        delay = 0.9
        depth = 0.9
        regen = 4.0
        width = 0.23
        speed = 1.3
        phase = 25
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "quadratic"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase,
                                    modulation='sinusoidal', interpolation='quadratic')

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例24
def test_equalizer(self):
        """
        Test biquad peaking equalizer filter, compare to SoX implementation
        """

        center_freq = 300
        q = 0.707
        gain = 1

        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("equalizer", [center_freq, q, gain])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)

        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) 
示例25
def test_perf_biquad_filtering(self):

        fn_sine = common_utils.get_asset_path('whitenoise.wav')

        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

        # SoX method
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(fn_sine)
        E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
        waveform_sox_out, _ = E.sox_build_flow_effects()

        waveform, _ = torchaudio.load(fn_sine, normalization=True)
        waveform_lfilter_out = F.lfilter(
            waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
        )

        self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5) 
示例26
def test_info_wav(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` is torchscript-able and returns the same result"""
        audio_path = self.get_temp_path(f'{dtype}_{sample_rate}_{num_channels}.wav')
        data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate)
        save_wav(audio_path, data, sample_rate)

        script_path = self.get_temp_path('info_func.zip')
        torch.jit.script(py_info_func).save(script_path)
        ts_info_func = torch.jit.load(script_path)

        py_info = py_info_func(audio_path)
        ts_info = ts_info_func(audio_path)

        assert py_info.get_sample_rate() == ts_info.get_sample_rate()
        assert py_info.get_num_frames() == ts_info.get_num_frames()
        assert py_info.get_num_channels() == ts_info.get_num_channels() 
示例27
def test_load_wav(self, dtype, sample_rate, num_channels, normalize, channels_first):
        """`sox_io_backend.load` is torchscript-able and returns the same result"""
        audio_path = self.get_temp_path(f'test_load_{dtype}_{sample_rate}_{num_channels}_{normalize}.wav')
        data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate)
        save_wav(audio_path, data, sample_rate)

        script_path = self.get_temp_path('load_func.zip')
        torch.jit.script(py_load_func).save(script_path)
        ts_load_func = torch.jit.load(script_path)

        py_data, py_sr = py_load_func(
            audio_path, normalize=normalize, channels_first=channels_first)
        ts_data, ts_sr = ts_load_func(
            audio_path, normalize=normalize, channels_first=channels_first)

        self.assertEqual(py_sr, ts_sr)
        self.assertEqual(py_data, ts_data) 
示例28
def test_save_wav(self, dtype, sample_rate, num_channels):
        script_path = self.get_temp_path('save_func.zip')
        torch.jit.script(py_save_func).save(script_path)
        ts_save_func = torch.jit.load(script_path)

        expected = get_wav_data(dtype, num_channels)
        py_path = self.get_temp_path(f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav')
        ts_path = self.get_temp_path(f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav')

        py_save_func(py_path, expected, sample_rate, True, None)
        ts_save_func(ts_path, expected, sample_rate, True, None)

        py_data, py_sr = load_wav(py_path)
        ts_data, ts_sr = load_wav(ts_path)

        self.assertEqual(sample_rate, py_sr)
        self.assertEqual(sample_rate, ts_sr)
        self.assertEqual(expected, py_data)
        self.assertEqual(expected, ts_data) 
示例29
def test_save_flac(self, sample_rate, num_channels, compression_level):
        script_path = self.get_temp_path('save_func.zip')
        torch.jit.script(py_save_func).save(script_path)
        ts_save_func = torch.jit.load(script_path)

        expected = get_wav_data('float32', num_channels)
        py_path = self.get_temp_path(f'test_save_py_{sample_rate}_{num_channels}_{compression_level}.flac')
        ts_path = self.get_temp_path(f'test_save_ts_{sample_rate}_{num_channels}_{compression_level}.flac')

        py_save_func(py_path, expected, sample_rate, True, compression_level)
        ts_save_func(ts_path, expected, sample_rate, True, compression_level)

        # converting to 32 bit because flac file has 24 bit depth which scipy cannot handle.
        py_path_wav = f'{py_path}.wav'
        ts_path_wav = f'{ts_path}.wav'
        sox_utils.convert_audio_file(py_path, py_path_wav, bit_depth=32)
        sox_utils.convert_audio_file(ts_path, ts_path_wav, bit_depth=32)

        py_data, py_sr = load_wav(py_path_wav, normalize=True)
        ts_data, ts_sr = load_wav(ts_path_wav, normalize=True)

        self.assertEqual(sample_rate, py_sr)
        self.assertEqual(sample_rate, ts_sr)
        self.assertEqual(expected, py_data)
        self.assertEqual(expected, ts_data) 
示例30
def __init__(self, data_root, utt2class, split_list,
                 chunker=None,
                 verbose=True):
        self.data_root = data_root
        if not isinstance(utt2class, str):
            raise ValueError('Please specify a path to a utt2class '
                             'file for loading data.')
        if not isinstance(split_list, str) and not isinstance(split_list, list):
            raise ValueError('Please specify a path to a split_list '
                             'file for loading data or to the list itself.')
        utt2class_ext = utt2class.split('.')[1]
        if utt2class_ext == 'json':
            with open(utt2class, 'r') as u2s_f:
                self.utt2class = json.load(u2s_f)
        else:
            self.utt2class = np.load(utt2class)
            self.utt2class = dict(self.utt2class.any())
        print('Found {} classes'.format(len(set(self.utt2class.values()))))
        self.chunker = chunker
        if isinstance(split_list, list):
            self.split_list = split_list
        else:
            with open(split_list, 'r') as sl_f:
                self.split_list = [l.rstrip() for l in sl_f]
                print('Found {} wav files'.format(len(self.split_list)))