Python源码示例:hparams.hparams.fft_size()
示例1
def _stft(y):
return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
示例2
def _stft(y):
return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
示例3
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)
示例4
def _stft(y):
return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
示例5
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)
示例6
def _lws_processor():
return lws.lws(hparams.fft_size, hparams.hop_size, mode="speech")
# Conversions:
示例7
def _build_mel_basis():
if hparams.fmax is not None:
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
fmin=hparams.fmin, fmax=hparams.fmax,
n_mels=hparams.num_mels)
示例8
def _stft(y):
return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
示例9
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)
示例10
def _stft(y):
return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
示例11
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)
示例12
def _lws_processor():
return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
示例13
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
fmin=hparams.fmin, fmax=hparams.fmax,
n_mels=hparams.num_mels)
示例14
def _lws_processor():
return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
示例15
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
fmin=hparams.fmin, fmax=hparams.fmax,
n_mels=hparams.num_mels)
示例16
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
fmin=hparams.fmin, fmax=hparams.fmax,
n_mels=hparams.num_mels)
示例17
def _lws_processor():
return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
示例18
def _build_mel_basis():
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
fmin=hparams.fmin, fmax=hparams.fmax,
n_mels=hparams.num_mels)
示例19
def _process_utterance(out_dir, index, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
if hparams.rescaling:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
out = np.pad(out, (l, r), mode="constant", constant_values=constant_values)
N = mel_spectrogram.shape[0]
assert len(out) >= N * audio.get_hop_size()
# time resolution adjustment
# ensure length of raw audio is multiple of hop_size so that we can use
# transposed convolution to upsample
out = out[:N * audio.get_hop_size()]
assert len(out) % audio.get_hop_size() == 0
timesteps = len(out)
# Write the spectrograms to disk:
audio_filename = 'ljspeech-audio-%05d.npy' % index
mel_filename = 'ljspeech-mel-%05d.npy' % index
np.save(os.path.join(out_dir, audio_filename),
out.astype(out_dtype), allow_pickle=False)
np.save(os.path.join(out_dir, mel_filename),
mel_spectrogram.astype(np.float32), allow_pickle=False)
# Return a tuple describing this training example:
return (audio_filename, mel_filename, timesteps, text)
示例20
def _process_utterance(out_dir, index, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
if hparams.rescaling:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
out = np.pad(out, (l, r), mode="constant", constant_values=constant_values)
N = mel_spectrogram.shape[0]
assert len(out) >= N * audio.get_hop_size()
# time resolution adjustment
# ensure length of raw audio is multiple of hop_size so that we can use
# transposed convolution to upsample
out = out[:N * audio.get_hop_size()]
assert len(out) % audio.get_hop_size() == 0
timesteps = len(out)
# Write the spectrograms to disk:
audio_filename = 'ljspeech-audio-%05d.npy' % index
mel_filename = 'ljspeech-mel-%05d.npy' % index
np.save(os.path.join(out_dir, audio_filename),
out.astype(out_dtype), allow_pickle=False)
np.save(os.path.join(out_dir, mel_filename),
mel_spectrogram.astype(np.float32), allow_pickle=False)
# Return a tuple describing this training example:
return (audio_filename, mel_filename, timesteps, text)