Skip to content

Commit

Permalink
improve restoration strategy in mode 0
Browse files Browse the repository at this point in the history
  • Loading branch information
haoheliu committed Sep 30, 2021
1 parent f5d1511 commit bbbe3f0
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 10 deletions.
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,25 @@ from voicefixer import Vocoder
# Initialize model
voicefixer = VoiceFixer()
# Speech restoration

# Mode 0
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = 0) # You can try out mode 0, 1, 2 to find out the best result
# Mode 1
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = 1) # You can try out mode 0, 1, 2 to find out the best result
# Mode 2
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = 0) # You can try out mode 0, 1 to find out the best result
mode = 2) # You can try out mode 0, 1, 2 to find out the best result




# Universal speaker independent vocoder
vocoder = Vocoder(sample_rate=44100) # Only 44100 sampling rate is supported.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
EMAIL = 'haoheliu@gmail.com'
AUTHOR = 'Haohe Liu'
REQUIRES_PYTHON = '>=3.7.0'
VERSION = '0.0.7'
VERSION = '0.0.8'

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down
6 changes: 3 additions & 3 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@

voicefixer = VoiceFixer()

voicefixer.restore(input="/Users/liuhaohe/Desktop/test_song.wav",
output="/Users/liuhaohe/Desktop/test_song_out_2.wav",
cuda=False,mode=1)
voicefixer.restore(input="/Users/liuhaohe/Downloads/lieshi_short.wav",
output="/Users/liuhaohe/Downloads/lieshi_short.wav",
cuda=False,mode=2)
32 changes: 27 additions & 5 deletions voicefixer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,41 @@ def _pre(self, model, input, cuda):
# return models.to_log(sp), models.to_log(mel_orig)
return sp, mel_orig

def remove_higher_frequency(self, wav, ratio=0.95):
stft = librosa.stft(wav)
real, img = np.real(stft), np.imag(stft)
mag = (real ** 2 + img ** 2) ** 0.5
cos, sin = real / mag, img / mag
spec = np.abs(stft) # [1025,T]
feature = spec.copy()
feature = np.log10(feature)
feature[feature < 0] = 0
energy_level = np.sum(feature, axis=1)
threshold = np.sum(energy_level) * ratio
curent_level, i = energy_level[0], 0
while (i < energy_level.shape[0] and curent_level < threshold):
curent_level += energy_level[i + 1, ...]
i += 1
spec[i:, ...] = np.zeros_like(spec[i:, ...])
stft = spec * cos + 1j * spec * sin
return librosa.istft(stft)

def restore(self, input, output, cuda=False, mode=0):
if(cuda and torch.cuda.is_available()):
self._model = self._model.cuda()
# metrics = {}
if(mode == 1):
self._model.train() # More effective on seriously demaged speech
elif(mode == 2):
self._model.generator.denoiser.train() # Another option worth trying
else:
if(mode == 0):
self._model.eval()
elif(mode == 1):
self._model.eval()
elif(mode == 2):
self._model.train() # More effective on seriously demaged speech

with torch.no_grad():
wav_10k = self._load_wav(input, sample_rate=44100)
if(mode == 0):
# print("In mode 0, we will remove part of the higher frequency part before processing")
wav_10k = self.remove_higher_frequency(wav_10k)
res = []
seg_length = 44100*60
break_point = seg_length
Expand Down

0 comments on commit bbbe3f0

Please sign in to comment.