add feature for using other vocoder

haoheliu · Sep 30, 2021 · 77be5d2 · 77be5d2
1 parent bbbe3f0
commit 77be5d2
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Please visit [demo page](https://haoheliu.github.io/demopage-voicefixer/) to vie
 
 ## Usage
 
-- Basic example:
+### Basic example:
 
 ```python
 # Will automatically download model parameters.
@@ -56,6 +56,35 @@ wave = vocoder.forward(mel=mel_spec) # This forward function is used in the foll
 vocoder.oracle(fpath="", # input wav file path
                out_path="") # output wav file path
 ```
+
+### Others
+
+- How to use your own vocoder, like pre-trained HiFi-Gan?
+
+First you need to write a following helper function with your model. Similar to the helper function in this repo: https://github.com/haoheliu/voicefixer/blob/main/voicefixer/vocoder/base.py#L35
+
+```shell script
+    def convert_mel_to_wav(mel):
+        """
+        :param non normalized mel spectrogram: [batchsize, 1, t-steps, n_mel]
+        :return: [batchsize, 1, samples]
+        """
+        return wav
+```
+
+Then pass this function to *voicefixer.restore*, for example:
+```
+voicefixer.restore(input="", # input wav file path
+                   output="", # output wav file path
+                   cuda=False, # whether to use gpu acceleration
+                   mode = 0,
+                   your_vocoder_func = convert_mel_to_wav)
+```
+
+Note: 
+- For compatibility, your vocoder should working on 44.1kHz wave with mel frequency bins 128. 
+- The input mel spectrogram to the helper function should not be normalized by the width of each mel filter. 
+
 ## Materials
 - Voicefixer training: https://github.com/haoheliu/voicefixer_main.git
 - Demo page: https://haoheliu.github.io/demopage-voicefixer/ 

diff --git a/voicefixer/base.py b/voicefixer/base.py
@@ -80,7 +80,7 @@ def remove_higher_frequency(self, wav, ratio=0.95):
         stft = spec * cos + 1j * spec * sin
         return librosa.istft(stft)
 
-    def restore(self, input, output, cuda=False, mode=0):
+    def restore(self, input, output, cuda=False, mode=0, your_vocoder_func=None):
         if(cuda and torch.cuda.is_available()):
             self._model = self._model.cuda()
         # metrics = {}
@@ -106,7 +106,10 @@ def restore(self, input, output, cuda=False, mode=0):
                 denoised_mel = from_log(out_model['mel'])
                 # if(meta["unify_energy"]):
                 #    denoised_mel, mel_noisy = self.amp_to_original_f(mel_sp_est=denoised_mel,mel_sp_target=mel_noisy)
-                out = self._model.vocoder(denoised_mel)
+                if(your_vocoder_func is None):
+                    out = self._model.vocoder(denoised_mel)
+                else:
+                    out = your_vocoder_func(denoised_mel)
                 # unify energy
                 if(torch.max(torch.abs(out)) > 1.0):
                     out = out / torch.max(torch.abs(out))