#GPT-4 Conversation #1099357415293587498
1 messages · Page 1 of 1 (latest)
Write a simple python program that takes a wav audio signal containing a vowel, and outputs the audio signal of that vowel, but with a long duration created through the Psola algorithm
Here's a simple Python program that uses the library pydub and numpy to apply the PSOLA algorithm to a .wav file containing a vowel. This script increases the duration of the vowel sound:
import numpy as np
from scipy.io import wavfile
from pydub import AudioSegment
def PSOLA(input_file, output_file, stretch_factor):
rate, audio_data = wavfile.read(input_file)
audio_data = audio_data / np.iinfo(audio_data.dtype).max
audio_segment = AudioSegment(audio_data.tobytes(), frame_rate=rate, sample_width=audio_data.dtype.itemsize, channels=1)
audio_duration = audio_segment.duration_seconds
frame_length = int(rate * audio_duration) // audio_duration
overlap_factor = 0.5
hop_input = int(frame_length * (1 - overlap_factor))
hop_stretch = int(hop_input * stretch_factor)
extended_data = np.zeros(int(audio_data.shape[0] * stretch_factor), dtype=audio_data.dtype)
for i in range(0, audio_data.shape[0], hop_input):
i_ext = int(i * stretch_factor)
if i + frame_length < audio_data.shape[0]:
extended_data[i_ext:i_ext + frame_length] += audio_data[i:i + frame_length]
if i_ext + hop_stretch + frame_length < extended_data.shape[0]:
extended_data[i_ext + hop_stretch:i_ext + hop_stretch + frame_length] += audio_data[i:i + frame_length]
extended_data *= 32767.0
output_wav = extended_data.astype(np.int16)
wavfile.write(output_file, rate, output_wav)
input_file = "input_vowel.wav"
output_file = "output_vowel_longer.wav"
stretch_factor = 2.0
PSOLA(input_file, output_file, stretch_factor)
To run this program, you will need to install pydub and numpy:
pip install pydub numpy scipy
You can then run the script with a .wav file containing a vowel sound. The stretch_factor value helps to control the duration of the output. For example, a stretch factor of 2.0 will make the vowel sound twice as long. Adjust this value to achieve the desired dur
ation.
C:\Users\Egor\Documents\ai_psola.py:7: WavFileWarning: Chunk (non-data) not understood, skipping it.
rate, audio_data = wavfile.read(input_file)
Traceback (most recent call last):
File "C:\Users\Egor\Documents\ai_psola.py", line 37, in <module>
PSOLA(input_file, output_file, stretch_factor)
File "C:\Users\Egor\Documents\ai_psola.py", line 8, in PSOLA
audio_data = audio_data / np.iinfo(audio_data.dtype).max
File "C:\Users\Egor\AppData\Local\Programs\Python\Python39\lib\site-packages\numpy\core\getlimits.py", line 507, in init
raise ValueError("Invalid integer data type %r." % (self.kind,))
ValueError: Invalid integer data type 'f'.