#GPT-4 Conversation #1099357415293587498

1 messages · Page 1 of 1 (latest)

oblique flameBOT
magic scaffold
#

Write a simple python program that takes a wav audio signal containing a vowel, and outputs the audio signal of that vowel, but with a long duration created through the Psola algorithm

oblique flameBOT
# magic scaffold Write a simple python program that takes a wav audio signal containing a vowel, ...

Here's a simple Python program that uses the library pydub and numpy to apply the PSOLA algorithm to a .wav file containing a vowel. This script increases the duration of the vowel sound:

import numpy as np
from scipy.io import wavfile
from pydub import AudioSegment


def PSOLA(input_file, output_file, stretch_factor):
    rate, audio_data = wavfile.read(input_file)
    audio_data = audio_data / np.iinfo(audio_data.dtype).max

    audio_segment = AudioSegment(audio_data.tobytes(), frame_rate=rate, sample_width=audio_data.dtype.itemsize, channels=1)
    audio_duration = audio_segment.duration_seconds
    frame_length = int(rate * audio_duration) // audio_duration

    overlap_factor = 0.5
    hop_input = int(frame_length * (1 - overlap_factor))
    hop_stretch = int(hop_input * stretch_factor)
    extended_data = np.zeros(int(audio_data.shape[0] * stretch_factor), dtype=audio_data.dtype)

    for i in range(0, audio_data.shape[0], hop_input):
        i_ext = int(i * stretch_factor)
        
        if i + frame_length < audio_data.shape[0]:
            extended_data[i_ext:i_ext + frame_length] += audio_data[i:i + frame_length]

        if i_ext + hop_stretch + frame_length < extended_data.shape[0]:
            extended_data[i_ext + hop_stretch:i_ext + hop_stretch + frame_length] += audio_data[i:i + frame_length]

    extended_data *= 32767.0
    output_wav = extended_data.astype(np.int16)
    wavfile.write(output_file, rate, output_wav)


input_file = "input_vowel.wav"
output_file = "output_vowel_longer.wav"
stretch_factor = 2.0

PSOLA(input_file, output_file, stretch_factor)

To run this program, you will need to install pydub and numpy:

pip install pydub numpy scipy

You can then run the script with a .wav file containing a vowel sound. The stretch_factor value helps to control the duration of the output. For example, a stretch factor of 2.0 will make the vowel sound twice as long. Adjust this value to achieve the desired dur

#

ation.

magic scaffold
#

C:\Users\Egor\Documents\ai_psola.py:7: WavFileWarning: Chunk (non-data) not understood, skipping it.
rate, audio_data = wavfile.read(input_file)
Traceback (most recent call last):
File "C:\Users\Egor\Documents\ai_psola.py", line 37, in <module>
PSOLA(input_file, output_file, stretch_factor)
File "C:\Users\Egor\Documents\ai_psola.py", line 8, in PSOLA
audio_data = audio_data / np.iinfo(audio_data.dtype).max
File "C:\Users\Egor\AppData\Local\Programs\Python\Python39\lib\site-packages\numpy\core\getlimits.py", line 507, in init
raise ValueError("Invalid integer data type %r." % (self.kind,))
ValueError: Invalid integer data type 'f'.