1

I'm trying to use faster-whisper for speech-to-text transcription in a Jupyter Notebook. I followed a YouTube tutorial and refined my code with ChatGPT, but I'm not getting any transcription output. The recording seems to work, but the speech_recognition function doesn't display any text.

Here's my code:

import ipywidgets as wd
from IPython.display import display
from threading import Thread
from queue import Queue
import sounddevice as sd
import numpy as np
import faster_whisper
import pyaudio

# Load the whisper model
model = faster_whisper.WhisperModel("small", device="cpu", compute_type="int8")
recordings = Queue()

# UI buttons
record_button = wd.Button(description="Record", disabled=False, button_style="success", icon="microphone")
stop_button = wd.Button(description="Stop", disabled=False, button_style="warning", icon="stop")
output = wd.Output()

# PyAudio setup
p = pyaudio.PyAudio()
default_device_index = p.get_default_input_device_info().get("index", None)

CHANNELS = 1
FRAME_RATE = 16000
RECORD_SECONDS = 20
AUDIO_FORMAT = pyaudio.paInt16
SAMPLE_SIZE = 2
CHUNK = 1024
is_recording = False

def record_microphone():
    """Records audio from the microphone and puts it in a queue."""
    global is_recording
    
    p = pyaudio.PyAudio()
    stream = p.open(format=AUDIO_FORMAT, channels=CHANNELS, rate=FRAME_RATE,
                    input=True, input_device_index=default_device_index, frames_per_buffer=CHUNK)

    while is_recording:
        data = stream.read(CHUNK)
        recordings.put(data)
            
    stream.stop_stream()
    stream.close()
    p.terminate()

def speech_recognition():
    """Processes audio from the queue and transcribes it using Faster-Whisper."""
    audio_buffer = []
    
    while is_recording or not recordings.empty():
        if not recordings.empty():
            data = recordings.get()
            audio_buffer.append(np.frombuffer(data, dtype=np.int16))
            if len(audio_buffer) * CHUNK >= FRAME_RATE:
                # Normalize audio
                audio_chunk = np.concatenate(audio_buffer).astype(np.float32) / 32768  
                audio_buffer = []
                segments, _ = model.transcribe(audio_chunk, language="en", beam_size=5)            
                with output:
                    for segment in segments:
                        display(segment.text)

def start_recording(data):
    """Starts recording and transcription threads."""
    global is_recording
    is_recording = True

    with output:
        display("Listening...")
    
    record_thread = Thread(target=record_microphone)
    transcribe_thread = Thread(target=speech_recognition)

    record_thread.start()
    transcribe_thread.start()
    
def stop_recording(data):
    """Stops the recording process."""
    global is_recording
    is_recording = False
    with output:
        display("Stopped.")

record_button.on_click(start_recording)
stop_button.on_click(stop_recording)

display(record_button, stop_button, output)

Any help is much appreciated

1 Answer 1

-1
def speech_recognition():
    audio_buffer = []

    while is_recording or not recordings.empty():
        if not recordings.empty():
            data = recordings.get()
            audio_buffer.append(np.frombuffer(data, dtype=np.int16))

    audio_np = np.concatenate(audio_buffer).astype(np.float32) / 32768

    segments, _ = model.transcribe(audio_np, language="en", beam_size=5)

    transcription = ""
    for segment in segments:
        transcription += segment.text + " "

    with output:
        output.clear_output()
        print("Transcription:")
        print(transcription.strip())
Sign up to request clarification or add additional context in comments.

1 Comment

As it’s currently written, your answer is unclear. Please edit to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers in the help center.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.