No Output When Using Faster-Whisper for Transcription in Python

Question

I'm trying to use faster-whisper for speech-to-text transcription in a Jupyter Notebook. I followed a YouTube tutorial and refined my code with ChatGPT, but I'm not getting any transcription output. The recording seems to work, but the speech_recognition function doesn't display any text.

Here's my code:

import ipywidgets as wd
from IPython.display import display
from threading import Thread
from queue import Queue
import sounddevice as sd
import numpy as np
import faster_whisper
import pyaudio

# Load the whisper model
model = faster_whisper.WhisperModel("small", device="cpu", compute_type="int8")
recordings = Queue()

# UI buttons
record_button = wd.Button(description="Record", disabled=False, button_style="success", icon="microphone")
stop_button = wd.Button(description="Stop", disabled=False, button_style="warning", icon="stop")
output = wd.Output()

# PyAudio setup
p = pyaudio.PyAudio()
default_device_index = p.get_default_input_device_info().get("index", None)

CHANNELS = 1
FRAME_RATE = 16000
RECORD_SECONDS = 20
AUDIO_FORMAT = pyaudio.paInt16
SAMPLE_SIZE = 2
CHUNK = 1024
is_recording = False

def record_microphone():
    """Records audio from the microphone and puts it in a queue."""
    global is_recording
    
    p = pyaudio.PyAudio()
    stream = p.open(format=AUDIO_FORMAT, channels=CHANNELS, rate=FRAME_RATE,
                    input=True, input_device_index=default_device_index, frames_per_buffer=CHUNK)

    while is_recording:
        data = stream.read(CHUNK)
        recordings.put(data)
            
    stream.stop_stream()
    stream.close()
    p.terminate()

def speech_recognition():
    """Processes audio from the queue and transcribes it using Faster-Whisper."""
    audio_buffer = []
    
    while is_recording or not recordings.empty():
        if not recordings.empty():
            data = recordings.get()
            audio_buffer.append(np.frombuffer(data, dtype=np.int16))
            if len(audio_buffer) * CHUNK >= FRAME_RATE:
                # Normalize audio
                audio_chunk = np.concatenate(audio_buffer).astype(np.float32) / 32768  
                audio_buffer = []
                segments, _ = model.transcribe(audio_chunk, language="en", beam_size=5)            
                with output:
                    for segment in segments:
                        display(segment.text)

def start_recording(data):
    """Starts recording and transcription threads."""
    global is_recording
    is_recording = True

    with output:
        display("Listening...")
    
    record_thread = Thread(target=record_microphone)
    transcribe_thread = Thread(target=speech_recognition)

    record_thread.start()
    transcribe_thread.start()
    
def stop_recording(data):
    """Stops the recording process."""
    global is_recording
    is_recording = False
    with output:
        display("Stopped.")

record_button.on_click(start_recording)
stop_button.on_click(stop_recording)

display(record_button, stop_button, output)

Any help is much appreciated

Toufik1704 · Accepted Answer · 2025-07-25 17:04:17Z

-1

def speech_recognition():
    audio_buffer = []

    while is_recording or not recordings.empty():
        if not recordings.empty():
            data = recordings.get()
            audio_buffer.append(np.frombuffer(data, dtype=np.int16))

    audio_np = np.concatenate(audio_buffer).astype(np.float32) / 32768

    segments, _ = model.transcribe(audio_np, language="en", beam_size=5)

    transcription = ""
    for segment in segments:
        transcription += segment.text + " "

    with output:
        output.clear_output()
        print("Transcription:")
        print(transcription.strip())

answered Jul 25 at 17:04

Toufik1704

12 bronze badges

Sign up to request clarification or add additional context in comments.

1 Comment

Community Jul 25 at 17:14

As it’s currently written, your answer is unclear. Please edit to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers in the help center.

Collectives™ on Stack Overflow

No Output When Using Faster-Whisper for Transcription in Python

1 Answer 1

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related