1

error message:

Traceback (most recent call last):
  File "/home/prakrisht/-------------/detect_from_webcam.py", line 173, in <module>
    run_inference(detection_model, category_index, cap)
  File "/home/prakrisht/----------------/detect_from_webcam.py", line 60, in run_inference
    output_dict = run_inference_for_single_image(model, image_np)
  File "/home/prakrisht/-----------------/detect_from_webcam.py", line 31, in run_inference_for_single_image
    output_dict = model(input_tensor)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/saved_model/load.py", line 816, in _call_attribute
    return instance.__call__(*args, **kwargs)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/function_type_utils.py", line 446, in bind_function_inputs
    raise TypeError(
TypeError: Binding inputs to tf.function failed due to `Can not cast TensorSpec(shape=(1,), dtype=tf.float32, name='input_tensor') to TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')`. Received args: (<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>,) and kwargs: {} for signature: (input_tensor: TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')).

Code for the detection: summary:

  1. we run inference on the camera feed
  2. using a resnet101 model trained on a custom dataset
  3. the basic code works perfectly with images
  4. it was working perfectly some time ago with webcam as well, but doesn't work anymore.
import numpy as np
import argparse
import tensorflow as tf
import cv2

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile


def load_model(model_path):
    model = tf.saved_model.load(model_path)

    return model


def run_inference_for_single_image(model, image):
    image = np.asarray(image).astype(np.float32)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    
    # Run inference
    output_dict = model(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key: value[0, :num_detections].numpy()
                   for key, value in output_dict.items()}
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                                    output_dict['detection_masks'], output_dict['detection_boxes'],
                                    image.shape[0], image.shape[1])      
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
    return output_dict


def run_inference(model, category_index, cap):
    while True:
        ret, image_np = cap.read()
        # Actual detection.
        output_dict = run_inference_for_single_image(model, image_np)
        # Visualization of the results of a detection.
        #print(output_dict)
        draw_boxes_on_image(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            #instance_masks=output_dict.get('detection_masks_reframed', None),
            #use_normalized_coordinates=True,
            line_thickness=8,
            font_scale=1)
        cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break

import cv2
import numpy as np

def draw_boxes_on_image(image_np, boxes, classes, scores, category_index, 
                        threshold=0.5, line_thickness=8, font=cv2.FONT_HERSHEY_SIMPLEX, 
                        font_scale=0.7, font_thickness=2):
    """
    Draw bounding boxes on the image with class-specific colors, showing labels with the highest score on top.
    Alternate label placement between the left and right sides of the bounding box.

    Args:
    - image_np: Numpy array of the image.
    - boxes: Array of bounding boxes, with shape [N, 4].
    - classes: Array of class indices corresponding to the boxes.
    - scores: Array of confidence scores corresponding to the boxes.
    - category_index: Dictionary mapping class indices to class names.
    - threshold: Confidence threshold for displaying the bounding box.
    - line_thickness: Thickness of the bounding box lines.
    - font: Font type for the label.
    - font_scale: Scale (size) of the font for the label.
    - font_thickness: Thickness of the font for the label.
    """
    # Assign a unique color for each class
    colors = {}
    np.random.seed(42)  # For reproducibility
    for class_id in np.unique(classes):
        colors[class_id] = tuple(np.random.randint(0, 256, 3).tolist())

    # Sort detections by score in descending order
    sorted_indices = np.argsort(-scores)

    for i in sorted_indices:
        if scores[i] > threshold:
            box = boxes[i]
            class_id = int(classes[i])
            score = scores[i]

            ymin, xmin, ymax, xmax = box
            im_height, im_width, _ = image_np.shape
            (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), 
                                          int(ymin * im_height), int(ymax * im_height))

            # Draw bounding box
            color = colors[class_id]
            cv2.rectangle(image_np, (left, top), (right, bottom), color, line_thickness)

            # Prepare label text
            #strip "rotation" from the label
            label_text = category_index[class_id]["name"]
            if label_text.find("rotation") != -1:
                label_text = label_text[:label_text.find("rotation")]

            label = f'{label_text}: {int(score * 100)}%'
            label_size, base_line = cv2.getTextSize(label, font, font_scale, font_thickness)
            top = max(top, label_size[1])

            # Determine label position (alternate between left and right)
            if i % 2 == 0:
                # Left side of the box
                label_left = left
            else:
                # Right side of the box
                label_left = right - label_size[0]

            # Draw label background and text
            cv2.rectangle(image_np, (label_left, top - label_size[1]), 
                          (label_left + label_size[0], top + base_line), color, cv2.FILLED)
            cv2.putText(image_np, label, (label_left, top), font, font_scale, (0, 0, 0), font_thickness)

            # Adjust the top position to avoid overlapping with the next label
            top -= (label_size[1] + base_line + 5)

    return image_np 
if __name__ == '__main__':   
    model_path = './inference_graph_resnet101/saved_model'
    label_map_path = './-------------/1_label_map.pbtxt'
    detection_model = load_model(model_path)
    category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True)

    cap = cv2.VideoCapture(-1)
    #check camera and display the feed

    #print all available cameras
    for i in range(0, 10):
        cap = cv2.VideoCapture(i)
        if not cap.isOpened():
            print(f"Camera {i} is not available")
        else:
            print(f"Camera {i} is available")
            cap.release()
    run_inference(detection_model, category_index, cap)

Result for camera test:

[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 0 is not available
Camera 1 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 2 is not available
Camera 3 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video4): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 4 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video5): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 5 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video6): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 6 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video7): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 7 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video8): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 8 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video9): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 9 is not available

Troubleshooting done already:

  1. tried different camera indices.
  2. tested with set of images, it works perfectly
  3. checked model paths etc.

no idea, where to head next....

1 Answer 1

0

Looking at the code shows a logic issue

for i in range(0, 10):  # looping from 0 to 10
    cap = cv2.VideoCapture(i)  # capture camera number i
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        cap.release()  # releasing capture!!!
run_inference(detection_model, category_index, cap)  # there is no capture assigned to cap!!!

instead:

for i in range(0, 10):
    cap = cv2.VideoCapture(i)
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        run_inference(detection_model, category_index, cap)  # first run
        cap.release()  # then release.
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.