TypeError: Binding inputs to tf.function failed, Can not cast input_tensor TensorSpec to TensorSpec

Question

error message:

Traceback (most recent call last):
  File "/home/prakrisht/-------------/detect_from_webcam.py", line 173, in <module>
    run_inference(detection_model, category_index, cap)
  File "/home/prakrisht/----------------/detect_from_webcam.py", line 60, in run_inference
    output_dict = run_inference_for_single_image(model, image_np)
  File "/home/prakrisht/-----------------/detect_from_webcam.py", line 31, in run_inference_for_single_image
    output_dict = model(input_tensor)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/saved_model/load.py", line 816, in _call_attribute
    return instance.__call__(*args, **kwargs)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/function_type_utils.py", line 446, in bind_function_inputs
    raise TypeError(
TypeError: Binding inputs to tf.function failed due to `Can not cast TensorSpec(shape=(1,), dtype=tf.float32, name='input_tensor') to TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')`. Received args: (<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>,) and kwargs: {} for signature: (input_tensor: TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')).

Code for the detection: summary:

we run inference on the camera feed
using a resnet101 model trained on a custom dataset
the basic code works perfectly with images
it was working perfectly some time ago with webcam as well, but doesn't work anymore.

import numpy as np
import argparse
import tensorflow as tf
import cv2

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile


def load_model(model_path):
    model = tf.saved_model.load(model_path)

    return model


def run_inference_for_single_image(model, image):
    image = np.asarray(image).astype(np.float32)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    
    # Run inference
    output_dict = model(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key: value[0, :num_detections].numpy()
                   for key, value in output_dict.items()}
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                                    output_dict['detection_masks'], output_dict['detection_boxes'],
                                    image.shape[0], image.shape[1])      
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
    return output_dict


def run_inference(model, category_index, cap):
    while True:
        ret, image_np = cap.read()
        # Actual detection.
        output_dict = run_inference_for_single_image(model, image_np)
        # Visualization of the results of a detection.
        #print(output_dict)
        draw_boxes_on_image(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            #instance_masks=output_dict.get('detection_masks_reframed', None),
            #use_normalized_coordinates=True,
            line_thickness=8,
            font_scale=1)
        cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break

import cv2
import numpy as np

def draw_boxes_on_image(image_np, boxes, classes, scores, category_index, 
                        threshold=0.5, line_thickness=8, font=cv2.FONT_HERSHEY_SIMPLEX, 
                        font_scale=0.7, font_thickness=2):
    """
    Draw bounding boxes on the image with class-specific colors, showing labels with the highest score on top.
    Alternate label placement between the left and right sides of the bounding box.

    Args:
    - image_np: Numpy array of the image.
    - boxes: Array of bounding boxes, with shape [N, 4].
    - classes: Array of class indices corresponding to the boxes.
    - scores: Array of confidence scores corresponding to the boxes.
    - category_index: Dictionary mapping class indices to class names.
    - threshold: Confidence threshold for displaying the bounding box.
    - line_thickness: Thickness of the bounding box lines.
    - font: Font type for the label.
    - font_scale: Scale (size) of the font for the label.
    - font_thickness: Thickness of the font for the label.
    """
    # Assign a unique color for each class
    colors = {}
    np.random.seed(42)  # For reproducibility
    for class_id in np.unique(classes):
        colors[class_id] = tuple(np.random.randint(0, 256, 3).tolist())

    # Sort detections by score in descending order
    sorted_indices = np.argsort(-scores)

    for i in sorted_indices:
        if scores[i] > threshold:
            box = boxes[i]
            class_id = int(classes[i])
            score = scores[i]

            ymin, xmin, ymax, xmax = box
            im_height, im_width, _ = image_np.shape
            (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), 
                                          int(ymin * im_height), int(ymax * im_height))

            # Draw bounding box
            color = colors[class_id]
            cv2.rectangle(image_np, (left, top), (right, bottom), color, line_thickness)

            # Prepare label text
            #strip "rotation" from the label
            label_text = category_index[class_id]["name"]
            if label_text.find("rotation") != -1:
                label_text = label_text[:label_text.find("rotation")]

            label = f'{label_text}: {int(score * 100)}%'
            label_size, base_line = cv2.getTextSize(label, font, font_scale, font_thickness)
            top = max(top, label_size[1])

            # Determine label position (alternate between left and right)
            if i % 2 == 0:
                # Left side of the box
                label_left = left
            else:
                # Right side of the box
                label_left = right - label_size[0]

            # Draw label background and text
            cv2.rectangle(image_np, (label_left, top - label_size[1]), 
                          (label_left + label_size[0], top + base_line), color, cv2.FILLED)
            cv2.putText(image_np, label, (label_left, top), font, font_scale, (0, 0, 0), font_thickness)

            # Adjust the top position to avoid overlapping with the next label
            top -= (label_size[1] + base_line + 5)

    return image_np 
if __name__ == '__main__':   
    model_path = './inference_graph_resnet101/saved_model'
    label_map_path = './-------------/1_label_map.pbtxt'
    detection_model = load_model(model_path)
    category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True)

    cap = cv2.VideoCapture(-1)
    #check camera and display the feed

    #print all available cameras
    for i in range(0, 10):
        cap = cv2.VideoCapture(i)
        if not cap.isOpened():
            print(f"Camera {i} is not available")
        else:
            print(f"Camera {i} is available")
            cap.release()
    run_inference(detection_model, category_index, cap)

Result for camera test:

[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 0 is not available
Camera 1 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 2 is not available
Camera 3 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video4): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 4 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video5): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 5 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video6): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 6 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video7): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 7 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video8): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 8 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video9): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 9 is not available

Troubleshooting done already:

tried different camera indices.
tested with set of images, it works perfectly
checked model paths etc.

no idea, where to head next....

Ze'ev Ben-Tsvi · Accepted Answer · 2024-10-21 11:08:15Z

0

Looking at the code shows a logic issue

for i in range(0, 10):  # looping from 0 to 10
    cap = cv2.VideoCapture(i)  # capture camera number i
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        cap.release()  # releasing capture!!!
run_inference(detection_model, category_index, cap)  # there is no capture assigned to cap!!!

instead:

for i in range(0, 10):
    cap = cv2.VideoCapture(i)
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        run_inference(detection_model, category_index, cap)  # first run
        cap.release()  # then release.

answered Oct 21, 2024 at 11:08

Ze'ev Ben-Tsvi

1,4321 gold badge5 silver badges8 bronze badges

Sign up to request clarification or add additional context in comments.

Collectives™ on Stack Overflow

TypeError: Binding inputs to tf.function failed, Can not cast input_tensor TensorSpec to TensorSpec

1 Answer 1

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Related