From d9c69cadfcf370c317dee3abd8ddb32d63220f88 Mon Sep 17 00:00:00 2001
From: Kristofers Solo <kristians.cagulis@gmail.com>
Date: Sat, 10 Dec 2022 16:19:26 +0200
Subject: [PATCH] Created object detection program

---
 src/detector/object_detection.py | 266 +++++++++++++++++++++++++++++++
 1 file changed, 266 insertions(+)
 create mode 100644 src/detector/object_detection.py

diff --git a/src/detector/object_detection.py b/src/detector/object_detection.py
new file mode 100644
index 0000000..06ea643
--- /dev/null
+++ b/src/detector/object_detection.py
@@ -0,0 +1,266 @@
+import tensorflow as tf
+import numpy as np
+import cv2
+import logging
+from detector.paths import LOGS_PATH, IMAGES_OUT_PATH
+from pathlib import Path
+
+# Inception V3 model for Keras
+from tensorflow.keras.applications.inception_v3 import preprocess_input
+
+
+# Set up logging
+logger = logging.getLogger(__name__)
+handler = logging.FileHandler(str(Path.joinpath(LOGS_PATH, f"{__name__}.log")))
+formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+# COCO labels
+LABEL_PERSON = 1
+LABEL_CAR = 3
+LABEL_BUS = 6
+LABEL_TRUCK = 8
+LABEL_TRAFFIC_LIGHT = 10
+LABEL_STOP_SIGN = 13
+
+
+def accept_box(boxes, box_index, tolerance) -> bool:
+    """Eliminate duplicate bounding boxes."""
+    box = boxes[box_index]
+
+    for idx in range(box_index):
+        other_box = boxes[idx]
+        if abs(center(other_box, "x") - center(box, "x")) < tolerance and abs(center(other_box, "y") - center(box, "y")) < tolerance:
+            return False
+
+    return True
+
+
+def load_model(model_name):
+    """Download a pretrained object detection model, and save it to your hard drive."""
+    url = f"http://download.tensorflow.org/models/object_detection/tf2/20200711/{model_name}.tar.gz"
+
+    # Download a file from a URL that is not already in the cache
+    model_dir = tf.keras.utils.get_file(fname=model_name, untar=True, origin=url)
+
+    logger.info(f"Model path: {model_dir}")
+
+    return tf.saved_model.load(f"{model_dir}/saved_model")
+
+
+def load_rgb_images(files, shape=None):
+    """Loads the images in RGB format."""
+
+    # For each image in the directory, convert it from BGR format to RGB format
+    images = [cv2.cvtColor(cv2.imread(str(file)), cv2.COLOR_BGR2RGB) for file in files]
+
+    # Resize the image if the desired shape is provided
+    return [cv2.resize(img, shape) for img in images] if shape else images
+
+
+def load_ssd_coco():
+    """Load the neural network that has the SSD architecture, trained on the COCO data set."""
+    return load_model("ssd_resnet50_v1_fpn_640x640_coco17_tpu-8")
+
+
+def save_image_annotated(img_rgb, file_name: Path, output, model_traffic_lights=None) -> None:
+    """Annotate the image with the object types, and generate cropped images of traffic lights."""
+    output_file = Path.joinpath(IMAGES_OUT_PATH, file_name.name)
+
+    # For each bounding box that was detected
+    for idx, _ in enumerate(output["boxes"]):
+
+        # Extract the type of the object that was detected
+        obj_class = output["detection_classes"][idx]
+
+        # How confident the object detection model is on the object's type
+        score = int(output["detection_scores"][idx] * 100)
+
+        # Extract the bounding box
+        box = output["boxes"][idx]
+
+        color = None
+        label_text = ""
+
+        # if obj_class == LABEL_PERSON:
+        #     color = (0, 255, 255)
+        #     label_text = f"Person {score}"
+        # if obj_class == LABEL_CAR:
+        #     color = (255, 255, 0)
+        #     label_text = f"Car {score}"
+        # if obj_class == LABEL_BUS:
+        #     label_text = f"Bus {score}"
+        #     color = (255, 255, 0)
+        # if obj_class == LABEL_TRUCK:
+        #     color = (255, 255, 0)
+        #     label_text = f"Truck {score}"
+        # if obj_class == LABEL_STOP_SIGN:
+        #     color = (128, 0, 0)
+        #     label_text = f"Stop Sign {score}"
+        if obj_class == LABEL_TRAFFIC_LIGHT:
+            color = (255, 255, 255)
+            label_text = f"Traffic Light {score}"
+
+            if model_traffic_lights:
+
+                # Annotate the image and save it
+                img_traffic_light = img_rgb[box["y"]:box["y2"], box["x"]:box["x2"]]
+                img_inception = cv2.resize(img_traffic_light, (299, 299))
+
+                # Uncomment this if you want to save a cropped image of the traffic light
+                # cv2.imwrite(output_file.replace('.jpg', '_crop.jpg'), cv2.cvtColor(img_inception, cv2.COLOR_RGB2BGR))
+                img_inception = np.array([preprocess_input(img_inception)])
+
+                prediction = model_traffic_lights.predict(img_inception)
+                label = np.argmax(prediction)
+                score_light = int(np.max(prediction) * 100)
+
+                match label:
+                    case 0: label_text = f"Green {score_light}"
+                    case 1: label_text = f"Yellow {score_light}"
+                    case 2: label_text = f"Red {score_light}"
+                    case _: label_text = "NO-LIGHT"
+
+        if color and label_text and accept_box(output["boxes"], idx, 5) and score > 50:
+            cv2.rectangle(img_rgb, (box["x"], box["y"]), (box["x2"], box["y2"]), color, 2)
+            cv2.putText(img_rgb, label_text, (box["x"], box["y"]), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
+    cv2.imwrite(str(output_file), cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR))
+    logger.info(output_file)
+
+
+def center(box, coord_type):
+    """Get center of the bounding box."""
+    return (box[coord_type] + box[coord_type + "2"]) / 2
+
+
+def perform_object_detection(model, file_name, save_annotated=False, model_traffic_lights=None):
+    """Perform object detection on an image using the predefined neural network."""
+    # Store the image
+    img_bgr = cv2.imread(str(file_name))
+    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+    input_tensor = tf.convert_to_tensor(img_rgb)  # Input needs to be a tensor
+    input_tensor = input_tensor[tf.newaxis, ...]
+
+    # Run the model
+    output = model(input_tensor)
+
+    logger.info(f"Number detections: {output['num_detections']} {int(output['num_detections'])}")
+
+    # Convert the tensors to a NumPy array
+    num_detections = int(output.pop("num_detections"))
+    output = {key: value[0, :num_detections].numpy()
+              for key, value in output.items()}
+    output["num_detections"] = num_detections
+
+    logger.info(f"Detection classes: {output['detection_classes']}")
+    logger.info(f"Detection Boxes: {output['detection_boxes']}")
+
+    # The detected classes need to be integers.
+    output["detection_classes"] = output["detection_classes"].astype(np.int64)
+    output["boxes"] = [
+        {"y": int(box[0] * img_rgb.shape[0]), "x": int(box[1] * img_rgb.shape[1]), "y2": int(box[2] * img_rgb.shape[0]),
+         "x2": int(box[3] * img_rgb.shape[1])} for box in output["detection_boxes"]]
+
+    if save_annotated:
+        save_image_annotated(img_rgb, file_name, output, model_traffic_lights)
+
+    return img_rgb, output, file_name
+
+
+def perform_object_detection_video(model, video_frame, model_traffic_lights=None):
+    """Perform object detection on a video using the predefined neural network."""
+    # Store the image
+    img_rgb = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
+    input_tensor = tf.convert_to_tensor(img_rgb)  # Input needs to be a tensor
+    input_tensor = input_tensor[tf.newaxis, ...]
+
+    # Run the model
+    output = model(input_tensor)
+
+    # Convert the tensors to a NumPy array
+    num_detections = int(output.pop("num_detections"))
+    output = {key: value[0, :num_detections].numpy()
+              for key, value in output.items()}
+    output["num_detections"] = num_detections
+
+    # The detected classes need to be integers.
+    output["detection_classes"] = output["detection_classes"].astype(np.int64)
+    output["boxes"] = [
+        {"y": int(box[0] * img_rgb.shape[0]), "x": int(box[1] * img_rgb.shape[1]), "y2": int(box[2] * img_rgb.shape[0]),
+         "x2": int(box[3] * img_rgb.shape[1])} for box in output["detection_boxes"]]
+
+    # For each bounding box that was detected
+    for idx, _ in enumerate(output["boxes"]):
+
+        # Extract the type of the object that was detected
+        obj_class = output["detection_classes"][idx]
+
+        # How confident the object detection model is on the object's type
+        score = int(output["detection_scores"][idx] * 100)
+
+        # Extract the bounding box
+        box = output["boxes"][idx]
+
+        color = None
+        label_text = ""
+
+        # if obj_class == LABEL_PERSON:
+        #     color = (0, 255, 255)
+        #     label_text = "Person " + str(score)
+        # if obj_class == LABEL_CAR:
+        #     color = (255, 255, 0)
+        #     label_text = "Car " + str(score)
+        # if obj_class == LABEL_BUS:
+        #     color = (255, 255, 0)
+        #     label_text = "Bus " + str(score)
+        # if obj_class == LABEL_TRUCK:
+        #     color = (255, 255, 0)
+        #     label_text = "Truck " + str(score)
+        # if obj_class == LABEL_STOP_SIGN:
+        #     color = (128, 0, 0)
+        #     label_text = f"Stop Sign {score}"
+        if obj_class == LABEL_TRAFFIC_LIGHT:
+            color = (255, 255, 255)
+            label_text = f"Traffic Light {score}"
+
+            if model_traffic_lights:
+
+                # Annotate the image and save it
+                img_traffic_light = img_rgb[box["y"]:box["y2"], box["x"]:box["x2"]]
+                img_inception = cv2.resize(img_traffic_light, (299, 299))
+
+                img_inception = np.array([preprocess_input(img_inception)])
+
+                prediction = model_traffic_lights.predict(img_inception)
+                label = np.argmax(prediction)
+                score_light = int(np.max(prediction) * 100)
+
+                match label:
+                    case 0: label_text = f"Green {score_light}"
+                    case 1: label_text = f"Yellow {score_light}"
+                    case 2: label_text = f"Red {score_light}"
+                    case _: label_text = "NO-LIGHT"  # This is not a traffic light
+
+        # Use the score variable to indicate how confident we are it is a traffic light (in % terms)
+        # On the actual video frame, we display the confidence that the light is either red, green,
+        # yellow, or not a valid traffic light.
+        if color and label_text and accept_box(output["boxes"], idx, 5) and score > 20:
+            cv2.rectangle(img_rgb, (box["x"], box["y"]), (box["x2"], box["y2"]), color, 2)
+            cv2.putText(img_rgb, label_text, (box["x"], box["y"]), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
+    return cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+
+
+def double_shuffle(images, labels):
+    """Shuffle the images to add some randomness."""
+    indexes = np.random.permutation(len(images))
+
+    return [images[idx] for idx in indexes], [labels[idx] for idx in indexes]
+
+
+def reverse_preprocess_inception(image_preprocessed):
+    """Reverse the preprocessing process."""
+    image = image_preprocessed + 1 * 127.5
+    return image.astype(np.uint8)