Added general pipeline and config file

2022-04-05 12:25:27 +01:00 · 2022-04-05 12:25:27 +01:00 · 8258b225ce
commit 8258b225ce
parent 868473e284
3 changed files with 267 additions and 74 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,12 @@
+# Autophotographer
+
+Autophotographer is a tool that helps users filter the best images from a video.
+
+```
+.
+├── docs            Report and Project proposal
+├── img             images for the repository
+├── README.md
+├── src             project source code
+└── terraform       terraform IaC
+```
--- a/src/autophotographer.py
+++ b/src/autophotographer.py
@ -1,87 +1,239 @@
 import cv2
-import sys
 import argparse
 import os
-import pathlib
-import numpy
-import time
+from os.path import abspath
+import pandas
+import matplotlib.pyplot as plt
+import numpy as np
+from skimage.exposure import is_low_contrast
+import yaml

-# Process arguments
-def parse_arguments(argv=None):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input', dest='inputfile', type=pathlib.Path, help='Specify a video file')
-    parser.add_argument('-o', '--output', dest='outputfolder', type=pathlib.Path, help='Specify a folder to save frames to')
-    return parser.parse_args()
+# Import local packages
+from focusdetection.focusdetection import fast_fourier

-# Convert video to frames
-def video_to_frames():
-    startTime = time.time()
-    print("Converting video to frames...")
-    capture = cv2.VideoCapture(str(inputfile))
-    success,image = capture.read()
-    count = 0
-    while success:
-        outputfile = outputfolder + "/frame%d.jpg" % count
-        # print(outputfile)
-        cv2.imwrite(outputfile, image)
-        success,image = capture.read()
-        count +=1
-    endTime = time.time()
-    totalTime = endTime - startTime
-    print("Exported " + str(count) + " frames in " + str(totalTime) + " seconds.")
+#import sys
+#import pathlib
+#import numpy
+#import time

-# Shrink set based on filesize
-def display_file_sizes():
-    filesizes = []
-    for filename in os.listdir(outputfolder):
-        filepath = outputfolder + "/" + filename
-        filesize = os.path.getsize(filepath)
-        print(filepath + ": " + str(filesize))
-        filesizes.append(filesize)
-    # work out average
-    average = sum(filesizes)/len(filesizes)
-    print ("Average is: " + str(average))
-    # delete files below average
-    count = 0
-    for filename in os.listdir(outputfolder):
-        filepath = outputfolder + "/" + filename
-        if filesizes[count] < average:
-            # print(filepath + ": " + str(filesizes[count]))
-            os.remove(filepath)
-        count += 1
+# accepted image formats
+image_formats = (".jpg", ".jpeg", ".png")

-#def remove_similar_frames():
-def order_frames_by_filesize():
-    frames = os.listdir(outputfolder)
-    frames = sorted(frames, key = lambda x: os.stat(os.path.join(outputfolder, x)).st_size, reverse = True)
-    for frame in frames:
-        filesize = os.stat(os.path.join(outputfolder, frame)).st_size
-        if filesize > 1024:
-            filesize = filesize / 1024
-            print(frame + ": " + str(filesize) + " KB")
+# accepted video formats
+video_formats = (".mp4", ".mov", ".avi", ".flv", ".mkv")
+
+# load config file
+def load_config(path=os.path.join(os.path.dirname(__file__), "./config.yml")):
+    abs_path = os.path.abspath(path)
+
+    # check if file exists
+    if os.path.exists(abs_path):
+        if abs_path.lower().endswith((".yml", ".yaml")):
+            print("[INFO] Loading config...")
+            # attempt to open file
+            with open(abs_path) as file:
+                return yaml.safe_load(file)
        else:
-            print(frame + ": " + str(filesize))
+            print("[ERRO] Please specify a file with extension '.yml' or '.yaml'.")
+            quit()
+    else:
+        print("[ERRO] Path does not exist")
+        quit()

-def order_frames_by_brightness():
-    frames_path = os.listdir(outputfolder)
-    frames_path = sorted(frames_path, key = return_frame_brightness)
-    for frame_path in frames_path:
-        print(frame_path + ": " + str(return_frame_brightness(frame_path)))
+def filter_to_function(imagefilter, prettyName=False):
+        if imageFilter == "brightness":
+            # filter_brightness()
+            print("[INFO] Filtering based on brightness...")
+        elif imageFilter == "filesize":
+            # filter_filesize()
+            print("[INFO] Filtering based on filesize...")
+        elif imageFilter == "contrast":
+            # filter_contrast()
+            print("[INFO] Filtering based on contrast...")
+        elif imageFilter == "focus":
+            # filter_focus()
+            print("[INFO] Filtering based on focus...")
+        else:
+            print("[WARN] Filter not recognised. Ignoring...")

-def return_frame_brightness(frame_path):
-    frame = cv2.imread(os.path.join(outputfolder, frame_path))
-    hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
-    h, s, v = cv2.split(hsv_frame)
-    average_v = numpy.average(v)
-    return average_v
+# filter paths by accepted file extensions
+def filter_paths(paths):
+    print("[INFO] Filtering paths by filetype...")
+    filtered_paths = []
+    for path in paths:
+        if path.lower().endswith(video_formats + image_formats):
+            filtered_paths.append(path)
+    return filtered_paths

-args = parse_arguments()
-inputfile = str(args.inputfile.absolute())
-outputfolder = str(args.outputfolder.absolute())
+# load frames from images
+def load_video(path):
+    print("[INFO] Loading video...")
+    stream = cv2.VideoCapture(path)
+    frames = []
+    while True:
+        (retrieved, frame) = stream.read()
+        if not retrieved:
+            break
+        frames.append(frame)
+    return frames

-# Convert video to frames
-video_to_frames()
+# load images
+def load_image(path):
+    print("[INFO] Loading image...")
+    image = cv2.imread(path)
+    return image

-display_file_sizes()
-#order_frames_by_filesize()
-#order_frames_by_brightness()
+# load images respective of their file type
+def load_files(paths):
+    print("[INFO] Loading files...")
+    # set loaded counters to 0
+    images_loaded = 0
+    videos_loaded = 0
+    videos = []
+    images = []
+
+    # load in all image/video files
+    for path in paths:
+        # if file is an image, just load it
+        if path.lower().endswith(image_formats):
+            image = load_image(path)
+            images_loaded += 1
+            images.append(image)
+        # if file is a video, break into frames and load them
+        elif path.lower().endswith(video_formats):
+            frames = load_video(path)
+            videos.append(frames)
+            videos_loaded += 1
+        else:
+            print("[INFO] Skipped non-image/non-video file.")
+
+    # return information on number of files loaded
+    if videos_loaded == 0 and images_loaded == 0:
+        print("[INFO] No valid images or videos found.")
+    else:
+        print("[INFO] {} image(s) and {} video(s) loaded.".format(images_loaded, videos_loaded))
+
+    total_frames = 0
+    for video in videos:
+        total_frames += len(video)
+    total_images = total_frames + len(images)
+    print("[INFO] Total images loaded (including video frames): {}".format(total_images))
+
+    return (images, videos)
+
+# filter images based on filesize
+def filter_filesize(paths):
+    print("[INFO] Filtering by filesize...")
+    filesizes = []
+    filtered_paths = []
+    # calculate avg filesize
+    for path in paths:
+        filesize = os.path.getsize(path)
+        filesizes.append(filesize)
+    filesize_avg = sum(filesizes)/len(filesizes)
+    filesize_std = pandas.Series(filesizes).std(ddof=0)
+    filesize_min = min(filesizes)
+    filesize_max = max(filesizes)
+    filesize_filter = filesize_avg - filesize_std
+    print("[INFO] min size: {}, max size: {}, avg size: {}, std size: {}".format(
+        filesize_min, filesize_max, filesize_avg, filesize_std))
+
+    # remove images that are 3 std below mean
+    for path in paths:
+        if os.path.getsize(path) > filesize_filter:
+            filtered_paths.append(path)
+
+    return filtered_paths
+
+# plot the distribution of file sizes for data insight
+def plot_filesizes(paths):
+    filesizes = []
+    for path in paths:
+        if path.lower().endswith(image_formats):
+            filesize = os.path.getsize(path)
+            filesizes.append(filesize)
+    sorted_fs = sorted(filesizes)
+    data = np.array(sorted_fs)
+    fig = plt.figure(figsize =(10, 7))
+    plt.boxplot(data)
+    plt.show()
+
+# resize image and turn to greyscale
+def process_image(path, width):
+    print("[INFO] Processing image...")
+    image = cv2.imread(path)
+    height = int(image.shape[0] * (width / image.shape[0]))
+    image = cv2.resize(image, (width, height))
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    return image
+
+# filter images based on contrast 
+def filter_contrast(paths, thresh=0.35):
+    print("[INFO] Filtering by contrast...")
+    filtered_paths = []
+    for path in paths:
+        if path.lower().endswith(video_formats):
+            continue
+        print(path)
+        image = process_image(path, 500)
+        if is_low_contrast(image, thresh):
+            print("[INFO] Low contrast")
+        else:
+            print("[INFO] High contrast")
+            filtered_paths.append(path)
+    return filtered_paths 
+
+# filter images based on brightness
+def filter_brightness(paths):
+    print("[INFO] Filtering by brightness...")
+
+# filter images based on focus/blurriness
+def filter_focus(paths):
+    print("[INFO] Filtering by focus/blurriness...")
+    filtered_paths = []
+    # read images in greyscale
+    for path in paths:
+        if path.lower().endswith(video_formats):
+            continue
+        print(path)
+        image = process_image(path, 500) 
+        (mean, blurry) = fast_fourier(image, size=60, thresh=30)
+        blurStats = "Blurry ({:.4f})" if blurry else "Not blurry ({:.4f})"
+        if blurry:
+            print(("[INFO] {}: " + blurStats).format(path, mean))
+        else:
+            filtered_paths.append(path)
+    return filtered_paths
+
+# rank remaining images using a CNN
+def rank_images():
+    print("[INFO] Ranking images using machine learning...")
+    
+# parse commande line arguments
+parser = argparse.ArgumentParser()
+parser.add_argument("-i", "--input", type=os.path.abspath, required=True, nargs="+",
+	help="path to video or image folder")
+parser.add_argument("-c", "--config", type=os.path.abspath, help="path to config file")
+args = vars(parser.parse_args())
+
+paths = filter_paths(args["input"])
+
+# load in config file
+if args["config"] is not None:
+    autophotoConf = load_config(args["config"])
+else:
+    autophotoConf = load_config()
+
+# Order and selection of operations from config file
+for imageFilter in autophotoConf["filters"]:
+    n_of_images_before = len(paths)
+    filter_to_function(imageFilter)
+    n_of_images_after = len(paths)
+
+    # calculate set difference after filtering
+    diff = n_of_images_before - n_of_images_after
+    print("[INFO] Filtered {}/{} images via {} filtering.".format(
+        n_of_images_after, n_of_images_before, imageFilter))
+
+if autophotoConf["CNNrank"]:
+    print("[INFO] Running CNN ranking...")
--- a/src/config.yml
+++ b/src/config.yml
@ -0,0 +1,29 @@
+---
+# Config file for autophotographer
+
+# List of filters to apply in order
+filters:
+  - brightness
+  - filesize
+  - contrast
+  - focus
+
+# Whether or not to apply CNN ranking
+CNNrank: True
+
+# Options for focus filter
+brightness_options:
+  threshold: 0.35
+
+# Options for focus filter
+filesize_options:
+  threshold: 0.35
+
+# Options for focus filter
+contrast_options:
+  threshold: 0.35
+
+# Options for focus filter
+focus_options:
+  threshold: 0.35
+...