From 6b54d44959808414b44ad32c74cb918471c86b79 Mon Sep 17 00:00:00 2001 From: Oscar Pocock Date: Mon, 25 Apr 2022 13:29:34 +0100 Subject: [PATCH] Refactored and modified the project structure --- .woodpecker.yml | 7 +- docker-compose.yaml | 13 + environment.yml | 5 + src/.gitignore | 160 ++++++++++++ src/autophotographer.py | 229 +++++++++++++----- src/{autophotographer => cnn}/__init__.py | 0 .../autophotographer.py | 0 src/{autophotographer => cnn}/config.py | 0 src/{autophotographer => cnn}/dataframe.csv | 0 src/{autophotographer => cnn}/dataset.py | 12 +- .../export}/tensorImages.pt | 0 .../export}/tensorRatings.pt | 0 .../export}/trainDataset.pt | 0 .../export}/valDataset.pt | 0 src/{autophotographer => cnn}/model.py | 0 src/config.yml | 19 +- .../brightness}/__init__.py | 0 src/filters/brightness/brightness.py | 10 + src/filters/contrast/__init__.py | 0 src/filters/filesize/__init__.py | 0 src/filters/focusdetection/__init__.py | 0 .../focusdetection/focusdetection.py | 0 src/predict.py | 4 +- 23 files changed, 383 insertions(+), 76 deletions(-) create mode 100644 docker-compose.yaml create mode 100644 environment.yml create mode 100644 src/.gitignore rename src/{autophotographer => cnn}/__init__.py (100%) rename src/{autophotographer => cnn}/autophotographer.py (100%) rename src/{autophotographer => cnn}/config.py (100%) rename src/{autophotographer => cnn}/dataframe.csv (100%) rename src/{autophotographer => cnn}/dataset.py (92%) rename src/{autophotographer => cnn/export}/tensorImages.pt (100%) rename src/{autophotographer => cnn/export}/tensorRatings.pt (100%) rename src/{autophotographer => cnn/export}/trainDataset.pt (100%) rename src/{autophotographer => cnn/export}/valDataset.pt (100%) rename src/{autophotographer => cnn}/model.py (100%) rename src/{focusdetection => filters/brightness}/__init__.py (100%) create mode 100644 src/filters/brightness/brightness.py create mode 100644 src/filters/contrast/__init__.py create mode 100644 src/filters/filesize/__init__.py create mode 100644 src/filters/focusdetection/__init__.py rename src/{ => filters}/focusdetection/focusdetection.py (100%) diff --git a/.woodpecker.yml b/.woodpecker.yml index 47cbe14..f1cd3ef 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -2,6 +2,11 @@ pipeline: flake8: image: python:3.8 commands: - - pip install flake8 + - python3.8 -m pip install flake8 - flake8 src/ + mypy: + image: python:3.8 + commands: + - python3.8 -m pip install mypy + - mypy src/ +branches: dev \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..705144c --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,13 @@ +version: "3.3" + +services: + autophotographer: + build: + context: . + dockerfile: Dockerfile.cpu + image: noble/autophotographer + volumes: + # Source of project + - ./src:/src + # Location of dataset + - $HOME/Documents/Datasets:/datasets \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..4e0f912 --- /dev/null +++ b/environment.yml @@ -0,0 +1,5 @@ +name: autophotographer +dependencies: + - numpy + - pandas + - python=3.8 \ No newline at end of file diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/src/autophotographer.py b/src/autophotographer.py index 6b94504..30243b0 100644 --- a/src/autophotographer.py +++ b/src/autophotographer.py @@ -1,27 +1,40 @@ +# IMPORTS +# import from +from tqdm import tqdm +from skimage.exposure import is_low_contrast +from os.path import abspath + +# import as +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +# imports import cv2 import argparse import os -from os.path import abspath -import pandas -import matplotlib.pyplot as plt -import numpy as np -from skimage.exposure import is_low_contrast import yaml +import math -# Import local packages -from focusdetection.focusdetection import fast_fourier - -#import sys -#import pathlib -#import numpy -#import time +# local imports for filters +import filters.focusdetection.focusdetection as focusdetection +import filters.brightness.brightness as brightness +# GLOBAL VARIABLES # accepted image formats image_formats = (".jpg", ".jpeg", ".png") # accepted video formats video_formats = (".mp4", ".mov", ".avi", ".flv", ".mkv") +# default options +brightness_thresh = 0.35 +focus_thresh = 0.35 +filesize_thresh = 0.35 +contrast_thresh = 0.35 +ignore_video = False + +# FUNCTIONS # load config file def load_config(path=os.path.join(os.path.dirname(__file__), "./config.yml")): abs_path = os.path.abspath(path) @@ -40,33 +53,50 @@ def load_config(path=os.path.join(os.path.dirname(__file__), "./config.yml")): print("[ERRO] Path does not exist") quit() -def filter_to_function(imagefilter, prettyName=False): - if imageFilter == "brightness": - # filter_brightness() - print("[INFO] Filtering based on brightness...") - elif imageFilter == "filesize": - # filter_filesize() - print("[INFO] Filtering based on filesize...") - elif imageFilter == "contrast": - # filter_contrast() - print("[INFO] Filtering based on contrast...") - elif imageFilter == "focus": - # filter_focus() - print("[INFO] Filtering based on focus...") - else: - print("[WARN] Filter not recognised. Ignoring...") +# load the correct filter function from filter name +def filter_to_function(imagefilter: str, paths: list) -> list: + if imageFilter == "brightness": + paths = filter_brightness(paths, brightness_thresh) + print("[INFO] Filtering based on brightness...") + elif imageFilter == "filesize": + paths = filter_filesize(paths, filesize_thresh) + print("[INFO] Filtering based on filesize...") + elif imageFilter == "contrast": + paths = filter_contrast(paths, contrast_thresh) + print("[INFO] Filtering based on contrast...") + elif imageFilter == "focus": + paths = filter_focus(paths, focus_thresh) + print("[INFO] Filtering based on focus...") + else: + print("[WARN] Filter not recognised. Ignoring...") + return paths # filter paths by accepted file extensions -def filter_paths(paths): +def filter_paths(paths: list) -> list: print("[INFO] Filtering paths by filetype...") filtered_paths = [] - for path in paths: - if path.lower().endswith(video_formats + image_formats): - filtered_paths.append(path) + + # if input location is a directory then fetch all files in that directory + if len(paths) == 1 and os.path.isdir(paths[0]): + list_dir = os.listdir(paths[0]) + new_paths = [] + for filename in list_dir: + new_paths.append(os.path.join(args["input"][0], filename)) + paths = new_paths + + # if ignore_video is set to true then filter out video files + if ignore_video: + for path in paths: + if path.lower().endswith(image_formats): + filtered_paths.append(path) + else: + for path in paths: + if path.lower().endswith(video_formats + image_formats): + filtered_paths.append(path) return filtered_paths # load frames from images -def load_video(path): +def load_video(path: str) -> list: print("[INFO] Loading video...") stream = cv2.VideoCapture(path) frames = [] @@ -78,8 +108,9 @@ def load_video(path): return frames # load images -def load_image(path): - print("[INFO] Loading image...") +# note although trivial, a method was created so the way in which a file is loaded can be easily changed +def load_image(path: str): + # print("[INFO] Loading image...") image = cv2.imread(path) return image @@ -122,7 +153,7 @@ def load_files(paths): return (images, videos) # filter images based on filesize -def filter_filesize(paths): +def filter_filesize(paths, thresh=0.35): print("[INFO] Filtering by filesize...") filesizes = [] filtered_paths = [] @@ -131,7 +162,7 @@ def filter_filesize(paths): filesize = os.path.getsize(path) filesizes.append(filesize) filesize_avg = sum(filesizes)/len(filesizes) - filesize_std = pandas.Series(filesizes).std(ddof=0) + filesize_std = pd.Series(filesizes).std(ddof=0) filesize_min = min(filesizes) filesize_max = max(filesizes) filesize_filter = filesize_avg - filesize_std @@ -157,10 +188,11 @@ def plot_filesizes(paths): fig = plt.figure(figsize =(10, 7)) plt.boxplot(data) plt.show() + return plt # resize image and turn to greyscale def process_image(path, width): - print("[INFO] Processing image...") + # print("[INFO] Processing image...") image = cv2.imread(path) height = int(image.shape[0] * (width / image.shape[0])) image = cv2.resize(image, (width, height)) @@ -169,38 +201,63 @@ def process_image(path, width): # filter images based on contrast def filter_contrast(paths, thresh=0.35): - print("[INFO] Filtering by contrast...") + # print("[INFO] Filtering by contrast...") filtered_paths = [] - for path in paths: + for path in tqdm(paths, desc="Filtering contrast"): if path.lower().endswith(video_formats): continue - print(path) + # print(path) image = process_image(path, 500) if is_low_contrast(image, thresh): - print("[INFO] Low contrast") + # print("[INFO] Low contrast") + continue else: - print("[INFO] High contrast") + # print("[INFO] High contrast") filtered_paths.append(path) return filtered_paths # filter images based on brightness -def filter_brightness(paths): - print("[INFO] Filtering by brightness...") +def filter_brightness(paths, thresh=0.35): + filtered_paths = [] + for path in tqdm(paths, desc="Filtering brightness"): + image = load_image(path) + # width = 20 + # height = int(image.shape[0] * (width / image.shape[0])) + # image = cv2.resize(image, (width, height)) + # L, A, B = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2LAB)) + # L = L/np.max(L) + luminance = brightness.get_luminance_value(image) + if luminance > thresh: + filtered_paths.append(path) + return filtered_paths + + + # print("[INFO] Filtering by brightness...") + +def filter_brightness_dataframe(paths): + df = pd.DataFrame(paths, columns =['path']) + for path in tqdm(paths, desc="Filtering brightness"): + image = load_image(path) + luminance = brightness.get_luminance_value(image) + row = (df.index[df["path"] == path])[0] + df.at[row, "brightness"] = luminance + return df # filter images based on focus/blurriness -def filter_focus(paths): +def filter_focus(paths, thresh=0.35): print("[INFO] Filtering by focus/blurriness...") filtered_paths = [] # read images in greyscale - for path in paths: + for path in tqdm(paths, desc="Filtering focus"): if path.lower().endswith(video_formats): continue - print(path) + # print(path) image = process_image(path, 500) - (mean, blurry) = fast_fourier(image, size=60, thresh=30) + (mean, blurry) = focusdetection.fast_fourier(image, size=60, thresh=30) blurStats = "Blurry ({:.4f})" if blurry else "Not blurry ({:.4f})" if blurry: - print(("[INFO] {}: " + blurStats).format(path, mean)) + # print(("[INFO] {}: " + blurStats).format(path, mean)) + continue else: filtered_paths.append(path) return filtered_paths @@ -209,31 +266,81 @@ def filter_focus(paths): def rank_images(): print("[INFO] Ranking images using machine learning...") -# parse commande line arguments +def display_images(paths, location): + # create figure + + row = math.ceil(math.sqrt(len(paths))) + width = 100 + size = width * row + fig = plt.figure(figsize=(50, 50)) + + index = 1 + for path in tqdm(paths): + image = cv2.imread(path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + height = int(image.shape[0] * (width / image.shape[0])) + image = cv2.resize(image, (width, height)) + fig.add_subplot(row, row, index) + plt.imshow(image) + plt.axis('off') + filename = os.path.split(path)[1] + print(filename) + plt.title(filename) + index += 1 + + plt.savefig(location) + +# parse command line arguments parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", type=os.path.abspath, required=True, nargs="+", help="path to video or image folder") parser.add_argument("-c", "--config", type=os.path.abspath, help="path to config file") args = vars(parser.parse_args()) -paths = filter_paths(args["input"]) - # load in config file if args["config"] is not None: autophotoConf = load_config(args["config"]) else: autophotoConf = load_config() -# Order and selection of operations from config file -for imageFilter in autophotoConf["filters"]: - n_of_images_before = len(paths) - filter_to_function(imageFilter) - n_of_images_after = len(paths) +# Load values for options +if autophotoConf["brightness_options"]["threshold"] is not None: + brightness_thresh = autophotoConf["brightness_options"]["threshold"] +if autophotoConf["focus_options"]["threshold"] is not None: + focus_thresh = autophotoConf["focus_options"]["threshold"] +if autophotoConf["contrast_options"]["threshold"] is not None: + contrast_thresh = autophotoConf["contrast_options"]["threshold"] +if autophotoConf["filesize_options"]["threshold"] is not None: + filesize_thresh = autophotoConf["filesize_options"]["threshold"] +if autophotoConf["ignore_video"] is not None: + ignore_video = autophotoConf["ignore_video"] - # calculate set difference after filtering - diff = n_of_images_before - n_of_images_after - print("[INFO] Filtered {}/{} images via {} filtering.".format( - n_of_images_after, n_of_images_before, imageFilter)) +paths = filter_paths(args["input"]) +print("[INFO] Loaded {} objects.".format(len(paths))) +prior_paths = [] +path_diff = [] + +# Order and selection of operations from config file +if autophotoConf["filters"] is not None: + + # iterate over all chosen filters + for imageFilter in autophotoConf["filters"]: + prior_paths = paths + + # run given filter + paths = filter_to_function(imageFilter, paths) + + path_diff = list(set(paths) - set(prior_paths)) + list(set(prior_paths) - set(paths)) + diff = len(prior_paths) - len(paths) + # filename = "/src/fig" + "-" + str(imageFilter) + ".png" + # display_images(path_diff, filename) + + # calculate set difference after filtering + if diff == 0: + print("[INFO] No images were filtered.") + else: + print("[INFO] Filtered {}/{} images via {} filtering.".format( + len(paths), len(prior_paths), imageFilter)) if autophotoConf["CNNrank"]: print("[INFO] Running CNN ranking...") \ No newline at end of file diff --git a/src/autophotographer/__init__.py b/src/cnn/__init__.py similarity index 100% rename from src/autophotographer/__init__.py rename to src/cnn/__init__.py diff --git a/src/autophotographer/autophotographer.py b/src/cnn/autophotographer.py similarity index 100% rename from src/autophotographer/autophotographer.py rename to src/cnn/autophotographer.py diff --git a/src/autophotographer/config.py b/src/cnn/config.py similarity index 100% rename from src/autophotographer/config.py rename to src/cnn/config.py diff --git a/src/autophotographer/dataframe.csv b/src/cnn/dataframe.csv similarity index 100% rename from src/autophotographer/dataframe.csv rename to src/cnn/dataframe.csv diff --git a/src/autophotographer/dataset.py b/src/cnn/dataset.py similarity index 92% rename from src/autophotographer/dataset.py rename to src/cnn/dataset.py index f26e52c..16d94a8 100644 --- a/src/autophotographer/dataset.py +++ b/src/cnn/dataset.py @@ -22,12 +22,14 @@ datasetDir = "/datasets/" script_directory = os.path.dirname(__file__) projectRoot = abspath(os.path.join(script_directory, "../..")) #projectRoot = "/src/" -print(projectRoot) -tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt") -tensorRatingsPath = os.path.join(projectRoot, "src/autophotographer/tensorRatings.pt") -tensorArrayPath = os.path.join(projectRoot, "src/autophotographer/tensorArray.pt") +# tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt") +# tensorRatingsPath = os.path.join(projectRoot, "src/autophotographer/tensorRatings.pt") +# tensorArrayPath = os.path.join(projectRoot, "src/autophotographer/tensorArray.pt") +tensorImagesPath = os.path.join(script_directory, "tensorImages.pt") +tensorRatingsPath = os.path.join(script_directory, "tensorRatings.pt") +tensorArrayPath = os.path.join(script_directory, "tensorArray.pt") filePathRatings = os.path.join(projectRoot, "data/ratings.txt") -dataframePath = os.path.join(projectRoot, "src/autophotographer/dataframe.csv") +dataframePath = os.path.join(script_directory, "dataframe.csv") if not datasetDir == "": filePathStyle = datasetDir + "AVA/style_image_lists/test.multilab" diff --git a/src/autophotographer/tensorImages.pt b/src/cnn/export/tensorImages.pt similarity index 100% rename from src/autophotographer/tensorImages.pt rename to src/cnn/export/tensorImages.pt diff --git a/src/autophotographer/tensorRatings.pt b/src/cnn/export/tensorRatings.pt similarity index 100% rename from src/autophotographer/tensorRatings.pt rename to src/cnn/export/tensorRatings.pt diff --git a/src/autophotographer/trainDataset.pt b/src/cnn/export/trainDataset.pt similarity index 100% rename from src/autophotographer/trainDataset.pt rename to src/cnn/export/trainDataset.pt diff --git a/src/autophotographer/valDataset.pt b/src/cnn/export/valDataset.pt similarity index 100% rename from src/autophotographer/valDataset.pt rename to src/cnn/export/valDataset.pt diff --git a/src/autophotographer/model.py b/src/cnn/model.py similarity index 100% rename from src/autophotographer/model.py rename to src/cnn/model.py diff --git a/src/config.yml b/src/config.yml index 38992a8..8f8ce19 100644 --- a/src/config.yml +++ b/src/config.yml @@ -1,7 +1,8 @@ --- -# Config file for autophotographer +# Configuration file for the autophographer tool # List of filters to apply in order +# Note: Possible filters include: brightness, filesize, contrast, focus filters: - brightness - filesize @@ -9,21 +10,25 @@ filters: - focus # Whether or not to apply CNN ranking -CNNrank: True +CNNrank: False -# Options for focus filter +# Ignore video files and don't bother processing them into frames +# Note: Useful if directory contains original video and indivual frames from video (prevents processing the same frames more than once) +ignore_video: True + +# Options for brightness filter brightness_options: - threshold: 0.35 + threshold: 0.25 -# Options for focus filter +# Options for filesize filter filesize_options: threshold: 0.35 -# Options for focus filter +# Options for contrast filter contrast_options: threshold: 0.35 # Options for focus filter focus_options: - threshold: 0.35 + threshold: 0.5 ... \ No newline at end of file diff --git a/src/focusdetection/__init__.py b/src/filters/brightness/__init__.py similarity index 100% rename from src/focusdetection/__init__.py rename to src/filters/brightness/__init__.py diff --git a/src/filters/brightness/brightness.py b/src/filters/brightness/brightness.py new file mode 100644 index 0000000..93de728 --- /dev/null +++ b/src/filters/brightness/brightness.py @@ -0,0 +1,10 @@ +import numpy as np +import cv2 + +def get_luminance_value(image): + width = 20 + height = int(image.shape[0] * (width / image.shape[0])) + image = cv2.resize(image, (width, height)) + L, A, B = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2LAB)) + L = L/np.max(L) + return np.mean(L) \ No newline at end of file diff --git a/src/filters/contrast/__init__.py b/src/filters/contrast/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/filters/filesize/__init__.py b/src/filters/filesize/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/filters/focusdetection/__init__.py b/src/filters/focusdetection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/focusdetection/focusdetection.py b/src/filters/focusdetection/focusdetection.py similarity index 100% rename from src/focusdetection/focusdetection.py rename to src/filters/focusdetection/focusdetection.py diff --git a/src/predict.py b/src/predict.py index 4782d1a..890ca28 100644 --- a/src/predict.py +++ b/src/predict.py @@ -1,8 +1,8 @@ import argparse import os from os.path import abspath -from autophotographer import config -from autophotographer import dataset +from cnn import config +from cnn import dataset from torchvision import transforms import matplotlib.pyplot as plt from torch import nn