From 123a5d3dde71f452c3c832de267c661b3175acd6 Mon Sep 17 00:00:00 2001 From: Oscar Pocock Date: Sun, 20 Mar 2022 17:30:02 +0000 Subject: [PATCH] Added CNN code and tensors --- .gitattributes | 4 + src/autophotographer/__init__.py | 0 src/autophotographer/config.py | 24 ++++ src/autophotographer/dataset.py | 195 ++++++++++++++++++++++++++ src/autophotographer/model.py | 154 ++++++++++++++++++++ src/autophotographer/tensorImages.pt | 3 + src/autophotographer/tensorRatings.pt | 3 + 7 files changed, 383 insertions(+) create mode 100644 .gitattributes create mode 100644 src/autophotographer/__init__.py create mode 100644 src/autophotographer/config.py create mode 100644 src/autophotographer/dataset.py create mode 100644 src/autophotographer/model.py create mode 100644 src/autophotographer/tensorImages.pt create mode 100644 src/autophotographer/tensorRatings.pt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..2803c07 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.webm filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text diff --git a/src/autophotographer/__init__.py b/src/autophotographer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/autophotographer/config.py b/src/autophotographer/config.py new file mode 100644 index 0000000..f5a4721 --- /dev/null +++ b/src/autophotographer/config.py @@ -0,0 +1,24 @@ +import torch +import os + +# https://pytorch.org/hub/pytorch_vision_resnet/ +MEAN = [0.485, 0.456, 0.406] +STD = [0.229, 0.224, 0.225] +IMAGE_SIZE = 224 + +VAL_SPLIT = 0.1 +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" + +FEATURE_EXTRACTION_BATCH_SIZE = 256 +FINETUNE_BATCH_SIZE = 64 +PRED_BATCH_SIZE = 4 +EPOCHS = 20 +LR = 0.001 +LR_FINETUNE = 0.0005 # REMOVE +IMAGE_SIZE = 32 + +WARMUP_PLOT = os.path.join("output", "plot.png") +WARMUP_MODEL = os.path.join("output", "plot.pth") + +TENSOR_IMAGES_PATH = "tensorImages.pt" +TENSOR_RATINGS_PATH = "tensorRatings.pt" \ No newline at end of file diff --git a/src/autophotographer/dataset.py b/src/autophotographer/dataset.py new file mode 100644 index 0000000..23fc5ac --- /dev/null +++ b/src/autophotographer/dataset.py @@ -0,0 +1,195 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import random +import time +from sklearn.preprocessing import MinMaxScaler +import numpy as np +from sklearn.model_selection import train_test_split +import cv2 +from pathlib import Path +import config +from torchvision import transforms +import torch +from torch.utils.data import DataLoader +import os +from os.path import abspath +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +datasetDir = "/datasets/" +#datasetDir = "" +script_directory = os.path.dirname(__file__) +projectRoot = abspath(os.path.join(script_directory, "../..")) +#projectRoot = "/src/" +print(projectRoot) +tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt") +tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt") +tensorRatingsPath = os.path.join(projectRoot, "src/autophotographer/tensorRatings.pt") +tensorArrayPath = os.path.join(projectRoot, "src/autophotographer/tensorArray.pt") +filePathRatings = os.path.join(projectRoot, "data/ratings.txt") + +if not datasetDir == "": + filePathStyle = datasetDir + "AVA/style_image_lists/test.multilab" + filePathIds = datasetDir + "AVA/style_image_lists/test.jpgl" + filePathInfoAVA = datasetDir + "AVA/AVA.txt" + imgPath= datasetDir + "AVA/images/images" + + + +def load_aesthetic_attributes(filePathStyle, filePathIds): + columns = ["Complementary Colors", "Duotones", "HDR", "Image Grain", "Light On White", "Long Exposure", "Macro", "Motion Blur", "Negative Image", "Rule of Thirds", "Shallow DOF", "Silhouettes", "Soft Focus" , "Vanishing Point"] + dataFrameStyle = pd.read_csv(filePathStyle, sep=" ", header=None, names=columns) + + columns = ["ID"] + dataFrameId = pd.read_csv(filePathIds, sep=" ", header=None, names=columns) + + dataFrame = dataFrameId.join(dataFrameStyle) + + # return dataframe + return dataFrame + +def get_style_info(imageIndex): + df = load_aesthetic_attributes(filePathStyle, filePathIds) + imageInfo = df.loc[df['ID'] == imageIndex] + styleAttributes = "" + + for i in imageInfo: + if imageInfo[i].values[0] == 1: + if styleAttributes == "": + styleAttributes = i + else: + styleAttributes = styleAttributes + ", " + i + return styleAttributes + +def display_image_with_styles(imageIndex): + styleInfo = get_style_info(imageIndex) + image = mpimg.imread(imgPath + "/" + str(imageIndex) + ".jpg") + plt.imshow(image) + plt.title("ID: " + str(imageIndex) + ", Rating: " + str(calculate_image_rating(imageIndex))) + plt.text(0, 0, styleInfo, ha="center") + plt.axis("off") + plt.show() + +def get_random_image_index(): + df = load_aesthetic_attributes(filePathStyle, filePathIds) + imageIndexes = df["ID"].values + randomId = random.choice(imageIndexes) + return randomId + +def calculate_image_rating(imageIndex): + df = pd.read_csv(filePathInfoAVA, sep=" ", header=None) + imageInfo = df.loc[df[1] == imageIndex] + styleIndex = 2 + numOfRatings = 0 + score = 0 + #print(imageInfo) + while styleIndex <= 11: + score += imageInfo[styleIndex].values[0] * (styleIndex-1) + numOfRatings += imageInfo[styleIndex].values[0] + # print((str(styleIndex-1) + ": " + str(imageInfo[styleIndex].values[0]))) + styleIndex += 1 + #print(numOfRatings) + #print(score) + adjustedScore = score / numOfRatings + #print(adjustedScore) + return adjustedScore + +def get_all_image_ratings(): + imageRatings = {} + df = pd.read_csv(filePathInfoAVA, sep=" ", header=None, index_col=0) + listOfImageIndexes = df[1].values + imageIndex = 1 + startTime = time.time() + for image in listOfImageIndexes: + if imageIndex % 1000 == 0: + endTime = time.time() + timeTaken = endTime - startTime + print(str(imageIndex) + "/" + str(len(listOfImageIndexes)) + ": took " + str(timeTaken) + " seconds.") + rating = calculate_image_rating(image) + imageRatings[image] = rating + # print(str(image) + ": " + str(rating)) + imageIndex += 1 + + df = pd.DataFrame(list(imageRatings.items())) + return df + +def load_image_ratings(): + columns = ["id", "rating"] + df = pd.read_csv(filePathRatings, header=None, sep=" ", names=columns) + return df + +def process_image_ratings(df, train, test): + cs = MinMaxScaler() + trainContinuous = cs.fit_transform(train["rating"]) + testContinuous = cs.fit_transform(test["rating"]) + + return (trainContinuous, testContinuous) + +def load_images(df): + images = [] + for path in df.path.values: + image = cv2.imread(path) + image = cv2.resize(image, (32, 32)) + images.append(image) + return np.array(images) + +def remove_entries_for_missing_images(df, imgPath): + ids = [] + for path in Path(imgPath).glob('*.jpg'): + imgName = path.name + imgId = int(imgName[0:-4]) + ids.append(imgId) + df = df[df.id.isin(ids) == True] + return df + +def build_dataframe(df, imgPath): + imagePaths = [] + for id in df.id.values: + imagePath = imgPath + "/" + str(id) + ".jpg" + imagePaths.append(imagePath) + df['path'] = imagePaths + return df + +df = build_dataframe(remove_entries_for_missing_images(load_image_ratings(), imgPath), imgPath) + +def create_tensor_array(): + tensorArray = [] + for idx, row in df.iterrows(): + rating = row['rating'] + image = cv2.imread(row['path']) + image = cv2.resize(image, (32, 32)) + tensorTuple = (image, rating) + tensorArray.append(tensorTuple) + return tensorArray + +def load_in_tensors(): + print(tensorImagesPath) + print(tensorImagesPath) + print(tensorImagesPath) + tensorImages = torch.load(tensorImagesPath) + tensorRatings = torch.load(tensorRatingsPath) + tensorArray = torch.load(tensorArrayPath) + return (tensorImages, tensorRatings, tensorArray) + +def get_dataloader(df, transforms, batchSize, shuffle=True): + # Create a dataloader for dataset + tensorArray = [] + for idx, row in df.iterrows(): + #load each image path and process the data via transforms + transformedImg = transforms(pil_loader(row['path'])) + rating = row['rating'] + # form a tuple row with rating and the processed data + tensorTuple = (transformedImg, rating) + # append it to the array + tensorArray.append(tensorTuple) + + loader = DataLoader(tensorArray, batch_size=batchSize, shuffle=shuffle, num_workers=os.cpu_count(), + pin_memory=True if config.DEVICE == "cuda" else False) + + return (tensorArray, loader) + +def pil_loader(path): + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') \ No newline at end of file diff --git a/src/autophotographer/model.py b/src/autophotographer/model.py new file mode 100644 index 0000000..26f25dc --- /dev/null +++ b/src/autophotographer/model.py @@ -0,0 +1,154 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchvision.models import resnet50 +from torchvision import transforms +from torch.utils.data import DataLoader + +from tqdm import tqdm +import time +import os +from os.path import abspath +import matplotlib.pyplot as plt + +import config +import dataset + +# projectRoot = "/src/" +script_directory = os.path.dirname(__file__) +projectRoot = abspath(os.path.join(script_directory, "../..")) +print("Project root: " + projectRoot) +INITIAL_PLOT_PATH = projectRoot + "/src/output/plot.png" +INTIIAL_MODEL_PATH = projectRoot + "/src/output/model.pth" + +# define transformations +trainTransform = transforms.Compose([ + transforms.RandomResizedCrop(config.IMAGE_SIZE), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(90), + transforms.ToTensor(), + transforms.Normalize(mean=config.MEAN, std=config.STD) +]) +valTransform = transforms.Compose([ + transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)), + transforms.ToTensor(), + transforms.Normalize(mean=config.MEAN, std=config.STD) +]) + +valSetLen = int(len(dataset.df) * config.VAL_SPLIT) +trainSetLen = len(dataset.df) - valSetLen +trainSet = dataset.df[:trainSetLen] +valSet = dataset.df[trainSetLen:] + +print("Using " + config.DEVICE + "...") +# create data loaders +print("Getting dataloaders...") +#(trainDataset, trainLoader) = dataset.get_dataloader(trainSet, + #transforms=trainTransform, batchSize=config.FEATURE_EXTRACTION_BATCH_SIZE) +#torch.save(trainDataset, 'trainDataset.pt') +#(valDataset, valLoader) = dataset.get_dataloader(valSet, + #transforms=valTransform, batchSize=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=False) +#torch.save(valDataset, 'valDataset.pt') + +valDataset = torch.load("/src/src/autophotographer/valDataset.pt") +valLoader = DataLoader(valDataset, batch_size=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=False, num_workers=os.cpu_count(), + pin_memory=True if config.DEVICE == "cuda" else False) +trainDataset = torch.load("/src/src/autophotographer/trainDataset.pt") +trainLoader = DataLoader(trainDataset, batch_size=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=True, num_workers=os.cpu_count(), + pin_memory=True if config.DEVICE == "cuda" else False) + +# Load the resnet model +model = resnet50(pretrained=True) + +# Freeze all existing layers +for parameter in model.parameters(): + parameter.requires_grad = False + +modelOutputFeatures = model.fc.in_features +model.fc = nn.Linear(modelOutputFeatures, 1) +model = model.to(config.DEVICE) + +# initialize loss function and optimizer +lossFunction = nn.L1Loss() +optimizer = torch.optim.Adam(model.fc.parameters(), lr=config.LR) + + +# calculate steps per epoch for training and validating set +trainSteps = len(trainDataset) // config.FEATURE_EXTRACTION_BATCH_SIZE +valSteps = len(valDataset) // config.FEATURE_EXTRACTION_BATCH_SIZE + +# initialize a dictionary to store training data +H = {"train_loss": [], "val_loss": []} + +# loop over epochs +print("Starting training...") +startTime = time.time() +for epoch in tqdm(range(config.EPOCHS)): + model.train() + + totalTrainLoss = 0 + totalValLoss = 0 + + trainCorrect = 0 + valCorrect = 0 + + for (i, (x, y)) in enumerate(trainLoader): + (x, y) = (x.to(config.DEVICE), y.to(config.DEVICE)) + pred = model(x) + new_shape = (len(y), 1) + y = y.view(new_shape) + loss = lossFunction(pred, y) + + loss.backward() + + if (i + 2) % 2 == 0: + optimizer.step() + optimizer.zero_grad() + + totalTrainLoss += loss + trainCorrect += (pred.argmax(1) == y).type( + torch.float).sum().item() + + with torch.no_grad(): + model.eval() + + for (x, y) in valLoader: + (x, y) = (x.to(config.DEVICE), y.to(config.DEVICE)) + + pred = model(x) + new_shape = (len(y), 1) + y = y.view(new_shape) + totalValLoss += lossFunction(pred, y) + valCorrect += (pred.argmax(1) == y).type( + torch.float).sum().item() + + # calculate the average training and validation loss + avgTrainLoss = totalTrainLoss / trainSteps + avgValLoss = totalValLoss / valSteps + # calculate the training and validation accuracy + #trainCorrect = trainCorrect / len(trainDataset) + #valCorrect = valCorrect / len(valDataset) + # update our training history + H["train_loss"].append(avgTrainLoss.cpu().detach().numpy()) + H["val_loss"].append(avgValLoss.cpu().detach().numpy()) + # print the model training and validation information + print("[INFO] EPOCH: {}/{}".format(epoch + 1, config.EPOCHS)) + print("Train loss: {:.6f}, Val loss: {:.6f}".format( + avgTrainLoss, avgValLoss)) + +# display the total time needed to perform the training +endTime = time.time() +print("[INFO] total time taken to train the model: {:.2f}s".format( + endTime - startTime)) +# plot the training loss and accuracy +plt.style.use("ggplot") +plt.figure() +plt.plot(H["train_loss"], label="train_loss") +plt.plot(H["val_loss"], label="val_loss") +plt.title("Training Loss on Dataset") +plt.xlabel("Epoch #") +plt.ylabel("Loss") +plt.legend(loc="lower left") +plt.savefig(INITIAL_PLOT_PATH) +# serialize the model to disk +torch.save(model, INTIIAL_MODEL_PATH) \ No newline at end of file diff --git a/src/autophotographer/tensorImages.pt b/src/autophotographer/tensorImages.pt new file mode 100644 index 0000000..01c1745 --- /dev/null +++ b/src/autophotographer/tensorImages.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:169231ec08c6a4dea43a6d6c430926fc65df5a2c86100463f7a0505a41f5ba90 +size 784921323 diff --git a/src/autophotographer/tensorRatings.pt b/src/autophotographer/tensorRatings.pt new file mode 100644 index 0000000..6ad5756 --- /dev/null +++ b/src/autophotographer/tensorRatings.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c29c0fecae3177397bb478e7d542f9acd0f956088805eca001e1f0657cbebe8 +size 2044779