Added CNN code and tensors
This commit is contained in:
parent
b2f8b7606a
commit
123a5d3dde
7 changed files with 383 additions and 0 deletions
4
.gitattributes
vendored
Normal file
4
.gitattributes
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.webm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jpg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
0
src/autophotographer/__init__.py
Normal file
0
src/autophotographer/__init__.py
Normal file
24
src/autophotographer/config.py
Normal file
24
src/autophotographer/config.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
|
# https://pytorch.org/hub/pytorch_vision_resnet/
|
||||||
|
MEAN = [0.485, 0.456, 0.406]
|
||||||
|
STD = [0.229, 0.224, 0.225]
|
||||||
|
IMAGE_SIZE = 224
|
||||||
|
|
||||||
|
VAL_SPLIT = 0.1
|
||||||
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
FEATURE_EXTRACTION_BATCH_SIZE = 256
|
||||||
|
FINETUNE_BATCH_SIZE = 64
|
||||||
|
PRED_BATCH_SIZE = 4
|
||||||
|
EPOCHS = 20
|
||||||
|
LR = 0.001
|
||||||
|
LR_FINETUNE = 0.0005 # REMOVE
|
||||||
|
IMAGE_SIZE = 32
|
||||||
|
|
||||||
|
WARMUP_PLOT = os.path.join("output", "plot.png")
|
||||||
|
WARMUP_MODEL = os.path.join("output", "plot.pth")
|
||||||
|
|
||||||
|
TENSOR_IMAGES_PATH = "tensorImages.pt"
|
||||||
|
TENSOR_RATINGS_PATH = "tensorRatings.pt"
|
195
src/autophotographer/dataset.py
Normal file
195
src/autophotographer/dataset.py
Normal file
|
@ -0,0 +1,195 @@
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.image as mpimg
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import cv2
|
||||||
|
from pathlib import Path
|
||||||
|
import config
|
||||||
|
from torchvision import transforms
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
import os
|
||||||
|
from os.path import abspath
|
||||||
|
from PIL import Image, ImageFile
|
||||||
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
|
datasetDir = "/datasets/"
|
||||||
|
#datasetDir = ""
|
||||||
|
script_directory = os.path.dirname(__file__)
|
||||||
|
projectRoot = abspath(os.path.join(script_directory, "../.."))
|
||||||
|
#projectRoot = "/src/"
|
||||||
|
print(projectRoot)
|
||||||
|
tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt")
|
||||||
|
tensorImagesPath = os.path.join(projectRoot, "src/autophotographer/tensorImages.pt")
|
||||||
|
tensorRatingsPath = os.path.join(projectRoot, "src/autophotographer/tensorRatings.pt")
|
||||||
|
tensorArrayPath = os.path.join(projectRoot, "src/autophotographer/tensorArray.pt")
|
||||||
|
filePathRatings = os.path.join(projectRoot, "data/ratings.txt")
|
||||||
|
|
||||||
|
if not datasetDir == "":
|
||||||
|
filePathStyle = datasetDir + "AVA/style_image_lists/test.multilab"
|
||||||
|
filePathIds = datasetDir + "AVA/style_image_lists/test.jpgl"
|
||||||
|
filePathInfoAVA = datasetDir + "AVA/AVA.txt"
|
||||||
|
imgPath= datasetDir + "AVA/images/images"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_aesthetic_attributes(filePathStyle, filePathIds):
|
||||||
|
columns = ["Complementary Colors", "Duotones", "HDR", "Image Grain", "Light On White", "Long Exposure", "Macro", "Motion Blur", "Negative Image", "Rule of Thirds", "Shallow DOF", "Silhouettes", "Soft Focus" , "Vanishing Point"]
|
||||||
|
dataFrameStyle = pd.read_csv(filePathStyle, sep=" ", header=None, names=columns)
|
||||||
|
|
||||||
|
columns = ["ID"]
|
||||||
|
dataFrameId = pd.read_csv(filePathIds, sep=" ", header=None, names=columns)
|
||||||
|
|
||||||
|
dataFrame = dataFrameId.join(dataFrameStyle)
|
||||||
|
|
||||||
|
# return dataframe
|
||||||
|
return dataFrame
|
||||||
|
|
||||||
|
def get_style_info(imageIndex):
|
||||||
|
df = load_aesthetic_attributes(filePathStyle, filePathIds)
|
||||||
|
imageInfo = df.loc[df['ID'] == imageIndex]
|
||||||
|
styleAttributes = ""
|
||||||
|
|
||||||
|
for i in imageInfo:
|
||||||
|
if imageInfo[i].values[0] == 1:
|
||||||
|
if styleAttributes == "":
|
||||||
|
styleAttributes = i
|
||||||
|
else:
|
||||||
|
styleAttributes = styleAttributes + ", " + i
|
||||||
|
return styleAttributes
|
||||||
|
|
||||||
|
def display_image_with_styles(imageIndex):
|
||||||
|
styleInfo = get_style_info(imageIndex)
|
||||||
|
image = mpimg.imread(imgPath + "/" + str(imageIndex) + ".jpg")
|
||||||
|
plt.imshow(image)
|
||||||
|
plt.title("ID: " + str(imageIndex) + ", Rating: " + str(calculate_image_rating(imageIndex)))
|
||||||
|
plt.text(0, 0, styleInfo, ha="center")
|
||||||
|
plt.axis("off")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
def get_random_image_index():
|
||||||
|
df = load_aesthetic_attributes(filePathStyle, filePathIds)
|
||||||
|
imageIndexes = df["ID"].values
|
||||||
|
randomId = random.choice(imageIndexes)
|
||||||
|
return randomId
|
||||||
|
|
||||||
|
def calculate_image_rating(imageIndex):
|
||||||
|
df = pd.read_csv(filePathInfoAVA, sep=" ", header=None)
|
||||||
|
imageInfo = df.loc[df[1] == imageIndex]
|
||||||
|
styleIndex = 2
|
||||||
|
numOfRatings = 0
|
||||||
|
score = 0
|
||||||
|
#print(imageInfo)
|
||||||
|
while styleIndex <= 11:
|
||||||
|
score += imageInfo[styleIndex].values[0] * (styleIndex-1)
|
||||||
|
numOfRatings += imageInfo[styleIndex].values[0]
|
||||||
|
# print((str(styleIndex-1) + ": " + str(imageInfo[styleIndex].values[0])))
|
||||||
|
styleIndex += 1
|
||||||
|
#print(numOfRatings)
|
||||||
|
#print(score)
|
||||||
|
adjustedScore = score / numOfRatings
|
||||||
|
#print(adjustedScore)
|
||||||
|
return adjustedScore
|
||||||
|
|
||||||
|
def get_all_image_ratings():
|
||||||
|
imageRatings = {}
|
||||||
|
df = pd.read_csv(filePathInfoAVA, sep=" ", header=None, index_col=0)
|
||||||
|
listOfImageIndexes = df[1].values
|
||||||
|
imageIndex = 1
|
||||||
|
startTime = time.time()
|
||||||
|
for image in listOfImageIndexes:
|
||||||
|
if imageIndex % 1000 == 0:
|
||||||
|
endTime = time.time()
|
||||||
|
timeTaken = endTime - startTime
|
||||||
|
print(str(imageIndex) + "/" + str(len(listOfImageIndexes)) + ": took " + str(timeTaken) + " seconds.")
|
||||||
|
rating = calculate_image_rating(image)
|
||||||
|
imageRatings[image] = rating
|
||||||
|
# print(str(image) + ": " + str(rating))
|
||||||
|
imageIndex += 1
|
||||||
|
|
||||||
|
df = pd.DataFrame(list(imageRatings.items()))
|
||||||
|
return df
|
||||||
|
|
||||||
|
def load_image_ratings():
|
||||||
|
columns = ["id", "rating"]
|
||||||
|
df = pd.read_csv(filePathRatings, header=None, sep=" ", names=columns)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def process_image_ratings(df, train, test):
|
||||||
|
cs = MinMaxScaler()
|
||||||
|
trainContinuous = cs.fit_transform(train["rating"])
|
||||||
|
testContinuous = cs.fit_transform(test["rating"])
|
||||||
|
|
||||||
|
return (trainContinuous, testContinuous)
|
||||||
|
|
||||||
|
def load_images(df):
|
||||||
|
images = []
|
||||||
|
for path in df.path.values:
|
||||||
|
image = cv2.imread(path)
|
||||||
|
image = cv2.resize(image, (32, 32))
|
||||||
|
images.append(image)
|
||||||
|
return np.array(images)
|
||||||
|
|
||||||
|
def remove_entries_for_missing_images(df, imgPath):
|
||||||
|
ids = []
|
||||||
|
for path in Path(imgPath).glob('*.jpg'):
|
||||||
|
imgName = path.name
|
||||||
|
imgId = int(imgName[0:-4])
|
||||||
|
ids.append(imgId)
|
||||||
|
df = df[df.id.isin(ids) == True]
|
||||||
|
return df
|
||||||
|
|
||||||
|
def build_dataframe(df, imgPath):
|
||||||
|
imagePaths = []
|
||||||
|
for id in df.id.values:
|
||||||
|
imagePath = imgPath + "/" + str(id) + ".jpg"
|
||||||
|
imagePaths.append(imagePath)
|
||||||
|
df['path'] = imagePaths
|
||||||
|
return df
|
||||||
|
|
||||||
|
df = build_dataframe(remove_entries_for_missing_images(load_image_ratings(), imgPath), imgPath)
|
||||||
|
|
||||||
|
def create_tensor_array():
|
||||||
|
tensorArray = []
|
||||||
|
for idx, row in df.iterrows():
|
||||||
|
rating = row['rating']
|
||||||
|
image = cv2.imread(row['path'])
|
||||||
|
image = cv2.resize(image, (32, 32))
|
||||||
|
tensorTuple = (image, rating)
|
||||||
|
tensorArray.append(tensorTuple)
|
||||||
|
return tensorArray
|
||||||
|
|
||||||
|
def load_in_tensors():
|
||||||
|
print(tensorImagesPath)
|
||||||
|
print(tensorImagesPath)
|
||||||
|
print(tensorImagesPath)
|
||||||
|
tensorImages = torch.load(tensorImagesPath)
|
||||||
|
tensorRatings = torch.load(tensorRatingsPath)
|
||||||
|
tensorArray = torch.load(tensorArrayPath)
|
||||||
|
return (tensorImages, tensorRatings, tensorArray)
|
||||||
|
|
||||||
|
def get_dataloader(df, transforms, batchSize, shuffle=True):
|
||||||
|
# Create a dataloader for dataset
|
||||||
|
tensorArray = []
|
||||||
|
for idx, row in df.iterrows():
|
||||||
|
#load each image path and process the data via transforms
|
||||||
|
transformedImg = transforms(pil_loader(row['path']))
|
||||||
|
rating = row['rating']
|
||||||
|
# form a tuple row with rating and the processed data
|
||||||
|
tensorTuple = (transformedImg, rating)
|
||||||
|
# append it to the array
|
||||||
|
tensorArray.append(tensorTuple)
|
||||||
|
|
||||||
|
loader = DataLoader(tensorArray, batch_size=batchSize, shuffle=shuffle, num_workers=os.cpu_count(),
|
||||||
|
pin_memory=True if config.DEVICE == "cuda" else False)
|
||||||
|
|
||||||
|
return (tensorArray, loader)
|
||||||
|
|
||||||
|
def pil_loader(path):
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
img = Image.open(f)
|
||||||
|
return img.convert('RGB')
|
154
src/autophotographer/model.py
Normal file
154
src/autophotographer/model.py
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torchvision.models import resnet50
|
||||||
|
from torchvision import transforms
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from os.path import abspath
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
import config
|
||||||
|
import dataset
|
||||||
|
|
||||||
|
# projectRoot = "/src/"
|
||||||
|
script_directory = os.path.dirname(__file__)
|
||||||
|
projectRoot = abspath(os.path.join(script_directory, "../.."))
|
||||||
|
print("Project root: " + projectRoot)
|
||||||
|
INITIAL_PLOT_PATH = projectRoot + "/src/output/plot.png"
|
||||||
|
INTIIAL_MODEL_PATH = projectRoot + "/src/output/model.pth"
|
||||||
|
|
||||||
|
# define transformations
|
||||||
|
trainTransform = transforms.Compose([
|
||||||
|
transforms.RandomResizedCrop(config.IMAGE_SIZE),
|
||||||
|
transforms.RandomHorizontalFlip(),
|
||||||
|
transforms.RandomRotation(90),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(mean=config.MEAN, std=config.STD)
|
||||||
|
])
|
||||||
|
valTransform = transforms.Compose([
|
||||||
|
transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(mean=config.MEAN, std=config.STD)
|
||||||
|
])
|
||||||
|
|
||||||
|
valSetLen = int(len(dataset.df) * config.VAL_SPLIT)
|
||||||
|
trainSetLen = len(dataset.df) - valSetLen
|
||||||
|
trainSet = dataset.df[:trainSetLen]
|
||||||
|
valSet = dataset.df[trainSetLen:]
|
||||||
|
|
||||||
|
print("Using " + config.DEVICE + "...")
|
||||||
|
# create data loaders
|
||||||
|
print("Getting dataloaders...")
|
||||||
|
#(trainDataset, trainLoader) = dataset.get_dataloader(trainSet,
|
||||||
|
#transforms=trainTransform, batchSize=config.FEATURE_EXTRACTION_BATCH_SIZE)
|
||||||
|
#torch.save(trainDataset, 'trainDataset.pt')
|
||||||
|
#(valDataset, valLoader) = dataset.get_dataloader(valSet,
|
||||||
|
#transforms=valTransform, batchSize=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=False)
|
||||||
|
#torch.save(valDataset, 'valDataset.pt')
|
||||||
|
|
||||||
|
valDataset = torch.load("/src/src/autophotographer/valDataset.pt")
|
||||||
|
valLoader = DataLoader(valDataset, batch_size=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=False, num_workers=os.cpu_count(),
|
||||||
|
pin_memory=True if config.DEVICE == "cuda" else False)
|
||||||
|
trainDataset = torch.load("/src/src/autophotographer/trainDataset.pt")
|
||||||
|
trainLoader = DataLoader(trainDataset, batch_size=config.FEATURE_EXTRACTION_BATCH_SIZE, shuffle=True, num_workers=os.cpu_count(),
|
||||||
|
pin_memory=True if config.DEVICE == "cuda" else False)
|
||||||
|
|
||||||
|
# Load the resnet model
|
||||||
|
model = resnet50(pretrained=True)
|
||||||
|
|
||||||
|
# Freeze all existing layers
|
||||||
|
for parameter in model.parameters():
|
||||||
|
parameter.requires_grad = False
|
||||||
|
|
||||||
|
modelOutputFeatures = model.fc.in_features
|
||||||
|
model.fc = nn.Linear(modelOutputFeatures, 1)
|
||||||
|
model = model.to(config.DEVICE)
|
||||||
|
|
||||||
|
# initialize loss function and optimizer
|
||||||
|
lossFunction = nn.L1Loss()
|
||||||
|
optimizer = torch.optim.Adam(model.fc.parameters(), lr=config.LR)
|
||||||
|
|
||||||
|
|
||||||
|
# calculate steps per epoch for training and validating set
|
||||||
|
trainSteps = len(trainDataset) // config.FEATURE_EXTRACTION_BATCH_SIZE
|
||||||
|
valSteps = len(valDataset) // config.FEATURE_EXTRACTION_BATCH_SIZE
|
||||||
|
|
||||||
|
# initialize a dictionary to store training data
|
||||||
|
H = {"train_loss": [], "val_loss": []}
|
||||||
|
|
||||||
|
# loop over epochs
|
||||||
|
print("Starting training...")
|
||||||
|
startTime = time.time()
|
||||||
|
for epoch in tqdm(range(config.EPOCHS)):
|
||||||
|
model.train()
|
||||||
|
|
||||||
|
totalTrainLoss = 0
|
||||||
|
totalValLoss = 0
|
||||||
|
|
||||||
|
trainCorrect = 0
|
||||||
|
valCorrect = 0
|
||||||
|
|
||||||
|
for (i, (x, y)) in enumerate(trainLoader):
|
||||||
|
(x, y) = (x.to(config.DEVICE), y.to(config.DEVICE))
|
||||||
|
pred = model(x)
|
||||||
|
new_shape = (len(y), 1)
|
||||||
|
y = y.view(new_shape)
|
||||||
|
loss = lossFunction(pred, y)
|
||||||
|
|
||||||
|
loss.backward()
|
||||||
|
|
||||||
|
if (i + 2) % 2 == 0:
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
totalTrainLoss += loss
|
||||||
|
trainCorrect += (pred.argmax(1) == y).type(
|
||||||
|
torch.float).sum().item()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
for (x, y) in valLoader:
|
||||||
|
(x, y) = (x.to(config.DEVICE), y.to(config.DEVICE))
|
||||||
|
|
||||||
|
pred = model(x)
|
||||||
|
new_shape = (len(y), 1)
|
||||||
|
y = y.view(new_shape)
|
||||||
|
totalValLoss += lossFunction(pred, y)
|
||||||
|
valCorrect += (pred.argmax(1) == y).type(
|
||||||
|
torch.float).sum().item()
|
||||||
|
|
||||||
|
# calculate the average training and validation loss
|
||||||
|
avgTrainLoss = totalTrainLoss / trainSteps
|
||||||
|
avgValLoss = totalValLoss / valSteps
|
||||||
|
# calculate the training and validation accuracy
|
||||||
|
#trainCorrect = trainCorrect / len(trainDataset)
|
||||||
|
#valCorrect = valCorrect / len(valDataset)
|
||||||
|
# update our training history
|
||||||
|
H["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
|
||||||
|
H["val_loss"].append(avgValLoss.cpu().detach().numpy())
|
||||||
|
# print the model training and validation information
|
||||||
|
print("[INFO] EPOCH: {}/{}".format(epoch + 1, config.EPOCHS))
|
||||||
|
print("Train loss: {:.6f}, Val loss: {:.6f}".format(
|
||||||
|
avgTrainLoss, avgValLoss))
|
||||||
|
|
||||||
|
# display the total time needed to perform the training
|
||||||
|
endTime = time.time()
|
||||||
|
print("[INFO] total time taken to train the model: {:.2f}s".format(
|
||||||
|
endTime - startTime))
|
||||||
|
# plot the training loss and accuracy
|
||||||
|
plt.style.use("ggplot")
|
||||||
|
plt.figure()
|
||||||
|
plt.plot(H["train_loss"], label="train_loss")
|
||||||
|
plt.plot(H["val_loss"], label="val_loss")
|
||||||
|
plt.title("Training Loss on Dataset")
|
||||||
|
plt.xlabel("Epoch #")
|
||||||
|
plt.ylabel("Loss")
|
||||||
|
plt.legend(loc="lower left")
|
||||||
|
plt.savefig(INITIAL_PLOT_PATH)
|
||||||
|
# serialize the model to disk
|
||||||
|
torch.save(model, INTIIAL_MODEL_PATH)
|
BIN
src/autophotographer/tensorImages.pt
(Stored with Git LFS)
Normal file
BIN
src/autophotographer/tensorImages.pt
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
src/autophotographer/tensorRatings.pt
(Stored with Git LFS)
Normal file
BIN
src/autophotographer/tensorRatings.pt
(Stored with Git LFS)
Normal file
Binary file not shown.
Reference in a new issue