mirror of
https://github.com/lovell/sharp.git
synced 2025-12-19 07:15:08 +01:00
Add experimental 'attention' crop strategy
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vips/vips8>
|
||||
|
||||
#include "common.h"
|
||||
@@ -289,69 +290,104 @@ namespace sharp {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate the Shannon entropy
|
||||
*/
|
||||
double EntropyStrategy::operator()(VImage image) {
|
||||
return image.hist_find().hist_entropy();
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate the intensity of edges, skin tone and saturation
|
||||
*/
|
||||
double AttentionStrategy::operator()(VImage image) {
|
||||
// Convert to LAB colourspace
|
||||
VImage lab = image.colourspace(VIPS_INTERPRETATION_LAB);
|
||||
VImage l = lab[0];
|
||||
VImage a = lab[1];
|
||||
VImage b = lab[2];
|
||||
// Edge detect luminosity with the Sobel operator
|
||||
VImage sobel = vips::VImage::new_matrixv(3, 3,
|
||||
-1.0, 0.0, 1.0,
|
||||
-2.0, 0.0, 2.0,
|
||||
-1.0, 0.0, 1.0);
|
||||
VImage edges = l.conv(sobel).abs() + l.conv(sobel.rot90()).abs();
|
||||
// Skin tone chroma thresholds trained with http://humanae.tumblr.com/
|
||||
VImage skin = (a >= 3) & (a <= 22) & (b >= 4) & (b <= 31);
|
||||
// Chroma >~50% saturation
|
||||
VImage lch = lab.colourspace(VIPS_INTERPRETATION_LCH);
|
||||
VImage c = lch[1];
|
||||
VImage saturation = c > 60;
|
||||
// Find maximum in combined saliency mask
|
||||
VImage mask = edges + skin + saturation;
|
||||
return mask.max();
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate crop area based on image entropy
|
||||
*/
|
||||
std::tuple<int, int> EntropyCrop(VImage image, int const outWidth, int const outHeight) {
|
||||
std::tuple<int, int> Crop(
|
||||
VImage image, int const outWidth, int const outHeight, std::function<double(VImage)> strategy
|
||||
) {
|
||||
int left = 0;
|
||||
int top = 0;
|
||||
int const inWidth = image.width();
|
||||
int const inHeight = image.height();
|
||||
if (inWidth > outWidth) {
|
||||
// Reduce width by repeated removing slices from edge with lowest entropy
|
||||
// Reduce width by repeated removing slices from edge with lowest score
|
||||
int width = inWidth;
|
||||
double leftEntropy = 0.0;
|
||||
double rightEntropy = 0.0;
|
||||
double leftScore = 0.0;
|
||||
double rightScore = 0.0;
|
||||
// Max width of each slice
|
||||
int const maxSliceWidth = static_cast<int>(ceil((inWidth - outWidth) / 8.0));
|
||||
while (width > outWidth) {
|
||||
// Width of current slice
|
||||
int const slice = std::min(width - outWidth, maxSliceWidth);
|
||||
if (leftEntropy == 0.0) {
|
||||
// Update entropy of left slice
|
||||
leftEntropy = Entropy(image.extract_area(left, 0, slice, inHeight));
|
||||
if (leftScore == 0.0) {
|
||||
// Update score of left slice
|
||||
leftScore = strategy(image.extract_area(left, 0, slice, inHeight));
|
||||
}
|
||||
if (rightEntropy == 0.0) {
|
||||
// Update entropy of right slice
|
||||
rightEntropy = Entropy(image.extract_area(width - slice - 1, 0, slice, inHeight));
|
||||
if (rightScore == 0.0) {
|
||||
// Update score of right slice
|
||||
rightScore = strategy(image.extract_area(width - slice - 1, 0, slice, inHeight));
|
||||
}
|
||||
// Keep slice with highest entropy
|
||||
if (leftEntropy >= rightEntropy) {
|
||||
// Keep slice with highest score
|
||||
if (leftScore >= rightScore) {
|
||||
// Discard right slice
|
||||
rightEntropy = 0.0;
|
||||
rightScore = 0.0;
|
||||
} else {
|
||||
// Discard left slice
|
||||
leftEntropy = 0.0;
|
||||
leftScore = 0.0;
|
||||
left = left + slice;
|
||||
}
|
||||
width = width - slice;
|
||||
}
|
||||
}
|
||||
if (inHeight > outHeight) {
|
||||
// Reduce height by repeated removing slices from edge with lowest entropy
|
||||
// Reduce height by repeated removing slices from edge with lowest score
|
||||
int height = inHeight;
|
||||
double topEntropy = 0.0;
|
||||
double bottomEntropy = 0.0;
|
||||
double topScore = 0.0;
|
||||
double bottomScore = 0.0;
|
||||
// Max height of each slice
|
||||
int const maxSliceHeight = static_cast<int>(ceil((inHeight - outHeight) / 8.0));
|
||||
while (height > outHeight) {
|
||||
// Height of current slice
|
||||
int const slice = std::min(height - outHeight, maxSliceHeight);
|
||||
if (topEntropy == 0.0) {
|
||||
// Update entropy of top slice
|
||||
topEntropy = Entropy(image.extract_area(0, top, inWidth, slice));
|
||||
if (topScore == 0.0) {
|
||||
// Update score of top slice
|
||||
topScore = strategy(image.extract_area(0, top, inWidth, slice));
|
||||
}
|
||||
if (bottomEntropy == 0.0) {
|
||||
// Update entropy of bottom slice
|
||||
bottomEntropy = Entropy(image.extract_area(0, height - slice - 1, inWidth, slice));
|
||||
if (bottomScore == 0.0) {
|
||||
// Update score of bottom slice
|
||||
bottomScore = strategy(image.extract_area(0, height - slice - 1, inWidth, slice));
|
||||
}
|
||||
// Keep slice with highest entropy
|
||||
if (topEntropy >= bottomEntropy) {
|
||||
// Keep slice with highest score
|
||||
if (topScore >= bottomScore) {
|
||||
// Discard bottom slice
|
||||
bottomEntropy = 0.0;
|
||||
bottomScore = 0.0;
|
||||
} else {
|
||||
// Discard top slice
|
||||
topEntropy = 0.0;
|
||||
topScore = 0.0;
|
||||
top = top + slice;
|
||||
}
|
||||
height = height - slice;
|
||||
@@ -360,13 +396,6 @@ namespace sharp {
|
||||
return std::make_tuple(left, top);
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate the Shannon entropy for an image
|
||||
*/
|
||||
double Entropy(VImage image) {
|
||||
return image.hist_find().hist_entropy();
|
||||
}
|
||||
|
||||
/*
|
||||
Insert a tile cache to prevent over-computation of any previous operations in the pipeline
|
||||
*/
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
#ifndef SRC_OPERATIONS_H_
|
||||
#define SRC_OPERATIONS_H_
|
||||
|
||||
#include <tuple>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vips/vips8>
|
||||
|
||||
using vips::VImage;
|
||||
@@ -63,14 +65,21 @@ namespace sharp {
|
||||
VImage Sharpen(VImage image, double const sigma, double const flat, double const jagged);
|
||||
|
||||
/*
|
||||
Calculate crop area based on image entropy
|
||||
Crop strategy functors
|
||||
*/
|
||||
std::tuple<int, int> EntropyCrop(VImage image, int const outWidth, int const outHeight);
|
||||
struct EntropyStrategy {
|
||||
double operator()(VImage image);
|
||||
};
|
||||
struct AttentionStrategy {
|
||||
double operator()(VImage image);
|
||||
};
|
||||
|
||||
/*
|
||||
Calculate the Shannon entropy for an image
|
||||
Calculate crop area based on given strategy (Entropy, Attention)
|
||||
*/
|
||||
double Entropy(VImage image);
|
||||
std::tuple<int, int> Crop(
|
||||
VImage image, int const outWidth, int const outHeight, std::function<double(VImage)> strategy
|
||||
);
|
||||
|
||||
/*
|
||||
Insert a tile cache to prevent over-computation of any previous operations in the pipeline
|
||||
|
||||
@@ -488,13 +488,18 @@ class PipelineWorker : public Nan::AsyncWorker {
|
||||
std::tie(left, top) = sharp::CalculateCrop(
|
||||
image.width(), image.height(), baton->width, baton->height, baton->crop
|
||||
);
|
||||
} else {
|
||||
} else if (baton->crop == 16) {
|
||||
// Entropy-based crop
|
||||
std::tie(left, top) = sharp::EntropyCrop(image, baton->width, baton->height);
|
||||
std::tie(left, top) = sharp::Crop(image, baton->width, baton->height, sharp::EntropyStrategy());
|
||||
} else {
|
||||
// Attention-based crop
|
||||
std::tie(left, top) = sharp::Crop(image, baton->width, baton->height, sharp::AttentionStrategy());
|
||||
}
|
||||
int width = std::min(image.width(), baton->width);
|
||||
int height = std::min(image.height(), baton->height);
|
||||
image = image.extract_area(left, top, width, height);
|
||||
baton->cropCalcLeft = left;
|
||||
baton->cropCalcTop = top;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -890,6 +895,10 @@ class PipelineWorker : public Nan::AsyncWorker {
|
||||
Set(info, New("width").ToLocalChecked(), New<v8::Uint32>(static_cast<uint32_t>(width)));
|
||||
Set(info, New("height").ToLocalChecked(), New<v8::Uint32>(static_cast<uint32_t>(height)));
|
||||
Set(info, New("channels").ToLocalChecked(), New<v8::Uint32>(static_cast<uint32_t>(baton->channels)));
|
||||
if (baton->cropCalcLeft != -1 && baton->cropCalcLeft != -1) {
|
||||
Set(info, New("cropCalcLeft").ToLocalChecked(), New<v8::Uint32>(static_cast<uint32_t>(baton->cropCalcLeft)));
|
||||
Set(info, New("cropCalcTop").ToLocalChecked(), New<v8::Uint32>(static_cast<uint32_t>(baton->cropCalcTop)));
|
||||
}
|
||||
|
||||
if (baton->bufferOutLength > 0) {
|
||||
// Pass ownership of output data to Buffer instance
|
||||
|
||||
@@ -46,6 +46,8 @@ struct PipelineBaton {
|
||||
int channels;
|
||||
Canvas canvas;
|
||||
int crop;
|
||||
int cropCalcLeft;
|
||||
int cropCalcTop;
|
||||
std::string kernel;
|
||||
std::string interpolator;
|
||||
double background[4];
|
||||
@@ -112,6 +114,8 @@ struct PipelineBaton {
|
||||
channels(0),
|
||||
canvas(Canvas::CROP),
|
||||
crop(0),
|
||||
cropCalcLeft(-1),
|
||||
cropCalcTop(-1),
|
||||
flatten(false),
|
||||
negate(false),
|
||||
blurSigma(0.0),
|
||||
|
||||
Reference in New Issue
Block a user