#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgproc/types_c.h>
using namespace cv;
class AndroidFaceDetect {
public:
explicit AndroidFaceDetect(JNIEnv *env,
const std::string &prototxt,
const std::string &caffe_model,
int image_fixed_width)
: rga_buffer_length_(0),
prototxt_(prototxt),
caffe_model_(caffe_model),
image_fixed_width_(image_fixed_width),
in_scale_factor_(1.0),
mean_val_(104.0, 177.0, 123.0) {
}
~AndroidFaceDetect() {
UnInit();
}
bool Init() {
if (!rga_) {
std::unique_ptr<media::RGAInterface> rga(new media::RGAInterface());
if (rga->Init()) {
rga_ = std::move(rga);
}
}
if (net_.empty()) {
net_ = dnn::readNetFromCaffe(prototxt_, caffe_model_);
}
return !net_.empty();
}
void UnInit() {
if (rga_) {
rga_->UnInit();
rga_.reset();
}
if (!net_.empty()) {
net_.~Net();
}
}
bool Detect(JNIEnv *env,
jbyteArray data,
jint src_w,
jint src_h,
jint src_format,
jint transform,
std::vector<float> *output) {
if (!rga_) {
return false;
}
jbyte *src_bytes = env->GetByteArrayElements(data, nullptr);
if (!src_bytes) {
LOG(ERROR) << "failed to GetByteArrayElements";
return false;
}
bool ret = DetectFromImage(src_w, src_h, src_format, transform, src_bytes, output);
env->ReleaseByteArrayElements(data, src_bytes, JNI_ABORT);
return ret;
}
bool DetectBuffer(JNIEnv *env,
jobject data,
jint src_w,
jint src_h,
jint src_format,
jint transform,
std::vector<float> *output) {
if (!rga_) {
return false;
}
void *src_bytes = env->GetDirectBufferAddress(data);
if (!src_bytes) {
LOG(ERROR) << "failed to GetDirectBufferAddress(data)";
return false;
}
return DetectFromImage(src_w, src_h, src_format, transform, src_bytes, output);
}
private:
void EnsureCapacity(const gfx::Size &size) {
int buffer_len = size.GetArea() * 4;
if (rga_buffer_length_ < buffer_len) {
rga_buffer_.reset(new uint8_t[buffer_len]);
mat_buffer_.reset(new uint8_t[buffer_len]);
rga_buffer_length_ = buffer_len;
}
}
bool DetectFromImage(jint src_w,
jint src_h,
jint src_format,
jint transform,
void *src_bytes,
std::vector<float> *output) {
if (transform == HAL_TRANSFORM_ROT_90 || transform == HAL_TRANSFORM_ROT_270) {
LOG(ERROR) << "not support transform 90 or 270";
return false;
}
base::TimeTicks t1 = base::TimeTicks::Now();
gfx::Size image_size(src_w, src_h);
int mat_width = image_fixed_width_;
int mat_height =
base::saturated_cast<int>((double) (src_h) / ((double) src_w / (double) image_fixed_width_));
gfx::Size mat_size(mat_width, mat_height);
gfx::Size rga_size(RoundUp(mat_width, kAlignment), RoundUp(mat_height, kAlignment));
EnsureCapacity(rga_size);
bool ret = rga_->Blit(src_bytes,
0,
0,
src_w,
src_h,
src_w,
src_h,
src_format,
transform,
rga_size.width(),
rga_size.height(),
rga_size.width(),
rga_size.height(),
RK_FORMAT_BGR_888,
rga_buffer_.get());
if (!ret) {
LOG(ERROR) << "rga blit operation failed";
return false;
}
libyuv::CopyPlane(rga_buffer_.get(), rga_size.width() * 3,
mat_buffer_.get(), mat_size.width() * 3,
mat_size.width() * 3, mat_size.height());
ret = DoDetectFaces(image_size, mat_size, mat_buffer_.get(), output);
LOG(INFO) << "detect duration:" << (base::TimeTicks::Now() - t1).InMicroseconds();
return ret;
}
bool DoDetectFaces(const gfx::Size &image_size,
const gfx::Size &mat_size,
uint8_t *data,
std::vector<float> *output) {
Mat mat = cv::Mat(mat_size.height(),
mat_size.width(),
CV_8UC3,
data);
#if defined(WRITE_CONVERT_IMAGE)
int64_t now = base::TimeTicks::Now().ToInternalValue();
std::string str = base::StringPrintf("/sdcard/data/%ld.jpg", now);
cv::imwrite(str, mat);
#endif
Mat inputBlob = dnn::blobFromImage(mat,
in_scale_factor_,
Size(mat_size.width(), mat_size.height()),
mean_val_,
false,
false);
net_.setInput(inputBlob);
Mat detection = net_.forward();
//Mat detection_mat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
Mat detection_mat = detection.reshape(1, (int) detection.size().width);
float result[5] = {0};
for (int i = 0; i < detection_mat.rows; ++i) {
result[0] = detection_mat.at<float>(i, 2);
result[1] = detection_mat.at<float>(i, 3) * image_size.width();
result[2] = detection_mat.at<float>(i, 4) * image_size.height();
result[3] = detection_mat.at<float>(i, 5) * image_size.width();
result[4] = detection_mat.at<float>(i, 6) * image_size.height();
LOG(INFO)
<< "confidence:(" << result[0] << "),rectangle:[" << result[1] << "," << result[2] << ","
<< result[3] << "," << result[4] << "]";
output->insert(output->end(), result, result + 5);
}
return !output->empty();
}
std::unique_ptr<media::RGAInterface> rga_;
size_t rga_buffer_length_;
std::unique_ptr<uint8_t[]> rga_buffer_;
std::unique_ptr<uint8_t[]> mat_buffer_;
std::string prototxt_;
std::string caffe_model_;
int image_fixed_width_;
double in_scale_factor_;
Scalar mean_val_;
dnn::Net net_;
DISALLOW_COPY_AND_ASSIGN(AndroidFaceDetect);
};