00001 #include <cmath>
00002 #include <iostream>
00003 #include <algorithm>
00004 #include <map>
00005 #include <vector>
00006 #include <string.h>
00007 #ifdef FCAM_ARCH_ARM
00008 #include <arm_neon.h>
00009 #endif
00010
00011 #include <FCam/processing/Demosaic.h>
00012 #include <FCam/Sensor.h>
00013 #include <FCam/Time.h>
00014
00015
00016 namespace FCam {
00017
00018 void makeLUT(const Frame &f, float contrast, int blackLevel, float gamma, unsigned char *lut) {
00019 unsigned short minRaw = f.minRawValue()+blackLevel;
00020 unsigned short maxRaw = f.maxRawValue();
00021
00022 for (int i = 0; i <= minRaw; i++) {
00023 lut[i] = 0;
00024 }
00025
00026 float invRange = 1.0f/(maxRaw - minRaw);
00027 float b = 2 - powf(2.0f, contrast/100.0f);
00028 float a = 2 - 2*b;
00029 for (int i = minRaw+1; i <= maxRaw; i++) {
00030
00031 float y = (i-minRaw)*invRange;
00032
00033 y = powf(y, 1.0f/gamma);
00034
00035 if (y > 0.5) {
00036 y = 1-y;
00037 y = a*y*y + b*y;
00038 y = 1-y;
00039 } else {
00040 y = a*y*y + b*y;
00041 }
00042
00043 y = std::floor(y * 255 + 0.5f);
00044 if (y < 0) y = 0;
00045 if (y > 255) y = 255;
00046 lut[i] = (unsigned char)y;
00047 }
00048
00049
00050 for (int i = maxRaw+1; i < 4096; i++) {
00051 lut[i] = 255;
00052 }
00053 }
00054
00055 Image demosaic(Frame src, float contrast, bool denoise, int blackLevel, float gamma) {
00056 if (!src.image().valid()) {
00057 std::cerr << "Cannot demosaic an invalid image" << std::endl;
00058 Image nullImage;
00059 return nullImage;
00060 }
00061 if (src.image().bytesPerRow() % 2 == 1) {
00062 std::cerr << "Cannot demosaic an image with bytesPerRow not divisible by 2" << std::endl;
00063 Image nullImage;
00064 return nullImage;
00065 }
00066
00067 #ifdef FCAM_ARCH_ARM
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 #define BLOCK_WIDTH 40
00140 #define BLOCK_HEIGHT 24
00141
00142 Image input = src.image();
00143
00144
00145 switch((int)src.bayerPattern()) {
00146 case GRBG:
00147 break;
00148 case RGGB:
00149 input = input.subImage(1, 0, Size(input.width()-2, input.height()));
00150 break;
00151 case BGGR:
00152 input = input.subImage(0, 1, Size(input.width(), input.height()-2));
00153 break;
00154 case GBRG:
00155 input = input.subImage(1, 1, Size(input.width()-2, input.height()-2));
00156 default:
00157 printf("Can't demosaic from a non-bayer sensor\n");
00158 }
00159
00160 int rawWidth = input.width();
00161 int rawHeight = input.height();
00162
00163 #define VEC_WIDTH ((BLOCK_WIDTH + 8)/8)
00164 #define VEC_HEIGHT ((BLOCK_HEIGHT + 8)/2)
00165
00166 int rawPixelsPerRow = input.bytesPerRow()/2 ;
00167
00168 int outWidth = rawWidth-8;
00169 int outHeight = rawHeight-8;
00170 outWidth /= BLOCK_WIDTH;
00171 outWidth *= BLOCK_WIDTH;
00172 outHeight /= BLOCK_HEIGHT;
00173 outHeight *= BLOCK_HEIGHT;
00174
00175 Image out(outWidth, outHeight, RGB24);
00176
00177
00178 if (((input.width() - 8) != (unsigned)outWidth) ||
00179 ((input.height() - 8) != (unsigned)outHeight)) {
00180 int offX = (input.width() - 8 - outWidth)/2;
00181 int offY = (input.height() - 8 - outHeight)/2;
00182 offX -= offX&1;
00183 offY -= offY&1;
00184
00185 if (offX || offY) {
00186 input = input.subImage(offX, offY, Size(outWidth+8, outHeight+8));
00187 }
00188 }
00189
00190 Time startTime = Time::now();
00191
00192
00193 float colorMatrix_f[12];
00194
00195 src.rawToRGBColorMatrix((float *)colorMatrix_f);
00196
00197 int16x4_t colorMatrix[3];
00198 for (int i = 0; i < 3; i++) {
00199 int16_t val = (int16_t)(colorMatrix_f[i*4+0] * 256 + 0.5);
00200 colorMatrix[i] = vld1_lane_s16(&val, colorMatrix[i], 0);
00201 val = (int16_t)(colorMatrix_f[i*4+1] * 256 + 0.5);
00202 colorMatrix[i] = vld1_lane_s16(&val, colorMatrix[i], 1);
00203 val = (int16_t)(colorMatrix_f[i*4+2] * 256 + 0.5);
00204 colorMatrix[i] = vld1_lane_s16(&val, colorMatrix[i], 2);
00205 val = (int16_t)(colorMatrix_f[i*4+3] * 256 + 0.5);
00206 colorMatrix[i] = vld1_lane_s16(&val, colorMatrix[i], 3);
00207 }
00208
00209
00210
00211 uint16_t out16[BLOCK_WIDTH*BLOCK_HEIGHT*3];
00212
00213
00214
00215 int16_t scratch[VEC_WIDTH*VEC_HEIGHT*4*12];
00216
00217 #define R_R_OFF (VEC_WIDTH*VEC_HEIGHT*4*0)
00218 #define R_GR_OFF (VEC_WIDTH*VEC_HEIGHT*4*1)
00219 #define R_GB_OFF (VEC_WIDTH*VEC_HEIGHT*4*2)
00220 #define R_B_OFF (VEC_WIDTH*VEC_HEIGHT*4*3)
00221
00222 #define G_R_OFF (VEC_WIDTH*VEC_HEIGHT*4*4)
00223 #define G_GR_OFF (VEC_WIDTH*VEC_HEIGHT*4*5)
00224 #define G_GB_OFF (VEC_WIDTH*VEC_HEIGHT*4*6)
00225 #define G_B_OFF (VEC_WIDTH*VEC_HEIGHT*4*7)
00226
00227 #define B_R_OFF (VEC_WIDTH*VEC_HEIGHT*4*8)
00228 #define B_GR_OFF (VEC_WIDTH*VEC_HEIGHT*4*9)
00229 #define B_GB_OFF (VEC_WIDTH*VEC_HEIGHT*4*10)
00230 #define B_B_OFF (VEC_WIDTH*VEC_HEIGHT*4*11)
00231
00232 #define R_R(i) (scratch+(i)+R_R_OFF)
00233 #define R_GR(i) (scratch+(i)+R_GR_OFF)
00234 #define R_GB(i) (scratch+(i)+R_GB_OFF)
00235 #define R_B(i) (scratch+(i)+R_B_OFF)
00236
00237 #define G_R(i) (scratch+(i)+G_R_OFF)
00238 #define G_GR(i) (scratch+(i)+G_GR_OFF)
00239 #define G_GB(i) (scratch+(i)+G_GB_OFF)
00240 #define G_B(i) (scratch+(i)+G_B_OFF)
00241
00242 #define B_R(i) (scratch+(i)+B_R_OFF)
00243 #define B_GR(i) (scratch+(i)+B_GR_OFF)
00244 #define B_GB(i) (scratch+(i)+B_GB_OFF)
00245 #define B_B(i) (scratch+(i)+B_B_OFF)
00246
00247
00248 #define G_GR_NOISY B_GR
00249 #define B_B_NOISY G_B
00250 #define R_R_NOISY G_R
00251 #define G_GB_NOISY B_GB
00252
00253
00254 unsigned char lut[4096];
00255 makeLUT(src, contrast, blackLevel, gamma, lut);
00256
00257
00258 for (int by = 0; by < rawHeight-8-BLOCK_HEIGHT+1; by += BLOCK_HEIGHT) {
00259 const short * __restrict__ blockPtr = (const short *)input(0,by);
00260 unsigned char * __restrict__ outBlockPtr = out(0, by);
00261 for (int bx = 0; bx < rawWidth-8-BLOCK_WIDTH+1; bx += BLOCK_WIDTH) {
00262
00263
00264
00265 if (1) {
00266 register const int16_t * __restrict__ rawPtr = blockPtr;
00267 register const int16_t * __restrict__ rawPtr2 = blockPtr + rawPixelsPerRow;
00268
00269 register const int rawJump = rawPixelsPerRow*2 - VEC_WIDTH*8;
00270
00271 register int16_t * __restrict__ g_gr_ptr = denoise ? G_GR_NOISY(0) : G_GR(0);
00272 register int16_t * __restrict__ r_r_ptr = denoise ? R_R_NOISY(0) : R_R(0);
00273 register int16_t * __restrict__ b_b_ptr = denoise ? B_B_NOISY(0) : B_B(0);
00274 register int16_t * __restrict__ g_gb_ptr = denoise ? G_GB_NOISY(0) : G_GB(0);
00275
00276 for (int y = 0; y < VEC_HEIGHT; y++) {
00277 for (int x = 0; x < VEC_WIDTH/2; x++) {
00278
00279 asm volatile ("# Stage 1) Demux\n");
00280
00281
00282
00283
00284 asm volatile (
00285 "vld2.16 {d6-d9}, [%[rawPtr]]! \n\t"
00286 "vld2.16 {d10-d13}, [%[rawPtr2]]! \n\t"
00287 "vst1.16 {d6-d7}, [%[g_gr_ptr]]! \n\t"
00288 "vst1.16 {d8-d9}, [%[r_r_ptr]]! \n\t"
00289 "vst1.16 {d10-d11}, [%[b_b_ptr]]! \n\t"
00290 "vst1.16 {d12-d13}, [%[g_gb_ptr]]! \n\t" :
00291 [rawPtr]"+r"(rawPtr),
00292 [rawPtr2]"+r"(rawPtr2),
00293 [g_gr_ptr]"+r"(g_gr_ptr),
00294 [r_r_ptr]"+r"(r_r_ptr),
00295 [b_b_ptr]"+r"(b_b_ptr),
00296 [g_gb_ptr]"+r"(g_gb_ptr) ::
00297 "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "memory");
00298
00299 }
00300
00301 rawPtr += rawJump;
00302 rawPtr2 += rawJump;
00303 }
00304 }
00305
00306
00307
00308
00309
00310 if (denoise) {
00311 register int16_t * __restrict__ ptr_in = NULL;
00312 register int16_t * __restrict__ ptr_out = NULL;
00313 asm volatile("#Stage 1.5: Denoise\n\t");
00314 for (int b=0; b<4; b++) {
00315 if (b==0) ptr_in = G_GR_NOISY(0);
00316 if (b==1) ptr_in = R_R_NOISY(0);
00317 if (b==2) ptr_in = B_B_NOISY(0);
00318 if (b==3) ptr_in = G_GB_NOISY(0);
00319 if (b==0) ptr_out = G_GR(0);
00320 if (b==1) ptr_out = R_R(0);
00321 if (b==2) ptr_out = B_B(0);
00322 if (b==3) ptr_out = G_GB(0);
00323
00324
00325 for (int x = 0; x < (BLOCK_WIDTH+8); x+=8) {
00326 int16x8_t in = vld1q_s16(ptr_in);
00327 vst1q_s16(ptr_out, in);
00328 ptr_in+=8;
00329 ptr_out+=8;
00330 }
00331
00332 for (int y = 1; y < VEC_HEIGHT - 1; y++) {
00333 for (int x = 0; x < VEC_WIDTH/2; x++) {
00334 int16x8_t here = vld1q_s16(ptr_in);
00335 int16x8_t above = vld1q_s16(ptr_in + VEC_WIDTH*4);
00336 int16x8_t under = vld1q_s16(ptr_in - VEC_WIDTH*4);
00337 int16x8_t right = vld1q_s16(ptr_in + 1);
00338 int16x8_t left = vld1q_s16(ptr_in - 1);
00339 int16x8_t max, min;
00340
00341
00342 max = vmaxq_s16(left, right);
00343 max = vmaxq_s16(above, max);
00344 max = vmaxq_s16(under, max);
00345
00346 min = vminq_s16(left, right);
00347 min = vminq_s16(above, min);
00348 min = vminq_s16(under, min);
00349
00350
00351 here = vminq_s16(max, here);
00352 here = vmaxq_s16(min, here);
00353
00354 vst1q_s16(ptr_out, here);
00355 ptr_in += 8;
00356 ptr_out += 8;
00357 }
00358 }
00359
00360
00361 for (int x = 0; x < (BLOCK_WIDTH+8); x+=8) {
00362 int16x8_t in = vld1q_s16(ptr_in);
00363 vst1q_s16(ptr_out, in);
00364 ptr_in+=8;
00365 ptr_out+=8;
00366 }
00367 }
00368 }
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 if (1) {
00387
00388 int i = VEC_WIDTH*4;
00389
00390 register int16_t *g_gb_up_ptr = G_GB(i) - VEC_WIDTH*4;
00391 register int16_t *g_gb_here_ptr = G_GB(i);
00392 register int16_t *g_gb_left_ptr = G_GB(i) - 1;
00393 register int16_t *g_gr_down_ptr = G_GR(i) + VEC_WIDTH*4;
00394 register int16_t *g_gr_here_ptr = G_GR(i);
00395 register int16_t *g_gr_right_ptr = G_GR(i) + 1;
00396 register int16_t *g_r_ptr = G_R(i);
00397 register int16_t *g_b_ptr = G_B(i);
00398
00399 for (int y = 1; y < VEC_HEIGHT-1; y++) {
00400 for (int x = 0; x < VEC_WIDTH/2; x++) {
00401
00402 asm volatile ("#Stage 2) Green interpolation\n");
00403
00404
00405
00406 int16x8_t gb_up = vld1q_s16(g_gb_up_ptr);
00407 g_gb_up_ptr+=8;
00408 int16x8_t gb_here = vld1q_s16(g_gb_here_ptr);
00409 g_gb_here_ptr+=8;
00410 int16x8_t gb_left = vld1q_s16(g_gb_left_ptr);
00411 g_gb_left_ptr+=8;
00412 int16x8_t gr_down = vld1q_s16(g_gr_down_ptr);
00413 g_gr_down_ptr+=8;
00414 int16x8_t gr_here = vld1q_s16(g_gr_here_ptr);
00415 g_gr_here_ptr+=8;
00416 int16x8_t gr_right = vld1q_s16(g_gr_right_ptr);
00417 g_gr_right_ptr+=8;
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449 int16x8_t gv_r = vhaddq_s16(gb_up, gb_here);
00450 int16x8_t gvd_r = vabdq_s16(gb_up, gb_here);
00451 int16x8_t gh_r = vhaddq_s16(gr_right, gr_here);
00452 int16x8_t ghd_r = vabdq_s16(gr_here, gr_right);
00453 int16x8_t g_r = vbslq_s16(vcltq_s16(ghd_r, gvd_r), gh_r, gv_r);
00454
00455 int16x8_t gv_b = vhaddq_s16(gr_down, gr_here);
00456 int16x8_t gvd_b = vabdq_s16(gr_down, gr_here);
00457 int16x8_t gh_b = vhaddq_s16(gb_left, gb_here);
00458 int16x8_t ghd_b = vabdq_s16(gb_left, gb_here);
00459 int16x8_t g_b = vbslq_s16(vcltq_s16(ghd_b, gvd_b), gh_b, gv_b);
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489 vst1q_s16(g_r_ptr, g_r);
00490 g_r_ptr+=8;
00491 vst1q_s16(g_b_ptr, g_b);
00492 g_b_ptr+=8;
00493 }
00494 }
00495 }
00496 asm volatile ("#End of stage 2 (green interpolation)\n");
00497
00498
00499 if (1) {
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520 int i = 2*VEC_WIDTH*4;
00521
00522 for (int y = 2; y < VEC_HEIGHT-2; y++) {
00523 for (int x = 0; x < VEC_WIDTH; x++) {
00524
00525 asm volatile ("#Stage 4) r/b interpolation\n");
00526
00527
00528 int16x4_t r_here = vld1_s16(R_R(i));
00529 int16x4_t r_left = vld1_s16(R_R(i) - 1);
00530 int16x4_t r_down = vld1_s16(R_R(i) + VEC_WIDTH*4);
00531
00532 int16x4_t g_r_left = vld1_s16(G_R(i) - 1);
00533 int16x4_t g_r_here = vld1_s16(G_R(i));
00534 int16x4_t g_r_down = vld1_s16(G_R(i) + VEC_WIDTH*4);
00535
00536 int16x4_t b_up = vld1_s16(B_B(i) - VEC_WIDTH*4);
00537 int16x4_t b_here = vld1_s16(B_B(i));
00538 int16x4_t b_right = vld1_s16(B_B(i) + 1);
00539
00540 int16x4_t g_b_up = vld1_s16(G_B(i) - VEC_WIDTH*4);
00541 int16x4_t g_b_here = vld1_s16(G_B(i));
00542 int16x4_t g_b_right = vld1_s16(G_B(i) + 1);
00543
00544
00545 int16x4_t gr_here = vld1_s16(G_GR(i));
00546 int16x4_t gb_here = vld1_s16(G_GB(i));
00547
00548 {
00549 int16x4_t r_gr = vadd_s16(vhadd_s16(r_left, r_here),
00550 vsub_s16(gr_here,
00551 vhadd_s16(g_r_left, g_r_here)));
00552 int16x4_t r_gb = vadd_s16(vhadd_s16(r_here, r_down),
00553 vsub_s16(gb_here,
00554 vhadd_s16(g_r_down, g_r_here)));
00555 vst1_s16(R_GR(i), r_gr);
00556 vst1_s16(R_GB(i), r_gb);
00557 }
00558
00559 {
00560 int16x4_t r_downleft = vld1_s16(R_R(i) + VEC_WIDTH*4 - 1);
00561 int16x4_t g_r_downleft = vld1_s16(G_R(i) + VEC_WIDTH*4 - 1);
00562
00563 int16x4_t rp_b = vadd_s16(vhadd_s16(r_downleft, r_here),
00564 vsub_s16(g_b_here,
00565 vhadd_s16(g_r_downleft, g_r_here)));
00566 int16x4_t rn_b = vadd_s16(vhadd_s16(r_left, r_down),
00567 vsub_s16(g_b_here,
00568 vhadd_s16(g_r_left, g_r_down)));
00569 int16x4_t rpd_b = vabd_s16(r_downleft, r_here);
00570 int16x4_t rnd_b = vabd_s16(r_left, r_down);
00571 int16x4_t r_b = vbsl_s16(vclt_s16(rpd_b, rnd_b), rp_b, rn_b);
00572 vst1_s16(R_B(i), r_b);
00573 }
00574
00575 {
00576 int16x4_t b_gr = vadd_s16(vhadd_s16(b_up, b_here),
00577 vsub_s16(gr_here,
00578 vhadd_s16(g_b_up, g_b_here)));
00579 int16x4_t b_gb = vadd_s16(vhadd_s16(b_here, b_right),
00580 vsub_s16(gb_here,
00581 vhadd_s16(g_b_right, g_b_here)));
00582 vst1_s16(B_GR(i), b_gr);
00583 vst1_s16(B_GB(i), b_gb);
00584 }
00585
00586 {
00587 int16x4_t b_upright = vld1_s16(B_B(i) - VEC_WIDTH*4 + 1);
00588 int16x4_t g_b_upright = vld1_s16(G_B(i) - VEC_WIDTH*4 + 1);
00589
00590 int16x4_t bp_r = vadd_s16(vhadd_s16(b_upright, b_here),
00591 vsub_s16(g_r_here,
00592 vhadd_s16(g_b_upright, g_b_here)));
00593 int16x4_t bn_r = vadd_s16(vhadd_s16(b_right, b_up),
00594 vsub_s16(g_r_here,
00595 vhadd_s16(g_b_right, g_b_up)));
00596 int16x4_t bpd_r = vabd_s16(b_upright, b_here);
00597 int16x4_t bnd_r = vabd_s16(b_right, b_up);
00598 int16x4_t b_r = vbsl_s16(vclt_s16(bpd_r, bnd_r), bp_r, bn_r);
00599 vst1_s16(B_R(i), b_r);
00600 }
00601
00602
00603 i += 4;
00604 }
00605 }
00606 asm volatile ("#End of stage 4 - what_ever\n\t");
00607 }
00608
00609
00610 if (1) {
00611
00612
00613 asm volatile ("#Stage 10) Color Correction\n");
00614
00615 uint16_t * __restrict__ out16Ptr = out16;
00616
00617 int i = 2*VEC_WIDTH*4;
00618
00619 for (int y = 2; y < VEC_HEIGHT-2; y++) {
00620
00621
00622
00623 int16x4x2_t r0 = vzip_s16(vld1_s16(R_GR(i)), vld1_s16(R_R(i)));
00624 int16x4x2_t g0 = vzip_s16(vld1_s16(G_GR(i)), vld1_s16(G_R(i)));
00625 int16x4x2_t b0 = vzip_s16(vld1_s16(B_GR(i)), vld1_s16(B_R(i)));
00626 i += 4;
00627
00628 for (int x = 1; x < VEC_WIDTH; x++) {
00629
00630 int16x4x2_t r1 = vzip_s16(vld1_s16(R_GR(i)), vld1_s16(R_R(i)));
00631 int16x4x2_t g1 = vzip_s16(vld1_s16(G_GR(i)), vld1_s16(G_R(i)));
00632 int16x4x2_t b1 = vzip_s16(vld1_s16(B_GR(i)), vld1_s16(B_R(i)));
00633
00634
00635 int32x4_t rout = vmovl_s16(vdup_lane_s16(colorMatrix[0], 3));
00636 rout = vmlal_lane_s16(rout, r0.val[1], colorMatrix[0], 0);
00637 rout = vmlal_lane_s16(rout, g0.val[1], colorMatrix[0], 1);
00638 rout = vmlal_lane_s16(rout, b0.val[1], colorMatrix[0], 2);
00639
00640 int32x4_t gout = vmovl_s16(vdup_lane_s16(colorMatrix[1], 3));
00641 gout = vmlal_lane_s16(gout, r0.val[1], colorMatrix[1], 0);
00642 gout = vmlal_lane_s16(gout, g0.val[1], colorMatrix[1], 1);
00643 gout = vmlal_lane_s16(gout, b0.val[1], colorMatrix[1], 2);
00644
00645 int32x4_t bout = vmovl_s16(vdup_lane_s16(colorMatrix[2], 3));
00646 bout = vmlal_lane_s16(bout, r0.val[1], colorMatrix[2], 0);
00647 bout = vmlal_lane_s16(bout, g0.val[1], colorMatrix[2], 1);
00648 bout = vmlal_lane_s16(bout, b0.val[1], colorMatrix[2], 2);
00649
00650 uint16x4x3_t col16;
00651 col16.val[0] = vqrshrun_n_s32(rout, 8);
00652 col16.val[1] = vqrshrun_n_s32(gout, 8);
00653 col16.val[2] = vqrshrun_n_s32(bout, 8);
00654 vst3_u16(out16Ptr, col16);
00655 out16Ptr += 12;
00656
00657 rout = vmovl_s16(vdup_lane_s16(colorMatrix[0], 3));
00658 rout = vmlal_lane_s16(rout, r1.val[0], colorMatrix[0], 0);
00659 rout = vmlal_lane_s16(rout, g1.val[0], colorMatrix[0], 1);
00660 rout = vmlal_lane_s16(rout, b1.val[0], colorMatrix[0], 2);
00661
00662 gout = vmovl_s16(vdup_lane_s16(colorMatrix[1], 3));
00663 gout = vmlal_lane_s16(gout, r1.val[0], colorMatrix[1], 0);
00664 gout = vmlal_lane_s16(gout, g1.val[0], colorMatrix[1], 1);
00665 gout = vmlal_lane_s16(gout, b1.val[0], colorMatrix[1], 2);
00666
00667 bout = vmovl_s16(vdup_lane_s16(colorMatrix[2], 3));
00668 bout = vmlal_lane_s16(bout, r1.val[0], colorMatrix[2], 0);
00669 bout = vmlal_lane_s16(bout, g1.val[0], colorMatrix[2], 1);
00670 bout = vmlal_lane_s16(bout, b1.val[0], colorMatrix[2], 2);
00671
00672 col16.val[0] = vqrshrun_n_s32(rout, 8);
00673 col16.val[1] = vqrshrun_n_s32(gout, 8);
00674 col16.val[2] = vqrshrun_n_s32(bout, 8);
00675 vst3_u16(out16Ptr, col16);
00676 out16Ptr += 12;
00677
00678 r0 = r1;
00679 g0 = g1;
00680 b0 = b1;
00681
00682 i += 4;
00683 }
00684
00685
00686 i -= VEC_WIDTH*4;
00687
00688 r0 = vzip_s16(vld1_s16(R_B(i)), vld1_s16(R_GB(i)));
00689 g0 = vzip_s16(vld1_s16(G_B(i)), vld1_s16(G_GB(i)));
00690 b0 = vzip_s16(vld1_s16(B_B(i)), vld1_s16(B_GB(i)));
00691 i += 4;
00692
00693 for (int x = 1; x < VEC_WIDTH; x++) {
00694 int16x4x2_t r1 = vzip_s16(vld1_s16(R_B(i)), vld1_s16(R_GB(i)));
00695 int16x4x2_t g1 = vzip_s16(vld1_s16(G_B(i)), vld1_s16(G_GB(i)));
00696 int16x4x2_t b1 = vzip_s16(vld1_s16(B_B(i)), vld1_s16(B_GB(i)));
00697
00698
00699 int32x4_t rout = vmovl_s16(vdup_lane_s16(colorMatrix[0], 3));
00700 rout = vmlal_lane_s16(rout, r0.val[1], colorMatrix[0], 0);
00701 rout = vmlal_lane_s16(rout, g0.val[1], colorMatrix[0], 1);
00702 rout = vmlal_lane_s16(rout, b0.val[1], colorMatrix[0], 2);
00703
00704 int32x4_t gout = vmovl_s16(vdup_lane_s16(colorMatrix[1], 3));
00705 gout = vmlal_lane_s16(gout, r0.val[1], colorMatrix[1], 0);
00706 gout = vmlal_lane_s16(gout, g0.val[1], colorMatrix[1], 1);
00707 gout = vmlal_lane_s16(gout, b0.val[1], colorMatrix[1], 2);
00708
00709 int32x4_t bout = vmovl_s16(vdup_lane_s16(colorMatrix[2], 3));
00710 bout = vmlal_lane_s16(bout, r0.val[1], colorMatrix[2], 0);
00711 bout = vmlal_lane_s16(bout, g0.val[1], colorMatrix[2], 1);
00712 bout = vmlal_lane_s16(bout, b0.val[1], colorMatrix[2], 2);
00713
00714 uint16x4x3_t col16;
00715 col16.val[0] = vqrshrun_n_s32(rout, 8);
00716 col16.val[1] = vqrshrun_n_s32(gout, 8);
00717 col16.val[2] = vqrshrun_n_s32(bout, 8);
00718 vst3_u16(out16Ptr, col16);
00719 out16Ptr += 12;
00720
00721 rout = vmovl_s16(vdup_lane_s16(colorMatrix[0], 3));
00722 rout = vmlal_lane_s16(rout, r1.val[0], colorMatrix[0], 0);
00723 rout = vmlal_lane_s16(rout, g1.val[0], colorMatrix[0], 1);
00724 rout = vmlal_lane_s16(rout, b1.val[0], colorMatrix[0], 2);
00725
00726 gout = vmovl_s16(vdup_lane_s16(colorMatrix[1], 3));
00727 gout = vmlal_lane_s16(gout, r1.val[0], colorMatrix[1], 0);
00728 gout = vmlal_lane_s16(gout, g1.val[0], colorMatrix[1], 1);
00729 gout = vmlal_lane_s16(gout, b1.val[0], colorMatrix[1], 2);
00730
00731 bout = vmovl_s16(vdup_lane_s16(colorMatrix[2], 3));
00732 bout = vmlal_lane_s16(bout, r1.val[0], colorMatrix[2], 0);
00733 bout = vmlal_lane_s16(bout, g1.val[0], colorMatrix[2], 1);
00734 bout = vmlal_lane_s16(bout, b1.val[0], colorMatrix[2], 2);
00735
00736 col16.val[0] = vqrshrun_n_s32(rout, 8);
00737 col16.val[1] = vqrshrun_n_s32(gout, 8);
00738 col16.val[2] = vqrshrun_n_s32(bout, 8);
00739 vst3_u16(out16Ptr, col16);
00740 out16Ptr += 12;
00741
00742 r0 = r1;
00743 g0 = g1;
00744 b0 = b1;
00745
00746 i += 4;
00747 }
00748 }
00749 asm volatile("#End of stage 10) - color correction\n\t");
00750 }
00751
00752
00753 if (1) {
00754
00755 asm volatile("#Gamma Correction\n");
00756
00757 const uint16_t * __restrict__ out16Ptr = out16;
00758
00759 for (int y = 0; y < BLOCK_HEIGHT; y++) {
00760 unsigned int * __restrict__ outPtr32 = (unsigned int *)(outBlockPtr + y * outWidth * 3);
00761 for (int x = 0; x < (BLOCK_WIDTH*3)/4; x++) {
00762 unsigned val = ((lut[out16Ptr[0]] << 0) |
00763 (lut[out16Ptr[1]] << 8) |
00764 (lut[out16Ptr[2]] << 16) |
00765 (lut[out16Ptr[3]] << 24));
00766 *outPtr32++ = val;
00767 out16Ptr += 4;
00768
00769 }
00770 }
00771 asm volatile("#end of Gamma Correction\n");
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783 }
00784
00785
00786 blockPtr += BLOCK_WIDTH;
00787 outBlockPtr += BLOCK_WIDTH*3;
00788 }
00789 }
00790
00791
00792 #else
00793 Image out;
00794 #endif
00795
00796 return out;
00797 }
00798
00799 #ifdef __arm__
00800 Image makeThumbnailRAW_N900(Frame src, float contrast, int blackLevel, float gamma) {
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814 Image thumb(640, 480, RGB24);
00815 const unsigned int w = 2592, tw = 640;
00816 const unsigned int h = 1968, th = 480;
00817 const unsigned int scale = 4;
00818 const unsigned int cw = tw*scale;
00819 const unsigned int ch = th*scale;
00820 const unsigned int startX = (w-cw)/2;
00821 const unsigned int startY = (h-ch)/2;
00822 const unsigned int bytesPerRow = src.image().bytesPerRow();
00823
00824
00825 unsigned char lut[4096];
00826 makeLUT(src, contrast, blackLevel, gamma, lut);
00827
00828 unsigned char *row = src.image()(startX, startY);
00829
00830 Time startTime = Time::now();
00831 float colorMatrix_f[12];
00832 src.rawToRGBColorMatrix(colorMatrix_f);
00833
00834 register int16x4_t colorMatrix0 asm ("d0");
00835 register int16x4_t colorMatrix1 asm ("d1");
00836 register int16x4_t colorMatrix2 asm ("d2");
00837 register int16x4_t wCoord asm ("d20");
00838 register int16x4_t maxValue asm ("d21");
00839 register int16x4_t minValue asm ("d22");
00840
00841 asm volatile(
00842
00843 "vldm %[colorMatrix_f], {q2,q3,q4} \n\t"
00844 "vcvt.s32.f32 q2, q2, #8 \n\t"
00845 "vcvt.s32.f32 q3, q3, #8 \n\t"
00846 "vcvt.s32.f32 q4, q4, #8 \n\t"
00847 "vmovn.i32 d0, q2 \n\t"
00848 "vmovn.i32 d1, q3 \n\t"
00849 "vmovn.i32 d2, q4 \n\t"
00850
00851 "vmov.i16 d20, #0x4 \n\t"
00852 "vmov.i16 d21, #0x00FF \n\t"
00853 "vorr.i16 d21, #0x0300 \n\t"
00854 "vmov.i16 d22, #0x0 \n\t"
00855 : [colorMatrix0] "=w" (colorMatrix0),
00856 [colorMatrix1] "=w" (colorMatrix1),
00857 [colorMatrix2] "=w" (colorMatrix2),
00858 [wCoord] "=w" (wCoord),
00859 [maxValue] "=w" (maxValue),
00860 [minValue] "=w" (minValue)
00861 : [colorMatrix_f] "r" (colorMatrix_f)
00862 : "memory",
00863 "d3", "d4", "d5", "d6", "d7", "d8", "d9");
00864
00865 for (unsigned int ty = 0; ty <480; ty++, row+=4*bytesPerRow) {
00866 register unsigned short *px0 = (unsigned short *)row;
00867 register unsigned short *px1 = (unsigned short *)(row+1*bytesPerRow);
00868 register unsigned short *px2 = (unsigned short *)(row+2*bytesPerRow);
00869 register unsigned short *px3 = (unsigned short *)(row+3*bytesPerRow);
00870
00871 register unsigned char *dst = thumb(0,ty);
00872 for (register unsigned int tx =0; tx < 640; tx+=scale) {
00873
00874 asm volatile(
00875
00876
00877
00878
00880 "vld2.16 {d4-d7}, [%[px0]]! \n\t"
00881 "vld2.16 {d8-d11}, [%[px1]]! \n\t"
00882 "vld2.16 {d12-d15}, [%[px2]]! \n\t"
00883 "vld2.16 {d16-d19}, [%[px3]]! \n\t"
00884
00885
00886
00887
00888
00889
00890
00891
00892
00894 "vpadd.u16 d4, d4, d5 \n\t"
00895 "vpadd.u16 d5, d6, d7 \n\t"
00896 "vpadd.u16 d6, d8, d9 \n\t"
00897 "vpadd.u16 d7, d10, d11 \n\t"
00898 "vpadd.u16 d8, d12, d13 \n\t"
00899 "vpadd.u16 d9, d14, d15 \n\t"
00900 "vpadd.u16 d10, d16, d17 \n\t"
00901 "vpadd.u16 d11, d18, d19 \n\t"
00902
00903
00904
00905
00906
00908 "vadd.u16 d7, d8 \n\t"
00909 "vadd.u16 d4, d11 \n\t"
00910 "vhadd.u16 d4, d7 \n\t"
00912 "vadd.u16 d5, d9 \n\t"
00914 "vadd.u16 d6, d10 \n\t"
00915
00916
00917
00918
00919
00920
00921
00922
00923
00925
00926 "vmull.s16 q4, d5, d0[0] \n\t"
00927 "vmlal.s16 q4, d4, d0[1] \n\t"
00928 "vmlal.s16 q4, d6, d0[2] \n\t"
00929 "vmlal.s16 q4, d20, d0[3] \n\t"
00930
00931 "vmull.s16 q5, d5, d1[0] \n\t"
00932 "vmlal.s16 q5, d4, d1[1] \n\t"
00933 "vmlal.s16 q5, d6, d1[2] \n\t"
00934 "vmlal.s16 q5, d20, d1[3] \n\t"
00935
00936 "vmull.s16 q6, d5, d2[0] \n\t"
00937 "vmlal.s16 q6, d4, d2[1] \n\t"
00938 "vmlal.s16 q6, d6, d2[2] \n\t"
00939 "vmlal.s16 q6, d20, d2[3] \n\t"
00940
00941
00942
00943
00945 "vrshrn.s32 d3, q4, #10 \n\t"
00946 "vrshrn.s32 d4, q5, #10 \n\t"
00947 "vrshrn.s32 d5, q6, #10 \n\t"
00949 "vmin.s16 d3, d3, d21 \n\t"
00950 "vmin.s16 d4, d4, d21 \n\t"
00951 "vmin.s16 d5, d5, d21 \n\t"
00952 "vmax.s16 d3, d3, d22 \n\t"
00953 "vmax.s16 d4, d4, d22 \n\t"
00954 "vmax.s16 d5, d5, d22 \n\t"
00955
00956
00957
00958
00960 "vmov r0,r1, d3 \n\t"
00961
00962
00963 "uxth r2, r0 \n\t"
00964 "ldrb r4, [%[gammaTable], r2] \n\t"
00965
00966 "uxth r2, r0, ROR #16 \n\t"
00967 "ldrb r3, [%[gammaTable], r2] \n\t"
00968 "orr r4, r4, r3, LSL #24 \n\t"
00969
00970 "uxth r2, r1 \n\t"
00971 "ldrb r3, [%[gammaTable], r2] \n\t"
00972 "mov r5, r3, LSL #16 \n\t"
00973
00974 "uxth r2, r1, ROR #16 \n\t"
00975 "ldrb r3, [%[gammaTable], r2] \n\t"
00976 "mov r6, r3, LSL #8 \n\t"
00977
00978
00979
00980
00981 "vmov r0,r1, d4 \n\t"
00982
00983
00984 "uxth r2, r0 \n\t"
00985 "ldrb r3, [%[gammaTable], r2] \n\t"
00986 "orr r4, r4, r3, LSL #8 \n\t"
00987
00988 "uxth r2, r0, ROR #16 \n\t"
00989 "ldrb r3, [%[gammaTable], r2] \n\t"
00990 "orr r5, r5, r3 \n\t"
00991
00992 "uxth r2, r1 \n\t"
00993 "ldrb r3, [%[gammaTable], r2] \n\t"
00994 "orr r5, r5, r3, LSL #24 \n\t"
00995
00996 "uxth r2, r1, ROR #16 \n\t"
00997 "ldrb r3, [%[gammaTable], r2] \n\t"
00998 "orr r6, r6, r3, LSL #16 \n\t"
00999
01000
01001
01002
01003 "vmov r0,r1, d5 \n\t"
01004
01005
01006 "uxth r2, r0 \n\t"
01007 "ldrb r3, [%[gammaTable], r2] \n\t"
01008 "orr r4, r4, r3, LSL #16 \n\t"
01009
01010 "uxth r2, r0, ROR #16 \n\t"
01011 "ldrb r3, [%[gammaTable], r2] \n\t"
01012 "orr r5, r5, r3, LSL #8 \n\t"
01013
01014 "uxth r2, r1 \n\t"
01015 "ldrb r3, [%[gammaTable], r2] \n\t"
01016 "orr r6, r6, r3 \n\t"
01017
01018 "uxth r2, r1, ROR #16 \n\t"
01019 "ldrb r3, [%[gammaTable], r2] \n\t"
01020 "orr r6, r6, r3, LSL #24 \n\t"
01021
01022
01023
01024
01025 "stm %[dst]!, {r4,r5,r6} \n\t"
01026 : [px0] "+&r" (px0),
01027 [px1] "+&r" (px1),
01028 [px2] "+&r" (px2),
01029 [px3] "+&r" (px3),
01030 [dst] "+&r" (dst)
01031 : [gammaTable] "r" (lut),
01032 [colorMatrix0] "w" (colorMatrix0),
01033 [colorMatrix1] "w" (colorMatrix1),
01034 [colorMatrix2] "w" (colorMatrix2),
01035 [wCoord] "w" (wCoord),
01036 [maxValue] "w" (maxValue),
01037 [minValue] "w" (minValue)
01038 : "memory",
01039 "r0", "r1", "r2", "r3", "r4", "r5", "r6",
01040 "d3", "d4", "d5", "d6",
01041 "d7", "d8", "d9", "d10",
01042 "d11", "d12", "d13", "d14",
01043 "d15", "d16", "d17", "d18", "d19"
01044 );
01045
01046 }
01047 }
01048
01049
01050
01051
01052 return thumb;
01053 }
01054 #endif
01055
01056
01057 Image makeThumbnailRAW(Frame src, const Size &thumbSize, float contrast, int blackLevel, float gamma) {
01058
01059
01060 #ifdef __arm__
01061 if (src.image().width() == 2592 &&
01062 src.image().height() == 1968 &&
01063 thumbSize.width == 640 &&
01064 thumbSize.height == 480 &&
01065 src.bayerPattern() == GRBG) {
01066 return makeThumbnailRAW_N900(src, contrast, blackLevel, gamma);
01067 }
01068 #endif
01069
01070
01071 unsigned char lut[4096];
01072 makeLUT(src, contrast, blackLevel, gamma, lut);
01073
01074 Image thumb;
01076 bool redRowEven, blueRowGreenPixelEven;
01077 switch (src.bayerPattern()) {
01078 case RGGB:
01079 redRowEven = true;
01080 blueRowGreenPixelEven = true;
01081 break;
01082 case BGGR:
01083 redRowEven = false;
01084 blueRowGreenPixelEven = false;
01085 break;
01086 case GRBG:
01087 redRowEven = true;
01088 blueRowGreenPixelEven = false;
01089 break;
01090 case GBRG:
01091 redRowEven = false;
01092 blueRowGreenPixelEven = true;
01093 break;
01094 default:
01095 return thumb;
01096 break;
01097 }
01098
01099 Time startTime = Time::now();
01100
01101 thumb = Image(thumbSize, RGB24);
01102
01103 unsigned int w = src.image().width();
01104 unsigned int h = src.image().height();
01105 unsigned int tw = thumbSize.width;
01106 unsigned int th = thumbSize.height;
01107 short maxPixel = 1023;
01108 short minPixel = 0;
01109 unsigned int scaleX = (int)std::floor((float)w / tw);
01110 unsigned int scaleY = (int)std::floor((float)h / th);
01111 unsigned int scale = std::min(scaleX, scaleY);
01112
01113 int cropX = (w-scale*tw)/2;
01114 if (cropX % 2 == 1) cropX--;
01115 int cropY = (h-scale*th)/2;
01116 if (cropY % 2 == 1) cropY--;
01117
01118 float colorMatrix[12];
01119 src.rawToRGBColorMatrix(colorMatrix);
01120
01121
01122
01123
01124
01125
01126 for (unsigned int ty=0; ty < th; ty++) {
01127 unsigned char *tpix = thumb(0,ty);
01128 for (unsigned int tx=0; tx < tw; tx++) {
01129 unsigned int r=0,g=0,b=0;
01130 unsigned int rc=0,gc=0,bc=0;
01131
01132 unsigned char *rowStart = src.image()(cropX + tx*scale, cropY + ty*scale);
01133
01134 bool isRedRow = ((ty*scale % 2 == 0) == redRowEven);
01135
01136 for(unsigned int i=0; i<scale; i++, rowStart+=src.image().bytesPerRow(), isRedRow = !isRedRow) {
01137 unsigned short *px = (unsigned short *)rowStart;
01138 if (isRedRow) {
01139 bool isRed=((tx*scale)%2==0) == blueRowGreenPixelEven;
01140 for (unsigned int j=0; j < scale; j++, isRed=!isRed) {
01141 if (isRed) {
01142 r += *(px++);
01143 rc++;
01144 } else {
01145 g += *(px++);
01146 gc++;
01147 }
01148 }
01149 } else {
01150 bool isGreen=((tx*scale)%2==0) == blueRowGreenPixelEven;
01151 for (unsigned int j=0; j < scale; j++, isGreen=!isGreen) {
01152 if (isGreen) {
01153 g += *(px++);
01154 gc++;
01155 } else {
01156 b += *(px++);
01157 bc++;
01158 }
01159
01160 }
01161 }
01162 }
01163 float r_sensor = ((float)r / rc);
01164 float g_sensor = ((float)g / gc);
01165 float b_sensor = ((float)b / bc);
01166 float r_srgb = (r_sensor * colorMatrix[0] +
01167 g_sensor * colorMatrix[1] +
01168 b_sensor * colorMatrix[2] +
01169 colorMatrix[3]);
01170 float g_srgb = (r_sensor * colorMatrix[4] +
01171 g_sensor * colorMatrix[5] +
01172 b_sensor * colorMatrix[6] +
01173 colorMatrix[7]);
01174 float b_srgb = (r_sensor * colorMatrix[8] +
01175 g_sensor * colorMatrix[9] +
01176 b_sensor * colorMatrix[10] +
01177 colorMatrix[11]);
01178 unsigned short r_linear = std::min(maxPixel,std::max(minPixel,(short int)std::floor(r_srgb+0.5)));
01179 unsigned short g_linear = std::min(maxPixel,std::max(minPixel,(short int)std::floor(g_srgb+0.5)));
01180 unsigned short b_linear = std::min(maxPixel,std::max(minPixel,(short int)std::floor(b_srgb+0.5)));
01181 *(tpix++) = lut[r_linear];
01182 *(tpix++) = lut[g_linear];
01183 *(tpix++) = lut[b_linear];
01184 }
01185 }
01186
01187
01188
01189 return thumb;
01190
01191 }
01192
01193 Image makeThumbnail(Frame src, const Size &thumbSize, float contrast, int blackLevel, float gamma) {
01194 Image thumb;
01195
01196
01197 if (not src.image().valid()) return thumb;
01198 if (thumbSize.width == 0 or thumbSize.height == 0) return thumb;
01199
01200 switch (src.image().type()) {
01201 case RAW:
01202 thumb = makeThumbnailRAW(src, thumbSize, contrast, blackLevel, gamma);
01203 break;
01204 case RGB24:
01205 case RGB16:
01206 case UYVY:
01207 case YUV24:
01208 case UNKNOWN:
01209 default:
01211
01212 break;
01213 }
01214 return thumb;
01215 }
01216
01217 }