• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

減色プログラム


Commit MetaInfo

Revisión4986e10d08106c1adcd49e5dc918735fa3ca7736 (tree)
Tiempo2011-05-22 07:22:26
Autorberupon <berupon@gmai...>
Commiterberupon

Log Message

optimized

Cambiar Resumen

Diferencia incremental

--- a/Color4d.h
+++ b/Color4d.h
@@ -38,11 +38,18 @@ struct Color4d
3838
3939 double dot_product(const Color4d& rhs) {
4040 // http://www.icnet.ne.jp/~nsystem/simd_tobira/dpps.html
41+#if 1
42+ Color4d result = (*this) * rhs;
43+ __m128d v = _mm_add_pd(result.v[0], result.v[1]);
44+ v = _mm_hadd_pd(v, v);
45+ return v.m128d_f64[0];
46+#else
4147 double result = 0;
4248 for (int i=0; i<3; i++) {
4349 result += (*this)[i] * rhs[i];
4450 }
4551 return result;
52+#endif
4653 }
4754
4855 Color4d& operator += (const Color4d& rhs) {
@@ -51,16 +58,16 @@ struct Color4d
5158 return *this;
5259 }
5360
61+/*
5462 Color4d& operator += (const Color4f& rhs) {
5563 v[0] = _mm_add_pd(v[0], _mm_cvtps_pd(rhs.v));
5664 v[1] = _mm_add_pd(v[1], _mm_cvtps_pd(_mm_movehl_ps(rhs.v, rhs.v)));
5765 return *this;
5866 }
67+*/
5968
6069 Color4d operator + (const Color4d& rhs) {
61- Color4d result(*this);
62- result += rhs;
63- return result;
70+ return Color4d(*this) += rhs;
6471 }
6572
6673 Color4d& operator -= (const Color4d& rhs) {
@@ -70,9 +77,7 @@ struct Color4d
7077 }
7178
7279 Color4d operator - (const Color4d& rhs) {
73- Color4d result(*this);
74- result -= rhs;
75- return result;
80+ return Color4d(*this) -= rhs;
7681 }
7782
7883 Color4d& operator *= (const Color4d& rhs) {
@@ -82,9 +87,7 @@ struct Color4d
8287 }
8388
8489 Color4d operator * (const Color4d& rhs) {
85- Color4d result(*this);
86- result *= rhs;
87- return result;
90+ return Color4d(*this) *= rhs;
8891 }
8992
9093 Color4d& operator *= (double scalar) {
@@ -95,11 +98,9 @@ struct Color4d
9598 }
9699
97100 Color4d operator * (double scalar) {
98- Color4d result(*this);
99- result *= scalar;
100- return result;
101+ return Color4d(*this) *= scalar;
101102 }
102-
103+
103104 double& operator[] (int idx) {
104105 return ((double*)&v)[idx];
105106 }
@@ -108,13 +109,22 @@ struct Color4d
108109 }
109110
110111 double norm_squared() {
112+#if 1
113+ __m128d t = _mm_add_pd(
114+ _mm_mul_pd(v[0], v[0]),
115+ _mm_mul_pd(v[1], v[1])
116+ );
117+ t = _mm_hadd_pd(t, t);
118+ return t.m128d_f64[0];
119+#else
111120 double result = 0;
112121 for (int i=0; i<3; i++) {
113122 result += (*this)[i] * (*this)[i];
114123 }
115124 return result;
125+#endif
116126 }
117-
127+
118128 void zero() {
119129 v[0] = _mm_setzero_pd();
120130 v[1] = _mm_setzero_pd();
@@ -122,7 +132,10 @@ struct Color4d
122132 };
123133
124134 inline Color4d operator * (double scalar, const Color4d& c) {
125- Color4d tmp = c;
126- return tmp * scalar;
135+ return Color4d(c) *= scalar;
136+}
137+
138+inline Color4d operator * (const Color4d& c, double scalar) {
139+ return Color4d(c) *= scalar;
127140 }
128141
--- a/Color4f.h
+++ b/Color4f.h
@@ -1,6 +1,6 @@
11 #pragma once
22
3-#include <emmintrin.h>
3+#include <intrin.h>
44
55 // http://www2.kobe-u.ac.jp/~lerl2/l_cc_p_10.1.008/doc/main_cls/mergedProjects/intref_cls/
66
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -293,18 +293,17 @@ void compute_initial_s(
293293 )
294294 {
295295 size_t palette_size = s.width_;
296- int coarse_width = coarse_variables.width_;
297- int coarse_height = coarse_variables.height_;
298- int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
299-
300- Color center_b = b_value(b,0,0,0,0);
301296 Color zero_vector;
302297 zero_vector.zero();
303298 for (size_t v=0; v<palette_size; ++v) {
304- for (size_t alpha=v; alpha<palette_size; ++alpha) {
305- s[alpha][v] = zero_vector;
299+ for (size_t v2=0; v2<v+1; ++v2) {
300+ s[v][v2] = zero_vector;
306301 }
307302 }
303+ const int coarse_width = coarse_variables.width_;
304+ const int coarse_height = coarse_variables.height_;
305+ const int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
306+ const Color center_b = b_value(b,0,0,0,0);
308307 for (int i_y=0; i_y<coarse_height; ++i_y) {
309308 for (int i_x=0; i_x<coarse_width; ++i_x) {
310309 const double* p_icv = &coarse_variables(i_x, i_y, 0);
@@ -313,15 +312,20 @@ void compute_initial_s(
313312 for (size_t j_y=max<int>(0, i_y - center_y); j_y<max_j_y; ++j_y) {
314313 for (int j_x=max<int>(0, i_x - center_x); j_x<max_j_x; ++j_x) {
315314 if (i_x == j_x && i_y == j_y) continue;
316- Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
315+ const Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
316+ const double* p_jcv = &coarse_variables(j_x, j_y, 0);
317317 for (size_t v=0; v<palette_size; ++v) {
318- Color b_ij2 = b_ij * p_icv[v];
319- const double* p_jcv = &coarse_variables(j_x, j_y, v);
318+ const Color b_ij2 = b_ij * p_icv[v];
319+ const double* p_jcv2 = p_jcv++;
320+ double jcv = *p_jcv2;
320321 Color* ps = s.pBuff_ + v * palette_size + v;
321322 // TODO: 変更画像、縦方向ではなく横方向に操作する。後で転置。
322323 for (size_t alpha=v; alpha<palette_size; ++alpha) {
323- *ps += (*p_jcv++) * b_ij2;
324+ ++p_jcv2;
325+ double njcv = *p_jcv2;
326+ *ps += jcv * b_ij2;
324327 ps += palette_size;
328+ jcv = njcv;
325329 }
326330 }
327331 }
@@ -343,27 +347,34 @@ void update_s(
343347 const double delta
344348 )
345349 {
346- const size_t palette_size = s.width_;
347- const int coarse_width = coarse_variables.width_;
348- const int coarse_height = coarse_variables.height_;
350+ const size_t palette_size = s.width_;
349351 const int center_x = (b.width_-1) / 2;
350352 const int center_y = (b.height_-1) / 2;
351- const int max_i_x = min(coarse_width, j_x + center_x + 1);
352- const int max_i_y = min<int>(coarse_height, j_y + center_y + 1);
353+ const size_t max_i_x = min<int>(coarse_variables.width_, j_x + center_x + 1);
354+ const size_t max_i_y = min<int>(coarse_variables.height_, j_y + center_y + 1);
353355 for (size_t i_y=max(0, j_y - center_y); i_y<max_i_y; ++i_y) {
354356 for (size_t i_x=max(0, j_x - center_x); i_x<max_i_x; ++i_x) {
355- const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
356357 if (i_x == j_x && i_y == j_y) continue;
358+ const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
357359 Color* ps = s[alpha];
358360 const double* p_cv = &coarse_variables(i_x, i_y, 0);
361+ double cv = *p_cv;
359362 for (size_t v=0; v<=alpha; ++v) {
360- ps[v] += (*p_cv++) * delta_b_ij;
363+ ++p_cv;
364+ double ncv = *p_cv;
365+ *ps += cv * delta_b_ij;
366+ ++ps;
367+ cv = ncv;
361368 }
362369 --p_cv;
363- ps += alpha;
370+ --ps;
371+ cv = *p_cv;
364372 for (size_t v=alpha; v<palette_size; ++v) {
365- *ps += (*p_cv++) * delta_b_ij;
373+ ++p_cv;
374+ double ncv = *p_cv;
375+ *ps += cv * delta_b_ij;
366376 ps += palette_size;
377+ cv = ncv;
367378 }
368379 }
369380 }
@@ -539,6 +550,8 @@ void spatial_color_quant(
539550
540551 // Compute 2*sum(j in extended neighborhood of i, j != i) b_ij
541552
553+ const int radius_width = (b.width_ - 1)/2;
554+ const int radius_height = (b.height_ - 1)/2;
542555 while (!visit_queue.empty()) {
543556 // If we get to 10% above initial size, just revisit them all
544557 if (visit_queue.size() > coarse_variables.width_*coarse_variables.height_*11.0/10) {
@@ -552,16 +565,45 @@ void spatial_color_quant(
552565 // Compute (25)
553566 Color p_i;
554567 p_i.zero();
555- for (int y=0; y<b.height_; ++y) {
556- int j_y = y - center_y + i_y;
557- if (j_y < 0 || j_y >= coarse_variables.height_) continue;
558- for (int x=0; x<b.width_; ++x) {
559- int j_x = x - center_x + i_x;
560- if (i_x == j_x && i_y == j_y) continue;
561- if (j_x < 0 || j_x >= coarse_variables.width_) continue;
562- Color b_ij = b_value(b, i_x, i_y, j_x, j_y);
563- Color j_pal = (*j_palette_sum)[j_y][j_x];
564- p_i += b_ij * j_pal;
568+ int sy = i_y - center_y;
569+ int ey = sy + b.height_;
570+ sy = max(sy, 0);
571+ ey = min<int>(ey, coarse_variables.height_);
572+ int ky = sy - i_y + radius_height;
573+ if (ky < 0) {
574+ sy += -ky;
575+ }
576+ ky = ey - 1 - i_y + radius_height;
577+ if (ky >= b.height_) {
578+ ey -= (ky - b.height_) + 1;
579+ }
580+ int sx = i_x - center_x;
581+ int ex = sx + b.width_;
582+ sx = max(sx, 0);
583+ ex = min<int>(ex, coarse_variables.width_);
584+ int kx = sx - i_x + radius_width;
585+ if (kx < 0) {
586+ sx += -kx;
587+ }
588+ kx = ex - 1 - i_x + radius_width;
589+ if (kx >= b.width_) {
590+ ex -= (kx - b.width_) + 1;
591+ }
592+ for (int y=sy; y<ey; ++y) {
593+ int k_y = y - i_y + radius_height;
594+ Color* cb = b[k_y] + sx - i_x + radius_width;
595+ Color* cp = (*j_palette_sum)[y]+sx;
596+ if (i_y == y) {
597+ for (int x=sx; x<ex; ++x) {
598+ Color c = (*cb++) * (*cp++);
599+ if (i_x != x) {
600+ p_i += c;
601+ }
602+ }
603+ }else {
604+ for (int x=sx; x<ex; ++x) {
605+ p_i += (*cb++) * (*cp++);
606+ }
565607 }
566608 }
567609 p_i *= 2.0;
@@ -569,13 +611,13 @@ void spatial_color_quant(
569611
570612 double max_meanfield_log = -numeric_limits<double>::infinity();
571613 double meanfield_sum = 0.0;
614+ double minus_inv_temperature = -1.0 / temperature;
572615 for (size_t v=0; v<num_colors; ++v) {
573616 // Update m_{pi(i)v}^I according to (23)
574617 // We can subtract an arbitrary factor to prevent overflow,
575618 // since only the weight relative to the sum matters, so we
576619 // will choose a value that makes the maximum e^100.
577- Color p_i2; p_i2 = p_i;
578- double m = -(palette[v].dot_product(p_i2 + middle_b.direct_product(palette[v]))) / temperature;
620+ double m = palette[v].dot_product(p_i + middle_b.direct_product(palette[v])) * minus_inv_temperature;
579621 meanfield_logs[v] = m;
580622 if (m > max_meanfield_log) {
581623 max_meanfield_log = m;