color quantizer
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revisión	4986e10d08106c1adcd49e5dc918735fa3ca7736 (tree)
Tiempo	2011-05-22 07:22:26
Autor	berupon <berupon@gmai...>
Commiter	berupon

Log Message

optimized

Cambiar Resumen

modified: Color4d.h (diff)
modified: Color4f.h (diff)
modified: quantize.cpp (diff)

Diferencia incremental

--- a/Color4d.h

+++ b/Color4d.h

		@@ -38,11 +38,18 @@ struct Color4d
38	38
39	39	double dot_product(const Color4d& rhs) {
40	40	// http://www.icnet.ne.jp/~nsystem/simd_tobira/dpps.html
	41	+#if 1
	42	+ Color4d result = (this) rhs;
	43	+ __m128d v = _mm_add_pd(result.v[0], result.v[1]);
	44	+ v = _mm_hadd_pd(v, v);
	45	+ return v.m128d_f64[0];
	46	+#else
41	47	double result = 0;
42	48	for (int i=0; i<3; i++) {
43	49	result += (this)[i] rhs[i];
44	50	}
45	51	return result;
	52	+#endif
46	53	}
47	54
48	55	Color4d& operator += (const Color4d& rhs) {

		@@ -51,16 +58,16 @@ struct Color4d
51	58	return *this;
52	59	}
53	60
	61	+/*
54	62	Color4d& operator += (const Color4f& rhs) {
55	63	v[0] = _mm_add_pd(v[0], _mm_cvtps_pd(rhs.v));
56	64	v[1] = _mm_add_pd(v[1], _mm_cvtps_pd(_mm_movehl_ps(rhs.v, rhs.v)));
57	65	return *this;
58	66	}
	67	+*/
59	68
60	69	Color4d operator + (const Color4d& rhs) {
61		- Color4d result(*this);
62		- result += rhs;
63		- return result;
	70	+ return Color4d(*this) += rhs;
64	71	}
65	72
66	73	Color4d& operator -= (const Color4d& rhs) {

		@@ -70,9 +77,7 @@ struct Color4d
70	77	}
71	78
72	79	Color4d operator - (const Color4d& rhs) {
73		- Color4d result(*this);
74		- result -= rhs;
75		- return result;
	80	+ return Color4d(*this) -= rhs;
76	81	}
77	82
78	83	Color4d& operator *= (const Color4d& rhs) {

		@@ -82,9 +87,7 @@ struct Color4d
82	87	}
83	88
84	89	Color4d operator * (const Color4d& rhs) {
85		- Color4d result(*this);
86		- result *= rhs;
87		- return result;
	90	+ return Color4d(this) = rhs;
88	91	}
89	92
90	93	Color4d& operator *= (double scalar) {

		@@ -95,11 +98,9 @@ struct Color4d
95	98	}
96	99
97	100	Color4d operator * (double scalar) {
98		- Color4d result(*this);
99		- result *= scalar;
100		- return result;
	101	+ return Color4d(this) = scalar;
101	102	}
102		-
	103	+
103	104	double& operator[] (int idx) {
104	105	return ((double*)&v)[idx];
105	106	}

		@@ -108,13 +109,22 @@ struct Color4d
108	109	}
109	110
110	111	double norm_squared() {
	112	+#if 1
	113	+ __m128d t = _mm_add_pd(
	114	+ _mm_mul_pd(v[0], v[0]),
	115	+ _mm_mul_pd(v[1], v[1])
	116	+ );
	117	+ t = _mm_hadd_pd(t, t);
	118	+ return t.m128d_f64[0];
	119	+#else
111	120	double result = 0;
112	121	for (int i=0; i<3; i++) {
113	122	result += (this)[i] (*this)[i];
114	123	}
115	124	return result;
	125	+#endif
116	126	}
117		-
	127	+
118	128	void zero() {
119	129	v[0] = _mm_setzero_pd();
120	130	v[1] = _mm_setzero_pd();

		@@ -122,7 +132,10 @@ struct Color4d
122	132	};
123	133
124	134	inline Color4d operator * (double scalar, const Color4d& c) {
125		- Color4d tmp = c;
126		- return tmp * scalar;
	135	+ return Color4d(c) *= scalar;
	136	+}
	137	+
	138	+inline Color4d operator * (const Color4d& c, double scalar) {
	139	+ return Color4d(c) *= scalar;
127	140	}
128	141

--- a/Color4f.h

+++ b/Color4f.h

		@@ -1,6 +1,6 @@
1	1	#pragma once
2	2
3		-#include <emmintrin.h>
	3	+#include <intrin.h>
4	4
5	5	// http://www2.kobe-u.ac.jp/~lerl2/l_cc_p_10.1.008/doc/main_cls/mergedProjects/intref_cls/
6	6

--- a/quantize.cpp

+++ b/quantize.cpp

		@@ -293,18 +293,17 @@ void compute_initial_s(
293	293	)
294	294	{
295	295	size_t palette_size = s.width_;
296		- int coarse_width = coarse_variables.width_;
297		- int coarse_height = coarse_variables.height_;
298		- int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
299		-
300		- Color center_b = b_value(b,0,0,0,0);
301	296	Color zero_vector;
302	297	zero_vector.zero();
303	298	for (size_t v=0; v<palette_size; ++v) {
304		- for (size_t alpha=v; alpha<palette_size; ++alpha) {
305		- s[alpha][v] = zero_vector;
	299	+ for (size_t v2=0; v2<v+1; ++v2) {
	300	+ s[v][v2] = zero_vector;
306	301	}
307	302	}
	303	+ const int coarse_width = coarse_variables.width_;
	304	+ const int coarse_height = coarse_variables.height_;
	305	+ const int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
	306	+ const Color center_b = b_value(b,0,0,0,0);
308	307	for (int i_y=0; i_y<coarse_height; ++i_y) {
309	308	for (int i_x=0; i_x<coarse_width; ++i_x) {
310	309	const double* p_icv = &coarse_variables(i_x, i_y, 0);

		@@ -313,15 +312,20 @@ void compute_initial_s(
313	312	for (size_t j_y=max<int>(0, i_y - center_y); j_y<max_j_y; ++j_y) {
314	313	for (int j_x=max<int>(0, i_x - center_x); j_x<max_j_x; ++j_x) {
315	314	if (i_x == j_x && i_y == j_y) continue;
316		- Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
	315	+ const Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
	316	+ const double* p_jcv = &coarse_variables(j_x, j_y, 0);
317	317	for (size_t v=0; v<palette_size; ++v) {
318		- Color b_ij2 = b_ij * p_icv[v];
319		- const double* p_jcv = &coarse_variables(j_x, j_y, v);
	318	+ const Color b_ij2 = b_ij * p_icv[v];
	319	+ const double* p_jcv2 = p_jcv++;
	320	+ double jcv = *p_jcv2;
320	321	Color* ps = s.pBuff_ + v * palette_size + v;
321	322	// TODO: 変更画像、縦方向ではなく横方向に操作する。後で転置。
322	323	for (size_t alpha=v; alpha<palette_size; ++alpha) {
323		- ps += (p_jcv++) * b_ij2;
	324	+ ++p_jcv2;
	325	+ double njcv = *p_jcv2;
	326	+ ps += jcv b_ij2;
324	327	ps += palette_size;
	328	+ jcv = njcv;
325	329	}
326	330	}
327	331	}

		@@ -343,27 +347,34 @@ void update_s(
343	347	const double delta
344	348	)
345	349	{
346		- const size_t palette_size = s.width_;
347		- const int coarse_width = coarse_variables.width_;
348		- const int coarse_height = coarse_variables.height_;
	350	+ const size_t palette_size = s.width_;
349	351	const int center_x = (b.width_-1) / 2;
350	352	const int center_y = (b.height_-1) / 2;
351		- const int max_i_x = min(coarse_width, j_x + center_x + 1);
352		- const int max_i_y = min<int>(coarse_height, j_y + center_y + 1);
	353	+ const size_t max_i_x = min<int>(coarse_variables.width_, j_x + center_x + 1);
	354	+ const size_t max_i_y = min<int>(coarse_variables.height_, j_y + center_y + 1);
353	355	for (size_t i_y=max(0, j_y - center_y); i_y<max_i_y; ++i_y) {
354	356	for (size_t i_x=max(0, j_x - center_x); i_x<max_i_x; ++i_x) {
355		- const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
356	357	if (i_x == j_x && i_y == j_y) continue;
	358	+ const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
357	359	Color* ps = s[alpha];
358	360	const double* p_cv = &coarse_variables(i_x, i_y, 0);
	361	+ double cv = *p_cv;
359	362	for (size_t v=0; v<=alpha; ++v) {
360		- ps[v] += (p_cv++) delta_b_ij;
	363	+ ++p_cv;
	364	+ double ncv = *p_cv;
	365	+ ps += cv delta_b_ij;
	366	+ ++ps;
	367	+ cv = ncv;
361	368	}
362	369	--p_cv;
363		- ps += alpha;
	370	+ --ps;
	371	+ cv = *p_cv;
364	372	for (size_t v=alpha; v<palette_size; ++v) {
365		- ps += (p_cv++) * delta_b_ij;
	373	+ ++p_cv;
	374	+ double ncv = *p_cv;
	375	+ ps += cv delta_b_ij;
366	376	ps += palette_size;
	377	+ cv = ncv;
367	378	}
368	379	}
369	380	}

		@@ -539,6 +550,8 @@ void spatial_color_quant(
539	550
540	551	// Compute 2*sum(j in extended neighborhood of i, j != i) b_ij
541	552
	553	+ const int radius_width = (b.width_ - 1)/2;
	554	+ const int radius_height = (b.height_ - 1)/2;
542	555	while (!visit_queue.empty()) {
543	556	// If we get to 10% above initial size, just revisit them all
544	557	if (visit_queue.size() > coarse_variables.width_coarse_variables.height_11.0/10) {

		@@ -552,16 +565,45 @@ void spatial_color_quant(
552	565	// Compute (25)
553	566	Color p_i;
554	567	p_i.zero();
555		- for (int y=0; y<b.height_; ++y) {
556		- int j_y = y - center_y + i_y;
557		- if (j_y < 0 \|\| j_y >= coarse_variables.height_) continue;
558		- for (int x=0; x<b.width_; ++x) {
559		- int j_x = x - center_x + i_x;
560		- if (i_x == j_x && i_y == j_y) continue;
561		- if (j_x < 0 \|\| j_x >= coarse_variables.width_) continue;
562		- Color b_ij = b_value(b, i_x, i_y, j_x, j_y);
563		- Color j_pal = (*j_palette_sum)[j_y][j_x];
564		- p_i += b_ij * j_pal;
	568	+ int sy = i_y - center_y;
	569	+ int ey = sy + b.height_;
	570	+ sy = max(sy, 0);
	571	+ ey = min<int>(ey, coarse_variables.height_);
	572	+ int ky = sy - i_y + radius_height;
	573	+ if (ky < 0) {
	574	+ sy += -ky;
	575	+ }
	576	+ ky = ey - 1 - i_y + radius_height;
	577	+ if (ky >= b.height_) {
	578	+ ey -= (ky - b.height_) + 1;
	579	+ }
	580	+ int sx = i_x - center_x;
	581	+ int ex = sx + b.width_;
	582	+ sx = max(sx, 0);
	583	+ ex = min<int>(ex, coarse_variables.width_);
	584	+ int kx = sx - i_x + radius_width;
	585	+ if (kx < 0) {
	586	+ sx += -kx;
	587	+ }
	588	+ kx = ex - 1 - i_x + radius_width;
	589	+ if (kx >= b.width_) {
	590	+ ex -= (kx - b.width_) + 1;
	591	+ }
	592	+ for (int y=sy; y<ey; ++y) {
	593	+ int k_y = y - i_y + radius_height;
	594	+ Color* cb = b[k_y] + sx - i_x + radius_width;
	595	+ Color* cp = (*j_palette_sum)[y]+sx;
	596	+ if (i_y == y) {
	597	+ for (int x=sx; x<ex; ++x) {
	598	+ Color c = (cb++) (*cp++);
	599	+ if (i_x != x) {
	600	+ p_i += c;
	601	+ }
	602	+ }
	603	+ }else {
	604	+ for (int x=sx; x<ex; ++x) {
	605	+ p_i += (cb++) (*cp++);
	606	+ }
565	607	}
566	608	}
567	609	p_i *= 2.0;

		@@ -569,13 +611,13 @@ void spatial_color_quant(
569	611
570	612	double max_meanfield_log = -numeric_limits<double>::infinity();
571	613	double meanfield_sum = 0.0;
	614	+ double minus_inv_temperature = -1.0 / temperature;
572	615	for (size_t v=0; v<num_colors; ++v) {
573	616	// Update m_{pi(i)v}^I according to (23)
574	617	// We can subtract an arbitrary factor to prevent overflow,
575	618	// since only the weight relative to the sum matters, so we
576	619	// will choose a value that makes the maximum e^100.
577		- Color p_i2; p_i2 = p_i;
578		- double m = -(palette[v].dot_product(p_i2 + middle_b.direct_product(palette[v]))) / temperature;
	620	+ double m = palette[v].dot_product(p_i + middle_b.direct_product(palette[v])) * minus_inv_temperature;
579	621	meanfield_logs[v] = m;
580	622	if (m > max_meanfield_log) {
581	623	max_meanfield_log = m;

color quantizer Fork

Commit

Tags

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Cambiar Resumen

Diferencia incremental

color quantizer
Fork