system/corennnnn
Revisión | 42361542c8aa1edc8c1892e1d194ac8d0894ca5a (tree) |
---|---|
Tiempo | 2016-12-09 16:05:38 |
Autor | Chih-Wei Huang <cwhuang@linu...> |
Commiter | Chih-Wei Huang |
Software-accelerated Pixel Flinger
The typical graphic workloads on VirtualBox are improved 3X.
Supports both x86 and x86_64.
Similar change by HazouPH <jgrharbers@gmail.com>:
http://review.cyanogenmod.org/#/c/70896/
And by Quanganh pham <quanganh2627@gmail.com>:
http://review.cyanogenmod.org/#/c/97125/
@@ -7,9 +7,16 @@ include $(CLEAR_VARS) | ||
7 | 7 | |
8 | 8 | include $(CLEAR_VARS) |
9 | 9 | PIXELFLINGER_SRC_FILES:= \ |
10 | + codeflinger/CodeCache.cpp \ | |
11 | + format.cpp \ | |
12 | + clear.cpp \ | |
13 | + raster.cpp \ | |
14 | + buffer.cpp | |
15 | + | |
16 | +ifeq ($(filter x86%,$(TARGET_ARCH)),) | |
17 | +PIXELFLINGER_SRC_FILES += \ | |
10 | 18 | codeflinger/ARMAssemblerInterface.cpp \ |
11 | 19 | codeflinger/ARMAssemblerProxy.cpp \ |
12 | - codeflinger/CodeCache.cpp \ | |
13 | 20 | codeflinger/GGLAssembler.cpp \ |
14 | 21 | codeflinger/load_store.cpp \ |
15 | 22 | codeflinger/blending.cpp \ |
@@ -19,10 +26,8 @@ PIXELFLINGER_SRC_FILES:= \ | ||
19 | 26 | pixelflinger.cpp.arm \ |
20 | 27 | trap.cpp.arm \ |
21 | 28 | scanline.cpp.arm \ |
22 | - format.cpp \ | |
23 | - clear.cpp \ | |
24 | - raster.cpp \ | |
25 | - buffer.cpp | |
29 | + | |
30 | +endif | |
26 | 31 | |
27 | 32 | PIXELFLINGER_CFLAGS := -fstrict-aliasing -fomit-frame-pointer |
28 | 33 |
@@ -43,6 +48,18 @@ PIXELFLINGER_SRC_FILES_arm64 := \ | ||
43 | 48 | arch-arm64/col32cb16blend.S \ |
44 | 49 | arch-arm64/t32cb16blend.S \ |
45 | 50 | |
51 | +PIXELFLINGER_SRC_FILES_x86 := \ | |
52 | + codeflinger/x86/X86Assembler.cpp \ | |
53 | + codeflinger/x86/GGLX86Assembler.cpp \ | |
54 | + codeflinger/x86/load_store.cpp \ | |
55 | + codeflinger/x86/blending.cpp \ | |
56 | + codeflinger/x86/texturing.cpp \ | |
57 | + fixed.cpp \ | |
58 | + picker.cpp \ | |
59 | + pixelflinger.cpp \ | |
60 | + trap.cpp \ | |
61 | + scanline.cpp | |
62 | + | |
46 | 63 | ifndef ARCH_MIPS_REV6 |
47 | 64 | PIXELFLINGER_SRC_FILES_mips := \ |
48 | 65 | codeflinger/MIPSAssembler.cpp \ |
@@ -66,6 +83,8 @@ LOCAL_MODULE:= libpixelflinger | ||
66 | 83 | LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES) |
67 | 84 | LOCAL_SRC_FILES_arm := $(PIXELFLINGER_SRC_FILES_arm) |
68 | 85 | LOCAL_SRC_FILES_arm64 := $(PIXELFLINGER_SRC_FILES_arm64) |
86 | +LOCAL_SRC_FILES_x86 := $(PIXELFLINGER_SRC_FILES_x86) | |
87 | +LOCAL_SRC_FILES_x86_64 := $(PIXELFLINGER_SRC_FILES_x86) | |
69 | 88 | LOCAL_SRC_FILES_mips := $(PIXELFLINGER_SRC_FILES_mips) |
70 | 89 | LOCAL_SRC_FILES_mips64 := $(PIXELFLINGER_SRC_FILES_mips64) |
71 | 90 | LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS) |
@@ -73,6 +92,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include | ||
73 | 92 | LOCAL_C_INCLUDES += $(LOCAL_EXPORT_C_INCLUDE_DIRS) \ |
74 | 93 | external/safe-iop/include |
75 | 94 | LOCAL_SHARED_LIBRARIES := libcutils liblog libutils |
95 | +LOCAL_WHOLE_STATIC_LIBRARIES_x86 := libenc | |
96 | +LOCAL_WHOLE_STATIC_LIBRARIES_x86_64 := libenc | |
76 | 97 | |
77 | 98 | # Really this should go away entirely or at least not depend on |
78 | 99 | # libhardware, but this at least gets us built. |
@@ -0,0 +1,3 @@ | ||
1 | +ifneq ($(filter x86%,$(TARGET_ARCH)),) | |
2 | +include $(call all-named-subdir-makefiles,x86/libenc) | |
3 | +endif |
@@ -0,0 +1,1507 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.cpp | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#define LOG_TAG "GGLX86Assembler" | |
19 | + | |
20 | +#include <assert.h> | |
21 | +#include <stdint.h> | |
22 | +#include <stdlib.h> | |
23 | +#include <stdio.h> | |
24 | +#include <sys/types.h> | |
25 | +#include <cutils/log.h> | |
26 | + | |
27 | +#include "codeflinger/x86/GGLX86Assembler.h" | |
28 | + | |
29 | +namespace android { | |
30 | + | |
31 | +// ---------------------------------------------------------------------------- | |
32 | + | |
33 | +GGLX86Assembler::GGLX86Assembler(const sp<Assembly>& assembly) | |
34 | + : X86Assembler(assembly), X86RegisterAllocator(), mOptLevel(7) | |
35 | +{ | |
36 | +} | |
37 | + | |
38 | +GGLX86Assembler::~GGLX86Assembler() | |
39 | +{ | |
40 | +} | |
41 | + | |
42 | +void GGLX86Assembler::reset(int opt_level) | |
43 | +{ | |
44 | + X86Assembler::reset(); | |
45 | + X86RegisterAllocator::reset(); | |
46 | + mOptLevel = opt_level; | |
47 | +} | |
48 | + | |
49 | +// --------------------------------------------------------------------------- | |
50 | + | |
51 | +int GGLX86Assembler::scanline(const needs_t& needs, context_t const* c) | |
52 | +{ | |
53 | + int err = 0; | |
54 | + err = scanline_core(needs, c); | |
55 | + if (err != 0) | |
56 | + ALOGE("scanline_core failed probably due to running out of the registers: %d\n", err); | |
57 | + | |
58 | + // XXX: in theory, pcForLabel is not valid before generate() | |
59 | + char* fragment_start_pc = pcForLabel("fragment_loop"); | |
60 | + char* fragment_end_pc = pcForLabel("fragment_end"); | |
61 | + const int per_fragment_ins_size = int(fragment_end_pc - fragment_start_pc); | |
62 | + | |
63 | + // build a name for our pipeline | |
64 | + char name[128]; | |
65 | + sprintf(name, | |
66 | + "scanline__%08X:%08X_%08X_%08X [%3d ipp ins size]", | |
67 | + needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ins_size); | |
68 | + | |
69 | + if (err) { | |
70 | + ALOGE("Error while generating ""%s""\n", name); | |
71 | + disassemble(name); | |
72 | + return -1; | |
73 | + } | |
74 | + | |
75 | + return generate(name); | |
76 | +} | |
77 | + | |
78 | +int GGLX86Assembler::scanline_core(const needs_t& needs, context_t const* c) | |
79 | +{ | |
80 | + int64_t duration = ggl_system_time(); | |
81 | + | |
82 | + mBlendFactorCached = 0; | |
83 | + mBlending = 0; | |
84 | + mMasking = 0; | |
85 | + mAA = GGL_READ_NEEDS(P_AA, needs.p); | |
86 | + mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); | |
87 | + mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; | |
88 | + mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; | |
89 | + mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; | |
90 | + mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; | |
91 | + mBuilderContext.needs = needs; | |
92 | + mBuilderContext.c = c; | |
93 | + mBuilderContext.Rctx = obtainReg(); //dynamically obtain if used and then immediately recycle it if not used | |
94 | + mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; | |
95 | + | |
96 | + // ------------------------------------------------------------------------ | |
97 | + | |
98 | + decodeLogicOpNeeds(needs); | |
99 | + | |
100 | + decodeTMUNeeds(needs, c); | |
101 | + | |
102 | + mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); | |
103 | + mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); | |
104 | + mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); | |
105 | + mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); | |
106 | + | |
107 | + if (!mCbFormat.c[GGLFormat::ALPHA].h) { | |
108 | + if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || | |
109 | + (mBlendSrc == GGL_DST_ALPHA)) { | |
110 | + mBlendSrc = GGL_ONE; | |
111 | + } | |
112 | + if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || | |
113 | + (mBlendSrcA == GGL_DST_ALPHA)) { | |
114 | + mBlendSrcA = GGL_ONE; | |
115 | + } | |
116 | + if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || | |
117 | + (mBlendDst == GGL_DST_ALPHA)) { | |
118 | + mBlendDst = GGL_ONE; | |
119 | + } | |
120 | + if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || | |
121 | + (mBlendDstA == GGL_DST_ALPHA)) { | |
122 | + mBlendDstA = GGL_ONE; | |
123 | + } | |
124 | + } | |
125 | + | |
126 | + // if we need the framebuffer, read it now | |
127 | + const int blending = blending_codes(mBlendSrc, mBlendDst) | | |
128 | + blending_codes(mBlendSrcA, mBlendDstA); | |
129 | + | |
130 | + // XXX: handle special cases, destination not modified... | |
131 | + if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && | |
132 | + (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { | |
133 | + // Destination unmodified (beware of logic ops) | |
134 | + } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && | |
135 | + (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { | |
136 | + // Destination is zero (beware of logic ops) | |
137 | + } | |
138 | + | |
139 | + int fbComponents = 0; | |
140 | + const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); | |
141 | + for (int i=0 ; i<4 ; i++) { | |
142 | + const int mask = 1<<i; | |
143 | + component_info_t& info = mInfo[i]; | |
144 | + int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; | |
145 | + int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; | |
146 | + if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) | |
147 | + fs = GGL_ONE; | |
148 | + info.masked = !!(masking & mask); | |
149 | + info.inDest = !info.masked && mCbFormat.c[i].h && | |
150 | + ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); | |
151 | + if (mCbFormat.components >= GGL_LUMINANCE && | |
152 | + (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { | |
153 | + info.inDest = false; | |
154 | + } | |
155 | + info.needed = (i==GGLFormat::ALPHA) && | |
156 | + (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); | |
157 | + info.replaced = !!(mTextureMachine.replaced & mask); | |
158 | + info.iterated = (!info.replaced && (info.inDest || info.needed)); | |
159 | + info.smooth = mSmooth && info.iterated; | |
160 | + info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); | |
161 | + info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); | |
162 | + | |
163 | + mBlending |= (info.blend ? mask : 0); | |
164 | + mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; | |
165 | + fbComponents |= mCbFormat.c[i].h ? mask : 0; | |
166 | + } | |
167 | + | |
168 | + mAllMasked = (mMasking == fbComponents); | |
169 | + if (mAllMasked) { | |
170 | + mDithering = 0; | |
171 | + } | |
172 | + | |
173 | + fragment_parts_t parts; | |
174 | + | |
175 | + // ------------------------------------------------------------------------ | |
176 | + callee_work(); | |
177 | + // ------------------------------------------------------------------------ | |
178 | + | |
179 | + mCurSp = -12; // %ebx, %edi, %esi | |
180 | + prepare_esp(0); | |
181 | + build_scanline_preparation(parts, needs); | |
182 | + recycleReg(mBuilderContext.Rctx); | |
183 | + | |
184 | + if (registerFile().status()) | |
185 | + return registerFile().status(); | |
186 | + | |
187 | + // ------------------------------------------------------------------------ | |
188 | + label("fragment_loop"); | |
189 | + // ------------------------------------------------------------------------ | |
190 | + { | |
191 | + Scratch regs(registerFile()); | |
192 | + int temp_reg = -1; | |
193 | + | |
194 | + if (mDithering) { | |
195 | + // update the dither index. | |
196 | + temp_reg = regs.obtain(); | |
197 | + //To load to register and calculate should be fast than the memory operations | |
198 | + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg); | |
199 | + ROR(GGL_DITHER_ORDER_SHIFT, temp_reg); | |
200 | + ADD_IMM_TO_REG(1 << (32 - GGL_DITHER_ORDER_SHIFT), temp_reg); | |
201 | + ROR(32 - GGL_DITHER_ORDER_SHIFT, temp_reg); | |
202 | + MOV_REG_TO_MEM(temp_reg, parts.count.offset_ebp, PhysicalReg_EBP); | |
203 | + regs.recycle(temp_reg); | |
204 | + | |
205 | + } | |
206 | + | |
207 | + // XXX: could we do an early alpha-test here in some cases? | |
208 | + // It would probaly be used only with smooth-alpha and no texture | |
209 | + // (or no alpha component in the texture). | |
210 | + | |
211 | + // Early z-test | |
212 | + if (mAlphaTest==GGL_ALWAYS) { | |
213 | + build_depth_test(parts, Z_TEST|Z_WRITE); | |
214 | + } else { | |
215 | + // we cannot do the z-write here, because | |
216 | + // it might be killed by the alpha-test later | |
217 | + build_depth_test(parts, Z_TEST); | |
218 | + } | |
219 | + | |
220 | + { // texture coordinates | |
221 | + Scratch scratches(registerFile()); | |
222 | + | |
223 | + // texel generation | |
224 | + build_textures(parts, regs); | |
225 | + | |
226 | + } | |
227 | + | |
228 | + if ((blending & (FACTOR_DST|BLEND_DST)) || | |
229 | + (mMasking && !mAllMasked) || | |
230 | + (mLogicOp & LOGIC_OP_DST)) | |
231 | + { | |
232 | + // blending / logic_op / masking need the framebuffer | |
233 | + mDstPixel.setTo(regs.obtain(), &mCbFormat); | |
234 | + | |
235 | + // load the framebuffer pixel | |
236 | + comment("fetch color-buffer"); | |
237 | + parts.cbPtr.reg = regs.obtain(); | |
238 | + MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, PhysicalReg_EBP, parts.cbPtr.reg); | |
239 | + load(parts.cbPtr, mDstPixel); | |
240 | + mCurSp = mCurSp - 4; | |
241 | + mDstPixel.offset_ebp = mCurSp; | |
242 | + MOV_REG_TO_MEM(mDstPixel.reg, mDstPixel.offset_ebp, EBP); | |
243 | + regs.recycle(mDstPixel.reg); | |
244 | + regs.recycle(parts.cbPtr.reg); | |
245 | + mDstPixel.reg = -1; | |
246 | + } | |
247 | + | |
248 | + if (registerFile().status()) | |
249 | + return registerFile().status(); | |
250 | + | |
251 | + pixel_t pixel; | |
252 | + int directTex = mTextureMachine.directTexture; | |
253 | + if (directTex | parts.packed) { | |
254 | + // note: we can't have both here | |
255 | + // iterated color or direct texture | |
256 | + if(directTex) { | |
257 | + pixel.offset_ebp = parts.texel[directTex-1].offset_ebp; | |
258 | + } | |
259 | + else | |
260 | + pixel.offset_ebp = parts.iterated.offset_ebp; | |
261 | + pixel.reg = regs.obtain(); | |
262 | + MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg); | |
263 | + //pixel = directTex ? parts.texel[directTex-1] : parts.iterated; | |
264 | + pixel.flags &= ~CORRUPTIBLE; | |
265 | + } else { | |
266 | + if (mDithering) { | |
267 | + mBuilderContext.Rctx = regs.obtain(); | |
268 | + temp_reg = regs.obtain(); | |
269 | + const int ctxtReg = mBuilderContext.Rctx; | |
270 | + MOV_MEM_TO_REG(8, EBP, ctxtReg); | |
271 | + const int mask = GGL_DITHER_SIZE-1; | |
272 | + parts.dither = reg_t(regs.obtain()); | |
273 | + MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, parts.dither.reg); | |
274 | + AND_IMM_TO_REG(mask, parts.dither.reg); | |
275 | + ADD_REG_TO_REG(ctxtReg, parts.dither.reg); | |
276 | + MOVZX_MEM_TO_REG(OpndSize_8, parts.dither.reg, GGL_OFFSETOF(ditherMatrix), temp_reg); | |
277 | + MOV_REG_TO_REG(temp_reg, parts.dither.reg); | |
278 | + mCurSp = mCurSp - 4; | |
279 | + parts.dither.offset_ebp = mCurSp; | |
280 | + MOV_REG_TO_MEM(parts.dither.reg, parts.dither.offset_ebp, EBP); | |
281 | + regs.recycle(parts.dither.reg); | |
282 | + regs.recycle(temp_reg); | |
283 | + regs.recycle(mBuilderContext.Rctx); | |
284 | + | |
285 | + } | |
286 | + | |
287 | + // allocate a register for the resulting pixel | |
288 | + pixel.setTo(regs.obtain(), &mCbFormat, FIRST); | |
289 | + | |
290 | + build_component(pixel, parts, GGLFormat::ALPHA, regs); | |
291 | + | |
292 | + if (mAlphaTest!=GGL_ALWAYS) { | |
293 | + // only handle the z-write part here. We know z-test | |
294 | + // was successful, as well as alpha-test. | |
295 | + build_depth_test(parts, Z_WRITE); | |
296 | + } | |
297 | + | |
298 | + build_component(pixel, parts, GGLFormat::RED, regs); | |
299 | + build_component(pixel, parts, GGLFormat::GREEN, regs); | |
300 | + build_component(pixel, parts, GGLFormat::BLUE, regs); | |
301 | + | |
302 | + pixel.flags |= CORRUPTIBLE; | |
303 | + } | |
304 | + | |
305 | + if (registerFile().status()) { | |
306 | + return registerFile().status(); | |
307 | + } | |
308 | + | |
309 | + if (pixel.reg == -1) { | |
310 | + // be defensive here. if we're here it's probably | |
311 | + // that this whole fragment is a no-op. | |
312 | + pixel = mDstPixel; | |
313 | + } | |
314 | + | |
315 | + if (!mAllMasked) { | |
316 | + // logic operation | |
317 | + build_logic_op(pixel, regs); | |
318 | + | |
319 | + // masking | |
320 | + build_masking(pixel, regs); | |
321 | + | |
322 | + comment("store"); | |
323 | + parts.cbPtr.reg = regs.obtain(); | |
324 | + MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, EBP, parts.cbPtr.reg); | |
325 | + store(parts.cbPtr, pixel, WRITE_BACK); | |
326 | + MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP); | |
327 | + regs.recycle(parts.cbPtr.reg); | |
328 | + regs.recycle(pixel.reg); | |
329 | + } | |
330 | + } | |
331 | + | |
332 | + if (registerFile().status()) | |
333 | + return registerFile().status(); | |
334 | + | |
335 | + // update the iterated color... | |
336 | + if (parts.reload != 3) { | |
337 | + build_smooth_shade(parts); | |
338 | + } | |
339 | + | |
340 | + // update iterated z | |
341 | + build_iterate_z(parts); | |
342 | + | |
343 | + // update iterated fog | |
344 | + build_iterate_f(parts); | |
345 | + | |
346 | + //SUB_IMM_TO_REG(1<<16, parts.count.reg); | |
347 | + SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP); | |
348 | + | |
349 | + JCC(Mnemonic_JNS, "fragment_loop"); | |
350 | + label("fragment_end"); | |
351 | + int update_esp_offset, shrink_esp_offset; | |
352 | + update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi | |
353 | + update_esp(update_esp_offset); | |
354 | + shrink_esp(shrink_esp_offset); | |
355 | + return_work(); | |
356 | + | |
357 | + if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { | |
358 | + if (mDepthTest!=GGL_ALWAYS) { | |
359 | + label("discard_before_textures"); | |
360 | + build_iterate_texture_coordinates(parts); | |
361 | + } | |
362 | + label("discard_after_textures"); | |
363 | + build_smooth_shade(parts); | |
364 | + build_iterate_z(parts); | |
365 | + build_iterate_f(parts); | |
366 | + if (!mAllMasked) { | |
367 | + //ADD_IMM_TO_REG(parts.cbPtr.size>>3, parts.cbPtr.reg); | |
368 | + ADD_IMM_TO_MEM(parts.cbPtr.size>>3, parts.cbPtr.offset_ebp, EBP); | |
369 | + } | |
370 | + SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP); | |
371 | + //SUB_IMM_TO_REG(1<<16, parts.count.reg); | |
372 | + JCC(Mnemonic_JNS, "fragment_loop"); | |
373 | + update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi | |
374 | + update_esp(update_esp_offset); | |
375 | + shrink_esp(shrink_esp_offset); | |
376 | + return_work(); | |
377 | + } | |
378 | + | |
379 | + return registerFile().status(); | |
380 | +} | |
381 | + | |
382 | +// --------------------------------------------------------------------------- | |
383 | + | |
384 | +void GGLX86Assembler::build_scanline_preparation( | |
385 | + fragment_parts_t& parts, const needs_t& needs) | |
386 | +{ | |
387 | + Scratch scratches(registerFile()); | |
388 | + | |
389 | + // compute count | |
390 | + comment("compute ct (# of pixels to process)"); | |
391 | + int temp_reg; | |
392 | + parts.count.setTo(obtainReg()); | |
393 | + int Rx = scratches.obtain(); | |
394 | + int Ry = scratches.obtain(); | |
395 | + // the only argument is +8 bytes relative to the current EBP | |
396 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
397 | + CONTEXT_LOAD(Rx, iterators.xl); | |
398 | + CONTEXT_LOAD(parts.count.reg, iterators.xr); | |
399 | + CONTEXT_LOAD(Ry, iterators.y); | |
400 | + | |
401 | + // parts.count = iterators.xr - Rx | |
402 | + SUB_REG_TO_REG(Rx, parts.count.reg); | |
403 | + SUB_IMM_TO_REG(1, parts.count.reg); | |
404 | + | |
405 | + if (mDithering) { | |
406 | + // parts.count.reg = 0xNNNNXXDD | |
407 | + // NNNN = count-1 | |
408 | + // DD = dither offset | |
409 | + // XX = 0xxxxxxx (x = garbage) | |
410 | + Scratch scratches(registerFile()); | |
411 | + int tx = scratches.obtain(); | |
412 | + int ty = scratches.obtain(); | |
413 | + | |
414 | + MOV_REG_TO_REG(Rx,tx); | |
415 | + AND_IMM_TO_REG(GGL_DITHER_MASK, tx); | |
416 | + MOV_REG_TO_REG(Ry,ty); | |
417 | + AND_IMM_TO_REG(GGL_DITHER_MASK, ty); | |
418 | + SHL(GGL_DITHER_ORDER_SHIFT, ty); | |
419 | + ADD_REG_TO_REG(ty, tx); | |
420 | + SHL(16, parts.count.reg); | |
421 | + OR_REG_TO_REG(tx, parts.count.reg); | |
422 | + scratches.recycle(tx); | |
423 | + scratches.recycle(ty); | |
424 | + } else { | |
425 | + // parts.count.reg = 0xNNNN0000 | |
426 | + // NNNN = count-1 | |
427 | + SHL(16, parts.count.reg); | |
428 | + } | |
429 | + mCurSp = mCurSp - 4; | |
430 | + parts.count.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg | |
431 | + MOV_REG_TO_MEM(parts.count.reg, parts.count.offset_ebp, EBP); | |
432 | + //PUSH(parts.count.reg); | |
433 | + recycleReg(parts.count.reg); | |
434 | + parts.count.reg=-1; | |
435 | + if (!mAllMasked) { | |
436 | + // compute dst ptr | |
437 | + comment("compute color-buffer pointer"); | |
438 | + const int cb_bits = mCbFormat.size*8; | |
439 | + int Rs = scratches.obtain(); | |
440 | + temp_reg = scratches.obtain(); | |
441 | + CONTEXT_LOAD(Rs, state.buffers.color.stride); | |
442 | + MOVSX_REG_TO_REG(OpndSize_16, Ry, temp_reg); | |
443 | + MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs); | |
444 | + IMUL(temp_reg, Rs); | |
445 | + scratches.recycle(temp_reg); | |
446 | + ADD_REG_TO_REG(Rx, Rs); | |
447 | + | |
448 | + parts.cbPtr.setTo(obtainReg(), cb_bits); | |
449 | + CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); | |
450 | + reg_t temp_reg_t; | |
451 | + temp_reg_t.setTo(Rs); | |
452 | + base_offset(parts.cbPtr, parts.cbPtr, temp_reg_t); | |
453 | + | |
454 | + mCurSp = mCurSp - 4; | |
455 | + parts.cbPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg | |
456 | + MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP); | |
457 | + //PUSH(parts.cbPtr.reg); | |
458 | + recycleReg(parts.cbPtr.reg); | |
459 | + parts.cbPtr.reg=-1; | |
460 | + scratches.recycle(Rs); | |
461 | + } | |
462 | + | |
463 | + // init fog | |
464 | + const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); | |
465 | + if (need_fog) { | |
466 | + comment("compute initial fog coordinate"); | |
467 | + Scratch scratches(registerFile()); | |
468 | + int ydfdy = scratches.obtain(); | |
469 | + int dfdx = scratches.obtain(); | |
470 | + CONTEXT_LOAD(dfdx, generated_vars.dfdx); | |
471 | + IMUL(Rx, dfdx); | |
472 | + CONTEXT_LOAD(ydfdy, iterators.ydfdy); | |
473 | + ADD_REG_TO_REG(ydfdy, dfdx); // Rx * dfdx + ydfdy | |
474 | + CONTEXT_STORE(dfdx, generated_vars.f); | |
475 | + scratches.recycle(dfdx); | |
476 | + scratches.recycle(ydfdy); | |
477 | + } | |
478 | + | |
479 | + // init Z coordinate | |
480 | + if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { | |
481 | + parts.z = reg_t(obtainReg()); | |
482 | + comment("compute initial Z coordinate"); | |
483 | + Scratch scratches(registerFile()); | |
484 | + int dzdx = scratches.obtain(); | |
485 | + int ydzdy = parts.z.reg; | |
486 | + CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point | |
487 | + IMUL(Rx, dzdx); | |
488 | + CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point | |
489 | + ADD_REG_TO_REG(dzdx, ydzdy); // parts.z.reg = Rx * dzdx + ydzdy | |
490 | + | |
491 | + mCurSp = mCurSp - 4; | |
492 | + parts.z.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg | |
493 | + MOV_REG_TO_MEM(ydzdy, parts.z.offset_ebp, EBP); | |
494 | + //PUSH(ydzdy); | |
495 | + recycleReg(ydzdy); | |
496 | + parts.z.reg=-1; | |
497 | + | |
498 | + // we're going to index zbase of parts.count | |
499 | + // zbase = base + (xl-count + stride*y)*2 by arm | |
500 | + // !!! Actually, zbase = base + (xl + stride*y)*2 | |
501 | + int Rs = dzdx; | |
502 | + int zbase = scratches.obtain(); | |
503 | + temp_reg = zbase; | |
504 | + CONTEXT_LOAD(Rs, state.buffers.depth.stride); | |
505 | + MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs); | |
506 | + MOV_REG_TO_REG(Ry, temp_reg); | |
507 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg); | |
508 | + IMUL(temp_reg, Rs); | |
509 | + ADD_REG_TO_REG(Rx, Rs); | |
510 | + // load parts.count.reg | |
511 | + MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, temp_reg); | |
512 | + SHR(16, temp_reg); | |
513 | + ADD_REG_TO_REG(temp_reg, Rs); | |
514 | + SHL(1, Rs); | |
515 | + CONTEXT_LOAD(zbase, state.buffers.depth.data); | |
516 | + ADD_REG_TO_REG(Rs, zbase); | |
517 | + CONTEXT_STORE(zbase, generated_vars.zbase); | |
518 | + scratches.recycle(zbase); | |
519 | + scratches.recycle(dzdx); | |
520 | + } | |
521 | + // the rgisters are all used up | |
522 | + | |
523 | + // init texture coordinates | |
524 | + init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); | |
525 | + scratches.recycle(Ry); | |
526 | + | |
527 | + // iterated color | |
528 | + init_iterated_color(parts, reg_t(Rx)); | |
529 | + | |
530 | + // init coverage factor application (anti-aliasing) | |
531 | + if (mAA) { | |
532 | + parts.covPtr.setTo(obtainReg(), 16); | |
533 | + CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); | |
534 | + SHL(1, Rx); | |
535 | + ADD_REG_TO_REG(Rx, parts.covPtr.reg); | |
536 | + | |
537 | + mCurSp = mCurSp - 4; | |
538 | + parts.covPtr.offset_ebp = mCurSp; | |
539 | + MOV_REG_TO_MEM(parts.covPtr.reg, parts.covPtr.offset_ebp, EBP); | |
540 | + //PUSH(parts.covPtr.reg); | |
541 | + recycleReg(parts.covPtr.reg); | |
542 | + parts.covPtr.reg=-1; | |
543 | + } | |
544 | + scratches.recycle(Rx); | |
545 | +} | |
546 | + | |
547 | +// --------------------------------------------------------------------------- | |
548 | + | |
549 | +void GGLX86Assembler::build_component( pixel_t& pixel, | |
550 | + fragment_parts_t& parts, | |
551 | + int component, | |
552 | + Scratch& regs) | |
553 | +{ | |
554 | + static char const * comments[] = {"alpha", "red", "green", "blue"}; | |
555 | + comment(comments[component]); | |
556 | + | |
557 | + // local register file | |
558 | + Scratch scratches(registerFile()); | |
559 | + const int dst_component_size = pixel.component_size(component); | |
560 | + | |
561 | + component_t temp(-1); | |
562 | + build_incoming_component( temp, dst_component_size, | |
563 | + parts, component, scratches, regs); | |
564 | + | |
565 | + if (mInfo[component].inDest) { | |
566 | + // blending... | |
567 | + build_blending( temp, mDstPixel, component, scratches ); | |
568 | + | |
569 | + // downshift component and rebuild pixel... | |
570 | + downshift(pixel, component, temp, parts.dither); | |
571 | + } | |
572 | +} | |
573 | + | |
574 | +void GGLX86Assembler::build_incoming_component( | |
575 | + component_t& temp, | |
576 | + int dst_size, | |
577 | + fragment_parts_t& parts, | |
578 | + int component, | |
579 | + Scratch& scratches, | |
580 | + Scratch& global_regs) | |
581 | +{ | |
582 | + const uint32_t component_mask = 1<<component; | |
583 | + | |
584 | + // Figure out what we need for the blending stage... | |
585 | + int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; | |
586 | + int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; | |
587 | + if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { | |
588 | + fs = GGL_ONE; | |
589 | + } | |
590 | + | |
591 | + // Figure out what we need to extract and for what reason | |
592 | + const int blending = blending_codes(fs, fd); | |
593 | + | |
594 | + // Are we actually going to blend? | |
595 | + const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); | |
596 | + | |
597 | + // expand the source if the destination has more bits | |
598 | + int need_expander = false; | |
599 | + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { | |
600 | + texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
601 | + if ((tmu.format_idx) && | |
602 | + (parts.texel[i].component_size(component) < dst_size)) { | |
603 | + need_expander = true; | |
604 | + } | |
605 | + } | |
606 | + | |
607 | + // do we need to extract this component? | |
608 | + const bool multiTexture = mTextureMachine.activeUnits > 1; | |
609 | + const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && | |
610 | + (isAlphaSourceNeeded()); | |
611 | + int need_extract = mInfo[component].needed; | |
612 | + if (mInfo[component].inDest) | |
613 | + { | |
614 | + need_extract |= ((need_blending ? | |
615 | + (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); | |
616 | + need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); | |
617 | + need_extract |= mInfo[component].smooth; | |
618 | + need_extract |= mInfo[component].fog; | |
619 | + need_extract |= mDithering; | |
620 | + need_extract |= multiTexture; | |
621 | + } | |
622 | + | |
623 | + if (need_extract) { | |
624 | + Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; | |
625 | + component_t fragment; | |
626 | + | |
627 | + // iterated color | |
628 | + fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); | |
629 | + build_iterated_color(fragment, parts, component, regs); | |
630 | + | |
631 | + // texture environment (decal, modulate, replace) | |
632 | + build_texture_environment(fragment, parts, component, regs); | |
633 | + | |
634 | + // expand the source if the destination has more bits | |
635 | + if (need_expander && (fragment.size() < dst_size)) { | |
636 | + // we're here only if we fetched a texel | |
637 | + // (so we know for sure fragment is CORRUPTIBLE) | |
638 | + //fragment is stored on the stack | |
639 | + expand(fragment, fragment, dst_size); | |
640 | + } | |
641 | + | |
642 | + mCurSp = mCurSp - 4; | |
643 | + fragment.offset_ebp = mCurSp; | |
644 | + MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP); | |
645 | + regs.recycle(fragment.reg); | |
646 | + | |
647 | + // We have a few specific things to do for the alpha-channel | |
648 | + if ((component==GGLFormat::ALPHA) && | |
649 | + (mInfo[component].needed || fragment.size()<dst_size)) | |
650 | + { | |
651 | + // convert to integer_t first and make sure | |
652 | + // we don't corrupt a needed register | |
653 | + if (fragment.l) { | |
654 | + //component_t incoming(fragment); | |
655 | + // actually fragment is not corruptible | |
656 | + //modify(fragment, regs); | |
657 | + //MOV_REG_TO_REG(incoming.reg, fragment.reg); | |
658 | + SHR(fragment.l, fragment.offset_ebp, EBP); | |
659 | + fragment.h -= fragment.l; | |
660 | + fragment.l = 0; | |
661 | + } | |
662 | + | |
663 | + // I haven't found any case to trigger coverage and the following alpha test (mAlphaTest != GGL_ALWAYS) | |
664 | + fragment.reg = regs.obtain(); | |
665 | + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, fragment.reg); | |
666 | + | |
667 | + // coverage factor application | |
668 | + build_coverage_application(fragment, parts, regs); | |
669 | + // alpha-test | |
670 | + build_alpha_test(fragment, parts); | |
671 | + | |
672 | + MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP); | |
673 | + regs.recycle(fragment.reg); | |
674 | + | |
675 | + if (blend_needs_alpha_source) { | |
676 | + // We keep only 8 bits for the blending stage | |
677 | + const int shift = fragment.h <= 8 ? 0 : fragment.h-8; | |
678 | + | |
679 | + if (fragment.flags & CORRUPTIBLE) { | |
680 | + fragment.flags &= ~CORRUPTIBLE; | |
681 | + mAlphaSource.setTo(fragment.reg, | |
682 | + fragment.size(), fragment.flags, fragment.offset_ebp); | |
683 | + //mCurSp = mCurSp - 4; | |
684 | + //mAlphaSource.offset_ebp = mCurSp; | |
685 | + if (shift) { | |
686 | + SHR(shift, mAlphaSource.offset_ebp, EBP); | |
687 | + } | |
688 | + } else { | |
689 | + // XXX: it would better to do this in build_blend_factor() | |
690 | + // so we can avoid the extra MOV below. | |
691 | + mAlphaSource.setTo(regs.obtain(), | |
692 | + fragment.size(), CORRUPTIBLE); | |
693 | + mCurSp = mCurSp - 4; | |
694 | + mAlphaSource.offset_ebp = mCurSp; | |
695 | + if (shift) { | |
696 | + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg); | |
697 | + SHR(shift, mAlphaSource.reg); | |
698 | + } else { | |
699 | + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg); | |
700 | + } | |
701 | + MOV_REG_TO_MEM(mAlphaSource.reg, mAlphaSource.offset_ebp, EBP); | |
702 | + regs.recycle(mAlphaSource.reg); | |
703 | + } | |
704 | + mAlphaSource.s -= shift; | |
705 | + | |
706 | + } | |
707 | + } | |
708 | + | |
709 | + // fog... | |
710 | + build_fog( fragment, component, regs ); | |
711 | + | |
712 | + temp = fragment; | |
713 | + } else { | |
714 | + if (mInfo[component].inDest) { | |
715 | + // extraction not needed and replace | |
716 | + // we just select the right component | |
717 | + if ((mTextureMachine.replaced & component_mask) == 0) { | |
718 | + // component wasn't replaced, so use it! | |
719 | + temp = component_t(parts.iterated, component); | |
720 | + } | |
721 | + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { | |
722 | + const texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
723 | + if ((tmu.mask & component_mask) && | |
724 | + ((tmu.replaced & component_mask) == 0)) { | |
725 | + temp = component_t(parts.texel[i], component); | |
726 | + } | |
727 | + } | |
728 | + } | |
729 | + } | |
730 | +} | |
731 | + | |
732 | +bool GGLX86Assembler::isAlphaSourceNeeded() const | |
733 | +{ | |
734 | + // XXX: also needed for alpha-test | |
735 | + const int bs = mBlendSrc; | |
736 | + const int bd = mBlendDst; | |
737 | + return bs==GGL_SRC_ALPHA_SATURATE || | |
738 | + bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || | |
739 | + bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; | |
740 | +} | |
741 | + | |
742 | +// --------------------------------------------------------------------------- | |
743 | + | |
744 | +void GGLX86Assembler::build_smooth_shade(fragment_parts_t& parts) | |
745 | +{ | |
746 | + if (mSmooth && !parts.iterated_packed) { | |
747 | + // update the iterated color in a pipelined way... | |
748 | + comment("update iterated color"); | |
749 | + Scratch scratches(registerFile()); | |
750 | + mBuilderContext.Rctx = scratches.obtain(); | |
751 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
752 | + | |
753 | + const int reload = parts.reload; | |
754 | + for (int i=0 ; i<4 ; i++) { | |
755 | + if (!mInfo[i].iterated) | |
756 | + continue; | |
757 | + | |
758 | + int dx = parts.argb_dx[i].reg; | |
759 | + int c = parts.argb[i].reg; | |
760 | + dx = scratches.obtain(); | |
761 | + c = scratches.obtain(); | |
762 | + CONTEXT_LOAD(dx, generated_vars.argb[i].dx); | |
763 | + CONTEXT_LOAD(c, generated_vars.argb[i].c); | |
764 | + | |
765 | + //if (reload & 1) { | |
766 | + // c = scratches.obtain(); | |
767 | + // CONTEXT_LOAD(c, generated_vars.argb[i].c); | |
768 | + //} | |
769 | + //if (reload & 2) { | |
770 | + // dx = scratches.obtain(); | |
771 | + // CONTEXT_LOAD(dx, generated_vars.argb[i].dx); | |
772 | + //} | |
773 | + | |
774 | + if (mSmooth) { | |
775 | + ADD_REG_TO_REG(dx, c); | |
776 | + } | |
777 | + | |
778 | + CONTEXT_STORE(c, generated_vars.argb[i].c); | |
779 | + scratches.recycle(c); | |
780 | + scratches.recycle(dx); | |
781 | + //if (reload & 1) { | |
782 | + // CONTEXT_STORE(c, generated_vars.argb[i].c); | |
783 | + // scratches.recycle(c); | |
784 | + //} | |
785 | + //if (reload & 2) { | |
786 | + // scratches.recycle(dx); | |
787 | + //} | |
788 | + } | |
789 | + scratches.recycle(mBuilderContext.Rctx); | |
790 | + } | |
791 | +} | |
792 | + | |
793 | +// --------------------------------------------------------------------------- | |
794 | + | |
795 | +void GGLX86Assembler::build_coverage_application(component_t& fragment, | |
796 | + fragment_parts_t& parts, Scratch& regs) | |
797 | +{ | |
798 | + // here fragment.l is guarenteed to be 0 | |
799 | + if (mAA) { | |
800 | + // coverages are 1.15 fixed-point numbers | |
801 | + comment("coverage application"); | |
802 | + | |
803 | + component_t incoming(fragment); | |
804 | + modify(fragment, regs); | |
805 | + | |
806 | + Scratch scratches(registerFile()); | |
807 | + int cf = scratches.obtain(); | |
808 | + parts.covPtr.reg = scratches.obtain(); | |
809 | + MOV_MEM_TO_REG(parts.covPtr.offset_ebp, EBP, parts.covPtr.reg); | |
810 | + MOVZX_MEM_TO_REG(OpndSize_16, parts.covPtr.reg, 2, cf); // refer to LDRH definition | |
811 | + scratches.recycle(parts.covPtr.reg); | |
812 | + if (fragment.h > 31) { | |
813 | + fragment.h--; | |
814 | + | |
815 | + int flag_push_edx = 0; | |
816 | + int flag_reserve_edx = 0; | |
817 | + int temp_reg2 = -1; | |
818 | + int edx_offset_ebp = 0; | |
819 | + if(scratches.isUsed(EDX) == 1) { | |
820 | + if(incoming.reg != EDX && cf != EDX) { | |
821 | + flag_push_edx = 1; | |
822 | + mCurSp = mCurSp - 4; | |
823 | + edx_offset_ebp = mCurSp; | |
824 | + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); | |
825 | + } | |
826 | + } | |
827 | + else { | |
828 | + flag_reserve_edx = 1; | |
829 | + scratches.reserve(EDX); | |
830 | + } | |
831 | + if(scratches.isUsed(EAX)) { | |
832 | + if( cf == EAX || incoming.reg == EAX) { | |
833 | + MOVSX_REG_TO_REG(OpndSize_16, cf, cf); | |
834 | + if(cf == EAX) | |
835 | + IMUL(incoming.reg); | |
836 | + else | |
837 | + IMUL(cf); | |
838 | + SHL(16, EDX); | |
839 | + SHR(16, EAX); | |
840 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
841 | + MOV_REG_TO_REG(EDX, incoming.reg); | |
842 | + } | |
843 | + else { | |
844 | + int eax_offset_ebp = 0; | |
845 | + if(scratches.countFreeRegs() > 0) { | |
846 | + temp_reg2 = scratches.obtain(); | |
847 | + MOV_REG_TO_REG(EAX, temp_reg2); | |
848 | + } | |
849 | + else { | |
850 | + mCurSp = mCurSp - 4; | |
851 | + eax_offset_ebp = mCurSp; | |
852 | + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); | |
853 | + } | |
854 | + MOV_REG_TO_REG(cf, EAX); | |
855 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
856 | + IMUL(incoming.reg); | |
857 | + SHL(16, EDX); | |
858 | + SHR(16, EAX); | |
859 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
860 | + MOV_REG_TO_REG(EDX, incoming.reg); | |
861 | + if(temp_reg2 > -1) { | |
862 | + MOV_REG_TO_REG(temp_reg2, EAX); | |
863 | + scratches.recycle(temp_reg2); | |
864 | + } | |
865 | + else { | |
866 | + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); | |
867 | + } | |
868 | + } | |
869 | + } | |
870 | + else { | |
871 | + MOV_REG_TO_REG(cf, EAX); | |
872 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
873 | + IMUL(incoming.reg); | |
874 | + SHL(16, EDX); | |
875 | + SHR(16, EAX); | |
876 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
877 | + MOV_REG_TO_REG(EDX, incoming.reg); | |
878 | + } | |
879 | + if(flag_push_edx == 1) { | |
880 | + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); | |
881 | + } | |
882 | + if(flag_reserve_edx ==1) | |
883 | + scratches.recycle(EDX); | |
884 | + | |
885 | + MOV_REG_TO_REG(incoming.reg, fragment.reg); | |
886 | + | |
887 | + //IMUL(cf, incoming.reg); | |
888 | + } else { | |
889 | + MOV_REG_TO_REG(incoming.reg, fragment.reg); | |
890 | + SHL(1, fragment.reg); | |
891 | + | |
892 | + int flag_push_edx = 0; | |
893 | + int flag_reserve_edx = 0; | |
894 | + int temp_reg2 = -1; | |
895 | + int edx_offset_ebp = 0; | |
896 | + if(scratches.isUsed(EDX) == 1) { | |
897 | + if(fragment.reg != EDX && cf != EDX) { | |
898 | + flag_push_edx = 1; | |
899 | + mCurSp = mCurSp - 4; | |
900 | + edx_offset_ebp = mCurSp; | |
901 | + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); | |
902 | + } | |
903 | + } | |
904 | + else { | |
905 | + flag_reserve_edx = 1; | |
906 | + scratches.reserve(EDX); | |
907 | + } | |
908 | + if(scratches.isUsed(EAX)) { | |
909 | + if( cf == EAX || fragment.reg == EAX) { | |
910 | + MOVSX_REG_TO_REG(OpndSize_16, cf, cf); | |
911 | + if(cf == EAX) | |
912 | + IMUL(fragment.reg); | |
913 | + else | |
914 | + IMUL(cf); | |
915 | + SHL(16, EDX); | |
916 | + SHR(16, EAX); | |
917 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
918 | + MOV_REG_TO_REG(EDX, fragment.reg); | |
919 | + } | |
920 | + else { | |
921 | + int eax_offset_ebp = 0; | |
922 | + if(scratches.countFreeRegs() > 0) { | |
923 | + temp_reg2 = scratches.obtain(); | |
924 | + MOV_REG_TO_REG(EAX, temp_reg2); | |
925 | + } | |
926 | + else { | |
927 | + mCurSp = mCurSp - 4; | |
928 | + eax_offset_ebp = mCurSp; | |
929 | + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); | |
930 | + } | |
931 | + MOV_REG_TO_REG(cf, EAX); | |
932 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
933 | + IMUL(fragment.reg); | |
934 | + SHL(16, EDX); | |
935 | + SHR(16, EAX); | |
936 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
937 | + MOV_REG_TO_REG(EDX, fragment.reg); | |
938 | + if(temp_reg2 > -1) { | |
939 | + MOV_REG_TO_REG(temp_reg2, EAX); | |
940 | + scratches.recycle(temp_reg2); | |
941 | + } | |
942 | + else { | |
943 | + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); | |
944 | + } | |
945 | + } | |
946 | + } | |
947 | + else { | |
948 | + MOV_REG_TO_REG(cf, EAX); | |
949 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
950 | + IMUL(fragment.reg); | |
951 | + SHL(16, EDX); | |
952 | + SHR(16, EAX); | |
953 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
954 | + MOV_REG_TO_REG(EDX, fragment.reg); | |
955 | + } | |
956 | + if(flag_push_edx == 1) { | |
957 | + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); | |
958 | + } | |
959 | + if(flag_reserve_edx ==1) | |
960 | + scratches.recycle(EDX); | |
961 | + | |
962 | + //IMUL(cf, fragment.reg); | |
963 | + } | |
964 | + scratches.recycle(cf); | |
965 | + } | |
966 | +} | |
967 | + | |
968 | +// --------------------------------------------------------------------------- | |
969 | + | |
970 | +void GGLX86Assembler::build_alpha_test(component_t& fragment, | |
971 | + const fragment_parts_t& parts) | |
972 | +{ | |
973 | + if (mAlphaTest != GGL_ALWAYS) { | |
974 | + comment("Alpha Test"); | |
975 | + Scratch scratches(registerFile()); | |
976 | + int ref = scratches.obtain(); | |
977 | + mBuilderContext.Rctx = scratches.obtain(); | |
978 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
979 | + const int shift = GGL_COLOR_BITS-fragment.size(); | |
980 | + CONTEXT_LOAD(ref, state.alpha_test.ref); | |
981 | + scratches.recycle(mBuilderContext.Rctx); | |
982 | + if (shift) { | |
983 | + SHR(shift, ref); | |
984 | + CMP_REG_TO_REG(ref, fragment.reg); | |
985 | + } else CMP_REG_TO_REG(ref, fragment.reg); | |
986 | + Mnemonic cc = Mnemonic_NULL; | |
987 | + //int cc = NV; | |
988 | + switch (mAlphaTest) { | |
989 | + case GGL_NEVER: | |
990 | + JMP("discard_after_textures"); | |
991 | + return; | |
992 | + break; | |
993 | + case GGL_LESS: | |
994 | + cc = Mnemonic_JNL; | |
995 | + break; | |
996 | + case GGL_EQUAL: | |
997 | + cc = Mnemonic_JNE; | |
998 | + break; | |
999 | + case GGL_LEQUAL: | |
1000 | + cc = Mnemonic_JB; | |
1001 | + break; | |
1002 | + case GGL_GREATER: | |
1003 | + cc = Mnemonic_JLE; | |
1004 | + break; | |
1005 | + case GGL_NOTEQUAL: | |
1006 | + cc = Mnemonic_JE; | |
1007 | + break; | |
1008 | + case GGL_GEQUAL: | |
1009 | + cc = Mnemonic_JNC; | |
1010 | + break; | |
1011 | + } | |
1012 | + JCC(cc, "discard_after_textures"); | |
1013 | + //B(cc^1, "discard_after_textures"); | |
1014 | + } | |
1015 | +} | |
1016 | + | |
1017 | +// --------------------------------------------------------------------------- | |
1018 | + | |
1019 | +void GGLX86Assembler::build_depth_test( | |
1020 | + const fragment_parts_t& parts, uint32_t mask) | |
1021 | +{ | |
1022 | + mask &= Z_TEST|Z_WRITE; | |
1023 | + int store_flag = 0; | |
1024 | + const needs_t& needs = mBuilderContext.needs; | |
1025 | + const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); | |
1026 | + Scratch scratches(registerFile()); | |
1027 | + | |
1028 | + if (mDepthTest != GGL_ALWAYS || zmask) { | |
1029 | + Mnemonic ic = Mnemonic_NULL; | |
1030 | + switch (mDepthTest) { | |
1031 | + case GGL_LESS: | |
1032 | + ic = Mnemonic_JBE; | |
1033 | + break; | |
1034 | + case GGL_EQUAL: | |
1035 | + ic = Mnemonic_JNE; | |
1036 | + break; | |
1037 | + case GGL_LEQUAL: | |
1038 | + ic = Mnemonic_JB; | |
1039 | + break; | |
1040 | + case GGL_GREATER: | |
1041 | + ic = Mnemonic_JGE; | |
1042 | + break; | |
1043 | + case GGL_NOTEQUAL: | |
1044 | + ic = Mnemonic_JE; | |
1045 | + break; | |
1046 | + case GGL_GEQUAL: | |
1047 | + ic = Mnemonic_JA; | |
1048 | + break; | |
1049 | + case GGL_NEVER: | |
1050 | + // this never happens, because it's taken care of when | |
1051 | + // computing the needs. but we keep it for completness. | |
1052 | + comment("Depth Test (NEVER)"); | |
1053 | + JMP("discard_before_textures"); | |
1054 | + return; | |
1055 | + case GGL_ALWAYS: | |
1056 | + // we're here because zmask is enabled | |
1057 | + mask &= ~Z_TEST; // test always passes. | |
1058 | + break; | |
1059 | + } | |
1060 | + | |
1061 | + | |
1062 | + if ((mask & Z_WRITE) && !zmask) { | |
1063 | + mask &= ~Z_WRITE; | |
1064 | + } | |
1065 | + | |
1066 | + if (!mask) | |
1067 | + return; | |
1068 | + | |
1069 | + comment("Depth Test"); | |
1070 | + | |
1071 | + int zbase = scratches.obtain(); | |
1072 | + mBuilderContext.Rctx = scratches.obtain(); | |
1073 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
1074 | + CONTEXT_LOAD(zbase, generated_vars.zbase); // stall | |
1075 | + scratches.recycle(mBuilderContext.Rctx); | |
1076 | + | |
1077 | + int temp_reg1 = scratches.obtain(); | |
1078 | + int depth = scratches.obtain(); | |
1079 | + int z = parts.z.reg; | |
1080 | + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg1); | |
1081 | + SHR(15, temp_reg1); | |
1082 | + SUB_REG_TO_REG(temp_reg1, zbase); | |
1083 | + | |
1084 | + // above does zbase = zbase + ((count >> 16) << 1) | |
1085 | + | |
1086 | + if (mask & Z_TEST) { | |
1087 | + MOVZX_MEM_TO_REG(OpndSize_16, zbase, 0, depth); | |
1088 | + MOV_MEM_TO_REG(parts.z.offset_ebp, PhysicalReg_EBP, temp_reg1); | |
1089 | + SHR(16, temp_reg1); | |
1090 | + CMP_REG_TO_REG(temp_reg1, depth); | |
1091 | + JCC(ic, "discard_before_textures"); | |
1092 | + | |
1093 | + } | |
1094 | + if (mask & Z_WRITE) { | |
1095 | + if (mask == Z_WRITE) { | |
1096 | + // only z-write asked, cc is meaningless | |
1097 | + store_flag = 1; | |
1098 | + } | |
1099 | + // actually it must be stored since the above branch is not taken | |
1100 | + MOV_REG_TO_MEM(temp_reg1, 0, zbase, OpndSize_16); | |
1101 | + } | |
1102 | + scratches.recycle(temp_reg1); | |
1103 | + scratches.recycle(zbase); | |
1104 | + scratches.recycle(depth); | |
1105 | + } | |
1106 | +} | |
1107 | + | |
1108 | +void GGLX86Assembler::build_iterate_z(const fragment_parts_t& parts) | |
1109 | +{ | |
1110 | + const needs_t& needs = mBuilderContext.needs; | |
1111 | + if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { | |
1112 | + Scratch scratches(registerFile()); | |
1113 | + int dzdx = scratches.obtain(); | |
1114 | + mBuilderContext.Rctx = scratches.obtain(); | |
1115 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
1116 | + CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall | |
1117 | + scratches.recycle(mBuilderContext.Rctx); | |
1118 | + ADD_REG_TO_MEM(dzdx, EBP, parts.z.offset_ebp); | |
1119 | + scratches.recycle(dzdx); | |
1120 | + } | |
1121 | +} | |
1122 | + | |
1123 | +void GGLX86Assembler::build_iterate_f(const fragment_parts_t& parts) | |
1124 | +{ | |
1125 | + const needs_t& needs = mBuilderContext.needs; | |
1126 | + if (GGL_READ_NEEDS(P_FOG, needs.p)) { | |
1127 | + Scratch scratches(registerFile()); | |
1128 | + int dfdx = scratches.obtain(); | |
1129 | + int f = scratches.obtain(); | |
1130 | + mBuilderContext.Rctx = scratches.obtain(); | |
1131 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
1132 | + CONTEXT_LOAD(f, generated_vars.f); | |
1133 | + CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall | |
1134 | + ADD_REG_TO_REG(dfdx, f); | |
1135 | + CONTEXT_STORE(f, generated_vars.f); | |
1136 | + scratches.recycle(mBuilderContext.Rctx); | |
1137 | + scratches.recycle(dfdx); | |
1138 | + scratches.recycle(f); | |
1139 | + } | |
1140 | +} | |
1141 | + | |
1142 | +// --------------------------------------------------------------------------- | |
1143 | + | |
1144 | +void GGLX86Assembler::build_logic_op(pixel_t& pixel, Scratch& regs) | |
1145 | +{ | |
1146 | + const needs_t& needs = mBuilderContext.needs; | |
1147 | + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; | |
1148 | + if (opcode == GGL_COPY) | |
1149 | + return; | |
1150 | + | |
1151 | + comment("logic operation"); | |
1152 | + | |
1153 | + pixel_t s(pixel); | |
1154 | + if (!(pixel.flags & CORRUPTIBLE)) { | |
1155 | + pixel.reg = regs.obtain(); | |
1156 | + pixel.flags |= CORRUPTIBLE; | |
1157 | + } | |
1158 | + | |
1159 | + pixel_t d(mDstPixel); | |
1160 | + d.reg = regs.obtain(); | |
1161 | + MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, d.reg); | |
1162 | + switch(opcode) { | |
1163 | + case GGL_CLEAR: | |
1164 | + MOV_IMM_TO_REG(0, pixel.reg); | |
1165 | + break; | |
1166 | + case GGL_AND: | |
1167 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1168 | + AND_REG_TO_REG(s.reg, pixel.reg); | |
1169 | + break; | |
1170 | + case GGL_AND_REVERSE: | |
1171 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1172 | + NOT(pixel.reg); | |
1173 | + AND_REG_TO_REG(s.reg, pixel.reg); | |
1174 | + break; | |
1175 | + case GGL_COPY: | |
1176 | + break; | |
1177 | + case GGL_AND_INVERTED: | |
1178 | + MOV_REG_TO_REG(s.reg, pixel.reg); | |
1179 | + NOT(pixel.reg); | |
1180 | + AND_REG_TO_REG(d.reg, pixel.reg); | |
1181 | + break; | |
1182 | + case GGL_NOOP: | |
1183 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1184 | + break; | |
1185 | + case GGL_XOR: | |
1186 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1187 | + XOR(s.reg, pixel.reg); | |
1188 | + break; | |
1189 | + case GGL_OR: | |
1190 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1191 | + OR_REG_TO_REG(s.reg, pixel.reg); | |
1192 | + break; | |
1193 | + case GGL_NOR: | |
1194 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1195 | + OR_REG_TO_REG(s.reg, pixel.reg); | |
1196 | + NOT(pixel.reg); | |
1197 | + break; | |
1198 | + case GGL_EQUIV: | |
1199 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1200 | + XOR(s.reg, pixel.reg); | |
1201 | + NOT(pixel.reg); | |
1202 | + break; | |
1203 | + case GGL_INVERT: | |
1204 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1205 | + NOT(pixel.reg); | |
1206 | + break; | |
1207 | + case GGL_OR_REVERSE: // s | ~d == ~(~s & d) | |
1208 | + MOV_REG_TO_REG(s.reg, pixel.reg); | |
1209 | + NOT(pixel.reg); | |
1210 | + AND_REG_TO_REG(d.reg, pixel.reg); | |
1211 | + NOT(pixel.reg); | |
1212 | + break; | |
1213 | + case GGL_COPY_INVERTED: | |
1214 | + MOV_REG_TO_REG(s.reg, pixel.reg); | |
1215 | + NOT(pixel.reg); | |
1216 | + break; | |
1217 | + case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) | |
1218 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1219 | + NOT(pixel.reg); | |
1220 | + AND_REG_TO_REG(s.reg, pixel.reg); | |
1221 | + NOT(pixel.reg); | |
1222 | + break; | |
1223 | + case GGL_NAND: | |
1224 | + MOV_REG_TO_REG(d.reg, pixel.reg); | |
1225 | + AND_REG_TO_REG(s.reg, pixel.reg); | |
1226 | + NOT(pixel.reg); | |
1227 | + break; | |
1228 | + case GGL_SET: | |
1229 | + MOV_IMM_TO_REG(0, pixel.reg); | |
1230 | + NOT(pixel.reg); | |
1231 | + break; | |
1232 | + }; | |
1233 | + regs.recycle(d.reg); | |
1234 | +} | |
1235 | + | |
1236 | +// --------------------------------------------------------------------------- | |
1237 | + | |
1238 | + | |
1239 | +void GGLX86Assembler::build_and_immediate(int d, int s, uint32_t mask, int bits) | |
1240 | +{ | |
1241 | + uint32_t rot; | |
1242 | + uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; | |
1243 | + mask &= size; | |
1244 | + | |
1245 | + if (mask == size) { | |
1246 | + if (d != s) | |
1247 | + MOV_REG_TO_REG(s, d); | |
1248 | + return; | |
1249 | + } | |
1250 | + | |
1251 | + MOV_REG_TO_REG(s, d); | |
1252 | + AND_IMM_TO_REG(mask, d); | |
1253 | +} | |
1254 | + | |
1255 | +void GGLX86Assembler::build_masking(pixel_t& pixel, Scratch& regs) | |
1256 | +{ | |
1257 | + if (!mMasking || mAllMasked) { | |
1258 | + return; | |
1259 | + } | |
1260 | + | |
1261 | + comment("color mask"); | |
1262 | + | |
1263 | + pixel_t fb(mDstPixel); | |
1264 | + fb.reg = regs.obtain(); | |
1265 | + MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, fb.reg); | |
1266 | + pixel_t s(pixel); | |
1267 | + if (!(pixel.flags & CORRUPTIBLE)) { | |
1268 | + pixel.reg = regs.obtain(); | |
1269 | + pixel.flags |= CORRUPTIBLE; | |
1270 | + } | |
1271 | + | |
1272 | + int mask = 0; | |
1273 | + for (int i=0 ; i<4 ; i++) { | |
1274 | + const int component_mask = 1<<i; | |
1275 | + const int h = fb.format.c[i].h; | |
1276 | + const int l = fb.format.c[i].l; | |
1277 | + if (h && (!(mMasking & component_mask))) { | |
1278 | + mask |= ((1<<(h-l))-1) << l; | |
1279 | + } | |
1280 | + } | |
1281 | + | |
1282 | + // There is no need to clear the masked components of the source | |
1283 | + // (unless we applied a logic op), because they're already zeroed | |
1284 | + // by construction (masked components are not computed) | |
1285 | + | |
1286 | + if (mLogicOp) { | |
1287 | + const needs_t& needs = mBuilderContext.needs; | |
1288 | + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; | |
1289 | + if (opcode != GGL_CLEAR) { | |
1290 | + // clear masked component of source | |
1291 | + build_and_immediate(pixel.reg, s.reg, mask, fb.size()); | |
1292 | + s = pixel; | |
1293 | + } | |
1294 | + } | |
1295 | + | |
1296 | + // clear non masked components of destination | |
1297 | + build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); | |
1298 | + | |
1299 | + // or back the channels that were masked | |
1300 | + if (s.reg == fb.reg) { | |
1301 | + // this is in fact a MOV | |
1302 | + if (s.reg == pixel.reg) { | |
1303 | + // ugh. this in in fact a nop | |
1304 | + } else { | |
1305 | + MOV_REG_TO_REG(fb.reg, pixel.reg); | |
1306 | + } | |
1307 | + } else { | |
1308 | + MOV_REG_TO_REG(fb.reg, pixel.reg); | |
1309 | + OR_REG_TO_REG(s.reg, pixel.reg); | |
1310 | + } | |
1311 | + MOV_REG_TO_MEM(fb.reg, mDstPixel.offset_ebp, EBP); | |
1312 | +} | |
1313 | + | |
1314 | +// --------------------------------------------------------------------------- | |
1315 | + | |
1316 | +void GGLX86Assembler::base_offset(pointer_t& d, pointer_t& b, const reg_t& o) | |
1317 | +{ | |
1318 | +// d and b are the same reference | |
1319 | + Scratch scratches(registerFile()); | |
1320 | + int temp_reg = scratches.obtain(); | |
1321 | + switch (b.size) { | |
1322 | + case 32: | |
1323 | + MOV_REG_TO_REG(b.reg, temp_reg); | |
1324 | + MOV_REG_TO_REG(o.reg, d.reg); | |
1325 | + SHL(2,d.reg); | |
1326 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
1327 | + break; | |
1328 | + case 24: | |
1329 | + if (d.reg == b.reg) { | |
1330 | + MOV_REG_TO_REG(b.reg, temp_reg); | |
1331 | + MOV_REG_TO_REG(o.reg, d.reg); | |
1332 | + SHL(1,d.reg); | |
1333 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
1334 | + ADD_REG_TO_REG(o.reg, d.reg); | |
1335 | + } else { | |
1336 | + MOV_REG_TO_REG(o.reg, temp_reg); | |
1337 | + SHL(1,temp_reg); | |
1338 | + MOV_REG_TO_REG(temp_reg, d.reg); | |
1339 | + ADD_REG_TO_REG(o.reg, d.reg); | |
1340 | + ADD_REG_TO_REG(b.reg, d.reg); | |
1341 | + } | |
1342 | + break; | |
1343 | + case 16: | |
1344 | + MOV_REG_TO_REG(b.reg, temp_reg); | |
1345 | + MOV_REG_TO_REG(o.reg, d.reg); | |
1346 | + SHL(1,d.reg); | |
1347 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
1348 | + break; | |
1349 | + case 8: | |
1350 | + MOV_REG_TO_REG(b.reg, temp_reg); | |
1351 | + MOV_REG_TO_REG(o.reg, d.reg); | |
1352 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
1353 | + break; | |
1354 | + } | |
1355 | + scratches.recycle(temp_reg); | |
1356 | +} | |
1357 | + | |
1358 | +// ---------------------------------------------------------------------------- | |
1359 | +// cheezy register allocator... | |
1360 | +// ---------------------------------------------------------------------------- | |
1361 | + | |
1362 | +void X86RegisterAllocator::reset() | |
1363 | +{ | |
1364 | + mRegs.reset(); | |
1365 | +} | |
1366 | + | |
1367 | +int X86RegisterAllocator::reserveReg(int reg) | |
1368 | +{ | |
1369 | + return mRegs.reserve(reg); | |
1370 | +} | |
1371 | + | |
1372 | +int X86RegisterAllocator::obtainReg() | |
1373 | +{ | |
1374 | + return mRegs.obtain(); | |
1375 | +} | |
1376 | + | |
1377 | +void X86RegisterAllocator::recycleReg(int reg) | |
1378 | +{ | |
1379 | + mRegs.recycle(reg); | |
1380 | +} | |
1381 | + | |
1382 | +X86RegisterAllocator::RegisterFile& X86RegisterAllocator::registerFile() | |
1383 | +{ | |
1384 | + return mRegs; | |
1385 | +} | |
1386 | + | |
1387 | +// ---------------------------------------------------------------------------- | |
1388 | + | |
1389 | +X86RegisterAllocator::RegisterFile::RegisterFile() | |
1390 | + : mRegs(0), mTouched(0), mStatus(0) | |
1391 | +{ | |
1392 | + //reserve(PhysicalReg_EBP); | |
1393 | + //reserve(PhysicalReg_ESP); | |
1394 | +} | |
1395 | + | |
1396 | +X86RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) | |
1397 | + : mRegs(rhs.mRegs), mTouched(rhs.mTouched) | |
1398 | +{ | |
1399 | +} | |
1400 | + | |
1401 | +X86RegisterAllocator::RegisterFile::~RegisterFile() | |
1402 | +{ | |
1403 | +} | |
1404 | + | |
1405 | +bool X86RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const | |
1406 | +{ | |
1407 | + return (mRegs == rhs.mRegs); | |
1408 | +} | |
1409 | + | |
1410 | +void X86RegisterAllocator::RegisterFile::reset() | |
1411 | +{ | |
1412 | + mRegs = mTouched = mStatus = 0; | |
1413 | +} | |
1414 | + | |
1415 | +int X86RegisterAllocator::RegisterFile::reserve(int reg) | |
1416 | +{ | |
1417 | + LOG_ALWAYS_FATAL_IF(isUsed(reg), | |
1418 | + "reserving register %d, but already in use", | |
1419 | + reg); | |
1420 | + if(isUsed(reg)) return -1; | |
1421 | + mRegs |= (1<<reg); | |
1422 | + mTouched |= mRegs; | |
1423 | + return reg; | |
1424 | +} | |
1425 | + | |
1426 | +void X86RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) | |
1427 | +{ | |
1428 | + mRegs |= regMask; | |
1429 | + mTouched |= regMask; | |
1430 | +} | |
1431 | + | |
1432 | +int X86RegisterAllocator::RegisterFile::isUsed(int reg) const | |
1433 | +{ | |
1434 | + LOG_ALWAYS_FATAL_IF(reg>=6, "invalid register %d", reg); | |
1435 | + return mRegs & (1<<reg); | |
1436 | +} | |
1437 | + | |
1438 | +int X86RegisterAllocator::RegisterFile::obtain() | |
1439 | +{ | |
1440 | +//multiplication result is in edx:eax | |
1441 | +//ebx, ecx, edi, esi, eax, edx | |
1442 | + const char priorityList[6] = { PhysicalReg_EBX, PhysicalReg_ECX,PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_EAX, PhysicalReg_EDX }; | |
1443 | + | |
1444 | + const int nbreg = sizeof(priorityList); | |
1445 | + int i, r; | |
1446 | + for (i=0 ; i<nbreg ; i++) { | |
1447 | + r = priorityList[i]; | |
1448 | + if (!isUsed(r)) { | |
1449 | + break; | |
1450 | + } | |
1451 | + } | |
1452 | + // this is not an error anymore because, we'll try again with | |
1453 | + // a lower optimization level. | |
1454 | + ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); | |
1455 | + if (i >= nbreg) { | |
1456 | + mStatus |= OUT_OF_REGISTERS; | |
1457 | + // we return SP so we can more easily debug things | |
1458 | + // the code will never be run anyway. | |
1459 | + printf("pixelflinger ran out of registers\n"); | |
1460 | + return PhysicalReg_ESP; | |
1461 | + //return -1; | |
1462 | + } | |
1463 | + reserve(r); | |
1464 | + return r; | |
1465 | +} | |
1466 | + | |
1467 | +bool X86RegisterAllocator::RegisterFile::hasFreeRegs() const | |
1468 | +{ | |
1469 | + return ((mRegs & 0x3F) == 0x3F) ? false : true; | |
1470 | +} | |
1471 | + | |
1472 | +int X86RegisterAllocator::RegisterFile::countFreeRegs() const | |
1473 | +{ | |
1474 | + int f = ~mRegs & 0x3F; | |
1475 | + // now count number of 1 | |
1476 | + f = (f & 0x5555) + ((f>>1) & 0x5555); | |
1477 | + f = (f & 0x3333) + ((f>>2) & 0x3333); | |
1478 | + f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); | |
1479 | + f = (f & 0x00FF) + ((f>>8) & 0x00FF); | |
1480 | + return f; | |
1481 | +} | |
1482 | + | |
1483 | +void X86RegisterAllocator::RegisterFile::recycle(int reg) | |
1484 | +{ | |
1485 | + LOG_FATAL_IF(!isUsed(reg), | |
1486 | + "recycling unallocated register %d", | |
1487 | + reg); | |
1488 | + mRegs &= ~(1<<reg); | |
1489 | +} | |
1490 | + | |
1491 | +void X86RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) | |
1492 | +{ | |
1493 | + LOG_FATAL_IF((mRegs & regMask)!=regMask, | |
1494 | + "recycling unallocated registers " | |
1495 | + "(recycle=%08x, allocated=%08x, unallocated=%08x)", | |
1496 | + regMask, mRegs, mRegs®Mask); | |
1497 | + mRegs &= ~regMask; | |
1498 | +} | |
1499 | + | |
1500 | +uint32_t X86RegisterAllocator::RegisterFile::touched() const | |
1501 | +{ | |
1502 | + return mTouched; | |
1503 | +} | |
1504 | + | |
1505 | +// ---------------------------------------------------------------------------- | |
1506 | + | |
1507 | +}; // namespace android |
@@ -0,0 +1,563 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.h | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | + | |
19 | +#ifndef ANDROID_GGLX86ASSEMBLER_H | |
20 | +#define ANDROID_GGLX86ASSEMBLER_H | |
21 | + | |
22 | +#include <stdint.h> | |
23 | +#include <sys/types.h> | |
24 | + | |
25 | +#include <private/pixelflinger/ggl_context.h> | |
26 | + | |
27 | +#include "codeflinger/x86/X86Assembler.h" | |
28 | + | |
29 | + | |
30 | +namespace android { | |
31 | + | |
32 | +// ---------------------------------------------------------------------------- | |
33 | + | |
34 | +#define CONTEXT_LOAD(REG, FIELD) \ | |
35 | + MOV_MEM_TO_REG(GGL_OFFSETOF(FIELD), mBuilderContext.Rctx, REG) | |
36 | + | |
37 | +#define CONTEXT_STORE(REG, FIELD) \ | |
38 | + MOV_REG_TO_MEM(REG, GGL_OFFSETOF(FIELD), mBuilderContext.Rctx) | |
39 | + | |
40 | +class X86RegisterAllocator | |
41 | +{ | |
42 | +public: | |
43 | + class RegisterFile; | |
44 | + | |
45 | + RegisterFile& registerFile(); | |
46 | + int reserveReg(int reg); | |
47 | + int obtainReg(); | |
48 | + void recycleReg(int reg); | |
49 | + void reset(); | |
50 | + | |
51 | + class RegisterFile | |
52 | + { | |
53 | + public: | |
54 | + RegisterFile(); | |
55 | + RegisterFile(const RegisterFile& rhs); | |
56 | + ~RegisterFile(); | |
57 | + | |
58 | + void reset(); | |
59 | + | |
60 | + bool operator == (const RegisterFile& rhs) const; | |
61 | + bool operator != (const RegisterFile& rhs) const { | |
62 | + return !operator == (rhs); | |
63 | + } | |
64 | + | |
65 | + int reserve(int reg); | |
66 | + void reserveSeveral(uint32_t regMask); | |
67 | + | |
68 | + void recycle(int reg); | |
69 | + void recycleSeveral(uint32_t regMask); | |
70 | + | |
71 | + int obtain(); | |
72 | + inline int isUsed(int reg) const; | |
73 | + | |
74 | + bool hasFreeRegs() const; | |
75 | + int countFreeRegs() const; | |
76 | + | |
77 | + uint32_t touched() const; | |
78 | + inline uint32_t status() const { return mStatus; } | |
79 | + | |
80 | + enum { | |
81 | + OUT_OF_REGISTERS = 0x1 | |
82 | + }; | |
83 | + | |
84 | + private: | |
85 | + uint32_t mRegs; | |
86 | + uint32_t mTouched; | |
87 | + uint32_t mStatus; | |
88 | + }; | |
89 | + | |
90 | + class Scratch | |
91 | + { | |
92 | + public: | |
93 | + Scratch(RegisterFile& regFile) | |
94 | + : mRegFile(regFile), mScratch(0) { | |
95 | + } | |
96 | + ~Scratch() { | |
97 | + mRegFile.recycleSeveral(mScratch); | |
98 | + } | |
99 | + int obtain() { | |
100 | + int reg = mRegFile.obtain(); | |
101 | + mScratch |= 1<<reg; | |
102 | + return reg; | |
103 | + } | |
104 | + void reserve(int reg) { | |
105 | + mRegFile.reserve(reg); | |
106 | + mScratch |= 1<<reg; | |
107 | + } | |
108 | + void recycle(int reg) { | |
109 | + mRegFile.recycle(reg); | |
110 | + mScratch &= ~(1<<reg); | |
111 | + } | |
112 | + bool isUsed(int reg) { | |
113 | + return (mScratch & (1<<reg)); | |
114 | + } | |
115 | + int countFreeRegs() { | |
116 | + return mRegFile.countFreeRegs(); | |
117 | + } | |
118 | + private: | |
119 | + RegisterFile& mRegFile; | |
120 | + uint32_t mScratch; | |
121 | + }; | |
122 | + | |
123 | +/* | |
124 | +// currently we don't use it | |
125 | + | |
126 | + class Spill | |
127 | + { | |
128 | + public: | |
129 | + Spill(RegisterFile& regFile, X86Assembler& gen, uint32_t reglist) | |
130 | + : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0) | |
131 | + { | |
132 | + if (reglist) { | |
133 | + int count = 0; | |
134 | + while (reglist) { | |
135 | + count++; | |
136 | + reglist &= ~(1 << (31 - __builtin_clz(reglist))); | |
137 | + } | |
138 | + if (count == 1) { | |
139 | + int reg = 31 - __builtin_clz(mRegList); | |
140 | + // move to the stack | |
141 | + } else { | |
142 | + // move to the stack | |
143 | + } | |
144 | + mRegFile.recycleSeveral(mRegList); | |
145 | + mCount = count; | |
146 | + } | |
147 | + } | |
148 | + ~Spill() { | |
149 | + if (mRegList) { | |
150 | + if (mCount == 1) { | |
151 | + int reg = 31 - __builtin_clz(mRegList); | |
152 | + // move to the stack | |
153 | + } else { | |
154 | + } | |
155 | + mRegFile.reserveSeveral(mRegList); | |
156 | + } | |
157 | + } | |
158 | + private: | |
159 | + RegisterFile& mRegFile; | |
160 | + X86Assembler& mGen; | |
161 | + uint32_t mRegList; | |
162 | + int mCount; | |
163 | + }; | |
164 | +*/ | |
165 | + | |
166 | +private: | |
167 | + RegisterFile mRegs; | |
168 | +}; | |
169 | + | |
170 | +// ---------------------------------------------------------------------------- | |
171 | + | |
172 | +class GGLX86Assembler : public X86Assembler, public X86RegisterAllocator | |
173 | +{ | |
174 | +public: | |
175 | + | |
176 | + GGLX86Assembler(const sp<Assembly>& assembly); | |
177 | + ~GGLX86Assembler(); | |
178 | + | |
179 | + char* base() const { return 0; } // XXX | |
180 | + char* pc() const { return 0; } // XXX | |
181 | + | |
182 | + void reset(int opt_level); | |
183 | + | |
184 | + | |
185 | + // generate scanline code for given needs | |
186 | + int scanline(const needs_t& needs, context_t const* c); | |
187 | + int scanline_core(const needs_t& needs, context_t const* c); | |
188 | + | |
189 | + enum { | |
190 | + CLEAR_LO = 0x0001, | |
191 | + CLEAR_HI = 0x0002, | |
192 | + CORRUPTIBLE = 0x0004, | |
193 | + FIRST = 0x0008 | |
194 | + }; | |
195 | + | |
196 | + enum { //load/store flags | |
197 | + WRITE_BACK = 0x0001 | |
198 | + }; | |
199 | + | |
200 | + struct reg_t { | |
201 | + reg_t() : reg(-1), flags(0), offset_ebp(0) { | |
202 | + } | |
203 | + reg_t(int r, int f=0, int offset=0) | |
204 | + : reg(r), flags(f), offset_ebp(offset) { | |
205 | + } | |
206 | + void setTo(int r, int f=0, int offset=0) { | |
207 | + reg=r; flags=f; offset_ebp=offset; | |
208 | + } | |
209 | + int reg; | |
210 | + uint16_t flags; | |
211 | + int offset_ebp; | |
212 | + }; | |
213 | + | |
214 | + struct integer_t : public reg_t { | |
215 | + integer_t() : reg_t(), s(0) { | |
216 | + } | |
217 | + integer_t(int r, int sz=32, int f=0, int offset=0) | |
218 | + : reg_t(r, f, offset), s(sz) { | |
219 | + } | |
220 | + void setTo(int r, int sz=32, int f=0, int offset=0) { | |
221 | + reg_t::setTo(r, f, offset); s=sz; | |
222 | + } | |
223 | + int8_t s; | |
224 | + inline int size() const { return s; } | |
225 | + }; | |
226 | + | |
227 | + struct pixel_t : public reg_t { | |
228 | + pixel_t() : reg_t() { | |
229 | + memset(&format, 0, sizeof(GGLFormat)); | |
230 | + } | |
231 | + pixel_t(int r, const GGLFormat* fmt, int f=0, int offset=0) | |
232 | + : reg_t(r, f, offset), format(*fmt) { | |
233 | + } | |
234 | + void setTo(int r, const GGLFormat* fmt, int f=0, int offset=0) { | |
235 | + reg_t::setTo(r, f, offset); format = *fmt; | |
236 | + } | |
237 | + GGLFormat format; | |
238 | + inline int hi(int c) const { return format.c[c].h; } | |
239 | + inline int low(int c) const { return format.c[c].l; } | |
240 | + inline int mask(int c) const { return ((1<<size(c))-1) << low(c); } | |
241 | + inline int size() const { return format.size*8; } | |
242 | + inline int size(int c) const { return component_size(c); } | |
243 | + inline int component_size(int c) const { return hi(c) - low(c); } | |
244 | + }; | |
245 | + | |
246 | + struct component_t : public reg_t { | |
247 | + component_t() : reg_t(), h(0), l(0) { | |
248 | + } | |
249 | + component_t(int r, int f=0, int offset=0) | |
250 | + : reg_t(r, f, offset), h(0), l(0) { | |
251 | + } | |
252 | + component_t(int r, int lo, int hi, int f=0, int offset=0) | |
253 | + : reg_t(r, f, offset), h(hi), l(lo) { | |
254 | + } | |
255 | + explicit component_t(const integer_t& rhs) | |
256 | + : reg_t(rhs.reg, rhs.flags, rhs.offset_ebp), h(rhs.s), l(0) { | |
257 | + } | |
258 | + explicit component_t(const pixel_t& rhs, int component) { | |
259 | + setTo( rhs.reg, | |
260 | + rhs.format.c[component].l, | |
261 | + rhs.format.c[component].h, | |
262 | + rhs.flags|CLEAR_LO|CLEAR_HI, rhs.offset_ebp); | |
263 | + } | |
264 | + void setTo(int r, int lo=0, int hi=0, int f=0, int offset=0) { | |
265 | + reg_t::setTo(r, f, offset); h=hi; l=lo; | |
266 | + } | |
267 | + int8_t h; | |
268 | + int8_t l; | |
269 | + inline int size() const { return h-l; } | |
270 | + }; | |
271 | + | |
272 | + struct pointer_t : public reg_t { | |
273 | + pointer_t() : reg_t(), size(0) { | |
274 | + } | |
275 | + pointer_t(int r, int s, int f=0, int offset=0) | |
276 | + : reg_t(r, f, offset), size(s) { | |
277 | + } | |
278 | + void setTo(int r, int s, int f=0, int offset=0) { | |
279 | + reg_t::setTo(r, f, offset); size=s; | |
280 | + } | |
281 | + int8_t size; | |
282 | + }; | |
283 | + | |
284 | + | |
285 | +private: | |
286 | + struct tex_coord_t { | |
287 | + reg_t s; | |
288 | + reg_t t; | |
289 | + pointer_t ptr; | |
290 | + }; | |
291 | + | |
292 | + struct fragment_parts_t { | |
293 | + uint32_t packed : 1; | |
294 | + uint32_t reload : 2; | |
295 | + uint32_t iterated_packed : 1; | |
296 | + pixel_t iterated; | |
297 | + pointer_t cbPtr; | |
298 | + pointer_t covPtr; | |
299 | + reg_t count; | |
300 | + reg_t argb[4]; | |
301 | + reg_t argb_dx[4]; | |
302 | + reg_t z; | |
303 | + reg_t dither; | |
304 | + pixel_t texel[GGL_TEXTURE_UNIT_COUNT]; | |
305 | + tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT]; | |
306 | + }; | |
307 | + | |
308 | + struct texture_unit_t { | |
309 | + int format_idx; | |
310 | + GGLFormat format; | |
311 | + int bits; | |
312 | + int swrap; | |
313 | + int twrap; | |
314 | + int env; | |
315 | + int pot; | |
316 | + int linear; | |
317 | + uint8_t mask; | |
318 | + uint8_t replaced; | |
319 | + }; | |
320 | + | |
321 | + struct texture_machine_t { | |
322 | + texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT]; | |
323 | + uint8_t mask; | |
324 | + uint8_t replaced; | |
325 | + uint8_t directTexture; | |
326 | + uint8_t activeUnits; | |
327 | + }; | |
328 | + | |
329 | + struct component_info_t { | |
330 | + bool masked : 1; | |
331 | + bool inDest : 1; | |
332 | + bool needed : 1; | |
333 | + bool replaced : 1; | |
334 | + bool iterated : 1; | |
335 | + bool smooth : 1; | |
336 | + bool blend : 1; | |
337 | + bool fog : 1; | |
338 | + }; | |
339 | + | |
340 | + struct builder_context_t { | |
341 | + context_t const* c; | |
342 | + needs_t needs; | |
343 | + int Rctx; | |
344 | + }; | |
345 | + | |
346 | + template <typename T> | |
347 | + void modify(T& r, Scratch& regs) | |
348 | + { | |
349 | + if (!(r.flags & CORRUPTIBLE)) { | |
350 | + r.reg = regs.obtain(); | |
351 | + r.flags |= CORRUPTIBLE; | |
352 | + } | |
353 | + } | |
354 | + | |
355 | + // helpers | |
356 | + void base_offset(pointer_t& d, pointer_t& b, const reg_t& o); | |
357 | + | |
358 | + // texture environement | |
359 | + void modulate( component_t& dest, | |
360 | + const component_t& incoming, | |
361 | + const pixel_t& texel, int component); | |
362 | + | |
363 | + void decal( component_t& dest, | |
364 | + const component_t& incoming, | |
365 | + const pixel_t& texel, int component); | |
366 | + | |
367 | + void blend( component_t& dest, | |
368 | + const component_t& incoming, | |
369 | + const pixel_t& texel, int component, int tmu); | |
370 | + | |
371 | + void add( component_t& dest, | |
372 | + const component_t& incoming, | |
373 | + const pixel_t& texel, int component); | |
374 | + | |
375 | + // load/store stuff | |
376 | + void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0); | |
377 | + void load(pointer_t& addr, const pixel_t& dest, uint32_t flags=0); | |
378 | + | |
379 | + void extract(integer_t& d, const pixel_t& s, int component); | |
380 | + void extract(component_t& d, const pixel_t& s, int component); | |
381 | + void extract(integer_t& d, int s, int h, int l, int bits=32); | |
382 | + void expand(integer_t& d, const integer_t& s, int dbits); | |
383 | + void expand(integer_t& d, const component_t& s, int dbits); | |
384 | + void expand(component_t& d, const component_t& s, int dbits); | |
385 | + void downshift(pixel_t& d, int component, component_t s, reg_t& dither); | |
386 | + | |
387 | + | |
388 | + void mul_factor( component_t& d, | |
389 | + const integer_t& v, | |
390 | + const integer_t& f, Scratch& scratches); | |
391 | + | |
392 | + void mul_factor_add( component_t& d, | |
393 | + const integer_t& v, | |
394 | + const integer_t& f, | |
395 | + const component_t& a); | |
396 | + | |
397 | + void component_add( component_t& d, | |
398 | + const integer_t& dst, | |
399 | + const integer_t& src); | |
400 | + | |
401 | + void component_sat( const component_t& v, const int temp_reg); | |
402 | + | |
403 | + | |
404 | + void build_scanline_preparation(fragment_parts_t& parts, | |
405 | + const needs_t& needs); | |
406 | + | |
407 | + void build_smooth_shade(fragment_parts_t& parts); | |
408 | + | |
409 | + void build_component( pixel_t& pixel, | |
410 | + fragment_parts_t& parts, | |
411 | + int component, | |
412 | + Scratch& global_scratches); | |
413 | + | |
414 | + void build_incoming_component( | |
415 | + component_t& temp, | |
416 | + int dst_size, | |
417 | + fragment_parts_t& parts, | |
418 | + int component, | |
419 | + Scratch& scratches, | |
420 | + Scratch& global_scratches); | |
421 | + | |
422 | + void init_iterated_color(fragment_parts_t& parts, const reg_t& x); | |
423 | + | |
424 | + void build_iterated_color( component_t& fragment, | |
425 | + fragment_parts_t& parts, | |
426 | + int component, | |
427 | + Scratch& regs); | |
428 | + | |
429 | + void decodeLogicOpNeeds(const needs_t& needs); | |
430 | + | |
431 | + void decodeTMUNeeds(const needs_t& needs, context_t const* c); | |
432 | + | |
433 | + void init_textures( tex_coord_t* coords, | |
434 | + const reg_t& x, | |
435 | + const reg_t& y); | |
436 | + | |
437 | + void build_textures( fragment_parts_t& parts, | |
438 | + Scratch& regs); | |
439 | + | |
440 | + void filter8( const fragment_parts_t& parts, | |
441 | + pixel_t& texel, const texture_unit_t& tmu, | |
442 | + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, | |
443 | + int FRAC_BITS, Scratch& scratches); | |
444 | + | |
445 | + void filter16( const fragment_parts_t& parts, | |
446 | + pixel_t& texel, const texture_unit_t& tmu, | |
447 | + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, | |
448 | + int FRAC_BITS, Scratch& scratches); | |
449 | + | |
450 | + void filter24( const fragment_parts_t& parts, | |
451 | + pixel_t& texel, const texture_unit_t& tmu, | |
452 | + int U, int V, pointer_t& txPtr, | |
453 | + int FRAC_BITS); | |
454 | + | |
455 | + void filter32( const fragment_parts_t& parts, | |
456 | + pixel_t& texel, const texture_unit_t& tmu, | |
457 | + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, | |
458 | + int FRAC_BITS, Scratch& scratches); | |
459 | + | |
460 | + void build_texture_environment( component_t& fragment, | |
461 | + fragment_parts_t& parts, | |
462 | + int component, | |
463 | + Scratch& regs); | |
464 | + | |
465 | + void wrapping( int d, | |
466 | + int coord, int size, | |
467 | + int tx_wrap, int tx_linear, Scratch& scratches); | |
468 | + | |
469 | + void build_fog( component_t& temp, | |
470 | + int component, | |
471 | + Scratch& parent_scratches); | |
472 | + | |
473 | + void build_blending( component_t& in_out, | |
474 | + pixel_t& pixel, | |
475 | + int component, | |
476 | + Scratch& parent_scratches); | |
477 | + | |
478 | + void build_blend_factor( | |
479 | + integer_t& factor, int f, int component, | |
480 | + const pixel_t& dst_pixel, | |
481 | + integer_t& fragment, | |
482 | + integer_t& fb, | |
483 | + Scratch& scratches); | |
484 | + | |
485 | + void build_blendFOneMinusF( component_t& temp, | |
486 | + const integer_t& factor, | |
487 | + const integer_t& fragment, | |
488 | + const integer_t& fb); | |
489 | + | |
490 | + void build_blendOneMinusFF( component_t& temp, | |
491 | + const integer_t& factor, | |
492 | + const integer_t& fragment, | |
493 | + const integer_t& fb); | |
494 | + | |
495 | + void build_coverage_application(component_t& fragment, | |
496 | + fragment_parts_t& parts, | |
497 | + Scratch& regs); | |
498 | + | |
499 | + void build_alpha_test(component_t& fragment, const fragment_parts_t& parts); | |
500 | + | |
501 | + enum { Z_TEST=1, Z_WRITE=2 }; | |
502 | + void build_depth_test(const fragment_parts_t& parts, uint32_t mask); | |
503 | + void build_iterate_z(const fragment_parts_t& parts); | |
504 | + void build_iterate_f(const fragment_parts_t& parts); | |
505 | + void build_iterate_texture_coordinates(const fragment_parts_t& parts); | |
506 | + | |
507 | + void build_logic_op(pixel_t& pixel, Scratch& regs); | |
508 | + | |
509 | + void build_masking(pixel_t& pixel, Scratch& regs); | |
510 | + | |
511 | + void build_and_immediate(int d, int s, uint32_t mask, int bits); | |
512 | + | |
513 | + bool isAlphaSourceNeeded() const; | |
514 | + | |
515 | + enum { | |
516 | + FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8 | |
517 | + }; | |
518 | + | |
519 | + enum { | |
520 | + LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4 | |
521 | + }; | |
522 | + | |
523 | + static int blending_codes(int fs, int fd); | |
524 | + | |
525 | + builder_context_t mBuilderContext; | |
526 | + texture_machine_t mTextureMachine; | |
527 | + component_info_t mInfo[4]; | |
528 | + int mBlending; | |
529 | + int mMasking; | |
530 | + int mAllMasked; | |
531 | + int mLogicOp; | |
532 | + int mAlphaTest; | |
533 | + int mAA; | |
534 | + int mDithering; | |
535 | + int mDepthTest; | |
536 | + | |
537 | + int mSmooth; | |
538 | + int mFog; | |
539 | + pixel_t mDstPixel; | |
540 | + | |
541 | + GGLFormat mCbFormat; | |
542 | + | |
543 | + int mBlendFactorCached; | |
544 | + integer_t mAlphaSource; | |
545 | + | |
546 | + int mBaseRegister; | |
547 | + | |
548 | + int mBlendSrc; | |
549 | + int mBlendDst; | |
550 | + int mBlendSrcA; | |
551 | + int mBlendDstA; | |
552 | + | |
553 | + int mOptLevel; | |
554 | + | |
555 | + // to stretch esp and shrink esp | |
556 | + int mCurSp; | |
557 | +}; | |
558 | + | |
559 | +// ---------------------------------------------------------------------------- | |
560 | + | |
561 | +}; // namespace android | |
562 | + | |
563 | +#endif // ANDROID_GGLX86ASSEMBLER_H |
@@ -0,0 +1,618 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/X86Assembler.cpp | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#define LOG_TAG "X86Assembler" | |
19 | + | |
20 | +#include <stdio.h> | |
21 | +#include <stdlib.h> | |
22 | +#include <cutils/log.h> | |
23 | +#include <cutils/properties.h> | |
24 | +#include <string.h> | |
25 | + | |
26 | +#if defined(WITH_LIB_HARDWARE) | |
27 | +#include <hardware_legacy/qemu_tracing.h> | |
28 | +#endif | |
29 | + | |
30 | +#include <private/pixelflinger/ggl_context.h> | |
31 | + | |
32 | +#include "codeflinger/CodeCache.h" | |
33 | +#include "codeflinger/x86/X86Assembler.h" | |
34 | + | |
35 | +// ---------------------------------------------------------------------------- | |
36 | + | |
37 | +namespace android { | |
38 | + | |
39 | +// ---------------------------------------------------------------------------- | |
40 | + | |
41 | +X86Assembler::X86Assembler(const sp<Assembly>& assembly) | |
42 | + : mAssembly(assembly) | |
43 | +{ | |
44 | + mBase = mStream = (char *)assembly->base(); | |
45 | + mDuration = ggl_system_time(); | |
46 | +#if defined(WITH_LIB_HARDWARE) | |
47 | + mQemuTracing = true; | |
48 | +#endif | |
49 | +} | |
50 | + | |
51 | +X86Assembler::~X86Assembler() | |
52 | +{ | |
53 | +} | |
54 | + | |
55 | +char* X86Assembler::pc() const | |
56 | +{ | |
57 | + return mStream; | |
58 | +} | |
59 | + | |
60 | +char* X86Assembler::base() const | |
61 | +{ | |
62 | + return mBase; | |
63 | +} | |
64 | + | |
65 | +void X86Assembler::reset() | |
66 | +{ | |
67 | + mBase = mStream = (char *)mAssembly->base(); | |
68 | + mBranchTargets.clear(); | |
69 | + mLabels.clear(); | |
70 | + mLabelsInverseMapping.clear(); | |
71 | + mComments.clear(); | |
72 | +} | |
73 | + | |
74 | +// ---------------------------------------------------------------------------- | |
75 | + | |
76 | +void X86Assembler::disassemble(const char* name) | |
77 | +{ | |
78 | + if (name) { | |
79 | + printf("%s:\n", name); | |
80 | + } | |
81 | + size_t count = pc()-base(); | |
82 | + unsigned insLength; | |
83 | + unsigned insSize; | |
84 | + char* curStream = (char*)base(); | |
85 | + while (count>0) { | |
86 | + ssize_t label = mLabelsInverseMapping.indexOfKey(curStream); | |
87 | + if (label >= 0) { | |
88 | + printf("%s:\n", mLabelsInverseMapping.valueAt(label)); | |
89 | + } | |
90 | + ssize_t comment = mComments.indexOfKey(curStream); | |
91 | + if (comment >= 0) { | |
92 | + printf("; %s\n", mComments.valueAt(comment)); | |
93 | + } | |
94 | + insLength = decodeThenPrint(curStream); | |
95 | + curStream = curStream + insLength; | |
96 | + count = count - insLength; | |
97 | + } | |
98 | +} | |
99 | + | |
100 | +void X86Assembler::comment(const char* string) | |
101 | +{ | |
102 | + mComments.add(mStream, string); | |
103 | +} | |
104 | + | |
105 | +void X86Assembler::label(const char* theLabel) | |
106 | +{ | |
107 | + mLabels.add(theLabel, mStream); | |
108 | + mLabelsInverseMapping.add(mStream, theLabel); | |
109 | +} | |
110 | + | |
111 | +//the conditional jump | |
112 | +void X86Assembler::JCC(Mnemonic cc, const char* label) { | |
113 | + switch (cc) { | |
114 | + case Mnemonic_JO: | |
115 | + encoder_imm(Mnemonic_JO, OpndSize_32, 0/*imm*/, mStream); | |
116 | + break; | |
117 | + case Mnemonic_JNO: | |
118 | + encoder_imm(Mnemonic_JNO, OpndSize_32, 0/*imm*/, mStream); | |
119 | + break; | |
120 | + case Mnemonic_JB: | |
121 | + encoder_imm(Mnemonic_JB, OpndSize_32, 0/*imm*/, mStream); | |
122 | + break; | |
123 | + case Mnemonic_JNB: | |
124 | + encoder_imm(Mnemonic_JNB, OpndSize_32, 0/*imm*/, mStream); | |
125 | + break; | |
126 | + case Mnemonic_JZ: | |
127 | + encoder_imm(Mnemonic_JZ, OpndSize_32, 0/*imm*/, mStream); | |
128 | + break; | |
129 | + case Mnemonic_JNZ: | |
130 | + encoder_imm(Mnemonic_JNZ, OpndSize_32, 0/*imm*/, mStream); | |
131 | + break; | |
132 | + case Mnemonic_JBE: | |
133 | + encoder_imm(Mnemonic_JBE, OpndSize_32, 0/*imm*/, mStream); | |
134 | + break; | |
135 | + case Mnemonic_JNBE: | |
136 | + encoder_imm(Mnemonic_JNBE, OpndSize_32, 0/*imm*/, mStream); | |
137 | + break; | |
138 | + case Mnemonic_JS: | |
139 | + encoder_imm(Mnemonic_JS, OpndSize_32, 0/*imm*/, mStream); | |
140 | + break; | |
141 | + case Mnemonic_JNS: | |
142 | + encoder_imm(Mnemonic_JNS, OpndSize_32, 0/*imm*/, mStream); | |
143 | + break; | |
144 | + case Mnemonic_JP: | |
145 | + encoder_imm(Mnemonic_JP, OpndSize_32, 0/*imm*/, mStream); | |
146 | + break; | |
147 | + case Mnemonic_JNP: | |
148 | + encoder_imm(Mnemonic_JNP, OpndSize_32, 0/*imm*/, mStream); | |
149 | + break; | |
150 | + case Mnemonic_JL: | |
151 | + encoder_imm(Mnemonic_JL, OpndSize_32, 0/*imm*/, mStream); | |
152 | + break; | |
153 | + case Mnemonic_JNL: | |
154 | + encoder_imm(Mnemonic_JNL, OpndSize_32, 0/*imm*/, mStream); | |
155 | + break; | |
156 | + case Mnemonic_JLE: | |
157 | + encoder_imm(Mnemonic_JLE, OpndSize_32, 0/*imm*/, mStream); | |
158 | + break; | |
159 | + case Mnemonic_JNLE: | |
160 | + encoder_imm(Mnemonic_JNLE, OpndSize_32, 0/*imm*/, mStream); | |
161 | + break; | |
162 | + default : | |
163 | + printf("the condition is not supported.\n"); | |
164 | + return; | |
165 | + } | |
166 | + mStreamNext = mStream + encoder_get_inst_size(mStream); | |
167 | + //the offset is relative to the next instruction of the current PC | |
168 | + mBranchTargets.add(branch_target_t(label, mStream, mStreamNext)); | |
169 | + mStream = mStreamNext; | |
170 | +} | |
171 | + | |
172 | +void X86Assembler::JMP(const char* label) { | |
173 | + encoder_imm(Mnemonic_JMP, OpndSize_32, 0/*imm*/, mStream); | |
174 | + mStreamNext = mStream + encoder_get_inst_size(mStream); | |
175 | + mBranchTargets.add(branch_target_t(label, mStream, mStreamNext)); | |
176 | + mStream = mStreamNext; | |
177 | +} | |
178 | + | |
179 | +void X86Assembler::prepare_esp(int old_offset) | |
180 | +{ | |
181 | + mStreamUpdate = mStream; | |
182 | + SUB_IMM_TO_REG(old_offset, ESP); | |
183 | +} | |
184 | + | |
185 | +void X86Assembler::update_esp(int new_offset) | |
186 | +{ | |
187 | + encoder_update_imm_rm(new_offset, mStreamUpdate); | |
188 | +} | |
189 | + | |
190 | +void X86Assembler::shrink_esp(int shrink_offset) | |
191 | +{ | |
192 | + ADD_IMM_TO_REG(shrink_offset, ESP); | |
193 | +} | |
194 | + | |
195 | +void X86Assembler::callee_work() | |
196 | +{ | |
197 | + //push EBX, ESI, EDI which need to be done in callee | |
198 | + /* | |
199 | + push %ebp | |
200 | + mov %esp,%ebp | |
201 | + push %ebx | |
202 | + push %esi | |
203 | + push %edi | |
204 | + */ | |
205 | + PUSH(EBP); | |
206 | + MOV_REG_TO_REG(ESP, EBP); | |
207 | + PUSH(EBX); | |
208 | + PUSH(ESI); | |
209 | + PUSH(EDI); | |
210 | +} | |
211 | + | |
212 | +void X86Assembler::return_work() | |
213 | +{ | |
214 | +// pop %esi | |
215 | +// pop %edi | |
216 | +// pop %ebx | |
217 | +// movl %ebp,%esp | |
218 | +// pop %ebp | |
219 | +// ret | |
220 | +// ret is equivalent to below | |
221 | +// pop %eax // the return address | |
222 | +// jmp *%eax | |
223 | + POP(EDI); | |
224 | + POP(ESI); | |
225 | + POP(EBX); | |
226 | + POP(EBP); | |
227 | + encoder_return(mStream); | |
228 | + mStream = mStream + encoder_get_inst_size(mStream); | |
229 | +} | |
230 | + | |
231 | +int X86Assembler::generate(const char* name) | |
232 | +{ | |
233 | + // fixup all the branches | |
234 | + size_t count = mBranchTargets.size(); | |
235 | + while (count--) { | |
236 | + const branch_target_t& bt = mBranchTargets[count]; | |
237 | + char* target_pc = mLabels.valueFor(bt.label); | |
238 | + LOG_ALWAYS_FATAL_IF(!target_pc, | |
239 | + "error resolving branch targets, target_pc is null"); | |
240 | + //the offset is relative to the next instruction of the current PC | |
241 | + int32_t offset = int32_t(target_pc - bt.next_pc); | |
242 | + encoder_update_imm(offset, bt.pc); | |
243 | + } | |
244 | + | |
245 | + mAssembly->resize((int)(pc()-base())); | |
246 | + | |
247 | + // the instruction cache is flushed by CodeCache | |
248 | + const int64_t duration = ggl_system_time() - mDuration; | |
249 | + const char * const format = "generated %s (%d ins size) at [%p:%p] in %lld ns\n"; | |
250 | + ALOGI(format, name, int(pc()-base()), base(), pc(), duration); | |
251 | + | |
252 | +#if defined(WITH_LIB_HARDWARE) | |
253 | + if (__builtin_expect(mQemuTracing, 0)) { | |
254 | + int err = qemu_add_mapping(uintptr_t(base()), name); | |
255 | + mQemuTracing = (err >= 0); | |
256 | + } | |
257 | +#endif | |
258 | + | |
259 | + char value[PROPERTY_VALUE_MAX]; | |
260 | + property_get("debug.pf.disasm", value, "0"); | |
261 | + if (atoi(value) != 0) { | |
262 | + printf(format, name, int(pc()-base()), base(), pc(), duration); | |
263 | + disassemble(name); | |
264 | + } | |
265 | + | |
266 | + return NO_ERROR; | |
267 | +} | |
268 | + | |
269 | +char* X86Assembler::pcForLabel(const char* label) | |
270 | +{ | |
271 | + return mLabels.valueFor(label); | |
272 | +} | |
273 | + | |
274 | +// ---------------------------------------------------------------------------- | |
275 | + | |
276 | +void X86Assembler::PUSH(int reg) { | |
277 | + encoder_reg(Mnemonic_PUSH, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
278 | + mStream = mStream + encoder_get_inst_size(mStream); | |
279 | +} | |
280 | + | |
281 | +void X86Assembler::POP(int reg) { | |
282 | + encoder_reg(Mnemonic_POP, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
283 | + mStream = mStream + encoder_get_inst_size(mStream); | |
284 | +} | |
285 | + | |
286 | +//arithmetic | |
287 | +void X86Assembler::ADD_REG_TO_REG(int src, int dst) { | |
288 | + encoder_reg_reg(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
289 | + mStream = mStream + encoder_get_inst_size(mStream); | |
290 | +} | |
291 | + | |
292 | +void X86Assembler::ADD_IMM_TO_REG(int imm, int dst) { | |
293 | + encoder_imm_reg(Mnemonic_ADD, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
294 | + mStream = mStream + encoder_get_inst_size(mStream); | |
295 | +} | |
296 | + | |
297 | +void X86Assembler::ADD_IMM_TO_MEM(int imm, int disp, int dst) { | |
298 | + encoder_imm_mem(Mnemonic_ADD, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); | |
299 | + mStream = mStream + encoder_get_inst_size(mStream); | |
300 | +} | |
301 | + | |
302 | +void X86Assembler::ADD_MEM_TO_REG(int base_reg, int disp, int dst) { | |
303 | + encoder_mem_reg(Mnemonic_ADD, OpndSize_32, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/,LowOpndRegType_gp, mStream); | |
304 | + mStream = mStream + encoder_get_inst_size(mStream); | |
305 | +} | |
306 | + | |
307 | +void X86Assembler::ADD_REG_TO_MEM(int src, int base_reg, int disp) { | |
308 | + encoder_reg_mem(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); | |
309 | + mStream = mStream + encoder_get_inst_size(mStream); | |
310 | +} | |
311 | + | |
312 | +void X86Assembler::SUB_REG_TO_REG(int src, int dst) { | |
313 | + encoder_reg_reg(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
314 | + mStream = mStream + encoder_get_inst_size(mStream); | |
315 | +} | |
316 | + | |
317 | +void X86Assembler::SUB_IMM_TO_REG(int imm, int dst) { | |
318 | + encoder_imm_reg(Mnemonic_SUB, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
319 | + mStream = mStream + encoder_get_inst_size(mStream); | |
320 | +} | |
321 | + | |
322 | +void X86Assembler::SUB_IMM_TO_MEM(int imm, int disp, int dst) { | |
323 | + encoder_imm_mem(Mnemonic_SUB, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); | |
324 | + mStream = mStream + encoder_get_inst_size(mStream); | |
325 | +} | |
326 | + | |
327 | +void X86Assembler::SUB_REG_TO_MEM(int src, int base_reg, int disp) { | |
328 | + encoder_reg_mem(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); | |
329 | + mStream = mStream + encoder_get_inst_size(mStream); | |
330 | +} | |
331 | + | |
332 | +//test | |
333 | +void X86Assembler::TEST_REG_TO_REG(int src, int dst, OpndSize size) { | |
334 | + encoder_reg_reg(Mnemonic_TEST, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
335 | + mStream = mStream + encoder_get_inst_size(mStream); | |
336 | +} | |
337 | + | |
338 | +//compare | |
339 | +void X86Assembler::CMP_REG_TO_REG(int src, int dst, OpndSize size) { | |
340 | + encoder_reg_reg(Mnemonic_CMP, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
341 | + mStream = mStream + encoder_get_inst_size(mStream); | |
342 | +} | |
343 | + | |
344 | +void X86Assembler::CMP_IMM_TO_REG(int imm, int dst) { | |
345 | + encoder_imm_reg(Mnemonic_CMP, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
346 | + mStream = mStream + encoder_get_inst_size(mStream); | |
347 | +} | |
348 | + | |
349 | +void X86Assembler::CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size) { | |
350 | + encoder_mem_reg(Mnemonic_CMP, size, disp, base_reg, 0/*isBasePhysical*/, | |
351 | + dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
352 | + mStream = mStream + encoder_get_inst_size(mStream); | |
353 | +} | |
354 | + | |
355 | +void X86Assembler::CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size) | |
356 | +{ | |
357 | + encoder_reg_mem(Mnemonic_CMP, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); | |
358 | + mStream = mStream + encoder_get_inst_size(mStream); | |
359 | +} | |
360 | + | |
361 | +//logical | |
362 | +void X86Assembler::AND_REG_TO_REG(int src, int dst) { | |
363 | + encoder_reg_reg(Mnemonic_AND, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
364 | + mStream = mStream + encoder_get_inst_size(mStream); | |
365 | +} | |
366 | + | |
367 | +void X86Assembler::AND_IMM_TO_REG(int imm, int dst) { | |
368 | + encoder_imm_reg(Mnemonic_AND, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
369 | + mStream = mStream + encoder_get_inst_size(mStream); | |
370 | +} | |
371 | + | |
372 | +void X86Assembler::OR_REG_TO_REG(int src, int dst) { | |
373 | + encoder_reg_reg(Mnemonic_OR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
374 | + mStream = mStream + encoder_get_inst_size(mStream); | |
375 | +} | |
376 | + | |
377 | +void X86Assembler::XOR(int src, int dst) { | |
378 | + encoder_reg_reg(Mnemonic_XOR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
379 | + mStream = mStream + encoder_get_inst_size(mStream); | |
380 | +} | |
381 | + | |
382 | +void X86Assembler::OR_IMM_TO_REG(int imm, int dst) { | |
383 | + encoder_imm_reg(Mnemonic_OR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
384 | + mStream = mStream + encoder_get_inst_size(mStream); | |
385 | +} | |
386 | + | |
387 | +void X86Assembler::NOT(int dst) { | |
388 | + encoder_reg(Mnemonic_NOT, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
389 | + mStream = mStream + encoder_get_inst_size(mStream); | |
390 | +} | |
391 | + | |
392 | +void X86Assembler::NEG(int dst) { | |
393 | + encoder_reg(Mnemonic_NEG, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
394 | + mStream = mStream + encoder_get_inst_size(mStream); | |
395 | +} | |
396 | +//shift | |
397 | +void X86Assembler::SHL(int imm, int dst) { | |
398 | + encoder_imm_reg(Mnemonic_SHL, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
399 | + mStream = mStream + encoder_get_inst_size(mStream); | |
400 | +} | |
401 | + | |
402 | +void X86Assembler::SHL(int imm, int disp, int dst) { | |
403 | + encoder_imm_mem(Mnemonic_SHL, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); | |
404 | + mStream = mStream + encoder_get_inst_size(mStream); | |
405 | +} | |
406 | + | |
407 | +void X86Assembler::SHR(int imm, int dst) { | |
408 | + encoder_imm_reg(Mnemonic_SHR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
409 | + mStream = mStream + encoder_get_inst_size(mStream); | |
410 | +} | |
411 | + | |
412 | +void X86Assembler::SHR(int imm, int disp, int dst) { | |
413 | + encoder_imm_mem(Mnemonic_SHR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); | |
414 | + mStream = mStream + encoder_get_inst_size(mStream); | |
415 | +} | |
416 | + | |
417 | +void X86Assembler::SAR(int imm, int dst) { | |
418 | + encoder_imm_reg(Mnemonic_SAR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
419 | + mStream = mStream + encoder_get_inst_size(mStream); | |
420 | +} | |
421 | + | |
422 | +void X86Assembler::ROR(const int imm, int dst) { | |
423 | + encoder_imm_reg(Mnemonic_ROR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
424 | + mStream = mStream + encoder_get_inst_size(mStream); | |
425 | +} | |
426 | + | |
427 | +void X86Assembler::ROR(int imm, int disp, int dst) { | |
428 | + encoder_imm_mem(Mnemonic_ROR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); | |
429 | + mStream = mStream + encoder_get_inst_size(mStream); | |
430 | +} | |
431 | +//signed extension | |
432 | +void X86Assembler::MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) { | |
433 | + encoder_moves_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream); | |
434 | + mStream = mStream + encoder_get_inst_size(mStream); | |
435 | +} | |
436 | + | |
437 | +void X86Assembler::MOVSX_REG_TO_REG(OpndSize size, int src, int dst) { | |
438 | + encoder_moves_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
439 | + mStream = mStream + encoder_get_inst_size(mStream); | |
440 | +} | |
441 | +//zero entension | |
442 | +void X86Assembler::MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) { | |
443 | + encoder_movez_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream); | |
444 | + mStream = mStream + encoder_get_inst_size(mStream); | |
445 | +} | |
446 | + | |
447 | +void X86Assembler::MOVZX_REG_TO_REG(OpndSize size, int src, int dst) { | |
448 | + encoder_movez_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
449 | + mStream = mStream + encoder_get_inst_size(mStream); | |
450 | +} | |
451 | + | |
452 | +// multiply... | |
453 | +// the first source operand is placed in EAX | |
454 | +void X86Assembler::IMUL(int reg) { | |
455 | + encoder_reg(Mnemonic_IMUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
456 | + mStream = mStream + encoder_get_inst_size(mStream); | |
457 | +} | |
458 | + | |
459 | +void X86Assembler::IMUL(int src, int dst) { | |
460 | + encoder_reg_reg(Mnemonic_IMUL, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); | |
461 | + mStream = mStream + encoder_get_inst_size(mStream); | |
462 | +} | |
463 | + | |
464 | +void X86Assembler::MUL(int reg) { | |
465 | + encoder_reg(Mnemonic_MUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
466 | + mStream = mStream + encoder_get_inst_size(mStream); | |
467 | +} | |
468 | + | |
469 | + | |
470 | +// data transfer... | |
471 | +void X86Assembler::MOV_IMM_TO_REG(int32_t imm, int dst) { | |
472 | + encoder_imm_reg(Mnemonic_MOV, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
473 | + mStream = mStream + encoder_get_inst_size(mStream); | |
474 | +} | |
475 | + | |
476 | +void X86Assembler::MOV_REG_TO_REG(int src, int dst, OpndSize size) | |
477 | +{ | |
478 | + if(src == dst) return; | |
479 | + encoder_reg_reg(Mnemonic_MOV, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
480 | + mStream = mStream + encoder_get_inst_size(mStream); | |
481 | +} | |
482 | + | |
483 | +void X86Assembler::MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size) | |
484 | +{ | |
485 | + encoder_reg_mem(Mnemonic_MOV, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); | |
486 | + mStream = mStream + encoder_get_inst_size(mStream); | |
487 | +} | |
488 | + | |
489 | +void X86Assembler::MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size) | |
490 | +{ | |
491 | + encoder_mem_reg(Mnemonic_MOV, size, disp, base_reg, 0/*isBasePhysical*/, | |
492 | + reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
493 | + mStream = mStream + encoder_get_inst_size(mStream); | |
494 | +} | |
495 | + | |
496 | +void X86Assembler::MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size) | |
497 | +{ | |
498 | + encoder_mem_scale_reg(Mnemonic_MOV, size, base_reg, 0/*isBasePhysical*/, index_reg, 0/*isIndexPhysical*/, scale, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
499 | + mStream = mStream + encoder_get_inst_size(mStream); | |
500 | +} | |
501 | +// the conditional move | |
502 | +void X86Assembler::CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size) | |
503 | +{ | |
504 | + switch (cc) { | |
505 | + case Mnemonic_CMOVO: | |
506 | + encoder_reg_reg(Mnemonic_CMOVO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
507 | + break; | |
508 | + case Mnemonic_CMOVNO: | |
509 | + encoder_reg_reg(Mnemonic_CMOVNO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
510 | + break; | |
511 | + case Mnemonic_CMOVB: | |
512 | + encoder_reg_reg(Mnemonic_CMOVB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
513 | + break; | |
514 | + case Mnemonic_CMOVNB: | |
515 | + encoder_reg_reg(Mnemonic_CMOVNB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
516 | + break; | |
517 | + case Mnemonic_CMOVZ: | |
518 | + encoder_reg_reg(Mnemonic_CMOVZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
519 | + break; | |
520 | + case Mnemonic_CMOVNZ: | |
521 | + encoder_reg_reg(Mnemonic_CMOVNZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
522 | + break; | |
523 | + case Mnemonic_CMOVBE: | |
524 | + encoder_reg_reg(Mnemonic_CMOVBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
525 | + break; | |
526 | + case Mnemonic_CMOVNBE: | |
527 | + encoder_reg_reg(Mnemonic_CMOVNBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
528 | + break; | |
529 | + case Mnemonic_CMOVS: | |
530 | + encoder_reg_reg(Mnemonic_CMOVS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
531 | + break; | |
532 | + case Mnemonic_CMOVNS: | |
533 | + encoder_reg_reg(Mnemonic_CMOVNS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
534 | + break; | |
535 | + case Mnemonic_CMOVP: | |
536 | + encoder_reg_reg(Mnemonic_CMOVP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
537 | + break; | |
538 | + case Mnemonic_CMOVNP: | |
539 | + encoder_reg_reg(Mnemonic_CMOVNP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
540 | + break; | |
541 | + case Mnemonic_CMOVL: | |
542 | + encoder_reg_reg(Mnemonic_CMOVL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
543 | + break; | |
544 | + case Mnemonic_CMOVNL: | |
545 | + encoder_reg_reg(Mnemonic_CMOVNL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
546 | + break; | |
547 | + case Mnemonic_CMOVLE: | |
548 | + encoder_reg_reg(Mnemonic_CMOVLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
549 | + break; | |
550 | + case Mnemonic_CMOVNLE: | |
551 | + encoder_reg_reg(Mnemonic_CMOVNLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); | |
552 | + break; | |
553 | + default : | |
554 | + printf("the condition is not supported.\n"); | |
555 | + return; | |
556 | + } | |
557 | + mStream = mStream + encoder_get_inst_size(mStream); | |
558 | +} | |
559 | + | |
560 | +void X86Assembler::CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size) | |
561 | +{ | |
562 | + switch (cc) { | |
563 | + case Mnemonic_CMOVO: | |
564 | + encoder_mem_reg(Mnemonic_CMOVO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
565 | + break; | |
566 | + case Mnemonic_CMOVNO: | |
567 | + encoder_mem_reg(Mnemonic_CMOVNO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
568 | + break; | |
569 | + case Mnemonic_CMOVB: | |
570 | + encoder_mem_reg(Mnemonic_CMOVB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
571 | + break; | |
572 | + case Mnemonic_CMOVNB: | |
573 | + encoder_mem_reg(Mnemonic_CMOVNB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
574 | + break; | |
575 | + case Mnemonic_CMOVZ: | |
576 | + encoder_mem_reg(Mnemonic_CMOVZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
577 | + break; | |
578 | + case Mnemonic_CMOVNZ: | |
579 | + encoder_mem_reg(Mnemonic_CMOVNZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
580 | + break; | |
581 | + case Mnemonic_CMOVBE: | |
582 | + encoder_mem_reg(Mnemonic_CMOVBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
583 | + break; | |
584 | + case Mnemonic_CMOVNBE: | |
585 | + encoder_mem_reg(Mnemonic_CMOVNBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
586 | + break; | |
587 | + case Mnemonic_CMOVS: | |
588 | + encoder_mem_reg(Mnemonic_CMOVS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
589 | + break; | |
590 | + case Mnemonic_CMOVNS: | |
591 | + encoder_mem_reg(Mnemonic_CMOVNS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
592 | + break; | |
593 | + case Mnemonic_CMOVP: | |
594 | + encoder_mem_reg(Mnemonic_CMOVP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
595 | + break; | |
596 | + case Mnemonic_CMOVNP: | |
597 | + encoder_mem_reg(Mnemonic_CMOVNP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
598 | + break; | |
599 | + case Mnemonic_CMOVL: | |
600 | + encoder_mem_reg(Mnemonic_CMOVL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
601 | + break; | |
602 | + case Mnemonic_CMOVNL: | |
603 | + encoder_mem_reg(Mnemonic_CMOVNL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
604 | + break; | |
605 | + case Mnemonic_CMOVLE: | |
606 | + encoder_mem_reg(Mnemonic_CMOVLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
607 | + break; | |
608 | + case Mnemonic_CMOVNLE: | |
609 | + encoder_mem_reg(Mnemonic_CMOVNLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); | |
610 | + break; | |
611 | + default : | |
612 | + printf("the condition is not supported.\n"); | |
613 | + return; | |
614 | + } | |
615 | + mStream = mStream + encoder_get_inst_size(mStream); | |
616 | +} | |
617 | + | |
618 | +}; // namespace android |
@@ -0,0 +1,163 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/X86Assembler.h | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#ifndef ANDROID_X86ASSEMBLER_H | |
19 | +#define ANDROID_X86ASSEMBLER_H | |
20 | + | |
21 | +#include <stdint.h> | |
22 | +#include <sys/types.h> | |
23 | + | |
24 | +#include <utils/Vector.h> | |
25 | +#include <utils/KeyedVector.h> | |
26 | + | |
27 | +#include "codeflinger/tinyutils/smartpointer.h" | |
28 | +#include "codeflinger/CodeCache.h" | |
29 | +#include "enc_wrapper.h" | |
30 | + | |
31 | +namespace android { | |
32 | + | |
33 | +// ---------------------------------------------------------------------------- | |
34 | + | |
35 | +class X86Assembler | |
36 | +{ | |
37 | +public: | |
38 | + | |
39 | + enum { | |
40 | + EAX = PhysicalReg_EAX, EBX = PhysicalReg_EBX, ECX = PhysicalReg_ECX, | |
41 | + EDX = PhysicalReg_EDX, EDI = PhysicalReg_EDI, ESI = PhysicalReg_ESI, | |
42 | + ESP = PhysicalReg_ESP, EBP = PhysicalReg_EBP | |
43 | + }; | |
44 | + | |
45 | + X86Assembler(const sp<Assembly>& assembly); | |
46 | + ~X86Assembler(); | |
47 | + | |
48 | + char* base() const; | |
49 | + char* pc() const; | |
50 | + | |
51 | + | |
52 | + void disassemble(const char* name); | |
53 | + | |
54 | + // ------------------------------------------------------------------------ | |
55 | + // X86AssemblerInterface... | |
56 | + // ------------------------------------------------------------------------ | |
57 | + | |
58 | + void reset(); | |
59 | + | |
60 | + int generate(const char* name); | |
61 | + | |
62 | + void comment(const char* string); | |
63 | + | |
64 | + void label(const char* theLabel); | |
65 | + | |
66 | + void JCC(Mnemonic cc, const char* label); | |
67 | + | |
68 | + void JMP(const char* label); | |
69 | + | |
70 | + void prepare_esp(int old_offset); | |
71 | + | |
72 | + void update_esp(int new_offset); | |
73 | + | |
74 | + void shrink_esp(int shrink_offset); | |
75 | + | |
76 | + void callee_work(); | |
77 | + | |
78 | + void return_work(); | |
79 | + | |
80 | + char* pcForLabel(const char* label); | |
81 | + | |
82 | + void PUSH(int reg); | |
83 | + | |
84 | + void POP(int reg); | |
85 | + | |
86 | + void ADD_REG_TO_REG(int src, int dst); | |
87 | + void ADD_IMM_TO_REG(int imm, int dst); | |
88 | + void ADD_IMM_TO_MEM(int imm, int disp, int dst); | |
89 | + void ADD_MEM_TO_REG(int base_reg, int disp, int dst); | |
90 | + void ADD_REG_TO_MEM(int src, int base_reg, int disp); | |
91 | + void SUB_REG_TO_REG(int src, int dst); | |
92 | + void SUB_IMM_TO_REG(int imm, int dst); | |
93 | + void SUB_IMM_TO_MEM(int imm, int disp, int dst); | |
94 | + void SUB_REG_TO_MEM(int src, int base_reg, int disp); | |
95 | + | |
96 | + void TEST_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); | |
97 | + void CMP_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); | |
98 | + void CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size=OpndSize_32); | |
99 | + void CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32); | |
100 | + void CMP_IMM_TO_REG(int imm, int dst); | |
101 | + | |
102 | + void AND_REG_TO_REG(int src, int dst); | |
103 | + void AND_IMM_TO_REG(int imm, int dst); | |
104 | + void OR_REG_TO_REG(int src, int dst); | |
105 | + void XOR(int src, int dst); | |
106 | + void OR_IMM_TO_REG(int imm, int dst); | |
107 | + void NOT(int dst); | |
108 | + void NEG(int dst); | |
109 | + void SHL(int imm, int dst); | |
110 | + void SHL(int imm, int disp, int dst); | |
111 | + void SHR(int imm, int dst); | |
112 | + void SHR(int imm, int disp, int dst); | |
113 | + void SAR(int imm, int dst); | |
114 | + void ROR(const int imm, int dst); | |
115 | + void ROR(int imm, int disp, int dst); | |
116 | + void IMUL(int reg); | |
117 | + void IMUL(int src, int dst); | |
118 | + void MUL(int reg); | |
119 | + | |
120 | + void MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst); | |
121 | + void MOVSX_REG_TO_REG(OpndSize size, int src, int dst); | |
122 | + void MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst); | |
123 | + void MOVZX_REG_TO_REG(OpndSize size, int src, int dst); | |
124 | + void MOV_IMM_TO_REG(int32_t imm, int dst); | |
125 | + void MOV_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); | |
126 | + void MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size=OpndSize_32); | |
127 | + void MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32); | |
128 | + void MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size=OpndSize_32); | |
129 | + void CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size=OpndSize_32); | |
130 | + void CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size=OpndSize_32); | |
131 | + | |
132 | + | |
133 | + sp<Assembly> mAssembly; | |
134 | + char* mBase; | |
135 | + char* mStream; | |
136 | + //branch target offset is relative to the next instruction | |
137 | + char* mStreamNext; | |
138 | + //updating esp after iterating the loop | |
139 | + char* mStreamUpdate; | |
140 | + | |
141 | + int64_t mDuration; | |
142 | +#if defined(WITH_LIB_HARDWARE) | |
143 | + bool mQemuTracing; | |
144 | +#endif | |
145 | + | |
146 | + struct branch_target_t { | |
147 | + inline branch_target_t() : label(0), pc(0), next_pc(0) { } | |
148 | + inline branch_target_t(const char* l, char* p, char* next_p) | |
149 | + : label(l), pc(p), next_pc(next_p) { } | |
150 | + const char* label; | |
151 | + char* pc; | |
152 | + char* next_pc; | |
153 | + }; | |
154 | + | |
155 | + Vector<branch_target_t> mBranchTargets; | |
156 | + KeyedVector< const char*, char* > mLabels; | |
157 | + KeyedVector< char*, const char* > mLabelsInverseMapping; | |
158 | + KeyedVector< char*, const char* > mComments; | |
159 | +}; | |
160 | + | |
161 | +}; // namespace android | |
162 | + | |
163 | +#endif //ANDROID_X86ASSEMBLER_H |
@@ -0,0 +1,974 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/blending.cpp | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#include <assert.h> | |
19 | +#include <stdint.h> | |
20 | +#include <stdlib.h> | |
21 | +#include <stdio.h> | |
22 | +#include <sys/types.h> | |
23 | + | |
24 | +#include <cutils/log.h> | |
25 | + | |
26 | +#include "codeflinger/x86/GGLX86Assembler.h" | |
27 | + | |
28 | + | |
29 | +namespace android { | |
30 | + | |
31 | +void GGLX86Assembler::build_fog( | |
32 | + component_t& temp, // incomming fragment / output | |
33 | + int component, | |
34 | + Scratch& regs) | |
35 | +{ | |
36 | + if (mInfo[component].fog) { | |
37 | + Scratch scratches(registerFile()); | |
38 | + comment("fog"); | |
39 | + | |
40 | + temp.reg = scratches.obtain(); | |
41 | + MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg); | |
42 | + integer_t fragment(temp.reg, temp.h, temp.flags, temp.offset_ebp); | |
43 | + if (!(temp.flags & CORRUPTIBLE)) { | |
44 | + temp.reg = regs.obtain(); | |
45 | + temp.flags |= CORRUPTIBLE; | |
46 | + } | |
47 | + | |
48 | + integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE); | |
49 | + mBuilderContext.Rctx = scratches.obtain(); | |
50 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
51 | + MOVZX_MEM_TO_REG(OpndSize_8, mBuilderContext.Rctx, GGL_OFFSETOF(state.fog.color[component]), fogColor.reg); | |
52 | + | |
53 | + integer_t factor(scratches.obtain(), 16, CORRUPTIBLE); | |
54 | + CONTEXT_LOAD(factor.reg, generated_vars.f); | |
55 | + scratches.recycle(mBuilderContext.Rctx); | |
56 | + | |
57 | + // clamp fog factor (TODO: see if there is a way to guarantee | |
58 | + // we won't overflow, when setting the iterators) | |
59 | + int temp_reg = scratches.obtain(); | |
60 | + MOV_REG_TO_REG(factor.reg, temp_reg); | |
61 | + SAR(31, temp_reg); | |
62 | + NOT(temp_reg); | |
63 | + AND_REG_TO_REG(temp_reg, factor.reg); | |
64 | + MOV_IMM_TO_REG(0x10000, temp_reg); | |
65 | + CMP_IMM_TO_REG(0x10000, factor.reg); | |
66 | + CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, factor.reg); | |
67 | + scratches.recycle(temp_reg); | |
68 | + | |
69 | + //we will resue factor.reg | |
70 | + build_blendFOneMinusF(temp, factor, fragment, fogColor); | |
71 | + MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP); | |
72 | + scratches.recycle(temp.reg); | |
73 | + } | |
74 | +} | |
75 | + | |
76 | +void GGLX86Assembler::build_blending( | |
77 | + component_t& temp, // incomming fragment / output | |
78 | + pixel_t& pixel, // framebuffer | |
79 | + int component, | |
80 | + Scratch& regs) | |
81 | +{ | |
82 | + if (!mInfo[component].blend) | |
83 | + return; | |
84 | + | |
85 | + int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; | |
86 | + int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; | |
87 | + if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) | |
88 | + fs = GGL_ONE; | |
89 | + const int blending = blending_codes(fs, fd); | |
90 | + if (!temp.size()) { | |
91 | + // here, blending will produce something which doesn't depend on | |
92 | + // that component (eg: GL_ZERO:GL_*), so the register has not been | |
93 | + // allocated yet. Will never be used as a source. | |
94 | + //temp = component_t(regs.obtain(), CORRUPTIBLE, temp_offset_ebp); | |
95 | + temp.reg = regs.obtain(); | |
96 | + temp.flags = CORRUPTIBLE; | |
97 | + temp.h = temp.l = 0; | |
98 | + } else { | |
99 | + temp.reg = regs.obtain(); | |
100 | + } | |
101 | + MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg); | |
102 | + // we are doing real blending... | |
103 | + // fb: extracted dst | |
104 | + // fragment: extracted src | |
105 | + // temp: component_t(fragment) and result | |
106 | + | |
107 | + // scoped register allocator | |
108 | + Scratch scratches(registerFile()); | |
109 | + comment("blending"); | |
110 | + | |
111 | + // we can optimize these cases a bit... | |
112 | + // (1) saturation is not needed | |
113 | + // (2) we can use only one multiply instead of 2 | |
114 | + // (3) we can reduce the register pressure | |
115 | + // R = S*f + D*(1-f) = (S-D)*f + D | |
116 | + // R = S*(1-f) + D*f = (D-S)*f + S | |
117 | + | |
118 | + const bool same_factor_opt1 = | |
119 | + (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) || | |
120 | + (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) || | |
121 | + (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) || | |
122 | + (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA); | |
123 | + | |
124 | + const bool same_factor_opt2 = | |
125 | + (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) || | |
126 | + (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) || | |
127 | + (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) || | |
128 | + (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA); | |
129 | + | |
130 | + | |
131 | + // XXX: we could also optimize these cases: | |
132 | + // R = S*f + D*f = (S+D)*f | |
133 | + // R = S*(1-f) + D*(1-f) = (S+D)*(1-f) | |
134 | + // R = S*D + D*S = 2*S*D | |
135 | + | |
136 | + | |
137 | + pixel.reg = scratches.obtain(); | |
138 | + MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg); | |
139 | + // see if we need to extract 'component' from the destination (fb) | |
140 | + integer_t fb; | |
141 | + if (blending & (BLEND_DST|FACTOR_DST)) { | |
142 | + fb.setTo(scratches.obtain(), 32); | |
143 | + extract(fb, pixel, component); | |
144 | + if (mDithering) { | |
145 | + // XXX: maybe what we should do instead, is simply | |
146 | + // expand fb -or- fragment to the larger of the two | |
147 | + if (fb.size() < temp.size()) { | |
148 | + // for now we expand 'fb' to min(fragment, 8) | |
149 | + int new_size = temp.size() < 8 ? temp.size() : 8; | |
150 | + expand(fb, fb, new_size); | |
151 | + } | |
152 | + } | |
153 | + } | |
154 | + | |
155 | + // convert input fragment to integer_t | |
156 | + if (temp.l && (temp.flags & CORRUPTIBLE)) { | |
157 | + SHR(temp.l, temp.reg); | |
158 | + temp.h -= temp.l; | |
159 | + temp.l = 0; | |
160 | + } | |
161 | + integer_t fragment(temp.reg, temp.size(), temp.flags, temp.offset_ebp); | |
162 | + | |
163 | + // if not done yet, convert input fragment to integer_t | |
164 | + if (temp.l) { | |
165 | + // here we know temp is not CORRUPTIBLE | |
166 | + fragment.reg = scratches.obtain(); | |
167 | + MOV_REG_TO_REG(temp.reg, fragment.reg); | |
168 | + SHR(temp.l, fragment.reg); | |
169 | + fragment.flags |= CORRUPTIBLE; | |
170 | + } | |
171 | + | |
172 | + if (!(temp.flags & CORRUPTIBLE)) { | |
173 | + // temp is not corruptible, but since it's the destination it | |
174 | + // will be modified, so we need to allocate a new register. | |
175 | + temp.reg = regs.obtain(); | |
176 | + temp.flags &= ~CORRUPTIBLE; | |
177 | + fragment.flags &= ~CORRUPTIBLE; | |
178 | + } | |
179 | + | |
180 | + if ((blending & BLEND_SRC) && !same_factor_opt1) { | |
181 | + // source (fragment) is needed for the blending stage | |
182 | + // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1) | |
183 | + fragment.flags &= ~CORRUPTIBLE; | |
184 | + } | |
185 | + | |
186 | + | |
187 | + if (same_factor_opt1) { | |
188 | + // R = S*f + D*(1-f) = (S-D)*f + D | |
189 | + integer_t factor; | |
190 | + build_blend_factor(factor, fs, | |
191 | + component, pixel, fragment, fb, scratches); | |
192 | + // fb is always corruptible from this point | |
193 | + fb.flags |= CORRUPTIBLE; | |
194 | + //we will reuse factor in mul_factor_add of build_blendFOneMinusF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor | |
195 | + if(factor.reg == fragment.reg || factor.reg == fb.reg) | |
196 | + MOV_REG_TO_REG(factor.reg, pixel.reg); | |
197 | + else | |
198 | + scratches.recycle(pixel.reg); | |
199 | + build_blendFOneMinusF(temp, factor, fragment, fb); | |
200 | + if(factor.reg == fragment.reg || factor.reg == fb.reg) { | |
201 | + MOV_REG_TO_REG(pixel.reg, factor.reg); | |
202 | + scratches.recycle(pixel.reg); | |
203 | + } | |
204 | + scratches.recycle(fb.reg); | |
205 | + //scratches.recycle(factor.reg); | |
206 | + } else if (same_factor_opt2) { | |
207 | + // R = S*(1-f) + D*f = (D-S)*f + S | |
208 | + integer_t factor; | |
209 | + // fb is always corrruptible here | |
210 | + fb.flags |= CORRUPTIBLE; | |
211 | + build_blend_factor(factor, fd, | |
212 | + component, pixel, fragment, fb, scratches); | |
213 | + //we will reuse factor in mul_factor_add of build_blendFOneMinusFF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor | |
214 | + if(factor.reg == fragment.reg || factor.reg == fb.reg) | |
215 | + MOV_REG_TO_REG(factor.reg, pixel.reg); | |
216 | + else | |
217 | + scratches.recycle(pixel.reg); | |
218 | + build_blendOneMinusFF(temp, factor, fragment, fb); | |
219 | + if(factor.reg == fragment.reg || factor.reg == fb.reg) { | |
220 | + MOV_REG_TO_REG(pixel.reg, factor.reg); | |
221 | + scratches.recycle(pixel.reg); | |
222 | + } | |
223 | + scratches.recycle(fb.reg); | |
224 | + } else { | |
225 | + integer_t src_factor; | |
226 | + integer_t dst_factor; | |
227 | + | |
228 | + // if destination (fb) is not needed for the blending stage, | |
229 | + // then it can be marked as CORRUPTIBLE | |
230 | + if (!(blending & BLEND_DST)) { | |
231 | + fb.flags |= CORRUPTIBLE; | |
232 | + } | |
233 | + | |
234 | + // XXX: try to mark some registers as CORRUPTIBLE | |
235 | + // in most case we could make those corruptible | |
236 | + // when we're processing the last component | |
237 | + // but not always, for instance | |
238 | + // when fragment is constant and not reloaded | |
239 | + // when fb is needed for logic-ops or masking | |
240 | + // when a register is aliased (for instance with mAlphaSource) | |
241 | + | |
242 | + // blend away... | |
243 | + if (fs==GGL_ZERO) { | |
244 | + if (fd==GGL_ZERO) { // R = 0 | |
245 | + // already taken care of | |
246 | + } else if (fd==GGL_ONE) { // R = D | |
247 | + // already taken care of | |
248 | + } else { // R = D*fd | |
249 | + // compute fd | |
250 | + build_blend_factor(dst_factor, fd, | |
251 | + component, pixel, fragment, fb, scratches); | |
252 | + scratches.recycle(pixel.reg); | |
253 | + mul_factor(temp, fb, dst_factor, regs); | |
254 | + scratches.recycle(fb.reg); | |
255 | + } | |
256 | + } else if (fs==GGL_ONE) { | |
257 | + int temp_reg; | |
258 | + if (fd==GGL_ZERO) { // R = S | |
259 | + // NOP, taken care of | |
260 | + } else if (fd==GGL_ONE) { // R = S + D | |
261 | + component_add(temp, fb, fragment); // args order matters | |
262 | + temp_reg = scratches.obtain(); | |
263 | + component_sat(temp, temp_reg); | |
264 | + scratches.recycle(temp_reg); | |
265 | + } else { // R = S + D*fd | |
266 | + // compute fd | |
267 | + build_blend_factor(dst_factor, fd, | |
268 | + component, pixel, fragment, fb, scratches); | |
269 | + //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor | |
270 | + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) | |
271 | + MOV_REG_TO_REG(dst_factor.reg, pixel.reg); | |
272 | + else | |
273 | + scratches.recycle(pixel.reg); | |
274 | + mul_factor_add(temp, fb, dst_factor, component_t(fragment)); | |
275 | + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) { | |
276 | + MOV_REG_TO_REG(pixel.reg, dst_factor.reg); | |
277 | + scratches.recycle(pixel.reg); | |
278 | + } | |
279 | + temp_reg = fb.reg; | |
280 | + component_sat(temp, temp_reg); | |
281 | + scratches.recycle(fb.reg); | |
282 | + } | |
283 | + } else { | |
284 | + // compute fs | |
285 | + int temp_reg; | |
286 | + build_blend_factor(src_factor, fs, | |
287 | + component, pixel, fragment, fb, scratches); | |
288 | + if (fd==GGL_ZERO) { // R = S*fs | |
289 | + mul_factor(temp, fragment, src_factor, regs); | |
290 | + if (scratches.isUsed(src_factor.reg)) | |
291 | + scratches.recycle(src_factor.reg); | |
292 | + } else if (fd==GGL_ONE) { // R = S*fs + D | |
293 | + //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor | |
294 | + if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg) | |
295 | + MOV_REG_TO_REG(src_factor.reg, pixel.reg); | |
296 | + else | |
297 | + scratches.recycle(pixel.reg); | |
298 | + mul_factor_add(temp, fragment, src_factor, component_t(fb)); | |
299 | + if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg) { | |
300 | + MOV_REG_TO_REG(pixel.reg, src_factor.reg); | |
301 | + scratches.recycle(pixel.reg); | |
302 | + } | |
303 | + temp_reg = fb.reg; | |
304 | + component_sat(temp, temp_reg); | |
305 | + scratches.recycle(fb.reg); | |
306 | + } else { // R = S*fs + D*fd | |
307 | + mul_factor(temp, fragment, src_factor, regs); | |
308 | + if (scratches.isUsed(src_factor.reg)) | |
309 | + scratches.recycle(src_factor.reg); | |
310 | + // compute fd | |
311 | + build_blend_factor(dst_factor, fd, | |
312 | + component, pixel, fragment, fb, scratches); | |
313 | + //we will probably change dst_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg | |
314 | + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) | |
315 | + MOV_REG_TO_REG(dst_factor.reg, pixel.reg); | |
316 | + else | |
317 | + scratches.recycle(pixel.reg); | |
318 | + mul_factor_add(temp, fb, dst_factor, temp); | |
319 | + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) { | |
320 | + MOV_REG_TO_REG(pixel.reg, dst_factor.reg); | |
321 | + scratches.recycle(pixel.reg); | |
322 | + } | |
323 | + if (!same_factor_opt1 && !same_factor_opt2) { | |
324 | + temp_reg = fb.reg; | |
325 | + component_sat(temp, temp_reg); | |
326 | + } | |
327 | + scratches.recycle(fb.reg); | |
328 | + } | |
329 | + if(scratches.isUsed(pixel.reg)) | |
330 | + scratches.recycle(pixel.reg); | |
331 | + } | |
332 | + } | |
333 | + // temp is modified, but it will be used immediately in downshift | |
334 | + //printf("temp.offset_ebp: %d \n", temp.offset_ebp); | |
335 | + //below will be triggered on CDK for surfaceflinger | |
336 | + if(temp.offset_ebp == mAlphaSource.offset_ebp) { | |
337 | + mCurSp = mCurSp - 4; | |
338 | + temp.offset_ebp = mCurSp; | |
339 | + } | |
340 | + // the r, g, b value must be stored, otherwise the color of globaltime is incorrect. | |
341 | + MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP); | |
342 | + regs.recycle(temp.reg); | |
343 | + | |
344 | + // now we can be corrupted (it's the dest) | |
345 | + temp.flags |= CORRUPTIBLE; | |
346 | +} | |
347 | + | |
348 | +void GGLX86Assembler::build_blend_factor( | |
349 | + integer_t& factor, int f, int component, | |
350 | + const pixel_t& dst_pixel, | |
351 | + integer_t& fragment, | |
352 | + integer_t& fb, | |
353 | + Scratch& scratches) | |
354 | +{ | |
355 | + integer_t src_alpha(fragment); | |
356 | + | |
357 | + // src_factor/dst_factor won't be used after blending, | |
358 | + // so it's fine to mark them as CORRUPTIBLE (if not aliased) | |
359 | + factor.flags |= CORRUPTIBLE; | |
360 | + int temp_reg; | |
361 | + switch(f) { | |
362 | + case GGL_ONE_MINUS_SRC_ALPHA: | |
363 | + case GGL_SRC_ALPHA: | |
364 | + if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) { | |
365 | + // we're processing alpha, so we already have | |
366 | + // src-alpha in fragment, and we need src-alpha just this time. | |
367 | + } else { | |
368 | + // alpha-src will be needed for other components | |
369 | + factor = mAlphaSource; | |
370 | + factor.flags &= ~CORRUPTIBLE; | |
371 | + factor.reg = scratches.obtain(); | |
372 | + //printf("mAlphaSource.offset_ebp: %d \n", mAlphaSource.offset_ebp); | |
373 | + //printf("fragment.offset_ebp: %d \n", fragment.offset_ebp); | |
374 | + //printf("factor.offset_ebp: %d \n", factor.offset_ebp); | |
375 | + MOV_MEM_TO_REG(mAlphaSource.offset_ebp, EBP, factor.reg); | |
376 | + if (!mBlendFactorCached || mBlendFactorCached==f) { | |
377 | + src_alpha = mAlphaSource; | |
378 | + // we already computed the blend factor before, nothing to do. | |
379 | + if (mBlendFactorCached) | |
380 | + return; | |
381 | + // this is the first time, make sure to compute the blend | |
382 | + // factor properly. | |
383 | + mBlendFactorCached = f; | |
384 | + break; | |
385 | + } else { | |
386 | + // we have a cached alpha blend factor, but we want another one, | |
387 | + // this should really not happen because by construction, | |
388 | + // we cannot have BOTH source and destination | |
389 | + // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because | |
390 | + // the blending stage uses the f/(1-f) optimization | |
391 | + | |
392 | + // for completeness, we handle this case though. Since there | |
393 | + // are only 2 choices, this meens we want "the other one" | |
394 | + // (1-factor) | |
395 | + //factor = mAlphaSource; | |
396 | + //factor.flags &= ~CORRUPTIBLE; | |
397 | + NEG(factor.reg); | |
398 | + ADD_IMM_TO_REG((1<<factor.s), factor.reg); | |
399 | + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); | |
400 | + mBlendFactorCached = f; | |
401 | + return; | |
402 | + } | |
403 | + } | |
404 | + // fall-through... | |
405 | + case GGL_ONE_MINUS_DST_COLOR: | |
406 | + case GGL_DST_COLOR: | |
407 | + case GGL_ONE_MINUS_SRC_COLOR: | |
408 | + case GGL_SRC_COLOR: | |
409 | + case GGL_ONE_MINUS_DST_ALPHA: | |
410 | + case GGL_DST_ALPHA: | |
411 | + case GGL_SRC_ALPHA_SATURATE: | |
412 | + // help us find out what register we can use for the blend-factor | |
413 | + // CORRUPTIBLE registers are chosen first, or a new one is allocated. | |
414 | + if (fragment.flags & CORRUPTIBLE) { | |
415 | + factor.setTo(fragment.reg, 32, CORRUPTIBLE, fragment.offset_ebp); | |
416 | + fragment.flags &= ~CORRUPTIBLE; | |
417 | + } else if (fb.flags & CORRUPTIBLE) { | |
418 | + factor.setTo(fb.reg, 32, CORRUPTIBLE, fb.offset_ebp); | |
419 | + fb.flags &= ~CORRUPTIBLE; | |
420 | + } else { | |
421 | + factor.setTo(scratches.obtain(), 32, CORRUPTIBLE); | |
422 | + mCurSp = mCurSp - 4; | |
423 | + factor.offset_ebp = mCurSp; | |
424 | + } | |
425 | + break; | |
426 | + } | |
427 | + | |
428 | + // XXX: doesn't work if size==1 | |
429 | + | |
430 | + switch(f) { | |
431 | + case GGL_ONE_MINUS_DST_COLOR: | |
432 | + case GGL_DST_COLOR: | |
433 | + factor.s = fb.s; | |
434 | + MOV_REG_TO_REG(fb.reg, factor.reg); | |
435 | + SHR(fb.s-1, factor.reg); | |
436 | + ADD_REG_TO_REG(fb.reg, factor.reg); | |
437 | + break; | |
438 | + case GGL_ONE_MINUS_SRC_COLOR: | |
439 | + case GGL_SRC_COLOR: | |
440 | + factor.s = fragment.s; | |
441 | + temp_reg = scratches.obtain(); | |
442 | + MOV_REG_TO_REG(fragment.reg, temp_reg); | |
443 | + SHR(fragment.s-1, fragment.reg); | |
444 | + ADD_REG_TO_REG(temp_reg, fragment.reg); | |
445 | + scratches.recycle(temp_reg); | |
446 | + break; | |
447 | + case GGL_ONE_MINUS_SRC_ALPHA: | |
448 | + case GGL_SRC_ALPHA: | |
449 | + factor.s = src_alpha.s; | |
450 | + if (mBlendFactorCached == f) { | |
451 | + //src_alpha == factor == mAlphaSource, we need a temp reg | |
452 | + if(scratches.countFreeRegs()) { | |
453 | + temp_reg = scratches.obtain(); | |
454 | + MOV_REG_TO_REG(factor.reg, temp_reg); | |
455 | + SHR(src_alpha.s-1, factor.reg); | |
456 | + ADD_REG_TO_REG(temp_reg, factor.reg); | |
457 | + scratches.recycle(temp_reg); | |
458 | + } | |
459 | + else { | |
460 | + SHR(src_alpha.s-1, factor.offset_ebp, EBP); | |
461 | + ADD_MEM_TO_REG(EBP, factor.offset_ebp, factor.reg); | |
462 | + } | |
463 | + } | |
464 | + else | |
465 | + { | |
466 | + MOV_REG_TO_REG(src_alpha.reg, factor.reg); | |
467 | + SHR(src_alpha.s-1, factor.reg); | |
468 | + ADD_REG_TO_REG(src_alpha.reg, factor.reg); | |
469 | + } | |
470 | + // we will store factor in the next switch for GGL_ONE_MINUS_SRC_ALPHA | |
471 | + if(f == GGL_SRC_ALPHA) | |
472 | + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); | |
473 | + break; | |
474 | + case GGL_ONE_MINUS_DST_ALPHA: | |
475 | + case GGL_DST_ALPHA: | |
476 | + // XXX: should be precomputed | |
477 | + extract(factor, dst_pixel, GGLFormat::ALPHA); | |
478 | + temp_reg = scratches.obtain(); | |
479 | + MOV_REG_TO_REG(factor.reg, temp_reg); | |
480 | + SHR(factor.s-1, factor.reg); | |
481 | + ADD_REG_TO_REG(temp_reg, factor.reg); | |
482 | + scratches.recycle(temp_reg); | |
483 | + break; | |
484 | + case GGL_SRC_ALPHA_SATURATE: | |
485 | + // XXX: should be precomputed | |
486 | + // XXX: f = min(As, 1-Ad) | |
487 | + // btw, we're guaranteed that Ad's size is <= 8, because | |
488 | + // it's extracted from the framebuffer | |
489 | + break; | |
490 | + } | |
491 | + | |
492 | + switch(f) { | |
493 | + case GGL_ONE_MINUS_DST_COLOR: | |
494 | + case GGL_ONE_MINUS_SRC_COLOR: | |
495 | + case GGL_ONE_MINUS_DST_ALPHA: | |
496 | + case GGL_ONE_MINUS_SRC_ALPHA: | |
497 | + NEG(factor.reg); | |
498 | + ADD_IMM_TO_REG(1<<factor.s, factor.reg); | |
499 | + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); | |
500 | + } | |
501 | + | |
502 | + // don't need more than 8-bits for the blend factor | |
503 | + // and this will prevent overflows in the multiplies later | |
504 | + if (factor.s > 8) { | |
505 | + SHR(factor.s-8, factor.reg); | |
506 | + factor.s = 8; | |
507 | + if(f == GGL_ONE_MINUS_SRC_ALPHA || f == GGL_SRC_ALPHA) | |
508 | + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); | |
509 | + } | |
510 | + //below will be triggered on CDK for surfaceflinger | |
511 | + if(fragment.offset_ebp == mAlphaSource.offset_ebp) | |
512 | + MOV_REG_TO_REG(factor.reg, fragment.reg); | |
513 | +} | |
514 | + | |
515 | +int GGLX86Assembler::blending_codes(int fs, int fd) | |
516 | +{ | |
517 | + int blending = 0; | |
518 | + switch(fs) { | |
519 | + case GGL_ONE: | |
520 | + blending |= BLEND_SRC; | |
521 | + break; | |
522 | + | |
523 | + case GGL_ONE_MINUS_DST_COLOR: | |
524 | + case GGL_DST_COLOR: | |
525 | + blending |= FACTOR_DST|BLEND_SRC; | |
526 | + break; | |
527 | + case GGL_ONE_MINUS_DST_ALPHA: | |
528 | + case GGL_DST_ALPHA: | |
529 | + // no need to extract 'component' from the destination | |
530 | + // for the blend factor, because we need ALPHA only. | |
531 | + blending |= BLEND_SRC; | |
532 | + break; | |
533 | + | |
534 | + case GGL_ONE_MINUS_SRC_COLOR: | |
535 | + case GGL_SRC_COLOR: | |
536 | + blending |= FACTOR_SRC|BLEND_SRC; | |
537 | + break; | |
538 | + case GGL_ONE_MINUS_SRC_ALPHA: | |
539 | + case GGL_SRC_ALPHA: | |
540 | + case GGL_SRC_ALPHA_SATURATE: | |
541 | + blending |= FACTOR_SRC|BLEND_SRC; | |
542 | + break; | |
543 | + } | |
544 | + switch(fd) { | |
545 | + case GGL_ONE: | |
546 | + blending |= BLEND_DST; | |
547 | + break; | |
548 | + | |
549 | + case GGL_ONE_MINUS_DST_COLOR: | |
550 | + case GGL_DST_COLOR: | |
551 | + blending |= FACTOR_DST|BLEND_DST; | |
552 | + break; | |
553 | + case GGL_ONE_MINUS_DST_ALPHA: | |
554 | + case GGL_DST_ALPHA: | |
555 | + blending |= FACTOR_DST|BLEND_DST; | |
556 | + break; | |
557 | + | |
558 | + case GGL_ONE_MINUS_SRC_COLOR: | |
559 | + case GGL_SRC_COLOR: | |
560 | + blending |= FACTOR_SRC|BLEND_DST; | |
561 | + break; | |
562 | + case GGL_ONE_MINUS_SRC_ALPHA: | |
563 | + case GGL_SRC_ALPHA: | |
564 | + // no need to extract 'component' from the source | |
565 | + // for the blend factor, because we need ALPHA only. | |
566 | + blending |= BLEND_DST; | |
567 | + break; | |
568 | + } | |
569 | + return blending; | |
570 | +} | |
571 | + | |
572 | +// --------------------------------------------------------------------------- | |
573 | + | |
574 | +void GGLX86Assembler::build_blendFOneMinusF( | |
575 | + component_t& temp, | |
576 | + const integer_t& factor, | |
577 | + const integer_t& fragment, | |
578 | + const integer_t& fb) | |
579 | +{ | |
580 | + // R = S*f + D*(1-f) = (S-D)*f + D | |
581 | + // compute S-D | |
582 | + Scratch scratches(registerFile()); | |
583 | + integer_t diff(fragment.flags & CORRUPTIBLE ? | |
584 | + fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); | |
585 | + const int shift = fragment.size() - fb.size(); | |
586 | + if (shift>0) { | |
587 | + MOV_REG_TO_REG(fragment.reg, diff.reg); | |
588 | + SHR(shift, diff.reg); | |
589 | + SUB_REG_TO_REG(fb.reg, diff.reg); | |
590 | + } else if (shift<0) { | |
591 | + MOV_REG_TO_REG(fragment.reg, diff.reg); | |
592 | + SHL(-shift, diff.reg); | |
593 | + SUB_REG_TO_REG(fb.reg, diff.reg); | |
594 | + } else { | |
595 | + MOV_REG_TO_REG(fragment.reg, diff.reg); | |
596 | + SUB_REG_TO_REG(fb.reg, diff.reg); | |
597 | + } | |
598 | + mul_factor_add(temp, diff, factor, component_t(fb)); | |
599 | + if(!(fragment.flags & CORRUPTIBLE)) | |
600 | + scratches.recycle(diff.reg); | |
601 | +} | |
602 | + | |
603 | +void GGLX86Assembler::build_blendOneMinusFF( | |
604 | + component_t& temp, | |
605 | + const integer_t& factor, | |
606 | + const integer_t& fragment, | |
607 | + const integer_t& fb) | |
608 | +{ | |
609 | + // R = S*f + D*(1-f) = (S-D)*f + D | |
610 | + Scratch scratches(registerFile()); | |
611 | + // compute D-S | |
612 | + integer_t diff(fb.flags & CORRUPTIBLE ? | |
613 | + fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); | |
614 | + const int shift = fragment.size() - fb.size(); | |
615 | + if (shift>0) { | |
616 | + SHR(shift, fragment.reg); | |
617 | + MOV_REG_TO_REG(fb.reg, diff.reg); | |
618 | + SUB_REG_TO_REG(fragment.reg, diff.reg); | |
619 | + } | |
620 | + else if (shift<0) { | |
621 | + SHR(-shift, fragment.reg); | |
622 | + MOV_REG_TO_REG(fb.reg, diff.reg); | |
623 | + SUB_REG_TO_REG(fragment.reg, diff.reg); | |
624 | + } | |
625 | + else { | |
626 | + MOV_REG_TO_REG(fb.reg, diff.reg); | |
627 | + SUB_REG_TO_REG(fragment.reg, diff.reg); | |
628 | + } | |
629 | + | |
630 | + mul_factor_add(temp, diff, factor, component_t(fragment)); | |
631 | + if(!(fragment.flags & CORRUPTIBLE)) | |
632 | + scratches.recycle(diff.reg); | |
633 | +} | |
634 | + | |
635 | +// --------------------------------------------------------------------------- | |
636 | + | |
637 | +void GGLX86Assembler::mul_factor( component_t& d, | |
638 | + const integer_t& v, | |
639 | + const integer_t& f, Scratch& scratches) | |
640 | +{ | |
641 | + // f can be changed | |
642 | + // | |
643 | + int vs = v.size(); | |
644 | + int fs = f.size(); | |
645 | + int ms = vs+fs; | |
646 | + | |
647 | + // XXX: we could have special cases for 1 bit mul | |
648 | + | |
649 | + // all this code below to use the best multiply instruction | |
650 | + // wrt the parameters size. We take advantage of the fact | |
651 | + // that the 16-bits multiplies allow a 16-bit shift | |
652 | + // The trick is that we just make sure that we have at least 8-bits | |
653 | + // per component (which is enough for a 8 bits display). | |
654 | + | |
655 | + int xy = -1; | |
656 | + int vshift = 0; | |
657 | + int fshift = 0; | |
658 | + int smulw = 0; | |
659 | + | |
660 | + int xyBB = 0; | |
661 | + int xyTB = 1; | |
662 | + int xyTT = 2; | |
663 | + int xyBT = 3; | |
664 | + if (vs<16) { | |
665 | + if (fs<16) { | |
666 | + xy = xyBB; | |
667 | + } else if (GGL_BETWEEN(fs, 24, 31)) { | |
668 | + ms -= 16; | |
669 | + xy = xyTB; | |
670 | + } else { | |
671 | + // eg: 15 * 18 -> 15 * 15 | |
672 | + fshift = fs - 15; | |
673 | + ms -= fshift; | |
674 | + xy = xyBB; | |
675 | + } | |
676 | + } else if (GGL_BETWEEN(vs, 24, 31)) { | |
677 | + if (fs<16) { | |
678 | + ms -= 16; | |
679 | + xy = xyTB; | |
680 | + } else if (GGL_BETWEEN(fs, 24, 31)) { | |
681 | + ms -= 32; | |
682 | + xy = xyTT; | |
683 | + } else { | |
684 | + // eg: 24 * 18 -> 8 * 18 | |
685 | + fshift = fs - 15; | |
686 | + ms -= 16 + fshift; | |
687 | + xy = xyTB; | |
688 | + } | |
689 | + } else { | |
690 | + if (fs<16) { | |
691 | + // eg: 18 * 15 -> 15 * 15 | |
692 | + vshift = vs - 15; | |
693 | + ms -= vshift; | |
694 | + xy = xyBB; | |
695 | + } else if (GGL_BETWEEN(fs, 24, 31)) { | |
696 | + // eg: 18 * 24 -> 15 * 8 | |
697 | + vshift = vs - 15; | |
698 | + ms -= 16 + vshift; | |
699 | + xy = xyBT; | |
700 | + } else { | |
701 | + // eg: 18 * 18 -> (15 * 18)>>16 | |
702 | + fshift = fs - 15; | |
703 | + ms -= 16 + fshift; | |
704 | + //xy = yB; //XXX SMULWB | |
705 | + smulw = 1; | |
706 | + } | |
707 | + } | |
708 | + | |
709 | + ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs); | |
710 | + | |
711 | + int vreg = v.reg; | |
712 | + int freg = f.reg; | |
713 | + if (vshift) { | |
714 | + MOV_REG_TO_REG(vreg, d.reg); | |
715 | + SHR(vshift, d.reg); | |
716 | + vreg = d.reg; | |
717 | + } | |
718 | + if (fshift) { | |
719 | + MOV_REG_TO_REG(vreg, d.reg); | |
720 | + SHR(fshift, d.reg); | |
721 | + freg = d.reg; | |
722 | + } | |
723 | + MOV_REG_TO_REG(vreg, d.reg); | |
724 | + if (smulw) { | |
725 | + int flag_push_edx = 0; | |
726 | + int flag_reserve_edx = 0; | |
727 | + int temp_reg2 = -1; | |
728 | + int edx_offset_ebp = 0; | |
729 | + if(scratches.isUsed(EDX) == 1) { | |
730 | + if(d.reg != EDX) { | |
731 | + flag_push_edx = 1; | |
732 | + mCurSp = mCurSp - 4; | |
733 | + edx_offset_ebp = mCurSp; | |
734 | + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); | |
735 | + //PUSH(EDX); | |
736 | + } | |
737 | + } | |
738 | + else { | |
739 | + flag_reserve_edx = 1; | |
740 | + scratches.reserve(EDX); | |
741 | + } | |
742 | + if(scratches.isUsed(EAX)) { | |
743 | + if( freg == EAX || d.reg == EAX) { | |
744 | + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); | |
745 | + if(freg == EAX) | |
746 | + IMUL(d.reg); | |
747 | + else | |
748 | + IMUL(freg); | |
749 | + SHL(16, EDX); | |
750 | + SHR(16, EAX); | |
751 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
752 | + MOV_REG_TO_REG(EDX, d.reg); | |
753 | + } | |
754 | + else { | |
755 | + int eax_offset_ebp = 0; | |
756 | + if(scratches.countFreeRegs() > 0) { | |
757 | + temp_reg2 = scratches.obtain(); | |
758 | + MOV_REG_TO_REG(EAX, temp_reg2); | |
759 | + } | |
760 | + else { | |
761 | + mCurSp = mCurSp - 4; | |
762 | + eax_offset_ebp = mCurSp; | |
763 | + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); | |
764 | + //PUSH(EAX); | |
765 | + } | |
766 | + MOV_REG_TO_REG(freg, EAX); | |
767 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
768 | + IMUL(d.reg); | |
769 | + SHL(16, EDX); | |
770 | + SHR(16, EAX); | |
771 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
772 | + MOV_REG_TO_REG(EDX, d.reg); | |
773 | + if(temp_reg2 > -1) { | |
774 | + MOV_REG_TO_REG(temp_reg2, EAX); | |
775 | + scratches.recycle(temp_reg2); | |
776 | + } | |
777 | + else { | |
778 | + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); | |
779 | + //POP(EAX); | |
780 | + } | |
781 | + } | |
782 | + } | |
783 | + else { | |
784 | + MOV_REG_TO_REG(freg, EAX); | |
785 | + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); | |
786 | + IMUL(d.reg); | |
787 | + SHL(16, EDX); | |
788 | + SHR(16, EAX); | |
789 | + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); | |
790 | + MOV_REG_TO_REG(EDX, d.reg); | |
791 | + } | |
792 | + if(flag_push_edx == 1) { | |
793 | + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); | |
794 | + //POP(EDX); | |
795 | + } | |
796 | + if(flag_reserve_edx ==1) | |
797 | + scratches.recycle(EDX); | |
798 | + } | |
799 | + else { | |
800 | + if(xy == xyBB) { | |
801 | + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); | |
802 | + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); | |
803 | + IMUL(freg, d.reg); | |
804 | + } | |
805 | + else if(xy == xyTB) { | |
806 | + SHR(16, d.reg); | |
807 | + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); | |
808 | + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); | |
809 | + IMUL(freg, d.reg); | |
810 | + } | |
811 | + else if(xy == xyBT) { | |
812 | + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); | |
813 | + SHR(16, freg); | |
814 | + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); | |
815 | + IMUL(freg, d.reg); | |
816 | + } | |
817 | + else if(xy == xyTT) { | |
818 | + SHR(16, d.reg); | |
819 | + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); | |
820 | + SHR(16, freg); | |
821 | + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); | |
822 | + IMUL(freg, d.reg); | |
823 | + } | |
824 | + } | |
825 | + | |
826 | + | |
827 | + d.h = ms; | |
828 | + if (mDithering) { | |
829 | + d.l = 0; | |
830 | + } else { | |
831 | + d.l = fs; | |
832 | + d.flags |= CLEAR_LO; | |
833 | + } | |
834 | +} | |
835 | + | |
836 | +void GGLX86Assembler::mul_factor_add( component_t& d, | |
837 | + const integer_t& v, | |
838 | + const integer_t& f, | |
839 | + const component_t& a) | |
840 | +{ | |
841 | + // XXX: we could have special cases for 1 bit mul | |
842 | + Scratch scratches(registerFile()); | |
843 | + | |
844 | + int vs = v.size(); | |
845 | + int fs = f.size(); | |
846 | + int as = a.h; | |
847 | + int ms = vs+fs; | |
848 | + | |
849 | + ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as); | |
850 | + | |
851 | + integer_t add(a.reg, a.h, a.flags, a.offset_ebp); | |
852 | + | |
853 | + | |
854 | + // 'a' is a component_t but it is guaranteed to have | |
855 | + // its high bits set to 0. However in the dithering case, | |
856 | + // we can't get away with truncating the potentially bad bits | |
857 | + // so extraction is needed. | |
858 | + | |
859 | + if ((mDithering) && (a.size() < ms)) { | |
860 | + // we need to expand a | |
861 | + if (!(a.flags & CORRUPTIBLE)) { | |
862 | + // ... but it's not corruptible, so we need to pick a | |
863 | + // temporary register. | |
864 | + // Try to uses the destination register first (it's likely | |
865 | + // to be usable, unless it aliases an input). | |
866 | + if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) { | |
867 | + add.reg = d.reg; | |
868 | + } else { | |
869 | + add.reg = scratches.obtain(); | |
870 | + } | |
871 | + } | |
872 | + expand(add, a, ms); // extracts and expands | |
873 | + as = ms; | |
874 | + } | |
875 | + | |
876 | + if (ms == as) { | |
877 | + MOV_REG_TO_REG(v.reg, d.reg); | |
878 | + if (vs<16 && fs<16) { | |
879 | + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); | |
880 | + MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg); | |
881 | + IMUL(f.reg, d.reg); | |
882 | + } | |
883 | + else | |
884 | + IMUL(f.reg, d.reg); | |
885 | + ADD_REG_TO_REG(add.reg, d.reg); | |
886 | + } else { | |
887 | + //int temp = d.reg; | |
888 | + //if (temp == add.reg) { | |
889 | + // // the mul will modify add.reg, we need an intermediary reg | |
890 | + // if (v.flags & CORRUPTIBLE) temp = v.reg; | |
891 | + // else if (f.flags & CORRUPTIBLE) temp = f.reg; | |
892 | + // else temp = scratches.obtain(); | |
893 | + //} | |
894 | + | |
895 | + // below d.reg may override "temp" result, so we use a new register | |
896 | + int temp_reg; | |
897 | + int v_offset_ebp = 0; | |
898 | + if(scratches.countFreeRegs() == 0) { | |
899 | + temp_reg = v.reg; | |
900 | + mCurSp = mCurSp - 4; | |
901 | + v_offset_ebp = mCurSp; | |
902 | + MOV_REG_TO_MEM(v.reg, v_offset_ebp, EBP); | |
903 | + } | |
904 | + else { | |
905 | + temp_reg = scratches.obtain(); | |
906 | + MOV_REG_TO_REG(v.reg, temp_reg); | |
907 | + } | |
908 | + if (vs<16 && fs<16) { | |
909 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg); | |
910 | + MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg); | |
911 | + IMUL(f.reg, temp_reg); | |
912 | + } | |
913 | + else | |
914 | + IMUL(f.reg, temp_reg); | |
915 | + | |
916 | + if (ms>as) { | |
917 | + MOV_REG_TO_REG(add.reg, d.reg); | |
918 | + SHL(ms-as, d.reg); | |
919 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
920 | + } else if (ms<as) { | |
921 | + // not sure if we should expand the mul instead? | |
922 | + MOV_REG_TO_REG(add.reg, d.reg); | |
923 | + SHL(as-ms, d.reg); | |
924 | + ADD_REG_TO_REG(temp_reg, d.reg); | |
925 | + } | |
926 | + if(temp_reg == v.reg) | |
927 | + MOV_MEM_TO_REG(v_offset_ebp, EBP, v.reg); | |
928 | + else | |
929 | + scratches.recycle(temp_reg); | |
930 | + } | |
931 | + | |
932 | + d.h = ms; | |
933 | + if (mDithering) { | |
934 | + d.l = a.l; | |
935 | + } else { | |
936 | + d.l = fs>a.l ? fs : a.l; | |
937 | + d.flags |= CLEAR_LO; | |
938 | + } | |
939 | +} | |
940 | + | |
941 | +void GGLX86Assembler::component_add(component_t& d, | |
942 | + const integer_t& dst, const integer_t& src) | |
943 | +{ | |
944 | + // here we're guaranteed that fragment.size() >= fb.size() | |
945 | + const int shift = src.size() - dst.size(); | |
946 | + if (!shift) { | |
947 | + MOV_REG_TO_REG(src.reg, d.reg); | |
948 | + ADD_REG_TO_REG(dst.reg, d.reg); | |
949 | + } else { | |
950 | + MOV_REG_TO_REG(dst.reg, d.reg); | |
951 | + SHL(shift, d.reg); | |
952 | + ADD_REG_TO_REG(src.reg, d.reg); | |
953 | + } | |
954 | + | |
955 | + d.h = src.size(); | |
956 | + if (mDithering) { | |
957 | + d.l = 0; | |
958 | + } else { | |
959 | + d.l = shift; | |
960 | + d.flags |= CLEAR_LO; | |
961 | + } | |
962 | +} | |
963 | + | |
964 | +void GGLX86Assembler::component_sat(const component_t& v, const int temp_reg) | |
965 | +{ | |
966 | + const int32_t one = ((1<<v.size())-1)<<v.l; | |
967 | + MOV_IMM_TO_REG(one, temp_reg); | |
968 | + CMP_IMM_TO_REG(1<<v.h, v.reg); | |
969 | + CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, v.reg); | |
970 | +} | |
971 | + | |
972 | +// ---------------------------------------------------------------------------- | |
973 | + | |
974 | +}; // namespace android |
@@ -0,0 +1,30 @@ | ||
1 | +# | |
2 | +# Copyright (C) 2015 The Android-x86 Open Source Project | |
3 | +# | |
4 | +# Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | +# you may not use this file except in compliance with the License. | |
6 | +# You may obtain a copy of the License at | |
7 | +# | |
8 | +# http://www.apache.org/licenses/LICENSE-2.0 | |
9 | +# | |
10 | +# Unless required by applicable law or agreed to in writing, software | |
11 | +# distributed under the License is distributed on an "AS IS" BASIS, | |
12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | +# See the License for the specific language governing permissions and | |
14 | +# limitations under the License. | |
15 | +# | |
16 | + | |
17 | +LOCAL_PATH := $(call my-dir) | |
18 | + | |
19 | +enc_src_files := \ | |
20 | + dec_base.cpp \ | |
21 | + enc_base.cpp \ | |
22 | + enc_tabl.cpp \ | |
23 | + enc_wrapper.cpp | |
24 | + | |
25 | +include $(CLEAR_VARS) | |
26 | +LOCAL_SRC_FILES := $(enc_src_files) | |
27 | +LOCAL_MODULE := libenc | |
28 | +LOCAL_MODULE_TAGS := optional | |
29 | +LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH) | |
30 | +include $(BUILD_STATIC_LIBRARY) |
@@ -0,0 +1,21 @@ | ||
1 | +Original source from Apache Harmony 5.0M15 (r991518 from 2010-09-01) at | |
2 | +http://harmony.apache.org/. | |
3 | + | |
4 | +The following files are from drlvm/vm/port/src/encoder/ia32_em64t. | |
5 | + | |
6 | + dec_base.cpp | |
7 | + dec_base.h | |
8 | + enc_base.cpp | |
9 | + enc_base.h | |
10 | + enc_defs.h | |
11 | + enc_prvt.h | |
12 | + enc_tabl.cpp | |
13 | + encoder.cpp | |
14 | + encoder.h | |
15 | + encoder.inl | |
16 | + | |
17 | +The following files are derived partially from the original Apache | |
18 | +Harmony files. | |
19 | + | |
20 | + enc_defs_ext.h -- derived from enc_defs.h | |
21 | + enc_wrapper.h -- derived from encoder.h |
@@ -0,0 +1,541 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | + | |
21 | +/** | |
22 | + * @file | |
23 | + * @brief Main decoding (disassembling) routines implementation. | |
24 | + */ | |
25 | + | |
26 | +#include "dec_base.h" | |
27 | +#include "enc_prvt.h" | |
28 | +#include <stdio.h> | |
29 | +//#include "open/common.h" | |
30 | + | |
31 | +bool DecoderBase::is_prefix(const unsigned char * bytes) | |
32 | +{ | |
33 | + unsigned char b0 = *bytes; | |
34 | + unsigned char b1 = *(bytes+1); | |
35 | + if (b0 == 0xF0) { // LOCK | |
36 | + return true; | |
37 | + } | |
38 | + if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes | |
39 | + if (b1 == 0x0F) { // .... but may be a part of SIMD opcode | |
40 | + return false; | |
41 | + } | |
42 | + return true; | |
43 | + } | |
44 | + if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) { | |
45 | + // branch hints, segment prefixes | |
46 | + return true; | |
47 | + } | |
48 | + if (b0==0x66) { // operand-size prefix | |
49 | + if (b1 == 0x0F) { // .... but may be a part of SIMD opcode | |
50 | + return false; | |
51 | + } | |
52 | + return false; //XXX - currently considered as part of opcode//true; | |
53 | + } | |
54 | + if (b0==0x67) { // address size prefix | |
55 | + return true; | |
56 | + } | |
57 | + return false; | |
58 | +} | |
59 | + | |
60 | +// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error | |
61 | +unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst) | |
62 | +{ | |
63 | + const unsigned char * my_bytes = bytes; | |
64 | + | |
65 | + while( 1 ) | |
66 | + { | |
67 | + unsigned char by1 = *my_bytes; | |
68 | + unsigned char by2 = *(my_bytes + 1); | |
69 | + Inst::PrefGroups where; | |
70 | + | |
71 | + switch( by1 ) | |
72 | + { | |
73 | + case InstPrefix_REPNE: | |
74 | + case InstPrefix_REP: | |
75 | + { | |
76 | + if( 0x0F == by2) | |
77 | + { | |
78 | + return pinst->prefc; | |
79 | + } | |
80 | + } | |
81 | + case InstPrefix_LOCK: | |
82 | + { | |
83 | + where = Inst::Group1; | |
84 | + break; | |
85 | + } | |
86 | + case InstPrefix_CS: | |
87 | + case InstPrefix_SS: | |
88 | + case InstPrefix_DS: | |
89 | + case InstPrefix_ES: | |
90 | + case InstPrefix_FS: | |
91 | + case InstPrefix_GS: | |
92 | +// case InstPrefix_HintTaken: the same as CS override | |
93 | +// case InstPrefix_HintNotTaken: the same as DS override | |
94 | + { | |
95 | + where = Inst::Group2; | |
96 | + break; | |
97 | + } | |
98 | + case InstPrefix_OpndSize: | |
99 | + { | |
100 | +//NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9 | |
101 | +// here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9 | |
102 | +// it will match JMP Sz32 ... | |
103 | +//HACK: assume it is the last prefix, return any way | |
104 | + if( 0x0F == by2) | |
105 | + { | |
106 | + return pinst->prefc; | |
107 | + } | |
108 | + return pinst->prefc; | |
109 | + where = Inst::Group3; | |
110 | + break; | |
111 | + } | |
112 | + case InstPrefix_AddrSize: | |
113 | + { | |
114 | + where = Inst::Group4; | |
115 | + break; | |
116 | + } | |
117 | + default: | |
118 | + { | |
119 | + return pinst->prefc; | |
120 | + } | |
121 | + } | |
122 | + // Assertions are not allowed here. | |
123 | + // Error situations should result in returning error status | |
124 | + if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group | |
125 | + return (unsigned int)-1; | |
126 | + | |
127 | + pinst->pref[where] = (InstPrefix)by1; | |
128 | + | |
129 | + if (pinst->prefc >= 4) //no more than 4 prefixes | |
130 | + return (unsigned int)-1; | |
131 | + | |
132 | + pinst->prefc++; | |
133 | + ++my_bytes; | |
134 | + } | |
135 | +} | |
136 | + | |
137 | + | |
138 | + | |
139 | +unsigned DecoderBase::decode(const void * addr, Inst * pinst) | |
140 | +{ | |
141 | + Inst tmp; | |
142 | + | |
143 | + //assert( *(unsigned char*)addr != 0x66); | |
144 | + | |
145 | + const unsigned char * bytes = (unsigned char*)addr; | |
146 | + | |
147 | + // Load up to 4 prefixes | |
148 | + // for each Mnemonic | |
149 | + unsigned int pref_count = fill_prefs(bytes, &tmp); | |
150 | + | |
151 | + if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes | |
152 | + return 0; // Error | |
153 | + | |
154 | + bytes += pref_count; | |
155 | + | |
156 | + // for each opcodedesc | |
157 | + // if (raw_len == 0) memcmp(, raw_len) | |
158 | + // else check the mixed state which is one of the following: | |
159 | + // /digit /i /rw /rd /rb | |
160 | + | |
161 | + bool found = false; | |
162 | + const unsigned char * saveBytes = bytes; | |
163 | + for (unsigned mn=1; mn<Mnemonic_Count; mn++) { | |
164 | + bytes = saveBytes; | |
165 | + found=try_mn((Mnemonic)mn, &bytes, &tmp); | |
166 | + if (found) { | |
167 | + tmp.mn = (Mnemonic)mn; | |
168 | + break; | |
169 | + } | |
170 | + } | |
171 | + if (!found) { | |
172 | + // Unknown opcode | |
173 | + return 0; | |
174 | + } | |
175 | + tmp.size = (unsigned)(bytes-(const unsigned char*)addr); | |
176 | + if (pinst) { | |
177 | + *pinst = tmp; | |
178 | + } | |
179 | + return tmp.size; | |
180 | +} | |
181 | + | |
182 | +#ifdef _EM64T_ | |
183 | +#define EXTEND_REG(reg, flag) \ | |
184 | + ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8)) | |
185 | +#else | |
186 | +#define EXTEND_REG(reg, flag) (reg) | |
187 | +#endif | |
188 | + | |
189 | +//don't know the use of rex, seems not used when _EM64T_ is not enabled | |
190 | +bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux, | |
191 | + const unsigned char ** pbuf, Inst * pinst | |
192 | +#ifdef _EM64T_ | |
193 | + , const Rex UNREF *rex | |
194 | +#endif | |
195 | + ) | |
196 | +{ | |
197 | + OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask); | |
198 | + unsigned byte = (aux & OpcodeByteKind_OpcodeMask); | |
199 | + unsigned data_byte = **pbuf; | |
200 | + EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; | |
201 | + const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; | |
202 | + | |
203 | + switch (kind) { | |
204 | + case OpcodeByteKind_SlashR: | |
205 | + { | |
206 | + RegName reg; | |
207 | + OpndKind okind; | |
208 | + const ModRM& modrm = *(ModRM*)*pbuf; | |
209 | + if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory | |
210 | +#ifdef _EM64T_ | |
211 | + decodeModRM(odesc, pbuf, pinst, rex); | |
212 | +#else | |
213 | + decodeModRM(odesc, pbuf, pinst); | |
214 | +#endif | |
215 | + ++pinst->argc; | |
216 | + const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc]; | |
217 | + okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; | |
218 | + EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; | |
219 | + reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r)); | |
220 | + regOpnd = EncoderBase::Operand(reg); | |
221 | + } else { // 2nd operand is memory | |
222 | + okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; | |
223 | + EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; | |
224 | + reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r)); | |
225 | + regOpnd = EncoderBase::Operand(reg); | |
226 | + ++pinst->argc; | |
227 | +#ifdef _EM64T_ | |
228 | + decodeModRM(odesc, pbuf, pinst, rex); | |
229 | +#else | |
230 | + decodeModRM(odesc, pbuf, pinst); | |
231 | +#endif | |
232 | + } | |
233 | + ++pinst->argc; | |
234 | + } | |
235 | + return true; | |
236 | + case OpcodeByteKind_rb: | |
237 | + case OpcodeByteKind_rw: | |
238 | + case OpcodeByteKind_rd: | |
239 | + { | |
240 | + // Gregory - | |
241 | + // Here we don't parse register because for current needs | |
242 | + // disassembler doesn't require to parse all operands | |
243 | + unsigned regid = data_byte - byte; | |
244 | + if (regid>7) { | |
245 | + return false; | |
246 | + } | |
247 | + OpndSize opnd_size; | |
248 | + switch(kind) | |
249 | + { | |
250 | + case OpcodeByteKind_rb: | |
251 | + { | |
252 | + opnd_size = OpndSize_8; | |
253 | + break; | |
254 | + } | |
255 | + case OpcodeByteKind_rw: | |
256 | + { | |
257 | + opnd_size = OpndSize_16; | |
258 | + break; | |
259 | + } | |
260 | + case OpcodeByteKind_rd: | |
261 | + { | |
262 | + opnd_size = OpndSize_32; | |
263 | + break; | |
264 | + } | |
265 | + default: | |
266 | + opnd_size = OpndSize_32; // so there is no compiler warning | |
267 | + assert( false ); | |
268 | + } | |
269 | + opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) ); | |
270 | + | |
271 | + ++pinst->argc; | |
272 | + ++*pbuf; | |
273 | + return true; | |
274 | + } | |
275 | + case OpcodeByteKind_cb: | |
276 | + { | |
277 | + char offset = *(char*)*pbuf; | |
278 | + *pbuf += 1; | |
279 | + opnd = EncoderBase::Operand(offset); | |
280 | + ++pinst->argc; | |
281 | + //pinst->direct_addr = (void*)(pinst->offset + *pbuf); | |
282 | + } | |
283 | + return true; | |
284 | + case OpcodeByteKind_cw: | |
285 | + // not an error, but not expected in current env | |
286 | + // Android x86 | |
287 | + { | |
288 | + short offset = *(short*)*pbuf; | |
289 | + *pbuf += 2; | |
290 | + opnd = EncoderBase::Operand(offset); | |
291 | + ++pinst->argc; | |
292 | + } | |
293 | + return true; | |
294 | + //return false; | |
295 | + case OpcodeByteKind_cd: | |
296 | + { | |
297 | + int offset = *(int*)*pbuf; | |
298 | + *pbuf += 4; | |
299 | + opnd = EncoderBase::Operand(offset); | |
300 | + ++pinst->argc; | |
301 | + } | |
302 | + return true; | |
303 | + case OpcodeByteKind_SlashNum: | |
304 | + { | |
305 | + const ModRM& modrm = *(ModRM*)*pbuf; | |
306 | + if (modrm.reg != byte) { | |
307 | + return false; | |
308 | + } | |
309 | + decodeModRM(odesc, pbuf, pinst | |
310 | +#ifdef _EM64T_ | |
311 | + , rex | |
312 | +#endif | |
313 | + ); | |
314 | + ++pinst->argc; | |
315 | + } | |
316 | + return true; | |
317 | + case OpcodeByteKind_ib: | |
318 | + { | |
319 | + char ival = *(char*)*pbuf; | |
320 | + opnd = EncoderBase::Operand(ival); | |
321 | + ++pinst->argc; | |
322 | + *pbuf += 1; | |
323 | + } | |
324 | + return true; | |
325 | + case OpcodeByteKind_iw: | |
326 | + { | |
327 | + short ival = *(short*)*pbuf; | |
328 | + opnd = EncoderBase::Operand(ival); | |
329 | + ++pinst->argc; | |
330 | + *pbuf += 2; | |
331 | + } | |
332 | + return true; | |
333 | + case OpcodeByteKind_id: | |
334 | + { | |
335 | + int ival = *(int*)*pbuf; | |
336 | + opnd = EncoderBase::Operand(ival); | |
337 | + ++pinst->argc; | |
338 | + *pbuf += 4; | |
339 | + } | |
340 | + return true; | |
341 | +#ifdef _EM64T_ | |
342 | + case OpcodeByteKind_io: | |
343 | + { | |
344 | + long long int ival = *(long long int*)*pbuf; | |
345 | + opnd = EncoderBase::Operand(OpndSize_64, ival); | |
346 | + ++pinst->argc; | |
347 | + *pbuf += 8; | |
348 | + } | |
349 | + return true; | |
350 | +#endif | |
351 | + case OpcodeByteKind_plus_i: | |
352 | + { | |
353 | + unsigned regid = data_byte - byte; | |
354 | + if (regid>7) { | |
355 | + return false; | |
356 | + } | |
357 | + ++*pbuf; | |
358 | + return true; | |
359 | + } | |
360 | + case OpcodeByteKind_ZeroOpcodeByte: // cant be here | |
361 | + return false; | |
362 | + default: | |
363 | + // unknown kind ? how comes ? | |
364 | + break; | |
365 | + } | |
366 | + return false; | |
367 | +} | |
368 | + | |
369 | +bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) { | |
370 | + const unsigned char * save_pbuf = *pbuf; | |
371 | + EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn]; | |
372 | + | |
373 | + for (unsigned i=0; !opcodes[i].last; i++) { | |
374 | + const EncoderBase::OpcodeDesc& odesc = opcodes[i]; | |
375 | + char *opcode_ptr = const_cast<char *>(odesc.opcode); | |
376 | + int opcode_len = odesc.opcode_len; | |
377 | +#ifdef _EM64T_ | |
378 | + Rex *prex = NULL; | |
379 | + Rex rex; | |
380 | +#endif | |
381 | + | |
382 | + *pbuf = save_pbuf; | |
383 | +#ifdef _EM64T_ | |
384 | + // Match REX prefixes | |
385 | + unsigned char rex_byte = (*pbuf)[0]; | |
386 | + if ((rex_byte & 0xf0) == 0x40) | |
387 | + { | |
388 | + if ((rex_byte & 0x08) != 0) | |
389 | + { | |
390 | + // Have REX.W | |
391 | + if (opcode_len > 0 && opcode_ptr[0] == 0x48) | |
392 | + { | |
393 | + // Have REX.W in opcode. All mnemonics that allow | |
394 | + // REX.W have to have specified it in opcode, | |
395 | + // otherwise it is not allowed | |
396 | + rex = *(Rex *)*pbuf; | |
397 | + prex = &rex; | |
398 | + (*pbuf)++; | |
399 | + opcode_ptr++; | |
400 | + opcode_len--; | |
401 | + } | |
402 | + } | |
403 | + else | |
404 | + { | |
405 | + // No REX.W, so it doesn't have to be in opcode. We | |
406 | + // have REX.B, REX.X, REX.R or their combination, but | |
407 | + // not in opcode, they may extend any part of the | |
408 | + // instruction | |
409 | + rex = *(Rex *)*pbuf; | |
410 | + prex = &rex; | |
411 | + (*pbuf)++; | |
412 | + } | |
413 | + } | |
414 | +#endif | |
415 | + if (opcode_len != 0) { | |
416 | + if (memcmp(*pbuf, opcode_ptr, opcode_len)) { | |
417 | + continue; | |
418 | + } | |
419 | + *pbuf += opcode_len; | |
420 | + } | |
421 | + if (odesc.aux0 != 0) { | |
422 | + | |
423 | + if (!decode_aux(odesc, odesc.aux0, pbuf, pinst | |
424 | +#ifdef _EM64T_ | |
425 | + , prex | |
426 | +#endif | |
427 | + )) { | |
428 | + continue; | |
429 | + } | |
430 | + if (odesc.aux1 != 0) { | |
431 | + if (!decode_aux(odesc, odesc.aux1, pbuf, pinst | |
432 | +#ifdef _EM64T_ | |
433 | + , prex | |
434 | +#endif | |
435 | + )) { | |
436 | + continue; | |
437 | + } | |
438 | + } | |
439 | + pinst->odesc = &opcodes[i]; | |
440 | + return true; | |
441 | + } | |
442 | + else { | |
443 | + // Can't have empty opcode | |
444 | + assert(opcode_len != 0); | |
445 | + pinst->odesc = &opcodes[i]; | |
446 | + return true; | |
447 | + } | |
448 | + } | |
449 | + return false; | |
450 | +} | |
451 | + | |
452 | +bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc, | |
453 | + const unsigned char ** pbuf, Inst * pinst | |
454 | +#ifdef _EM64T_ | |
455 | + , const Rex *rex | |
456 | +#endif | |
457 | + ) | |
458 | +{ | |
459 | + EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; | |
460 | + const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; | |
461 | + | |
462 | + //XXX debug ///assert(0x66 != *(*pbuf-2)); | |
463 | + const ModRM& modrm = *(ModRM*)*pbuf; | |
464 | + *pbuf += 1; | |
465 | + | |
466 | + RegName base = RegName_Null; | |
467 | + RegName index = RegName_Null; | |
468 | + int disp = 0; | |
469 | + unsigned scale = 0; | |
470 | + | |
471 | + // On x86_64 all mnemonics that allow REX.W have REX.W in opcode. | |
472 | + // Therefore REX.W is simply ignored, and opndDesc.size is used | |
473 | + | |
474 | + if (modrm.mod == 3) { | |
475 | + // we have only modrm. no sib, no disp. | |
476 | + // Android x86: Use XMMReg for 64b operand. | |
477 | + OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; | |
478 | + RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b)); | |
479 | + opnd = EncoderBase::Operand(reg); | |
480 | + return true; | |
481 | + } | |
482 | + //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory | |
483 | + //base and index should be 32 bits!!! | |
484 | + const SIB& sib = *(SIB*)*pbuf; | |
485 | + // check whether we have a sib | |
486 | + if (modrm.rm == 4) { | |
487 | + // yes, we have SIB | |
488 | + *pbuf += 1; | |
489 | + if (sib.index != 4) { | |
490 | + index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size | |
491 | + } else { | |
492 | + // (sib.index == 4) => no index | |
493 | + //%esp can't be sib.index | |
494 | + } | |
495 | + | |
496 | + // scale = sib.scale == 0 ? 0 : (1<<sib.scale); | |
497 | + // scale = (1<<sib.scale); | |
498 | + scale = (index == RegName_Null) ? 0 : (1<<sib.scale); | |
499 | + | |
500 | + if (sib.base != 5 || modrm.mod != 0) { | |
501 | + base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size | |
502 | + } else { | |
503 | + // (sib.base == 5 && modrm.mod == 0) => no base | |
504 | + } | |
505 | + } | |
506 | + else { | |
507 | + if (modrm.mod != 0 || modrm.rm != 5) { | |
508 | + base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size | |
509 | + } | |
510 | + else { | |
511 | + // mod=0 && rm == 5 => only disp32 | |
512 | + } | |
513 | + } | |
514 | + | |
515 | + //update disp and pbuf | |
516 | + if (modrm.mod == 2) { | |
517 | + // have disp32 | |
518 | + disp = *(int*)*pbuf; | |
519 | + *pbuf += 4; | |
520 | + } | |
521 | + else if (modrm.mod == 1) { | |
522 | + // have disp8 | |
523 | + disp = *(char*)*pbuf; | |
524 | + *pbuf += 1; | |
525 | + } | |
526 | + else { | |
527 | + assert(modrm.mod == 0); | |
528 | + if (modrm.rm == 5) { | |
529 | + // have disp32 w/o sib | |
530 | + disp = *(int*)*pbuf; | |
531 | + *pbuf += 4; | |
532 | + } | |
533 | + else if (modrm.rm == 4 && sib.base == 5) { | |
534 | + // have disp32 with SI in sib | |
535 | + disp = *(int*)*pbuf; | |
536 | + *pbuf += 4; | |
537 | + } | |
538 | + } | |
539 | + opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp); | |
540 | + return true; | |
541 | +} |
@@ -0,0 +1,135 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | + | |
21 | +/** | |
22 | + * @file | |
23 | + * @brief Main decoding (disassembling) routines and structures. | |
24 | + * | |
25 | + * @note Quick and rough implementation, subject for a change. | |
26 | + */ | |
27 | + | |
28 | +#ifndef __DEC_BASE_H_INCLUDED__ | |
29 | +#define __DEC_BASE_H_INCLUDED__ | |
30 | + | |
31 | + | |
32 | +#include "enc_base.h" | |
33 | +#include "enc_prvt.h" | |
34 | + | |
35 | +#ifdef ENCODER_ISOLATE | |
36 | +using namespace enc_ia32; | |
37 | +#endif | |
38 | + | |
39 | +#define IF_CONDITIONAL (0x00000000) | |
40 | +#define IF_SYMMETRIC (0x00000000) | |
41 | +#define IF_BRANCH (0x00000000) | |
42 | + | |
43 | +struct Inst { | |
44 | + Inst() { | |
45 | + mn = Mnemonic_Null; | |
46 | + prefc = 0; | |
47 | + size = 0; | |
48 | + flags = 0; | |
49 | + //offset = 0; | |
50 | + //direct_addr = NULL; | |
51 | + argc = 0; | |
52 | + for(int i = 0; i < 4; ++i) | |
53 | + { | |
54 | + pref[i] = InstPrefix_Null; | |
55 | + } | |
56 | + } | |
57 | + /** | |
58 | + * Mnemonic of the instruction.s | |
59 | + */ | |
60 | + Mnemonic mn; | |
61 | + /** | |
62 | + * Enumerating of indexes in the pref array. | |
63 | + */ | |
64 | + enum PrefGroups | |
65 | + { | |
66 | + Group1 = 0, | |
67 | + Group2, | |
68 | + Group3, | |
69 | + Group4 | |
70 | + }; | |
71 | + /** | |
72 | + * Number of prefixes (1 byte each). | |
73 | + */ | |
74 | + unsigned int prefc; | |
75 | + /** | |
76 | + * Instruction prefixes. Prefix should be placed here according to its group. | |
77 | + */ | |
78 | + InstPrefix pref[4]; | |
79 | + /** | |
80 | + * Size, in bytes, of the instruction. | |
81 | + */ | |
82 | + unsigned size; | |
83 | + /** | |
84 | + * Flags of the instruction. | |
85 | + * @see MF_ | |
86 | + */ | |
87 | + unsigned flags; | |
88 | + /** | |
89 | + * An offset of target address, in case of 'CALL offset', | |
90 | + * 'JMP/Jcc offset'. | |
91 | + */ | |
92 | + //int offset; | |
93 | + /** | |
94 | + * Direct address of the target (on Intel64/IA-32 is 'instruction IP' + | |
95 | + * 'instruction length' + offset). | |
96 | + */ | |
97 | + //void * direct_addr; | |
98 | + /** | |
99 | + * Number of arguments of the instruction. | |
100 | + */ | |
101 | + unsigned argc; | |
102 | + // | |
103 | + EncoderBase::Operand operands[3]; | |
104 | + // | |
105 | + const EncoderBase::OpcodeDesc * odesc; | |
106 | +}; | |
107 | + | |
108 | +inline bool is_jcc(Mnemonic mn) | |
109 | +{ | |
110 | + return Mnemonic_JO <= mn && mn<=Mnemonic_JG; | |
111 | +} | |
112 | + | |
113 | +class DecoderBase { | |
114 | +public: | |
115 | + static unsigned decode(const void * addr, Inst * pinst); | |
116 | +private: | |
117 | + static bool decodeModRM(const EncoderBase::OpcodeDesc& odesc, | |
118 | + const unsigned char ** pbuf, Inst * pinst | |
119 | +#ifdef _EM64T_ | |
120 | + , const Rex *rex | |
121 | +#endif | |
122 | + ); | |
123 | + static bool decode_aux(const EncoderBase::OpcodeDesc& odesc, | |
124 | + unsigned aux, const unsigned char ** pbuf, | |
125 | + Inst * pinst | |
126 | +#ifdef _EM64T_ | |
127 | + , const Rex *rex | |
128 | +#endif | |
129 | + ); | |
130 | + static bool try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst); | |
131 | + static unsigned int fill_prefs( const unsigned char * bytes, Inst * pinst); | |
132 | + static bool is_prefix(const unsigned char * bytes); | |
133 | +}; | |
134 | + | |
135 | +#endif // ~ __DEC_BASE_H_INCLUDED__ |
@@ -0,0 +1,1137 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | +#include "enc_base.h" | |
21 | +//#include <climits> | |
22 | +#include <string.h> | |
23 | +#define USE_ENCODER_DEFINES | |
24 | +#include "enc_prvt.h" | |
25 | +#include <stdio.h> | |
26 | + | |
27 | +//#define JET_PROTO | |
28 | + | |
29 | +#ifdef JET_PROTO | |
30 | +#include "dec_base.h" | |
31 | +#include "jvmti_dasm.h" | |
32 | +#endif | |
33 | + | |
34 | +ENCODER_NAMESPACE_START | |
35 | + | |
36 | +/** | |
37 | + * @file | |
38 | + * @brief Main encoding routines and structures. | |
39 | + */ | |
40 | + | |
41 | +#ifndef _WIN32 | |
42 | + #define strcmpi strcasecmp | |
43 | +#endif | |
44 | + | |
45 | +int EncoderBase::dummy = EncoderBase::buildTable(); | |
46 | + | |
47 | +const unsigned char EncoderBase::size_hash[OpndSize_64+1] = { | |
48 | + // | |
49 | + 0xFF, // OpndSize_Null = 0, | |
50 | + 3, // OpndSize_8 = 0x1, | |
51 | + 2, // OpndSize_16 = 0x2, | |
52 | + 0xFF, // 0x3 | |
53 | + 1, // OpndSize_32 = 0x4, | |
54 | + 0xFF, // 0x5 | |
55 | + 0xFF, // 0x6 | |
56 | + 0xFF, // 0x7 | |
57 | + 0, // OpndSize_64 = 0x8, | |
58 | + // | |
59 | +}; | |
60 | + | |
61 | +const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = { | |
62 | + // | |
63 | + //gp reg -> 000 = 0 | |
64 | + //memory -> 001 = 1 | |
65 | + //immediate -> 010 = 2 | |
66 | + //xmm reg -> 011 = 3 | |
67 | + //segment regs -> 100 = 4 | |
68 | + //fp reg -> 101 = 5 | |
69 | + //mmx reg -> 110 = 6 | |
70 | + // | |
71 | + 0xFF, // 0 OpndKind_Null=0, | |
72 | + 0<<2, // 1 OpndKind_GPReg = | |
73 | + // OpndKind_MinRegKind=0x1, | |
74 | + 4<<2, // 2 OpndKind_SReg=0x2, | |
75 | + | |
76 | +#ifdef _HAVE_MMX_ | |
77 | + 6<<2, // 3 | |
78 | +#else | |
79 | + 0xFF, // 3 | |
80 | +#endif | |
81 | + | |
82 | + 5<<2, // 4 OpndKind_FPReg=0x4, | |
83 | + 0xFF, 0xFF, 0xFF, // 5, 6, 7 | |
84 | + 3<<2, // OpndKind_XMMReg=0x8, | |
85 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD, | |
86 | + // 0xE, 0xF | |
87 | + 0xFF, // OpndKind_MaxRegKind = | |
88 | + // OpndKind_StatusReg = | |
89 | + // OpndKind_OtherReg=0x10, | |
90 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18 | |
91 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x19-0x1F | |
92 | + 2<<2, // OpndKind_Immediate=0x20, | |
93 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28 | |
94 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30 | |
95 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38 | |
96 | + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x39-0x3F | |
97 | + 1<<2, // OpndKind_Memory=0x40 | |
98 | +}; | |
99 | + | |
100 | +char * EncoderBase::curRelOpnd[3]; | |
101 | + | |
102 | +char* EncoderBase::encode_aux(char* stream, unsigned aux, | |
103 | + const Operands& opnds, const OpcodeDesc * odesc, | |
104 | + unsigned * pargsCount, Rex * prex) | |
105 | +{ | |
106 | + const unsigned byte = aux; | |
107 | + OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask); | |
108 | + // The '>>' here is to force the switch to be table-based) instead of | |
109 | + // set of CMP+Jcc. | |
110 | + if (*pargsCount >= COUNTOF(opnds)) { | |
111 | + assert(false); | |
112 | + return stream; | |
113 | + } | |
114 | + switch(kind>>8) { | |
115 | + case OpcodeByteKind_SlashR>>8: | |
116 | + // /r - Indicates that the ModR/M byte of the instruction contains | |
117 | + // both a register operand and an r/m operand. | |
118 | + { | |
119 | + assert(opnds.count() > 1); | |
120 | + // not true anymore for MOVQ xmm<->r | |
121 | + //assert((odesc->opnds[0].kind & OpndKind_Mem) || | |
122 | + // (odesc->opnds[1].kind & OpndKind_Mem)); | |
123 | + unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1; | |
124 | + unsigned regidx = memidx == 0 ? 1 : 0; | |
125 | + memidx += *pargsCount; | |
126 | + regidx += *pargsCount; | |
127 | + ModRM& modrm = *(ModRM*)stream; | |
128 | + if (memidx >= COUNTOF(opnds) || regidx >= COUNTOF(opnds)) { | |
129 | + assert(false); | |
130 | + break; | |
131 | + } | |
132 | + if (opnds[memidx].is_mem()) { | |
133 | + stream = encodeModRM(stream, opnds, memidx, odesc, prex); | |
134 | + } | |
135 | + else { | |
136 | + modrm.mod = 3; // 11 | |
137 | + modrm.rm = getHWRegIndex(opnds[memidx].reg()); | |
138 | +#ifdef _EM64T_ | |
139 | + if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) { | |
140 | + prex->b = 1; | |
141 | + } | |
142 | +#endif | |
143 | + ++stream; | |
144 | + } | |
145 | + modrm.reg = getHWRegIndex(opnds[regidx].reg()); | |
146 | +#ifdef _EM64T_ | |
147 | + if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) { | |
148 | + prex->r = 1; | |
149 | + } | |
150 | +#endif | |
151 | + *pargsCount += 2; | |
152 | + } | |
153 | + break; | |
154 | + case OpcodeByteKind_SlashNum>>8: | |
155 | + // /digit - A digit between 0 and 7 indicates that the | |
156 | + // ModR/M byte of the instruction uses only the r/m | |
157 | + // (register or memory) operand. The reg field contains | |
158 | + // the digit that provides an extension to the instruction's | |
159 | + // opcode. | |
160 | + { | |
161 | + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); | |
162 | + assert(lowByte <= 7); | |
163 | + ModRM& modrm = *(ModRM*)stream; | |
164 | + unsigned idx = *pargsCount; | |
165 | + assert(opnds[idx].is_mem() || opnds[idx].is_reg()); | |
166 | + if (opnds[idx].is_mem()) { | |
167 | + stream = encodeModRM(stream, opnds, idx, odesc, prex); | |
168 | + } | |
169 | + else { | |
170 | + modrm.mod = 3; // 11 | |
171 | + modrm.rm = getHWRegIndex(opnds[idx].reg()); | |
172 | +#ifdef _EM64T_ | |
173 | + if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) { | |
174 | + prex->b = 1; | |
175 | + } | |
176 | +#endif | |
177 | + ++stream; | |
178 | + } | |
179 | + modrm.reg = (char)lowByte; | |
180 | + *pargsCount += 1; | |
181 | + } | |
182 | + break; | |
183 | + case OpcodeByteKind_plus_i>>8: | |
184 | + // +i - A number used in floating-point instructions when one | |
185 | + // of the operands is ST(i) from the FPU register stack. The | |
186 | + // number i (which can range from 0 to 7) is added to the | |
187 | + // hexadecimal byte given at the left of the plus sign to form | |
188 | + // a single opcode byte. | |
189 | + { | |
190 | + unsigned idx = *pargsCount; | |
191 | + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); | |
192 | + *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg()); | |
193 | + ++stream; | |
194 | + *pargsCount += 1; | |
195 | + } | |
196 | + break; | |
197 | + case OpcodeByteKind_ib>>8: | |
198 | + case OpcodeByteKind_iw>>8: | |
199 | + case OpcodeByteKind_id>>8: | |
200 | +#ifdef _EM64T_ | |
201 | + case OpcodeByteKind_io>>8: | |
202 | +#endif //_EM64T_ | |
203 | + // ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id) | |
204 | + // immediate operand to the instruction that follows the | |
205 | + // opcode, ModR/M bytes or scale-indexing bytes. The opcode | |
206 | + // determines if the operand is a signed value. All words | |
207 | + // and double words are given with the low-order byte first. | |
208 | + { | |
209 | + unsigned idx = *pargsCount; | |
210 | + *pargsCount += 1; | |
211 | + assert(opnds[idx].is_imm()); | |
212 | + if (kind == OpcodeByteKind_ib) { | |
213 | + *(unsigned char*)stream = (unsigned char)opnds[idx].imm(); | |
214 | + curRelOpnd[idx] = stream; | |
215 | + stream += 1; | |
216 | + } | |
217 | + else if (kind == OpcodeByteKind_iw) { | |
218 | + *(unsigned short*)stream = (unsigned short)opnds[idx].imm(); | |
219 | + curRelOpnd[idx] = stream; | |
220 | + stream += 2; | |
221 | + } | |
222 | + else if (kind == OpcodeByteKind_id) { | |
223 | + *(unsigned*)stream = (unsigned)opnds[idx].imm(); | |
224 | + curRelOpnd[idx] = stream; | |
225 | + stream += 4; | |
226 | + } | |
227 | +#ifdef _EM64T_ | |
228 | + else { | |
229 | + assert(kind == OpcodeByteKind_io); | |
230 | + *(long long*)stream = (long long)opnds[idx].imm(); | |
231 | + curRelOpnd[idx] = stream; | |
232 | + stream += 8; | |
233 | + } | |
234 | +#else | |
235 | + else { | |
236 | + assert(false); | |
237 | + } | |
238 | +#endif | |
239 | + } | |
240 | + break; | |
241 | + case OpcodeByteKind_cb>>8: | |
242 | + assert(opnds[*pargsCount].is_imm()); | |
243 | + *(unsigned char*)stream = (unsigned char)opnds[*pargsCount].imm(); | |
244 | + curRelOpnd[*pargsCount]= stream; | |
245 | + stream += 1; | |
246 | + *pargsCount += 1; | |
247 | + break; | |
248 | + case OpcodeByteKind_cw>>8: | |
249 | + assert(opnds[*pargsCount].is_imm()); | |
250 | + *(unsigned short*)stream = (unsigned short)opnds[*pargsCount].imm(); | |
251 | + curRelOpnd[*pargsCount]= stream; | |
252 | + stream += 2; | |
253 | + *pargsCount += 1; | |
254 | + break; | |
255 | + case OpcodeByteKind_cd>>8: | |
256 | + assert(opnds[*pargsCount].is_imm()); | |
257 | + *(unsigned*)stream = (unsigned)opnds[*pargsCount].imm(); | |
258 | + curRelOpnd[*pargsCount]= stream; | |
259 | + stream += 4; | |
260 | + *pargsCount += 1; | |
261 | + break; | |
262 | + //OpcodeByteKind_cp = 0x0B00, | |
263 | + //OpcodeByteKind_co = 0x0C00, | |
264 | + //OpcodeByteKind_ct = 0x0D00, | |
265 | + case OpcodeByteKind_rb>>8: | |
266 | + case OpcodeByteKind_rw>>8: | |
267 | + case OpcodeByteKind_rd>>8: | |
268 | + // +rb, +rw, +rd - A register code, from 0 through 7, | |
269 | + // added to the hexadecimal byte given at the left of | |
270 | + // the plus sign to form a single opcode byte. | |
271 | + assert(opnds.count() > 0); | |
272 | + assert(opnds[*pargsCount].is_reg()); | |
273 | + { | |
274 | + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); | |
275 | + *(unsigned char*)stream = (unsigned char)lowByte + | |
276 | + getHWRegIndex(opnds[*pargsCount].reg()); | |
277 | +#ifdef _EM64T_ | |
278 | + if (opnds[*pargsCount].need_rex() && needs_rex_r(opnds[*pargsCount].reg())) { | |
279 | + prex->b = 1; | |
280 | + } | |
281 | +#endif | |
282 | + ++stream; | |
283 | + *pargsCount += 1; | |
284 | + } | |
285 | + break; | |
286 | + default: | |
287 | + assert(false); | |
288 | + break; | |
289 | + } | |
290 | + return stream; | |
291 | +} | |
292 | + | |
293 | +char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds) | |
294 | +{ | |
295 | +#ifdef _DEBUG | |
296 | + if (opnds.count() > 0) { | |
297 | + if (opnds[0].is_mem()) { | |
298 | + assert(getRegKind(opnds[0].base()) != OpndKind_SReg); | |
299 | + } | |
300 | + else if (opnds.count() >1 && opnds[1].is_mem()) { | |
301 | + assert(getRegKind(opnds[1].base()) != OpndKind_SReg); | |
302 | + } | |
303 | + } | |
304 | +#endif | |
305 | + | |
306 | +#ifdef JET_PROTO | |
307 | + char* saveStream = stream; | |
308 | +#endif | |
309 | + | |
310 | + const OpcodeDesc * odesc = lookup(mn, opnds); | |
311 | +#if !defined(_EM64T_) | |
312 | + bool copy_opcode = true; | |
313 | + Rex *prex = NULL; | |
314 | +#else | |
315 | + // We need rex if | |
316 | + // either of registers used as operand or address form is new extended register | |
317 | + // it's explicitly specified by opcode | |
318 | + // So, if we don't have REX in opcode but need_rex, then set rex here | |
319 | + // otherwise, wait until opcode is set, and then update REX | |
320 | + | |
321 | + bool copy_opcode = true; | |
322 | + unsigned char _1st = odesc->opcode[0]; | |
323 | + | |
324 | + Rex *prex = (Rex*)stream; | |
325 | + if (opnds.need_rex() && | |
326 | + ((_1st == 0x66) || (_1st == 0xF2 || _1st == 0xF3) && odesc->opcode[1] == 0x0F)) { | |
327 | + // Special processing | |
328 | + // | |
329 | + copy_opcode = false; | |
330 | + // | |
331 | + *(unsigned char*)stream = _1st; | |
332 | + ++stream; | |
333 | + // | |
334 | + prex = (Rex*)stream; | |
335 | + prex->dummy = 4; | |
336 | + prex->w = 0; | |
337 | + prex->b = 0; | |
338 | + prex->x = 0; | |
339 | + prex->r = 0; | |
340 | + ++stream; | |
341 | + // | |
342 | + memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1); | |
343 | + stream += odesc->opcode_len-1; | |
344 | + } | |
345 | + else if (_1st != 0x48 && opnds.need_rex()) { | |
346 | + prex = (Rex*)stream; | |
347 | + prex->dummy = 4; | |
348 | + prex->w = 0; | |
349 | + prex->b = 0; | |
350 | + prex->x = 0; | |
351 | + prex->r = 0; | |
352 | + ++stream; | |
353 | + } | |
354 | +#endif // ifndef EM64T | |
355 | + | |
356 | + if (copy_opcode) { | |
357 | + if (odesc->opcode_len==1) { | |
358 | + unsigned char *dest = (unsigned char *) (stream); | |
359 | + unsigned char *src = (unsigned char *) (& (odesc->opcode)); | |
360 | + *dest = *src; | |
361 | + } | |
362 | + else if (odesc->opcode_len==2) { | |
363 | + short *dest = (short *) (stream); | |
364 | + void *ptr = (void *) (& (odesc->opcode)); | |
365 | + short *src = (short *) (ptr); | |
366 | + *dest = *src; | |
367 | + } | |
368 | + else if (odesc->opcode_len==3) { | |
369 | + unsigned short *dest = (unsigned short *) (stream); | |
370 | + void *ptr = (void *) (& (odesc->opcode)); | |
371 | + unsigned short *src = (unsigned short *) (ptr); | |
372 | + *dest = *src; | |
373 | + | |
374 | + //Now handle the last part | |
375 | + unsigned char *dest2 = (unsigned char *) (stream + 2); | |
376 | + *dest2 = odesc->opcode[2]; | |
377 | + } | |
378 | + else if (odesc->opcode_len==4) { | |
379 | + unsigned int *dest = (unsigned int *) (stream); | |
380 | + void *ptr = (void *) (& (odesc->opcode)); | |
381 | + unsigned int *src = (unsigned int *) (ptr); | |
382 | + *dest = *src; | |
383 | + } | |
384 | + stream += odesc->opcode_len; | |
385 | + } | |
386 | + | |
387 | + unsigned argsCount = odesc->first_opnd; | |
388 | + | |
389 | + if (odesc->aux0) { | |
390 | + stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex); | |
391 | + if (odesc->aux1) { | |
392 | + stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex); | |
393 | + } | |
394 | + } | |
395 | +#ifdef JET_PROTO | |
396 | + //saveStream | |
397 | + Inst inst; | |
398 | + unsigned len = DecoderBase::decode(saveStream, &inst); | |
399 | + assert(inst.mn == mn); | |
400 | + assert(len == (unsigned)(stream-saveStream)); | |
401 | + if (mn == Mnemonic_CALL || mn == Mnemonic_JMP || | |
402 | + Mnemonic_RET == mn || | |
403 | + (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) { | |
404 | + assert(inst.argc == opnds.count()); | |
405 | + | |
406 | + InstructionDisassembler idi(saveStream); | |
407 | + | |
408 | + for (unsigned i=0; i<inst.argc; i++) { | |
409 | + const EncoderBase::Operand& original = opnds[i]; | |
410 | + const EncoderBase::Operand& decoded = inst.operands[i]; | |
411 | + assert(original.kind() == decoded.kind()); | |
412 | + assert(original.size() == decoded.size()); | |
413 | + if (original.is_imm()) { | |
414 | + assert(original.imm() == decoded.imm()); | |
415 | + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Imm); | |
416 | + if (mn == Mnemonic_CALL) { | |
417 | + assert(idi.get_type() == InstructionDisassembler::RELATIVE_CALL); | |
418 | + } | |
419 | + else if (mn == Mnemonic_JMP) { | |
420 | + assert(idi.get_type() == InstructionDisassembler::RELATIVE_JUMP); | |
421 | + } | |
422 | + else if (mn == Mnemonic_RET) { | |
423 | + assert(idi.get_type() == InstructionDisassembler::RET); | |
424 | + } | |
425 | + else { | |
426 | + assert(idi.get_type() == InstructionDisassembler::RELATIVE_COND_JUMP); | |
427 | + } | |
428 | + } | |
429 | + else if (original.is_mem()) { | |
430 | + assert(original.base() == decoded.base()); | |
431 | + assert(original.index() == decoded.index()); | |
432 | + assert(original.scale() == decoded.scale()); | |
433 | + assert(original.disp() == decoded.disp()); | |
434 | + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Mem); | |
435 | + if (mn == Mnemonic_CALL) { | |
436 | + assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL); | |
437 | + } | |
438 | + else if (mn == Mnemonic_JMP) { | |
439 | + assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP); | |
440 | + } | |
441 | + else { | |
442 | + assert(false); | |
443 | + } | |
444 | + } | |
445 | + else { | |
446 | + assert(original.is_reg()); | |
447 | + assert(original.reg() == decoded.reg()); | |
448 | + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Reg); | |
449 | + if (mn == Mnemonic_CALL) { | |
450 | + assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL); | |
451 | + } | |
452 | + else if (mn == Mnemonic_JMP) { | |
453 | + assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP); | |
454 | + } | |
455 | + else { | |
456 | + assert(false); | |
457 | + } | |
458 | + } | |
459 | + } | |
460 | + | |
461 | + Inst inst2; | |
462 | + len = DecoderBase::decode(saveStream, &inst2); | |
463 | + } | |
464 | + | |
465 | + // if(idi.get_length_with_prefix() != (int)len) { | |
466 | + //__asm { int 3 }; | |
467 | + // } | |
468 | +#endif | |
469 | + | |
470 | + return stream; | |
471 | +} | |
472 | + | |
473 | +char* EncoderBase::encodeModRM(char* stream, const Operands& opnds, | |
474 | + unsigned idx, const OpcodeDesc * odesc, | |
475 | + Rex * prex) | |
476 | +{ | |
477 | + const Operand& op = opnds[idx]; | |
478 | + assert(op.is_mem()); | |
479 | + assert(idx < COUNTOF(curRelOpnd)); | |
480 | + ModRM& modrm = *(ModRM*)stream; | |
481 | + ++stream; | |
482 | + SIB& sib = *(SIB*)stream; | |
483 | + | |
484 | + // we need SIB if | |
485 | + // we have index & scale (nb: having index w/o base and w/o scale | |
486 | + // treated as error) | |
487 | + // the base is EBP w/o disp, BUT let's use a fake disp8 | |
488 | + // the base is ESP (nb: cant have ESP as index) | |
489 | + | |
490 | + RegName base = op.base(); | |
491 | + // only disp ?.. | |
492 | + if (base == RegName_Null && op.index() == RegName_Null) { | |
493 | + assert(op.scale() == 0); // 'scale!=0' has no meaning without index | |
494 | + // ... yes - only have disp | |
495 | + // On EM64T, the simply [disp] addressing means 'RIP-based' one - | |
496 | + // must have to use SIB to encode 'DS: based' | |
497 | +#ifdef _EM64T_ | |
498 | + modrm.mod = 0; // 00 - .. | |
499 | + modrm.rm = 4; // 100 - have SIB | |
500 | + | |
501 | + sib.base = 5; // 101 - none | |
502 | + sib.index = 4; // 100 - none | |
503 | + sib.scale = 0; // | |
504 | + ++stream; // bypass SIB | |
505 | +#else | |
506 | + // ignore disp_fits8, always use disp32. | |
507 | + modrm.mod = 0; | |
508 | + modrm.rm = 5; | |
509 | +#endif | |
510 | + *(unsigned*)stream = (unsigned)op.disp(); | |
511 | + curRelOpnd[idx]= stream; | |
512 | + stream += 4; | |
513 | + return stream; | |
514 | + } | |
515 | + | |
516 | + //climits: error when targeting compal | |
517 | +#define CHAR_MIN -127 | |
518 | +#define CHAR_MAX 127 | |
519 | + const bool disp_fits8 = CHAR_MIN <= op.disp() && op.disp() <= CHAR_MAX; | |
520 | + /*&& op.base() != RegName_Null - just checked above*/ | |
521 | + if (op.index() == RegName_Null && getHWRegIndex(op.base()) != getHWRegIndex(REG_STACK)) { | |
522 | + assert(op.scale() == 0); // 'scale!=0' has no meaning without index | |
523 | + // ... luckily no SIB, only base and may be a disp | |
524 | + | |
525 | + // EBP base is a special case. Need to use [EBP] + disp8 form | |
526 | + if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) { | |
527 | + modrm.mod = 0; // mod=00, no disp et all | |
528 | + } | |
529 | + else if (disp_fits8) { | |
530 | + modrm.mod = 1; // mod=01, use disp8 | |
531 | + *(unsigned char*)stream = (unsigned char)op.disp(); | |
532 | + curRelOpnd[idx]= stream; | |
533 | + ++stream; | |
534 | + } | |
535 | + else { | |
536 | + modrm.mod = 2; // mod=10, use disp32 | |
537 | + *(unsigned*)stream = (unsigned)op.disp(); | |
538 | + curRelOpnd[idx]= stream; | |
539 | + stream += 4; | |
540 | + } | |
541 | + modrm.rm = getHWRegIndex(op.base()); | |
542 | + if (is_em64t_extra_reg(op.base())) { | |
543 | + prex->b = 1; | |
544 | + } | |
545 | + return stream; | |
546 | + } | |
547 | + | |
548 | + // cool, we do have SIB. | |
549 | + ++stream; // bypass SIB in stream | |
550 | + | |
551 | + // {E|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can | |
552 | + assert(op.index() == RegName_Null || !equals(op.index(), REG_STACK)); | |
553 | + | |
554 | + // Only GPRegs can be encoded in the SIB | |
555 | + assert(op.base() == RegName_Null || | |
556 | + getRegKind(op.base()) == OpndKind_GPReg); | |
557 | + assert(op.index() == RegName_Null || | |
558 | + getRegKind(op.index()) == OpndKind_GPReg); | |
559 | + | |
560 | + modrm.rm = 4; // r/m = 100, means 'we have SIB here' | |
561 | + if (op.base() == RegName_Null) { | |
562 | + // no base. | |
563 | + // already checked above if | |
564 | + // the first if() //assert(op.index() != RegName_Null); | |
565 | + | |
566 | + modrm.mod = 0; // mod=00 - here it means 'no base, but disp32' | |
567 | + sib.base = 5; // 101 with mod=00 ^^^ | |
568 | + | |
569 | + // encode at least fake disp32 to avoid having [base=ebp] | |
570 | + *(unsigned*)stream = op.disp(); | |
571 | + curRelOpnd[idx]= stream; | |
572 | + stream += 4; | |
573 | + | |
574 | + unsigned sc = op.scale(); | |
575 | + if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 | |
576 | + else if (sc == 2) { sib.scale = 1; } // SS=01 | |
577 | + else if (sc == 4) { sib.scale = 2; } // SS=10 | |
578 | + else if (sc == 8) { sib.scale = 3; } // SS=11 | |
579 | + sib.index = getHWRegIndex(op.index()); | |
580 | + if (is_em64t_extra_reg(op.index())) { | |
581 | + prex->x = 1; | |
582 | + } | |
583 | + | |
584 | + return stream; | |
585 | + } | |
586 | + | |
587 | + if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) { | |
588 | + modrm.mod = 0; // mod=00, no disp | |
589 | + } | |
590 | + else if (disp_fits8) { | |
591 | + modrm.mod = 1; // mod=01, use disp8 | |
592 | + *(unsigned char*)stream = (unsigned char)op.disp(); | |
593 | + curRelOpnd[idx]= stream; | |
594 | + stream += 1; | |
595 | + } | |
596 | + else { | |
597 | + modrm.mod = 2; // mod=10, use disp32 | |
598 | + *(unsigned*)stream = (unsigned)op.disp(); | |
599 | + curRelOpnd[idx]= stream; | |
600 | + stream += 4; | |
601 | + } | |
602 | + | |
603 | + if (op.index() == RegName_Null) { | |
604 | + assert(op.scale() == 0); // 'scale!=0' has no meaning without index | |
605 | + // the only reason we're here without index, is that we have {E|R}SP | |
606 | + // or R12 as a base. Another possible reason - EBP without a disp - | |
607 | + // is handled above by adding a fake disp8 | |
608 | +#ifdef _EM64T_ | |
609 | + assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) || | |
610 | + equals(op.base(), RegName_R12))); | |
611 | +#else // _EM64T_ | |
612 | + assert(op.base() != RegName_Null && equals(op.base(), REG_STACK)); | |
613 | +#endif //_EM64T_ | |
614 | + sib.scale = 0; // SS = 00 | |
615 | + sib.index = 4; // SS + index=100 means 'no index' | |
616 | + } | |
617 | + else { | |
618 | + unsigned sc = op.scale(); | |
619 | + if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 | |
620 | + else if (sc == 2) { sib.scale = 1; } // SS=01 | |
621 | + else if (sc == 4) { sib.scale = 2; } // SS=10 | |
622 | + else if (sc == 8) { sib.scale = 3; } // SS=11 | |
623 | + sib.index = getHWRegIndex(op.index()); | |
624 | + if (is_em64t_extra_reg(op.index())) { | |
625 | + prex->x = 1; | |
626 | + } | |
627 | + // not an error by itself, but the usage of [index*1] instead | |
628 | + // of [base] is discouraged | |
629 | + assert(op.base() != RegName_Null || op.scale() != 1); | |
630 | + } | |
631 | + sib.base = getHWRegIndex(op.base()); | |
632 | + if (is_em64t_extra_reg(op.base())) { | |
633 | + prex->b = 1; | |
634 | + } | |
635 | + return stream; | |
636 | +} | |
637 | + | |
638 | +char * EncoderBase::nops(char * stream, unsigned howMany) | |
639 | +{ | |
640 | + // Recommended multi-byte NOPs from the Intel architecture manual | |
641 | + static const unsigned char nops[10][9] = { | |
642 | + { 0, }, // 0, this line is dummy and not used in the loop below | |
643 | + { 0x90, }, // 1-byte NOP | |
644 | + { 0x66, 0x90, }, // 2 | |
645 | + { 0x0F, 0x1F, 0x00, }, // 3 | |
646 | + { 0x0F, 0x1F, 0x40, 0x00, }, // 4 | |
647 | + { 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 5 | |
648 | + { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 6 | |
649 | + { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, }, // 7 | |
650 | + { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, }, // 8 | |
651 | + { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, // 9-byte NOP | |
652 | + }; | |
653 | + | |
654 | + // Start from delivering the longest possible NOPs, then proceed with shorter ones | |
655 | + for (unsigned nopSize=9; nopSize!=0; nopSize--) { | |
656 | + while(howMany>=nopSize) { | |
657 | + const unsigned char* nopBytes = nops[nopSize]; | |
658 | + for (unsigned i=0; i<nopSize; i++) { | |
659 | + stream[i] = nopBytes[i]; | |
660 | + } | |
661 | + stream += nopSize; | |
662 | + howMany -= nopSize; | |
663 | + } | |
664 | + } | |
665 | + char* end = stream + howMany; | |
666 | + return end; | |
667 | +} | |
668 | + | |
669 | +char * EncoderBase::prefix(char* stream, InstPrefix pref) | |
670 | +{ | |
671 | + if (pref== InstPrefix_Null) { | |
672 | + // nothing to do | |
673 | + return stream; | |
674 | + } | |
675 | + *stream = (char)pref; | |
676 | + return stream + 1; | |
677 | +} | |
678 | + | |
679 | + | |
680 | +/** | |
681 | + * | |
682 | + */ | |
683 | +bool EncoderBase::extAllowed(OpndExt opndExt, OpndExt instExt) { | |
684 | + if (instExt == opndExt || instExt == OpndExt_Any || opndExt == OpndExt_Any) { | |
685 | + return true; | |
686 | + } | |
687 | +//asm("int3"); | |
688 | +assert(0); | |
689 | + return false; | |
690 | +} | |
691 | + | |
692 | +static bool try_match(const EncoderBase::OpcodeDesc& odesc, | |
693 | + const EncoderBase::Operands& opnds, bool strict) { | |
694 | + | |
695 | + assert(odesc.roles.count == opnds.count()); | |
696 | + | |
697 | + for(unsigned j=0; j<odesc.roles.count; j++) { | |
698 | + // - the location must match exactly | |
699 | + if ((odesc.opnds[j].kind & opnds[j].kind()) != opnds[j].kind()) { | |
700 | + return false; | |
701 | + } | |
702 | + if (strict) { | |
703 | + // the size must match exactly | |
704 | + if (odesc.opnds[j].size != opnds[j].size()) { | |
705 | + return false; | |
706 | + } | |
707 | + } | |
708 | + else { | |
709 | + // must match only for def operands, and dont care about use ones | |
710 | + // situations like 'mov r8, imm32/mov r32, imm8' so the | |
711 | + // destination operand defines the overall size | |
712 | + if (EncoderBase::getOpndRoles(odesc.roles, j) & OpndRole_Def) { | |
713 | + if (odesc.opnds[j].size != opnds[j].size()) { | |
714 | + return false; | |
715 | + } | |
716 | + } | |
717 | + } | |
718 | + } | |
719 | + return true; | |
720 | +} | |
721 | + | |
722 | +// | |
723 | +//Subhash implementaion - may be useful in case of many misses during fast | |
724 | +//opcode lookup. | |
725 | +// | |
726 | + | |
727 | +#ifdef ENCODER_USE_SUBHASH | |
728 | +static unsigned subHash[32]; | |
729 | + | |
730 | +static unsigned find(Mnemonic mn, unsigned hash) | |
731 | +{ | |
732 | + unsigned key = hash % COUNTOF(subHash); | |
733 | + unsigned pack = subHash[key]; | |
734 | + unsigned _hash = pack & 0xFFFF; | |
735 | + if (_hash != hash) { | |
736 | + stat.miss(mn); | |
737 | + return EncoderBase::NOHASH; | |
738 | + } | |
739 | + unsigned _mn = (pack >> 24)&0xFF; | |
740 | + if (_mn != _mn) { | |
741 | + stat.miss(mn); | |
742 | + return EncoderBase::NOHASH; | |
743 | + } | |
744 | + unsigned idx = (pack >> 16) & 0xFF; | |
745 | + stat.hit(mn); | |
746 | + return idx; | |
747 | +} | |
748 | + | |
749 | +static void put(Mnemonic mn, unsigned hash, unsigned idx) | |
750 | +{ | |
751 | + unsigned pack = hash | (idx<<16) | (mn << 24); | |
752 | + unsigned key = hash % COUNTOF(subHash); | |
753 | + subHash[key] = pack; | |
754 | +} | |
755 | +#endif | |
756 | + | |
757 | +const EncoderBase::OpcodeDesc * | |
758 | +EncoderBase::lookup(Mnemonic mn, const Operands& opnds) | |
759 | +{ | |
760 | + const unsigned hash = opnds.hash(); | |
761 | + unsigned opcodeIndex = opcodesHashMap[mn][hash]; | |
762 | +#ifdef ENCODER_USE_SUBHASH | |
763 | + if (opcodeIndex == NOHASH) { | |
764 | + opcodeIndex = find(mn, hash); | |
765 | + } | |
766 | +#endif | |
767 | + | |
768 | + if (opcodeIndex == NOHASH) { | |
769 | + // fast-path did no work. try to lookup sequentially | |
770 | + const OpcodeDesc * odesc = opcodes[mn]; | |
771 | + int idx = -1; | |
772 | + bool found = false; | |
773 | + for (idx=0; !odesc[idx].last; idx++) { | |
774 | + const OpcodeDesc& opcode = odesc[idx]; | |
775 | + if (opcode.platf == OpcodeInfo::decoder) { | |
776 | + continue; | |
777 | + } | |
778 | + if (opcode.roles.count != opnds.count()) { | |
779 | + continue; | |
780 | + } | |
781 | + if (try_match(opcode, opnds, true)) { | |
782 | + found = true; | |
783 | + break; | |
784 | + } | |
785 | + } | |
786 | + if (!found) { | |
787 | + for (idx=0; !odesc[idx].last; idx++) { | |
788 | + const OpcodeDesc& opcode = odesc[idx]; | |
789 | + if (opcode.platf == OpcodeInfo::decoder) { | |
790 | + continue; | |
791 | + } | |
792 | + if (opcode.roles.count != opnds.count()) { | |
793 | + continue; | |
794 | + } | |
795 | + if (try_match(opcode, opnds, false)) { | |
796 | + found = true; | |
797 | + break; | |
798 | + } | |
799 | + } | |
800 | + } | |
801 | + assert(found); | |
802 | + opcodeIndex = idx; | |
803 | +#ifdef ENCODER_USE_SUBHASH | |
804 | + put(mn, hash, opcodeIndex); | |
805 | +#endif | |
806 | + } | |
807 | + assert(opcodeIndex != NOHASH); | |
808 | + const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex]; | |
809 | + assert(!odesc->last); | |
810 | + assert(odesc->roles.count == opnds.count()); | |
811 | + assert(odesc->platf != OpcodeInfo::decoder); | |
812 | +#if !defined(_EM64T_) | |
813 | + // tuning was done for IA32 only, so no size restriction on EM64T | |
814 | + //assert(sizeof(OpcodeDesc)==128); | |
815 | +#endif | |
816 | + return odesc; | |
817 | +} | |
818 | + | |
819 | +char* EncoderBase::getOpndLocation(int index) { | |
820 | + assert(index < 3); | |
821 | + return curRelOpnd[index]; | |
822 | +} | |
823 | + | |
824 | + | |
825 | +Mnemonic EncoderBase::str2mnemonic(const char * mn_name) | |
826 | +{ | |
827 | + for (unsigned m = 1; m<Mnemonic_Count; m++) { | |
828 | + if (!strcmpi(mnemonics[m].name, mn_name)) { | |
829 | + return (Mnemonic)m; | |
830 | + } | |
831 | + } | |
832 | + return Mnemonic_Null; | |
833 | +} | |
834 | + | |
835 | +static const char * conditionStrings[ConditionMnemonic_Count] = { | |
836 | + "O", | |
837 | + "NO", | |
838 | + "B", | |
839 | + "AE", | |
840 | + "Z", | |
841 | + "NZ", | |
842 | + "BE", | |
843 | + "A", | |
844 | + | |
845 | + "S", | |
846 | + "NS", | |
847 | + "P", | |
848 | + "NP", | |
849 | + "L", | |
850 | + "GE", | |
851 | + "LE", | |
852 | + "G", | |
853 | +}; | |
854 | + | |
855 | +const char * getConditionString(ConditionMnemonic cm) { | |
856 | + return conditionStrings[cm]; | |
857 | +} | |
858 | + | |
859 | +static const struct { | |
860 | + char sizeString[12]; | |
861 | + OpndSize size; | |
862 | +} | |
863 | +sizes[] = { | |
864 | + { "Sz8", OpndSize_8 }, | |
865 | + { "Sz16", OpndSize_16 }, | |
866 | + { "Sz32", OpndSize_32 }, | |
867 | + { "Sz64", OpndSize_64 }, | |
868 | +#if !defined(TESTING_ENCODER) | |
869 | + { "Sz80", OpndSize_80 }, | |
870 | + { "Sz128", OpndSize_128 }, | |
871 | +#endif | |
872 | + { "SzAny", OpndSize_Any }, | |
873 | +}; | |
874 | + | |
875 | + | |
876 | +OpndSize getOpndSize(const char * sizeString) | |
877 | +{ | |
878 | + assert(sizeString); | |
879 | + for (unsigned i = 0; i<COUNTOF(sizes); i++) { | |
880 | + if (!strcmpi(sizeString, sizes[i].sizeString)) { | |
881 | + return sizes[i].size; | |
882 | + } | |
883 | + } | |
884 | + return OpndSize_Null; | |
885 | +} | |
886 | + | |
887 | +const char * getOpndSizeString(OpndSize size) { | |
888 | + for( unsigned i = 0; i<COUNTOF(sizes); i++ ) { | |
889 | + if( sizes[i].size==size ) { | |
890 | + return sizes[i].sizeString; | |
891 | + } | |
892 | + } | |
893 | + return NULL; | |
894 | +} | |
895 | + | |
896 | +static const struct { | |
897 | + char kindString[16]; | |
898 | + OpndKind kind; | |
899 | +} | |
900 | +kinds[] = { | |
901 | + { "Null", OpndKind_Null }, | |
902 | + { "GPReg", OpndKind_GPReg }, | |
903 | + { "SReg", OpndKind_SReg }, | |
904 | + { "FPReg", OpndKind_FPReg }, | |
905 | + { "XMMReg", OpndKind_XMMReg }, | |
906 | +#ifdef _HAVE_MMX_ | |
907 | + { "MMXReg", OpndKind_MMXReg }, | |
908 | +#endif | |
909 | + { "StatusReg", OpndKind_StatusReg }, | |
910 | + { "Reg", OpndKind_Reg }, | |
911 | + { "Imm", OpndKind_Imm }, | |
912 | + { "Mem", OpndKind_Mem }, | |
913 | + { "Any", OpndKind_Any }, | |
914 | +}; | |
915 | + | |
916 | +const char * getOpndKindString(OpndKind kind) | |
917 | +{ | |
918 | + for (unsigned i = 0; i<COUNTOF(kinds); i++) { | |
919 | + if (kinds[i].kind==kind) { | |
920 | + return kinds[i].kindString; | |
921 | + } | |
922 | + } | |
923 | + return NULL; | |
924 | +} | |
925 | + | |
926 | +OpndKind getOpndKind(const char * kindString) | |
927 | +{ | |
928 | + assert(kindString); | |
929 | + for (unsigned i = 0; i<COUNTOF(kinds); i++) { | |
930 | + if (!strcmpi(kindString, kinds[i].kindString)) { | |
931 | + return kinds[i].kind; | |
932 | + } | |
933 | + } | |
934 | + return OpndKind_Null; | |
935 | +} | |
936 | + | |
937 | +/** | |
938 | + * A mapping between register string representation and its RegName constant. | |
939 | + */ | |
940 | +static const struct { | |
941 | + char regstring[7]; | |
942 | + RegName regname; | |
943 | +} | |
944 | + | |
945 | +registers[] = { | |
946 | +#ifdef _EM64T_ | |
947 | + {"RAX", RegName_RAX}, | |
948 | + {"RBX", RegName_RBX}, | |
949 | + {"RCX", RegName_RCX}, | |
950 | + {"RDX", RegName_RDX}, | |
951 | + {"RBP", RegName_RBP}, | |
952 | + {"RSI", RegName_RSI}, | |
953 | + {"RDI", RegName_RDI}, | |
954 | + {"RSP", RegName_RSP}, | |
955 | + {"R8", RegName_R8}, | |
956 | + {"R9", RegName_R9}, | |
957 | + {"R10", RegName_R10}, | |
958 | + {"R11", RegName_R11}, | |
959 | + {"R12", RegName_R12}, | |
960 | + {"R13", RegName_R13}, | |
961 | + {"R14", RegName_R14}, | |
962 | + {"R15", RegName_R15}, | |
963 | +#endif | |
964 | + | |
965 | + {"EAX", RegName_EAX}, | |
966 | + {"ECX", RegName_ECX}, | |
967 | + {"EDX", RegName_EDX}, | |
968 | + {"EBX", RegName_EBX}, | |
969 | + {"ESP", RegName_ESP}, | |
970 | + {"EBP", RegName_EBP}, | |
971 | + {"ESI", RegName_ESI}, | |
972 | + {"EDI", RegName_EDI}, | |
973 | +#ifdef _EM64T_ | |
974 | + {"R8D", RegName_R8D}, | |
975 | + {"R9D", RegName_R9D}, | |
976 | + {"R10D", RegName_R10D}, | |
977 | + {"R11D", RegName_R11D}, | |
978 | + {"R12D", RegName_R12D}, | |
979 | + {"R13D", RegName_R13D}, | |
980 | + {"R14D", RegName_R14D}, | |
981 | + {"R15D", RegName_R15D}, | |
982 | +#endif | |
983 | + | |
984 | + {"AX", RegName_AX}, | |
985 | + {"CX", RegName_CX}, | |
986 | + {"DX", RegName_DX}, | |
987 | + {"BX", RegName_BX}, | |
988 | + {"SP", RegName_SP}, | |
989 | + {"BP", RegName_BP}, | |
990 | + {"SI", RegName_SI}, | |
991 | + {"DI", RegName_DI}, | |
992 | + | |
993 | + {"AL", RegName_AL}, | |
994 | + {"CL", RegName_CL}, | |
995 | + {"DL", RegName_DL}, | |
996 | + {"BL", RegName_BL}, | |
997 | +#if !defined(_EM64T_) | |
998 | + {"AH", RegName_AH}, | |
999 | + {"CH", RegName_CH}, | |
1000 | + {"DH", RegName_DH}, | |
1001 | + {"BH", RegName_BH}, | |
1002 | +#else | |
1003 | + {"SPL", RegName_SPL}, | |
1004 | + {"BPL", RegName_BPL}, | |
1005 | + {"SIL", RegName_SIL}, | |
1006 | + {"DIL", RegName_DIL}, | |
1007 | + {"R8L", RegName_R8L}, | |
1008 | + {"R9L", RegName_R9L}, | |
1009 | + {"R10L", RegName_R10L}, | |
1010 | + {"R11L", RegName_R11L}, | |
1011 | + {"R12L", RegName_R12L}, | |
1012 | + {"R13L", RegName_R13L}, | |
1013 | + {"R14L", RegName_R14L}, | |
1014 | + {"R15L", RegName_R15L}, | |
1015 | +#endif | |
1016 | + {"ES", RegName_ES}, | |
1017 | + {"CS", RegName_CS}, | |
1018 | + {"SS", RegName_SS}, | |
1019 | + {"DS", RegName_DS}, | |
1020 | + {"FS", RegName_FS}, | |
1021 | + {"GS", RegName_GS}, | |
1022 | + | |
1023 | + {"FP0", RegName_FP0}, | |
1024 | +/* | |
1025 | + {"FP1", RegName_FP1}, | |
1026 | + {"FP2", RegName_FP2}, | |
1027 | + {"FP3", RegName_FP3}, | |
1028 | + {"FP4", RegName_FP4}, | |
1029 | + {"FP5", RegName_FP5}, | |
1030 | + {"FP6", RegName_FP6}, | |
1031 | + {"FP7", RegName_FP7}, | |
1032 | +*/ | |
1033 | + {"FP0S", RegName_FP0S}, | |
1034 | + {"FP1S", RegName_FP1S}, | |
1035 | + {"FP2S", RegName_FP2S}, | |
1036 | + {"FP3S", RegName_FP3S}, | |
1037 | + {"FP4S", RegName_FP4S}, | |
1038 | + {"FP5S", RegName_FP5S}, | |
1039 | + {"FP6S", RegName_FP6S}, | |
1040 | + {"FP7S", RegName_FP7S}, | |
1041 | + | |
1042 | + {"FP0D", RegName_FP0D}, | |
1043 | + {"FP1D", RegName_FP1D}, | |
1044 | + {"FP2D", RegName_FP2D}, | |
1045 | + {"FP3D", RegName_FP3D}, | |
1046 | + {"FP4D", RegName_FP4D}, | |
1047 | + {"FP5D", RegName_FP5D}, | |
1048 | + {"FP6D", RegName_FP6D}, | |
1049 | + {"FP7D", RegName_FP7D}, | |
1050 | + | |
1051 | + {"XMM0", RegName_XMM0}, | |
1052 | + {"XMM1", RegName_XMM1}, | |
1053 | + {"XMM2", RegName_XMM2}, | |
1054 | + {"XMM3", RegName_XMM3}, | |
1055 | + {"XMM4", RegName_XMM4}, | |
1056 | + {"XMM5", RegName_XMM5}, | |
1057 | + {"XMM6", RegName_XMM6}, | |
1058 | + {"XMM7", RegName_XMM7}, | |
1059 | +#ifdef _EM64T_ | |
1060 | + {"XMM8", RegName_XMM8}, | |
1061 | + {"XMM9", RegName_XMM9}, | |
1062 | + {"XMM10", RegName_XMM10}, | |
1063 | + {"XMM11", RegName_XMM11}, | |
1064 | + {"XMM12", RegName_XMM12}, | |
1065 | + {"XMM13", RegName_XMM13}, | |
1066 | + {"XMM14", RegName_XMM14}, | |
1067 | + {"XMM15", RegName_XMM15}, | |
1068 | +#endif | |
1069 | + | |
1070 | + | |
1071 | + {"XMM0S", RegName_XMM0S}, | |
1072 | + {"XMM1S", RegName_XMM1S}, | |
1073 | + {"XMM2S", RegName_XMM2S}, | |
1074 | + {"XMM3S", RegName_XMM3S}, | |
1075 | + {"XMM4S", RegName_XMM4S}, | |
1076 | + {"XMM5S", RegName_XMM5S}, | |
1077 | + {"XMM6S", RegName_XMM6S}, | |
1078 | + {"XMM7S", RegName_XMM7S}, | |
1079 | +#ifdef _EM64T_ | |
1080 | + {"XMM8S", RegName_XMM8S}, | |
1081 | + {"XMM9S", RegName_XMM9S}, | |
1082 | + {"XMM10S", RegName_XMM10S}, | |
1083 | + {"XMM11S", RegName_XMM11S}, | |
1084 | + {"XMM12S", RegName_XMM12S}, | |
1085 | + {"XMM13S", RegName_XMM13S}, | |
1086 | + {"XMM14S", RegName_XMM14S}, | |
1087 | + {"XMM15S", RegName_XMM15S}, | |
1088 | +#endif | |
1089 | + | |
1090 | + {"XMM0D", RegName_XMM0D}, | |
1091 | + {"XMM1D", RegName_XMM1D}, | |
1092 | + {"XMM2D", RegName_XMM2D}, | |
1093 | + {"XMM3D", RegName_XMM3D}, | |
1094 | + {"XMM4D", RegName_XMM4D}, | |
1095 | + {"XMM5D", RegName_XMM5D}, | |
1096 | + {"XMM6D", RegName_XMM6D}, | |
1097 | + {"XMM7D", RegName_XMM7D}, | |
1098 | +#ifdef _EM64T_ | |
1099 | + {"XMM8D", RegName_XMM8D}, | |
1100 | + {"XMM9D", RegName_XMM9D}, | |
1101 | + {"XMM10D", RegName_XMM10D}, | |
1102 | + {"XMM11D", RegName_XMM11D}, | |
1103 | + {"XMM12D", RegName_XMM12D}, | |
1104 | + {"XMM13D", RegName_XMM13D}, | |
1105 | + {"XMM14D", RegName_XMM14D}, | |
1106 | + {"XMM15D", RegName_XMM15D}, | |
1107 | +#endif | |
1108 | + | |
1109 | + {"EFLGS", RegName_EFLAGS}, | |
1110 | +}; | |
1111 | + | |
1112 | + | |
1113 | +const char * getRegNameString(RegName reg) | |
1114 | +{ | |
1115 | + for (unsigned i = 0; i<COUNTOF(registers); i++) { | |
1116 | + if (registers[i].regname == reg) { | |
1117 | + return registers[i].regstring; | |
1118 | + } | |
1119 | + } | |
1120 | + return "(null)"; | |
1121 | +} | |
1122 | + | |
1123 | +RegName getRegName(const char * regname) | |
1124 | +{ | |
1125 | + if (NULL == regname) { | |
1126 | + return RegName_Null; | |
1127 | + } | |
1128 | + | |
1129 | + for (unsigned i = 0; i<COUNTOF(registers); i++) { | |
1130 | + if (!strcmpi(regname,registers[i].regstring)) { | |
1131 | + return registers[i].regname; | |
1132 | + } | |
1133 | + } | |
1134 | + return RegName_Null; | |
1135 | +} | |
1136 | + | |
1137 | +ENCODER_NAMESPACE_END |
@@ -0,0 +1,748 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | + | |
21 | +/** | |
22 | + * @file | |
23 | + * @brief Main encoding routines and structures. | |
24 | + */ | |
25 | + | |
26 | +#ifndef __ENC_BASE_H_INCLUDED__ | |
27 | +#define __ENC_BASE_H_INCLUDED__ | |
28 | + | |
29 | +#include "enc_defs.h" | |
30 | + | |
31 | + | |
32 | +#include <stdlib.h> | |
33 | +#include <assert.h> | |
34 | +#include <memory.h> | |
35 | + | |
36 | +ENCODER_NAMESPACE_START | |
37 | +struct MnemonicInfo; | |
38 | +struct OpcodeInfo; | |
39 | +struct Rex; | |
40 | + | |
41 | +/** | |
42 | + * @brief Basic facilities for generation of processor's instructions. | |
43 | + * | |
44 | + * The class EncoderBase represents the basic facilities for the encoding of | |
45 | + * processor's instructions on IA32 and EM64T platforms. | |
46 | + * | |
47 | + * The class provides general interface to generate the instructions as well | |
48 | + * as to retrieve some static data about instructions (number of arguments, | |
49 | + * their roles, etc). | |
50 | + * | |
51 | + * Currently, the EncoderBase class is used for both LIL and Jitrino code | |
52 | + * generators. Each of these code generators has its own wrapper to adapt | |
53 | + * this general interface for specific needs - see encoder.h for LIL wrappers | |
54 | + * and Ia32Encoder.h for Jitrino's adapter. | |
55 | + * | |
56 | + * Interface is provided through static methods, no instances of EncoderBase | |
57 | + * to be created. | |
58 | + * | |
59 | + * @todo RIP-based addressing on EM64T - it's not yet supported currently. | |
60 | + */ | |
61 | +class EncoderBase { | |
62 | +public: | |
63 | + class Operands; | |
64 | + struct MnemonicDesc; | |
65 | + /** | |
66 | + * @brief Generates processor's instruction. | |
67 | + * | |
68 | + * @param stream - a buffer to generate into | |
69 | + * @param mn - \link Mnemonic mnemonic \endlink of the instruction | |
70 | + * @param opnds - operands for the instruction | |
71 | + * @returns (stream + length of the just generated instruction) | |
72 | + */ | |
73 | + static char * encode(char * stream, Mnemonic mn, const Operands& opnds); | |
74 | + static char * getOpndLocation(int index); | |
75 | + | |
76 | + /** | |
77 | + * @brief Generates the smallest possible number of NOP-s. | |
78 | + * | |
79 | + * Effectively generates the smallest possible number of instructions, | |
80 | + * which are NOP-s for CPU. Normally used to make a code alignment. | |
81 | + * | |
82 | + * The method inserts exactly number of bytes specified. It's a caller's | |
83 | + * responsibility to make sure the buffer is big enough. | |
84 | + * | |
85 | + * @param stream - buffer where to generate code into, can not be NULL | |
86 | + * @param howMany - how many bytes to fill with NOP-s | |
87 | + * @return \c (stream+howMany) | |
88 | + */ | |
89 | + static char * nops(char * stream, unsigned howMany); | |
90 | + | |
91 | + /** | |
92 | + * @brief Inserts a prefix into the code buffer. | |
93 | + * | |
94 | + * The method writes no more than one byte into the buffer. This is a | |
95 | + * caller's responsibility to make sure the buffer is big enough. | |
96 | + * | |
97 | + * @param stream - buffer where to insert the prefix | |
98 | + * @param pref - prefix to be inserted. If it's InstPrefix_Null, then | |
99 | + * no action performed and return value is \c stream. | |
100 | + * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream | |
101 | + * otherwise | |
102 | + */ | |
103 | + static char * prefix(char* stream, InstPrefix pref); | |
104 | + | |
105 | + /** | |
106 | + * @brief Determines if operand with opndExt suites the position with instExt. | |
107 | + */ | |
108 | + static bool extAllowed(OpndExt opndExt, OpndExt instExt); | |
109 | + | |
110 | + /** | |
111 | + * @brief Returns MnemonicDesc by the given Mnemonic. | |
112 | + */ | |
113 | + static const MnemonicDesc * getMnemonicDesc(Mnemonic mn) | |
114 | + { | |
115 | + assert(mn < Mnemonic_Count); | |
116 | + return mnemonics + mn; | |
117 | + } | |
118 | + | |
119 | + /** | |
120 | + * @brief Returns a Mnemonic for the given name. | |
121 | + * | |
122 | + * The lookup is case insensitive, if no mnemonic found for the given | |
123 | + * string, then Mnemonic_Null returned. | |
124 | + */ | |
125 | + static Mnemonic str2mnemonic(const char * mn_name); | |
126 | + | |
127 | + /** | |
128 | + * @brief Returns a string representation of the given Mnemonic. | |
129 | + * | |
130 | + * If invalid mnemonic passed, then the behavior is unpredictable. | |
131 | + */ | |
132 | + static const char * getMnemonicString(Mnemonic mn) | |
133 | + { | |
134 | + return getMnemonicDesc(mn)->name; | |
135 | + } | |
136 | + | |
137 | + static const char * toStr(Mnemonic mn) | |
138 | + { | |
139 | + return getMnemonicDesc(mn)->name; | |
140 | + } | |
141 | + | |
142 | + | |
143 | + /** | |
144 | + * @brief Description of operand. | |
145 | + * | |
146 | + * Description of an operand in opcode - its kind, size or RegName if | |
147 | + * operand must be a particular register. | |
148 | + */ | |
149 | + struct OpndDesc { | |
150 | + /** | |
151 | + * @brief Location of the operand. | |
152 | + * | |
153 | + * May be a mask, i.e. OpndKind_Imm|OpndKind_Mem. | |
154 | + */ | |
155 | + OpndKind kind; | |
156 | + /** | |
157 | + * @brief Size of the operand. | |
158 | + */ | |
159 | + OpndSize size; | |
160 | + /** | |
161 | + * @brief Extention of the operand. | |
162 | + */ | |
163 | + OpndExt ext; | |
164 | + /** | |
165 | + * @brief Appropriate RegName if operand must reside on a particular | |
166 | + * register (i.e. CWD/CDQ instructions), RegName_Null | |
167 | + * otherwise. | |
168 | + */ | |
169 | + RegName reg; | |
170 | + }; | |
171 | + | |
172 | + /** | |
173 | + * @brief Description of operands' roles in instruction. | |
174 | + */ | |
175 | + struct OpndRolesDesc { | |
176 | + /** | |
177 | + * @brief Total number of operands in the operation. | |
178 | + */ | |
179 | + unsigned count; | |
180 | + /** | |
181 | + * @brief Number of defs in the operation. | |
182 | + */ | |
183 | + unsigned defCount; | |
184 | + /** | |
185 | + * @brief Number of uses in the operation. | |
186 | + */ | |
187 | + unsigned useCount; | |
188 | + /** | |
189 | + * @brief Operand roles, bit-packed. | |
190 | + * | |
191 | + * A bit-packed info about operands' roles. Each operand's role is | |
192 | + * described by two bits, counted from right-to-left - the less | |
193 | + * significant bits (0,1) represent operand#0. | |
194 | + * | |
195 | + * The mask is build by ORing #OpndRole_Def and #OpndRole_Use | |
196 | + * appropriately and shifting left, i.e. operand#0's role would be | |
197 | + * - '(OpndRole_Def|OpndRole_Use)' | |
198 | + * - opnd#1's role would be 'OpndRole_Use<<2' | |
199 | + * - and operand#2's role would be, say, 'OpndRole_Def<<4'. | |
200 | + */ | |
201 | + unsigned roles; | |
202 | + }; | |
203 | + | |
204 | + /** | |
205 | + * @brief Extracts appropriate OpndRole for a given operand. | |
206 | + * | |
207 | + * The order of operands is left-to-right, i.e. for MOV, it | |
208 | + * would be 'MOV op0, op1' | |
209 | + */ | |
210 | + static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx) | |
211 | + { | |
212 | + assert(idx < ord.count); | |
213 | + return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3); | |
214 | + } | |
215 | + | |
216 | + /** | |
217 | + * @brief Defines the maximum number of operands for an opcode. | |
218 | + * | |
219 | + * The 3 mostly comes from IDIV/IMUL which both may have up to | |
220 | + * 3 operands. | |
221 | + */ | |
222 | + static const unsigned int MAX_NUM_OPCODE_OPERANDS = 3; | |
223 | + | |
224 | + /** | |
225 | + * @brief Info about single opcode - its opcode bytes, operands, | |
226 | + * operands' roles. | |
227 | + */ | |
228 | + union OpcodeDesc { | |
229 | + char dummy[128]; // To make total size a power of 2 | |
230 | + | |
231 | + struct { | |
232 | + /** | |
233 | + * @brief Raw opcode bytes. | |
234 | + * | |
235 | + * 'Raw' opcode bytes which do not require any analysis and are | |
236 | + * independent from arguments/sizes/etc (may include opcode size | |
237 | + * prefix). | |
238 | + */ | |
239 | + char opcode[5]; | |
240 | + unsigned opcode_len; | |
241 | + unsigned aux0; | |
242 | + unsigned aux1; | |
243 | + /** | |
244 | + * @brief Info about opcode's operands. | |
245 | + */ | |
246 | + OpndDesc opnds[MAX_NUM_OPCODE_OPERANDS]; | |
247 | + unsigned first_opnd; | |
248 | + /** | |
249 | + * @brief Info about operands - total number, number of uses/defs, | |
250 | + * operands' roles. | |
251 | + */ | |
252 | + OpndRolesDesc roles; | |
253 | + /** | |
254 | + * @brief If not zero, then this is final OpcodeDesc structure in | |
255 | + * the list of opcodes for a given mnemonic. | |
256 | + */ | |
257 | + char last; | |
258 | + char platf; | |
259 | + }; | |
260 | + }; | |
261 | +public: | |
262 | + /** | |
263 | + * @brief General info about mnemonic. | |
264 | + */ | |
265 | + struct MnemonicDesc { | |
266 | + /** | |
267 | + * @brief The mnemonic itself. | |
268 | + */ | |
269 | + Mnemonic mn; | |
270 | + /** | |
271 | + * Various characteristics of mnemonic. | |
272 | + * @see MF_ | |
273 | + */ | |
274 | + unsigned flags; | |
275 | + /** | |
276 | + * @brief Operation's operand's count and roles. | |
277 | + * | |
278 | + * For the operations whose opcodes may use different number of | |
279 | + * operands (i.e. IMUL/SHL) either most common value used, or empty | |
280 | + * value left. | |
281 | + */ | |
282 | + OpndRolesDesc roles; | |
283 | + /** | |
284 | + * @brief Print name of the mnemonic. | |
285 | + */ | |
286 | + const char * name; | |
287 | + }; | |
288 | + | |
289 | + | |
290 | + /** | |
291 | + * @brief Magic number, shows a maximum value a hash code can take. | |
292 | + * | |
293 | + * For meaning and arithmetics see enc_tabl.cpp. | |
294 | + * | |
295 | + * The value was increased from '5155' to '8192' to make it aligned | |
296 | + * for faster access in EncoderBase::lookup(). | |
297 | + * | |
298 | + * It was further increased to 16384 as support for 3 operand opcodes | |
299 | + * with XMM registers were added | |
300 | + */ | |
301 | + static const unsigned int HASH_MAX = 16384; //5155; | |
302 | + /** | |
303 | + * @brief Empty value, used in hash-to-opcode map to show an empty slot. | |
304 | + */ | |
305 | + static const unsigned char NOHASH = 0xFF; | |
306 | + /** | |
307 | + * @brief The name says it all. | |
308 | + */ | |
309 | + static const unsigned char HASH_BITS_PER_OPERAND = 5; | |
310 | + | |
311 | + /** | |
312 | + * @brief Contains info about a single instructions's operand - its | |
313 | + * location, size and a value for immediate or RegName for | |
314 | + * register operands. | |
315 | + */ | |
316 | + class Operand { | |
317 | + public: | |
318 | + /** | |
319 | + * @brief Initializes the instance with empty size and kind. | |
320 | + */ | |
321 | + Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {} | |
322 | + /** | |
323 | + * @brief Creates register operand from given RegName. | |
324 | + */ | |
325 | + Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)), | |
326 | + m_size(getRegSize(reg)), | |
327 | + m_ext(ext), m_reg(reg) | |
328 | + { | |
329 | + hash_it(); | |
330 | + } | |
331 | + /** | |
332 | + * @brief Creates register operand from given RegName and with the | |
333 | + * specified size and kind. | |
334 | + * | |
335 | + * Used to speedup Operand creation as there is no need to extract | |
336 | + * size and kind from the RegName. | |
337 | + * The provided size and kind must match the RegName's ones though. | |
338 | + */ | |
339 | + Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) : | |
340 | + m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg) | |
341 | + { | |
342 | + assert(m_size == getRegSize(reg)); | |
343 | + assert(m_kind == getRegKind(reg)); | |
344 | + hash_it(); | |
345 | + } | |
346 | + /** | |
347 | + * @brief Creates immediate operand with the given size and value. | |
348 | + */ | |
349 | + Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) : | |
350 | + m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival) | |
351 | + { | |
352 | + hash_it(); | |
353 | + } | |
354 | + /** | |
355 | + * @brief Creates immediate operand of OpndSize_32. | |
356 | + */ | |
357 | + Operand(int ival, OpndExt ext = OpndExt_None) : | |
358 | + m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival) | |
359 | + { | |
360 | + hash_it(); | |
361 | + } | |
362 | + /** | |
363 | + * @brief Creates immediate operand of OpndSize_16. | |
364 | + */ | |
365 | + Operand(short ival, OpndExt ext = OpndExt_None) : | |
366 | + m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival) | |
367 | + { | |
368 | + hash_it(); | |
369 | + } | |
370 | + | |
371 | + /** | |
372 | + * @brief Creates immediate operand of OpndSize_8. | |
373 | + */ | |
374 | + Operand(char ival, OpndExt ext = OpndExt_None) : | |
375 | + m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival) | |
376 | + { | |
377 | + hash_it(); | |
378 | + } | |
379 | + | |
380 | + /** | |
381 | + * @brief Creates memory operand. | |
382 | + */ | |
383 | + Operand(OpndSize size, RegName base, RegName index, unsigned scale, | |
384 | + int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext) | |
385 | + { | |
386 | + m_base = base; | |
387 | + m_index = index; | |
388 | + m_scale = scale; | |
389 | + m_disp = disp; | |
390 | + hash_it(); | |
391 | + } | |
392 | + | |
393 | + /** | |
394 | + * @brief Creates memory operand with only base and displacement. | |
395 | + */ | |
396 | + Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) : | |
397 | + m_kind(OpndKind_Mem), m_size(size), m_ext(ext) | |
398 | + { | |
399 | + m_base = base; | |
400 | + m_index = RegName_Null; | |
401 | + m_scale = 0; | |
402 | + m_disp = disp; | |
403 | + hash_it(); | |
404 | + } | |
405 | + // | |
406 | + // general info | |
407 | + // | |
408 | + /** | |
409 | + * @brief Returns kind of the operand. | |
410 | + */ | |
411 | + OpndKind kind(void) const { return m_kind; } | |
412 | + /** | |
413 | + * @brief Returns size of the operand. | |
414 | + */ | |
415 | + OpndSize size(void) const { return m_size; } | |
416 | + /** | |
417 | + * @brief Returns extention of the operand. | |
418 | + */ | |
419 | + OpndExt ext(void) const { return m_ext; } | |
420 | + /** | |
421 | + * @brief Returns hash of the operand. | |
422 | + */ | |
423 | + unsigned hash(void) const { return m_hash; } | |
424 | + // | |
425 | +#ifdef _EM64T_ | |
426 | + bool need_rex(void) const { return m_need_rex; } | |
427 | +#else | |
428 | + bool need_rex(void) const { return false; } | |
429 | +#endif | |
430 | + /** | |
431 | + * @brief Tests whether operand is memory operand. | |
432 | + */ | |
433 | + bool is_mem(void) const { return is_placed_in(OpndKind_Mem); } | |
434 | + /** | |
435 | + * @brief Tests whether operand is immediate operand. | |
436 | + */ | |
437 | + bool is_imm(void) const { return is_placed_in(OpndKind_Imm); } | |
438 | + /** | |
439 | + * @brief Tests whether operand is register operand. | |
440 | + */ | |
441 | + bool is_reg(void) const { return is_placed_in(OpndKind_Reg); } | |
442 | + /** | |
443 | + * @brief Tests whether operand is general-purpose register operand. | |
444 | + */ | |
445 | + bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); } | |
446 | + /** | |
447 | + * @brief Tests whether operand is float-point pseudo-register operand. | |
448 | + */ | |
449 | + bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); } | |
450 | + /** | |
451 | + * @brief Tests whether operand is XMM register operand. | |
452 | + */ | |
453 | + bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); } | |
454 | +#ifdef _HAVE_MMX_ | |
455 | + /** | |
456 | + * @brief Tests whether operand is MMX register operand. | |
457 | + */ | |
458 | + bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); } | |
459 | +#endif | |
460 | + /** | |
461 | + * @brief Tests whether operand is signed immediate operand. | |
462 | + */ | |
463 | + //bool is_signed(void) const { assert(is_imm()); return m_is_signed; } | |
464 | + | |
465 | + /** | |
466 | + * @brief Returns base of memory operand (RegName_Null if not memory). | |
467 | + */ | |
468 | + RegName base(void) const { return is_mem() ? m_base : RegName_Null; } | |
469 | + /** | |
470 | + * @brief Returns index of memory operand (RegName_Null if not memory). | |
471 | + */ | |
472 | + RegName index(void) const { return is_mem() ? m_index : RegName_Null; } | |
473 | + /** | |
474 | + * @brief Returns scale of memory operand (0 if not memory). | |
475 | + */ | |
476 | + unsigned scale(void) const { return is_mem() ? m_scale : 0; } | |
477 | + /** | |
478 | + * @brief Returns displacement of memory operand (0 if not memory). | |
479 | + */ | |
480 | + int disp(void) const { return is_mem() ? m_disp : 0; } | |
481 | + /** | |
482 | + * @brief Returns RegName of register operand (RegName_Null if not | |
483 | + * register). | |
484 | + */ | |
485 | + RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; } | |
486 | + /** | |
487 | + * @brief Returns value of immediate operand (0 if not immediate). | |
488 | + */ | |
489 | + long long imm(void) const { return is_imm() ? m_imm64 : 0; } | |
490 | + private: | |
491 | + bool is_placed_in(OpndKind kd) const | |
492 | + { | |
493 | + return kd == OpndKind_Reg ? | |
494 | + m_kind == OpndKind_GPReg || | |
495 | +#ifdef _HAVE_MMX_ | |
496 | + m_kind == OpndKind_MMXReg || | |
497 | +#endif | |
498 | + m_kind == OpndKind_FPReg || | |
499 | + m_kind == OpndKind_XMMReg | |
500 | + : kd == m_kind; | |
501 | + } | |
502 | + void hash_it(void) | |
503 | + { | |
504 | + m_hash = get_size_hash(m_size) | get_kind_hash(m_kind); | |
505 | +#ifdef _EM64T_ | |
506 | + m_need_rex = false; | |
507 | + if (is_reg() && is_em64t_extra_reg(m_reg)) { | |
508 | + m_need_rex = true; | |
509 | + } | |
510 | + else if (is_mem() && (is_em64t_extra_reg(m_base) || | |
511 | + is_em64t_extra_reg(m_index))) { | |
512 | + m_need_rex = true; | |
513 | + } | |
514 | +#endif | |
515 | + } | |
516 | + // general info | |
517 | + OpndKind m_kind; | |
518 | + OpndSize m_size; | |
519 | + OpndExt m_ext; | |
520 | + // complex address form support | |
521 | + RegName m_base; | |
522 | + RegName m_index; | |
523 | + unsigned m_scale; | |
524 | + union { | |
525 | + int m_disp; | |
526 | + RegName m_reg; | |
527 | + long long m_imm64; | |
528 | + }; | |
529 | + unsigned m_hash; | |
530 | + bool m_need_rex; | |
531 | + friend class EncoderBase::Operands; | |
532 | + }; | |
533 | + /** | |
534 | + * @brief Simple container for up to 3 Operand-s. | |
535 | + */ | |
536 | + class Operands { | |
537 | + public: | |
538 | + Operands(void) | |
539 | + { | |
540 | + clear(); | |
541 | + } | |
542 | + Operands(const Operand& op0) | |
543 | + { | |
544 | + clear(); | |
545 | + add(op0); | |
546 | + } | |
547 | + | |
548 | + Operands(const Operand& op0, const Operand& op1) | |
549 | + { | |
550 | + clear(); | |
551 | + add(op0); add(op1); | |
552 | + } | |
553 | + | |
554 | + Operands(const Operand& op0, const Operand& op1, const Operand& op2) | |
555 | + { | |
556 | + clear(); | |
557 | + add(op0); add(op1); add(op2); | |
558 | + } | |
559 | + | |
560 | + unsigned count(void) const { return m_count; } | |
561 | + unsigned hash(void) const { return m_hash; } | |
562 | + const Operand& operator[](unsigned idx) const | |
563 | + { | |
564 | + assert(idx<m_count); | |
565 | + return m_operands[idx]; | |
566 | + } | |
567 | + | |
568 | + void add(const Operand& op) | |
569 | + { | |
570 | + assert(m_count < COUNTOF(m_operands)); | |
571 | + m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash(); | |
572 | + m_operands[m_count++] = op; | |
573 | + m_need_rex = m_need_rex || op.m_need_rex; | |
574 | + } | |
575 | +#ifdef _EM64T_ | |
576 | + bool need_rex(void) const { return m_need_rex; } | |
577 | +#else | |
578 | + bool need_rex(void) const { return false; } | |
579 | +#endif | |
580 | + void clear(void) | |
581 | + { | |
582 | + m_count = 0; m_hash = 0; m_need_rex = false; | |
583 | + } | |
584 | + private: | |
585 | + unsigned m_count; | |
586 | + Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )]; | |
587 | + unsigned m_hash; | |
588 | + bool m_need_rex; | |
589 | + }; | |
590 | +public: | |
591 | +#ifdef _DEBUG | |
592 | + /** | |
593 | + * Verifies some presumptions about encoding data table. | |
594 | + * Called automaticaly during statics initialization. | |
595 | + */ | |
596 | + static int verify(void); | |
597 | +#endif | |
598 | + | |
599 | +private: | |
600 | + /** | |
601 | + * @brief Returns found OpcodeDesc by the given Mnemonic and operands. | |
602 | + */ | |
603 | + static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds); | |
604 | + /** | |
605 | + * @brief Encodes mod/rm byte. | |
606 | + */ | |
607 | + static char* encodeModRM(char* stream, const Operands& opnds, | |
608 | + unsigned idx, const OpcodeDesc * odesc, Rex * prex); | |
609 | + /** | |
610 | + * @brief Encodes special things of opcode description - '/r', 'ib', etc. | |
611 | + */ | |
612 | + static char* encode_aux(char* stream, unsigned aux, | |
613 | + const Operands& opnds, const OpcodeDesc * odesc, | |
614 | + unsigned * pargsCount, Rex* prex); | |
615 | +#ifdef _EM64T_ | |
616 | + /** | |
617 | + * @brief Returns true if the 'reg' argument represents one of the new | |
618 | + * EM64T registers - R8(D)-R15(D). | |
619 | + * | |
620 | + * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not | |
621 | + * considered as 'extra'. | |
622 | + */ | |
623 | + static bool is_em64t_extra_reg(const RegName reg) | |
624 | + { | |
625 | + if (needs_rex_r(reg)) { | |
626 | + return true; | |
627 | + } | |
628 | + if (RegName_SPL <= reg && reg <= RegName_R15L) { | |
629 | + return true; | |
630 | + } | |
631 | + return false; | |
632 | + } | |
633 | + static bool needs_rex_r(const RegName reg) | |
634 | + { | |
635 | + if (RegName_R8 <= reg && reg <= RegName_R15) { | |
636 | + return true; | |
637 | + } | |
638 | + if (RegName_R8D <= reg && reg <= RegName_R15D) { | |
639 | + return true; | |
640 | + } | |
641 | + if (RegName_R8S <= reg && reg <= RegName_R15S) { | |
642 | + return true; | |
643 | + } | |
644 | + if (RegName_R8L <= reg && reg <= RegName_R15L) { | |
645 | + return true; | |
646 | + } | |
647 | + if (RegName_XMM8 <= reg && reg <= RegName_XMM15) { | |
648 | + return true; | |
649 | + } | |
650 | + if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) { | |
651 | + return true; | |
652 | + } | |
653 | + if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) { | |
654 | + return true; | |
655 | + } | |
656 | + return false; | |
657 | + } | |
658 | + /** | |
659 | + * @brief Returns an 'processor's index' of the register - the index | |
660 | + * used to encode the register in ModRM/SIB bytes. | |
661 | + * | |
662 | + * For the new EM64T registers the 'HW index' differs from the index | |
663 | + * encoded in RegName. For old-fashion registers it's effectively the | |
664 | + * same as ::getRegIndex(RegName). | |
665 | + */ | |
666 | + static unsigned char getHWRegIndex(const RegName reg) | |
667 | + { | |
668 | + if (getRegKind(reg) != OpndKind_GPReg) { | |
669 | + return getRegIndex(reg); | |
670 | + } | |
671 | + if (RegName_SPL <= reg && reg<=RegName_DIL) { | |
672 | + return getRegIndex(reg); | |
673 | + } | |
674 | + if (RegName_R8L<= reg && reg<=RegName_R15L) { | |
675 | + return getRegIndex(reg) - getRegIndex(RegName_R8L); | |
676 | + } | |
677 | + return is_em64t_extra_reg(reg) ? | |
678 | + getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg); | |
679 | + } | |
680 | +#else | |
681 | + static unsigned char getHWRegIndex(const RegName reg) | |
682 | + { | |
683 | + return getRegIndex(reg); | |
684 | + } | |
685 | + static bool is_em64t_extra_reg(const RegName reg) | |
686 | + { | |
687 | + return false; | |
688 | + } | |
689 | +#endif | |
690 | +public: | |
691 | + static unsigned char get_size_hash(OpndSize size) { | |
692 | + return (size <= OpndSize_64) ? size_hash[size] : 0xFF; | |
693 | + } | |
694 | + static unsigned char get_kind_hash(OpndKind kind) { | |
695 | + return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF; | |
696 | + } | |
697 | + | |
698 | + /** | |
699 | + * @brief A table used for the fast computation of hash value. | |
700 | + * | |
701 | + * A change must be strictly balanced with hash-related functions and data | |
702 | + * in enc_base.h/.cpp. | |
703 | + */ | |
704 | + static const unsigned char size_hash[OpndSize_64+1]; | |
705 | + /** | |
706 | + * @brief A table used for the fast computation of hash value. | |
707 | + * | |
708 | + * A change must be strictly balanced with hash-related functions and data | |
709 | + * in enc_base.h/.cpp. | |
710 | + */ | |
711 | + static const unsigned char kind_hash[OpndKind_Mem+1]; | |
712 | + /** | |
713 | + * @brief Maximum number of opcodes used for a single mnemonic. | |
714 | + * | |
715 | + * No arithmetics behind the number, simply estimated. | |
716 | + */ | |
717 | + static const unsigned int MAX_OPCODES = 32; //20; | |
718 | + /** | |
719 | + * @brief Mapping between operands hash code and operands. | |
720 | + */ | |
721 | + static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX]; | |
722 | + /** | |
723 | + * @brief Array of mnemonics. | |
724 | + */ | |
725 | + static MnemonicDesc mnemonics[Mnemonic_Count]; | |
726 | + /** | |
727 | + * @brief Array of available opcodes. | |
728 | + */ | |
729 | + static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES]; | |
730 | + | |
731 | + static int buildTable(void); | |
732 | + static void buildMnemonicDesc(const MnemonicInfo * minfo); | |
733 | + /** | |
734 | + * @brief Computes hash value for the given operands. | |
735 | + */ | |
736 | + static unsigned short getHash(const OpcodeInfo* odesc); | |
737 | + /** | |
738 | + * @brief Dummy variable, for automatic invocation of buildTable() at | |
739 | + * startup. | |
740 | + */ | |
741 | + static int dummy; | |
742 | + | |
743 | + static char * curRelOpnd[3]; | |
744 | +}; | |
745 | + | |
746 | +ENCODER_NAMESPACE_END | |
747 | + | |
748 | +#endif // ifndef __ENC_BASE_H_INCLUDED__ |
@@ -0,0 +1,786 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | +#ifndef _ENCODER_DEFS_H_ | |
21 | +#define _ENCODER_DEFS_H_ | |
22 | + | |
23 | + | |
24 | +// Used to isolate experimental or being tuned encoder into a separate | |
25 | +// namespace so it can coexist with a stable one in the same bundle. | |
26 | +#ifdef ENCODER_ISOLATE | |
27 | + #define ENCODER_NAMESPACE_START namespace enc_ia32 { | |
28 | + #define ENCODER_NAMESPACE_END }; | |
29 | +#else | |
30 | + #define ENCODER_NAMESPACE_START | |
31 | + #define ENCODER_NAMESPACE_END | |
32 | +#endif | |
33 | + | |
34 | +#include <assert.h> | |
35 | +#include "enc_defs_ext.h" | |
36 | + | |
37 | +#ifndef COUNTOF | |
38 | + /** | |
39 | + * Number of items in an array. | |
40 | + */ | |
41 | + #define COUNTOF(a) (sizeof(a)/sizeof(a[0])) | |
42 | +#endif | |
43 | + | |
44 | +#ifdef _EM64T_ | |
45 | + /** | |
46 | + * A stack pointer of default platform's size. | |
47 | + */ | |
48 | + #define REG_STACK RegName_RSP | |
49 | + /** | |
50 | + * A max GP register (with a highest index number) | |
51 | + */ | |
52 | + #define REG_MAX RegName_R15 | |
53 | + /** | |
54 | + * Total number of GP registers including stack pointer. | |
55 | + */ | |
56 | + #define MAX_REGS 15 | |
57 | +#else | |
58 | + #define REG_STACK RegName_ESP | |
59 | + #define REG_MAX RegName_EDI | |
60 | + #define MAX_REGS 8 | |
61 | +#endif | |
62 | + | |
63 | +ENCODER_NAMESPACE_START | |
64 | + | |
65 | +/** | |
66 | + * A number of bytes 'eaten' by an ordinary PUSH/POP. | |
67 | + */ | |
68 | +#define STACK_SLOT_SIZE (sizeof(void*)) | |
69 | + | |
70 | + | |
71 | +/** | |
72 | + * A recommended by Intel Arch Manual aligment for instructions that | |
73 | + * are targets for jmps. | |
74 | + */ | |
75 | +#define JMP_TARGET_ALIGMENT (16) | |
76 | +/** | |
77 | + * A maximum possible size of native instruction. | |
78 | + */ | |
79 | +#define MAX_NATIVE_INST_SIZE (15) | |
80 | +/** | |
81 | + * The enum OpndKind describes an operand's location - memory, immediate or a register. | |
82 | + * It can be used as a bit mask. | |
83 | + */ | |
84 | +typedef enum OpndKind { | |
85 | + /** | |
86 | + * A change must be balanced with at least the following places: | |
87 | + * Ia32::Constraint-s use the OpndKind as a mask | |
88 | + * encoder.cpp & encoder_master_info.cpp uses OpndKind as an index for hashing | |
89 | + * - perhaps there are much more places | |
90 | + * | |
91 | + * NOTE: an MMXReg kind is incompatible with the current constraints framework, | |
92 | + * as it's not encoded as a mask. | |
93 | + */ | |
94 | + OpndKind_Null=0, | |
95 | + OpndKind_GPReg = 0x01, OpndKind_MinRegKind = OpndKind_GPReg, | |
96 | + OpndKind_SReg = 0x02, | |
97 | +#ifdef _HAVE_MMX_ | |
98 | + OpndKind_MMXReg = 0x03, | |
99 | +#endif | |
100 | + OpndKind_FPReg = 0x04, | |
101 | + OpndKind_XMMReg = 0x08, | |
102 | + OpndKind_OtherReg = 0x10, | |
103 | + OpndKind_StatusReg = OpndKind_OtherReg, | |
104 | + OpndKind_MaxRegKind = OpndKind_StatusReg, // a max existing kind of register | |
105 | + OpndKind_MaxReg, // -'- + 1 to be used in array defs | |
106 | + // | |
107 | + OpndKind_Immediate = 0x20, OpndKind_Imm=OpndKind_Immediate, | |
108 | + OpndKind_Memory = 0x40, OpndKind_Mem=OpndKind_Memory, | |
109 | + // | |
110 | + OpndKind_Reg = 0x1F, | |
111 | + OpndKind_Any = 0x7F, | |
112 | + // syntetic constants. Normally not used anywhere, but are used for | |
113 | + // human-readable showing under the debugger | |
114 | + OpndKind_GPReg_Mem = OpndKind_GPReg|OpndKind_Mem, | |
115 | +#ifdef _HAVE_MMX_ | |
116 | + OpndKind_MMXReg_Mem = OpndKind_MMXReg|OpndKind_Mem, | |
117 | +#endif | |
118 | + OpndKind_XMMReg_Mem = OpndKind_XMMReg|OpndKind_Mem, | |
119 | +} OpndKind; | |
120 | + | |
121 | +/** | |
122 | + * Defines type of extention allowed for particular operand. | |
123 | + * For example imul r32,r_m32,imm8 sign extend imm8 before performing multiplication. | |
124 | + * To satisfy instruction constraints immediate operand should be either OpndExt_Signed | |
125 | + * or OpndExt_Any. | |
126 | + */ | |
127 | +typedef enum OpndExt { | |
128 | + OpndExt_None = 0x0, | |
129 | + OpndExt_Signed = 0x1, | |
130 | + OpndExt_Zero = 0x2, | |
131 | + OpndExt_Any = 0x3, | |
132 | +}OpndExt; | |
133 | + | |
134 | +/** | |
135 | + * enum OpndRole defines the role of an operand in an instruction | |
136 | + * Can be used as mask to combine def and use. The complete def+use | |
137 | + * info can be combined in 2 bits which is used, say in Encoder::OpndRole. | |
138 | + */ | |
139 | +//TODO: this duplicates an Role used in the Ia32::Inst. That duplicate enum should be removed. | |
140 | +typedef enum OpndRole { | |
141 | + OpndRole_Null=0, | |
142 | + OpndRole_Use=0x1, | |
143 | + OpndRole_Def=0x2, | |
144 | + OpndRole_UseDef=OpndRole_Use|OpndRole_Def, | |
145 | + OpndRole_All=0xffff, | |
146 | +} OpndRole; | |
147 | + | |
148 | + | |
149 | +#define REGNAME(k,s,i) ( ((k & OpndKind_Any)<<24) | ((s & OpndSize_Any)<<16) | (i&0xFF) ) | |
150 | + | |
151 | +// Gregory - | |
152 | +// It is critical that all register indexes (3rd number) inside of the | |
153 | +// following table go in ascending order. That is R8 goes after | |
154 | +// RDI. It is necessary for decoder when extending registers from RAX-RDI | |
155 | +// to R8-R15 by simply adding 8 to the index on EM64T architecture | |
156 | +typedef enum RegName { | |
157 | + | |
158 | + RegName_Null = 0, | |
159 | + | |
160 | +#ifdef _EM64T_ | |
161 | + /* | |
162 | + An index part of the RegName-s for RAX-RDI, EAX-ESI, AX-SI and AL-BH is | |
163 | + the same as the index used during instructions encoding. The same rule | |
164 | + applies for XMM regsters for IA32. | |
165 | + For new EM64T registers (both GP and XMM) the index need to be corrected to | |
166 | + obtain the index used in processor's instructions. | |
167 | + */ | |
168 | + RegName_RAX = REGNAME(OpndKind_GPReg,OpndSize_64,0), | |
169 | + RegName_RCX = REGNAME(OpndKind_GPReg,OpndSize_64,1), | |
170 | + RegName_RDX = REGNAME(OpndKind_GPReg,OpndSize_64,2), | |
171 | + RegName_RBX = REGNAME(OpndKind_GPReg,OpndSize_64,3), | |
172 | + RegName_RSP = REGNAME(OpndKind_GPReg,OpndSize_64,4), | |
173 | + RegName_RBP = REGNAME(OpndKind_GPReg,OpndSize_64,5), | |
174 | + RegName_RSI = REGNAME(OpndKind_GPReg,OpndSize_64,6), | |
175 | + RegName_RDI = REGNAME(OpndKind_GPReg,OpndSize_64,7), | |
176 | + | |
177 | + RegName_R8 = REGNAME(OpndKind_GPReg,OpndSize_64,8), | |
178 | + RegName_R9 = REGNAME(OpndKind_GPReg,OpndSize_64,9), | |
179 | + RegName_R10 = REGNAME(OpndKind_GPReg,OpndSize_64,10), | |
180 | + RegName_R11 = REGNAME(OpndKind_GPReg,OpndSize_64,11), | |
181 | + RegName_R12 = REGNAME(OpndKind_GPReg,OpndSize_64,12), | |
182 | + RegName_R13 = REGNAME(OpndKind_GPReg,OpndSize_64,13), | |
183 | + RegName_R14 = REGNAME(OpndKind_GPReg,OpndSize_64,14), | |
184 | + RegName_R15 = REGNAME(OpndKind_GPReg,OpndSize_64,15), | |
185 | +#endif //~_EM64T_ | |
186 | + | |
187 | + RegName_EAX=REGNAME(OpndKind_GPReg,OpndSize_32,0), | |
188 | + RegName_ECX=REGNAME(OpndKind_GPReg,OpndSize_32,1), | |
189 | + RegName_EDX=REGNAME(OpndKind_GPReg,OpndSize_32,2), | |
190 | + RegName_EBX=REGNAME(OpndKind_GPReg,OpndSize_32,3), | |
191 | + RegName_ESP=REGNAME(OpndKind_GPReg,OpndSize_32,4), | |
192 | + RegName_EBP=REGNAME(OpndKind_GPReg,OpndSize_32,5), | |
193 | + RegName_ESI=REGNAME(OpndKind_GPReg,OpndSize_32,6), | |
194 | + RegName_EDI=REGNAME(OpndKind_GPReg,OpndSize_32,7), | |
195 | + | |
196 | +#ifdef _EM64T_ | |
197 | + RegName_R8D = REGNAME(OpndKind_GPReg,OpndSize_32,8), | |
198 | + RegName_R9D = REGNAME(OpndKind_GPReg,OpndSize_32,9), | |
199 | + RegName_R10D = REGNAME(OpndKind_GPReg,OpndSize_32,10), | |
200 | + RegName_R11D = REGNAME(OpndKind_GPReg,OpndSize_32,11), | |
201 | + RegName_R12D = REGNAME(OpndKind_GPReg,OpndSize_32,12), | |
202 | + RegName_R13D = REGNAME(OpndKind_GPReg,OpndSize_32,13), | |
203 | + RegName_R14D = REGNAME(OpndKind_GPReg,OpndSize_32,14), | |
204 | + RegName_R15D = REGNAME(OpndKind_GPReg,OpndSize_32,15), | |
205 | +#endif //~_EM64T_ | |
206 | + | |
207 | + RegName_AX=REGNAME(OpndKind_GPReg,OpndSize_16,0), | |
208 | + RegName_CX=REGNAME(OpndKind_GPReg,OpndSize_16,1), | |
209 | + RegName_DX=REGNAME(OpndKind_GPReg,OpndSize_16,2), | |
210 | + RegName_BX=REGNAME(OpndKind_GPReg,OpndSize_16,3), | |
211 | + RegName_SP=REGNAME(OpndKind_GPReg,OpndSize_16,4), | |
212 | + RegName_BP=REGNAME(OpndKind_GPReg,OpndSize_16,5), | |
213 | + RegName_SI=REGNAME(OpndKind_GPReg,OpndSize_16,6), | |
214 | + RegName_DI=REGNAME(OpndKind_GPReg,OpndSize_16,7), | |
215 | + | |
216 | +#ifdef _EM64T_ | |
217 | + RegName_R8S = REGNAME(OpndKind_GPReg,OpndSize_16,8), | |
218 | + RegName_R9S = REGNAME(OpndKind_GPReg,OpndSize_16,9), | |
219 | + RegName_R10S = REGNAME(OpndKind_GPReg,OpndSize_16,10), | |
220 | + RegName_R11S = REGNAME(OpndKind_GPReg,OpndSize_16,11), | |
221 | + RegName_R12S = REGNAME(OpndKind_GPReg,OpndSize_16,12), | |
222 | + RegName_R13S = REGNAME(OpndKind_GPReg,OpndSize_16,13), | |
223 | + RegName_R14S = REGNAME(OpndKind_GPReg,OpndSize_16,14), | |
224 | + RegName_R15S = REGNAME(OpndKind_GPReg,OpndSize_16,15), | |
225 | +#endif //~_EM64T_ | |
226 | + | |
227 | + RegName_AL=REGNAME(OpndKind_GPReg,OpndSize_8,0), | |
228 | + RegName_CL=REGNAME(OpndKind_GPReg,OpndSize_8,1), | |
229 | + RegName_DL=REGNAME(OpndKind_GPReg,OpndSize_8,2), | |
230 | + RegName_BL=REGNAME(OpndKind_GPReg,OpndSize_8,3), | |
231 | + // FIXME: Used in enc_tabl.cpp | |
232 | + // AH is not accessible on EM64T, instead encoded register is SPL, so decoded | |
233 | + // register will return incorrect enum | |
234 | + RegName_AH=REGNAME(OpndKind_GPReg,OpndSize_8,4), | |
235 | +#if !defined(_EM64T_) | |
236 | + RegName_CH=REGNAME(OpndKind_GPReg,OpndSize_8,5), | |
237 | + RegName_DH=REGNAME(OpndKind_GPReg,OpndSize_8,6), | |
238 | + RegName_BH=REGNAME(OpndKind_GPReg,OpndSize_8,7), | |
239 | +#else | |
240 | + RegName_SPL=REGNAME(OpndKind_GPReg,OpndSize_8,4), | |
241 | + RegName_BPL=REGNAME(OpndKind_GPReg,OpndSize_8,5), | |
242 | + RegName_SIL=REGNAME(OpndKind_GPReg,OpndSize_8,6), | |
243 | + RegName_DIL=REGNAME(OpndKind_GPReg,OpndSize_8,7), | |
244 | + RegName_R8L=REGNAME(OpndKind_GPReg,OpndSize_8,8), | |
245 | + RegName_R9L=REGNAME(OpndKind_GPReg,OpndSize_8,9), | |
246 | + RegName_R10L=REGNAME(OpndKind_GPReg,OpndSize_8,10), | |
247 | + RegName_R11L=REGNAME(OpndKind_GPReg,OpndSize_8,11), | |
248 | + RegName_R12L=REGNAME(OpndKind_GPReg,OpndSize_8,12), | |
249 | + RegName_R13L=REGNAME(OpndKind_GPReg,OpndSize_8,13), | |
250 | + RegName_R14L=REGNAME(OpndKind_GPReg,OpndSize_8,14), | |
251 | + RegName_R15L=REGNAME(OpndKind_GPReg,OpndSize_8,15), | |
252 | +#endif | |
253 | + | |
254 | + RegName_ES=REGNAME(OpndKind_SReg,OpndSize_16,0), | |
255 | + RegName_CS=REGNAME(OpndKind_SReg,OpndSize_16,1), | |
256 | + RegName_SS=REGNAME(OpndKind_SReg,OpndSize_16,2), | |
257 | + RegName_DS=REGNAME(OpndKind_SReg,OpndSize_16,3), | |
258 | + RegName_FS=REGNAME(OpndKind_SReg,OpndSize_16,4), | |
259 | + RegName_GS=REGNAME(OpndKind_SReg,OpndSize_16,5), | |
260 | + | |
261 | + RegName_EFLAGS=REGNAME(OpndKind_StatusReg,OpndSize_32,0), | |
262 | + | |
263 | +#if !defined(TESTING_ENCODER) | |
264 | + RegName_FP0=REGNAME(OpndKind_FPReg,OpndSize_80,0), | |
265 | + RegName_FP1=REGNAME(OpndKind_FPReg,OpndSize_80,1), | |
266 | + RegName_FP2=REGNAME(OpndKind_FPReg,OpndSize_80,2), | |
267 | + RegName_FP3=REGNAME(OpndKind_FPReg,OpndSize_80,3), | |
268 | + RegName_FP4=REGNAME(OpndKind_FPReg,OpndSize_80,4), | |
269 | + RegName_FP5=REGNAME(OpndKind_FPReg,OpndSize_80,5), | |
270 | + RegName_FP6=REGNAME(OpndKind_FPReg,OpndSize_80,6), | |
271 | + RegName_FP7=REGNAME(OpndKind_FPReg,OpndSize_80,7), | |
272 | +#endif | |
273 | + RegName_FP0S=REGNAME(OpndKind_FPReg,OpndSize_32,0), | |
274 | + RegName_FP1S=REGNAME(OpndKind_FPReg,OpndSize_32,1), | |
275 | + RegName_FP2S=REGNAME(OpndKind_FPReg,OpndSize_32,2), | |
276 | + RegName_FP3S=REGNAME(OpndKind_FPReg,OpndSize_32,3), | |
277 | + RegName_FP4S=REGNAME(OpndKind_FPReg,OpndSize_32,4), | |
278 | + RegName_FP5S=REGNAME(OpndKind_FPReg,OpndSize_32,5), | |
279 | + RegName_FP6S=REGNAME(OpndKind_FPReg,OpndSize_32,6), | |
280 | + RegName_FP7S=REGNAME(OpndKind_FPReg,OpndSize_32,7), | |
281 | + | |
282 | + RegName_FP0D=REGNAME(OpndKind_FPReg,OpndSize_64,0), | |
283 | + RegName_FP1D=REGNAME(OpndKind_FPReg,OpndSize_64,1), | |
284 | + RegName_FP2D=REGNAME(OpndKind_FPReg,OpndSize_64,2), | |
285 | + RegName_FP3D=REGNAME(OpndKind_FPReg,OpndSize_64,3), | |
286 | + RegName_FP4D=REGNAME(OpndKind_FPReg,OpndSize_64,4), | |
287 | + RegName_FP5D=REGNAME(OpndKind_FPReg,OpndSize_64,5), | |
288 | + RegName_FP6D=REGNAME(OpndKind_FPReg,OpndSize_64,6), | |
289 | + RegName_FP7D=REGNAME(OpndKind_FPReg,OpndSize_64,7), | |
290 | + | |
291 | +#if !defined(TESTING_ENCODER) | |
292 | + RegName_XMM0=REGNAME(OpndKind_XMMReg,OpndSize_128,0), | |
293 | + RegName_XMM1=REGNAME(OpndKind_XMMReg,OpndSize_128,1), | |
294 | + RegName_XMM2=REGNAME(OpndKind_XMMReg,OpndSize_128,2), | |
295 | + RegName_XMM3=REGNAME(OpndKind_XMMReg,OpndSize_128,3), | |
296 | + RegName_XMM4=REGNAME(OpndKind_XMMReg,OpndSize_128,4), | |
297 | + RegName_XMM5=REGNAME(OpndKind_XMMReg,OpndSize_128,5), | |
298 | + RegName_XMM6=REGNAME(OpndKind_XMMReg,OpndSize_128,6), | |
299 | + RegName_XMM7=REGNAME(OpndKind_XMMReg,OpndSize_128,7), | |
300 | + | |
301 | +#ifdef _EM64T_ | |
302 | + RegName_XMM8 = REGNAME(OpndKind_XMMReg,OpndSize_128,0), | |
303 | + RegName_XMM9 = REGNAME(OpndKind_XMMReg,OpndSize_128,1), | |
304 | + RegName_XMM10 = REGNAME(OpndKind_XMMReg,OpndSize_128,2), | |
305 | + RegName_XMM11 = REGNAME(OpndKind_XMMReg,OpndSize_128,3), | |
306 | + RegName_XMM12 = REGNAME(OpndKind_XMMReg,OpndSize_128,4), | |
307 | + RegName_XMM13 = REGNAME(OpndKind_XMMReg,OpndSize_128,5), | |
308 | + RegName_XMM14 = REGNAME(OpndKind_XMMReg,OpndSize_128,6), | |
309 | + RegName_XMM15 = REGNAME(OpndKind_XMMReg,OpndSize_128,7), | |
310 | +#endif //~_EM64T_ | |
311 | + | |
312 | +#endif // ~TESTING_ENCODER | |
313 | + | |
314 | + RegName_XMM0S=REGNAME(OpndKind_XMMReg,OpndSize_32,0), | |
315 | + RegName_XMM1S=REGNAME(OpndKind_XMMReg,OpndSize_32,1), | |
316 | + RegName_XMM2S=REGNAME(OpndKind_XMMReg,OpndSize_32,2), | |
317 | + RegName_XMM3S=REGNAME(OpndKind_XMMReg,OpndSize_32,3), | |
318 | + RegName_XMM4S=REGNAME(OpndKind_XMMReg,OpndSize_32,4), | |
319 | + RegName_XMM5S=REGNAME(OpndKind_XMMReg,OpndSize_32,5), | |
320 | + RegName_XMM6S=REGNAME(OpndKind_XMMReg,OpndSize_32,6), | |
321 | + RegName_XMM7S=REGNAME(OpndKind_XMMReg,OpndSize_32,7), | |
322 | +#ifdef _EM64T_ | |
323 | + RegName_XMM8S=REGNAME(OpndKind_XMMReg,OpndSize_32,8), | |
324 | + RegName_XMM9S=REGNAME(OpndKind_XMMReg,OpndSize_32,9), | |
325 | + RegName_XMM10S=REGNAME(OpndKind_XMMReg,OpndSize_32,10), | |
326 | + RegName_XMM11S=REGNAME(OpndKind_XMMReg,OpndSize_32,11), | |
327 | + RegName_XMM12S=REGNAME(OpndKind_XMMReg,OpndSize_32,12), | |
328 | + RegName_XMM13S=REGNAME(OpndKind_XMMReg,OpndSize_32,13), | |
329 | + RegName_XMM14S=REGNAME(OpndKind_XMMReg,OpndSize_32,14), | |
330 | + RegName_XMM15S=REGNAME(OpndKind_XMMReg,OpndSize_32,15), | |
331 | +#endif // ifdef _EM64T_ | |
332 | + RegName_XMM0D=REGNAME(OpndKind_XMMReg,OpndSize_64,0), | |
333 | + RegName_XMM1D=REGNAME(OpndKind_XMMReg,OpndSize_64,1), | |
334 | + RegName_XMM2D=REGNAME(OpndKind_XMMReg,OpndSize_64,2), | |
335 | + RegName_XMM3D=REGNAME(OpndKind_XMMReg,OpndSize_64,3), | |
336 | + RegName_XMM4D=REGNAME(OpndKind_XMMReg,OpndSize_64,4), | |
337 | + RegName_XMM5D=REGNAME(OpndKind_XMMReg,OpndSize_64,5), | |
338 | + RegName_XMM6D=REGNAME(OpndKind_XMMReg,OpndSize_64,6), | |
339 | + RegName_XMM7D=REGNAME(OpndKind_XMMReg,OpndSize_64,7), | |
340 | +#ifdef _EM64T_ | |
341 | + RegName_XMM8D=REGNAME(OpndKind_XMMReg,OpndSize_64,8), | |
342 | + RegName_XMM9D=REGNAME(OpndKind_XMMReg,OpndSize_64,9), | |
343 | + RegName_XMM10D=REGNAME(OpndKind_XMMReg,OpndSize_64,10), | |
344 | + RegName_XMM11D=REGNAME(OpndKind_XMMReg,OpndSize_64,11), | |
345 | + RegName_XMM12D=REGNAME(OpndKind_XMMReg,OpndSize_64,12), | |
346 | + RegName_XMM13D=REGNAME(OpndKind_XMMReg,OpndSize_64,13), | |
347 | + RegName_XMM14D=REGNAME(OpndKind_XMMReg,OpndSize_64,14), | |
348 | + RegName_XMM15D=REGNAME(OpndKind_XMMReg,OpndSize_64,15), | |
349 | +#endif // ifdef _EM64T_ | |
350 | +#ifdef _HAVE_MMX_ | |
351 | + RegName_MMX0=REGNAME(OpndKind_MMXReg,OpndSize_64,0), | |
352 | + RegName_MMX1=REGNAME(OpndKind_MMXReg,OpndSize_64,1), | |
353 | + RegName_MMX2=REGNAME(OpndKind_MMXReg,OpndSize_64,2), | |
354 | + RegName_MMX3=REGNAME(OpndKind_MMXReg,OpndSize_64,3), | |
355 | + RegName_MMX4=REGNAME(OpndKind_MMXReg,OpndSize_64,4), | |
356 | + RegName_MMX5=REGNAME(OpndKind_MMXReg,OpndSize_64,5), | |
357 | + RegName_MMX6=REGNAME(OpndKind_MMXReg,OpndSize_64,6), | |
358 | + RegName_MMX7=REGNAME(OpndKind_MMXReg,OpndSize_64,7), | |
359 | +#endif // _HAVE_MMX_ | |
360 | +} RegName; | |
361 | + | |
362 | +#if 0 // Android x86: use mnemonics defined in enc_defs_ext.h | |
363 | +/** | |
364 | + * Conditional mnemonics. | |
365 | + * The values match the 'real' (==processor's) values of the appropriate | |
366 | + * condition values used in the opcodes. | |
367 | + */ | |
368 | +enum ConditionMnemonic { | |
369 | + | |
370 | + ConditionMnemonic_O=0, | |
371 | + ConditionMnemonic_NO=1, | |
372 | + ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B, | |
373 | + ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB, | |
374 | + ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z, | |
375 | + ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ, | |
376 | + ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE, | |
377 | + ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE, | |
378 | + | |
379 | + ConditionMnemonic_S=8, | |
380 | + ConditionMnemonic_NS=9, | |
381 | + ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P, | |
382 | + ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP, | |
383 | + ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L, | |
384 | + ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL, | |
385 | + ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE, | |
386 | + ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE, | |
387 | + ConditionMnemonic_Count=16 | |
388 | +}; | |
389 | + | |
390 | + | |
391 | +#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond | |
392 | + | |
393 | +//========================================================================================================= | |
394 | +enum Mnemonic { | |
395 | + | |
396 | +Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL, | |
397 | +Mnemonic_ADC, // Add with Carry | |
398 | +Mnemonic_ADD, // Add | |
399 | +Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values | |
400 | +Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values | |
401 | +Mnemonic_AND, // Logical AND | |
402 | + | |
403 | +Mnemonic_BSF, // Bit scan forward | |
404 | +Mnemonic_BSR, // Bit scan reverse | |
405 | + | |
406 | +Mnemonic_CALL, // Call Procedure | |
407 | +Mnemonic_CMC, // Complement Carry Flag | |
408 | +Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword | |
409 | +Mnemonic_CMOVcc, // Conditional Move | |
410 | + CCM(CMOV,O), | |
411 | + CCM(CMOV,NO), | |
412 | + CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C), | |
413 | + CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC), | |
414 | + CCM(CMOV,Z), CCM(CMOV,E), | |
415 | + CCM(CMOV,NZ), CCM(CMOV,NE), | |
416 | + CCM(CMOV,BE), CCM(CMOV,NA), | |
417 | + CCM(CMOV,NBE), CCM(CMOV,A), | |
418 | + | |
419 | + CCM(CMOV,S), | |
420 | + CCM(CMOV,NS), | |
421 | + CCM(CMOV,P), CCM(CMOV,PE), | |
422 | + CCM(CMOV,NP), CCM(CMOV,PO), | |
423 | + CCM(CMOV,L), CCM(CMOV,NGE), | |
424 | + CCM(CMOV,NL), CCM(CMOV,GE), | |
425 | + CCM(CMOV,LE), CCM(CMOV,NG), | |
426 | + CCM(CMOV,NLE), CCM(CMOV,G), | |
427 | + | |
428 | +Mnemonic_CMP, // Compare Two Operands | |
429 | +Mnemonic_CMPXCHG, // Compare and exchange | |
430 | +Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes | |
431 | +Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI | |
432 | +Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI | |
433 | +Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI | |
434 | +// | |
435 | +// double -> float | |
436 | +Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value | |
437 | +// double -> I_32 | |
438 | +Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer | |
439 | +// double [truncated] -> I_32 | |
440 | +Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer | |
441 | +// | |
442 | +// float -> double | |
443 | +Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value | |
444 | +// float -> I_32 | |
445 | +Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer | |
446 | +// float [truncated] -> I_32 | |
447 | +Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer | |
448 | +// | |
449 | +// I_32 -> double | |
450 | +Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value | |
451 | +// I_32 -> float | |
452 | +Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value | |
453 | + | |
454 | +Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS | |
455 | +Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS | |
456 | +Mnemonic_DEC, // Decrement by 1 | |
457 | +//Mnemonic_DIV, // Unsigned Divide | |
458 | +Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values | |
459 | +Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values | |
460 | + | |
461 | +#ifdef _HAVE_MMX_ | |
462 | +Mnemonic_EMMS, // Empty MMX Technology State | |
463 | +#endif | |
464 | + | |
465 | +Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters | |
466 | +Mnemonic_FLDCW, // Load FPU control word | |
467 | +Mnemonic_FADDP, | |
468 | +Mnemonic_FLDZ, | |
469 | +Mnemonic_FADD, | |
470 | +Mnemonic_FSUBP, | |
471 | +Mnemonic_FSUB, | |
472 | +Mnemonic_FISUB, | |
473 | +Mnemonic_FMUL, | |
474 | +Mnemonic_FMULP, | |
475 | +Mnemonic_FDIVP, | |
476 | +Mnemonic_FDIV, | |
477 | +Mnemonic_FUCOMPP, | |
478 | +Mnemonic_FRNDINT, | |
479 | +Mnemonic_FNSTCW, // Store FPU control word | |
480 | +Mnemonic_FSTSW, // Store FPU status word | |
481 | +Mnemonic_FNSTSW, // Store FPU status word | |
482 | +//Mnemonic_FDECSTP, // Decrement Stack-Top Pointer | |
483 | +Mnemonic_FILD, // Load Integer | |
484 | +Mnemonic_FLD, // Load Floating Point Value | |
485 | +Mnemonic_FLDLG2, | |
486 | +Mnemonic_FLDLN2, | |
487 | +Mnemonic_FLD1, | |
488 | + | |
489 | +Mnemonic_FCLEX, // Clear Exceptions | |
490 | +Mnemonic_FCHS, // Change sign of ST0 | |
491 | +Mnemonic_FNCLEX, // Clear Exceptions | |
492 | + | |
493 | +//Mnemonic_FINCSTP, // Increment Stack-Top Pointer | |
494 | +Mnemonic_FIST, // Store Integer | |
495 | +Mnemonic_FISTP, // Store Integer, pop FPU stack | |
496 | +Mnemonic_FISTTP, // Store Integer with Truncation | |
497 | +Mnemonic_FPREM, // Partial Remainder | |
498 | +Mnemonic_FPREM1, // Partial Remainder | |
499 | +Mnemonic_FST, // Store Floating Point Value | |
500 | +Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack | |
501 | +Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack | |
502 | +Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack | |
503 | +Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack | |
504 | +Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack | |
505 | +Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack | |
506 | +Mnemonic_FYL2X, | |
507 | +Mnemonic_FYL2XP1, | |
508 | +Mnemonic_F2XM1, | |
509 | +Mnemonic_FPATAN, | |
510 | +Mnemonic_FXCH, | |
511 | +Mnemonic_FSCALE, | |
512 | + | |
513 | +Mnemonic_XCHG, | |
514 | +Mnemonic_DIV, // Unsigned Divide | |
515 | +Mnemonic_IDIV, // Signed Divide | |
516 | +Mnemonic_MUL, // Unsigned Multiply | |
517 | +Mnemonic_IMUL, // Signed Multiply | |
518 | +Mnemonic_INC, // Increment by 1 | |
519 | +Mnemonic_INT3, // Call break point | |
520 | +Mnemonic_Jcc, // Jump if Condition Is Met | |
521 | + CCM(J,O), | |
522 | + CCM(J,NO), | |
523 | + CCM(J,B), CCM(J,NAE), CCM(J,C), | |
524 | + CCM(J,NB), CCM(J,AE), CCM(J,NC), | |
525 | + CCM(J,Z), CCM(J,E), | |
526 | + CCM(J,NZ), CCM(J,NE), | |
527 | + CCM(J,BE), CCM(J,NA), | |
528 | + CCM(J,NBE), CCM(J,A), | |
529 | + CCM(J,S), | |
530 | + CCM(J,NS), | |
531 | + CCM(J,P), CCM(J,PE), | |
532 | + CCM(J,NP), CCM(J,PO), | |
533 | + CCM(J,L), CCM(J,NGE), | |
534 | + CCM(J,NL), CCM(J,GE), | |
535 | + CCM(J,LE), CCM(J,NG), | |
536 | + CCM(J,NLE), CCM(J,G), | |
537 | +Mnemonic_JMP, // Jump | |
538 | +Mnemonic_LEA, // Load Effective Address | |
539 | +Mnemonic_LEAVE, // High Level Procedure Exit | |
540 | +Mnemonic_LOOP, // Loop according to ECX counter | |
541 | +Mnemonic_LOOPE, // Loop according to ECX counter | |
542 | +Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX | |
543 | +Mnemonic_LAHF, // Load Flags into AH | |
544 | +Mnemonic_MOV, // Move | |
545 | +Mnemonic_MOVD, // Move Double word | |
546 | +Mnemonic_MOVQ, // Move Quadword | |
547 | +/*Mnemonic_MOVS, // Move Data from String to String*/ | |
548 | +// MOVS is a special case: see encoding table for more details, | |
549 | +Mnemonic_MOVS8, Mnemonic_MOVS16, Mnemonic_MOVS32, Mnemonic_MOVS64, | |
550 | +// | |
551 | +Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value | |
552 | +Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value | |
553 | +Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values | |
554 | +Mnemonic_MOVSX, // Move with Sign-Extension | |
555 | +Mnemonic_MOVZX, // Move with Zero-Extend | |
556 | +//Mnemonic_MUL, // Unsigned Multiply | |
557 | +Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values | |
558 | +Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values | |
559 | +Mnemonic_NEG, // Two's Complement Negation | |
560 | +Mnemonic_NOP, // No Operation | |
561 | +Mnemonic_NOT, // One's Complement Negation | |
562 | +Mnemonic_OR, // Logical Inclusive OR | |
563 | +Mnemonic_PREFETCH, // prefetch | |
564 | + | |
565 | +#ifdef _HAVE_MMX_ | |
566 | + Mnemonic_PADDQ, // Add Packed Quadword Integers | |
567 | + Mnemonic_PAND, // Logical AND | |
568 | + Mnemonic_POR, // Bitwise Logical OR | |
569 | + Mnemonic_PSUBQ, // Subtract Packed Quadword Integers | |
570 | +#endif | |
571 | + | |
572 | +Mnemonic_PXOR, // Logical Exclusive OR | |
573 | +Mnemonic_POP, // Pop a Value from the Stack | |
574 | +Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack | |
575 | +Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack | |
576 | +Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack | |
577 | +Mnemonic_RET, // Return from Procedure | |
578 | + | |
579 | +Mnemonic_SETcc, // Set Byte on Condition | |
580 | + CCM(SET,O), | |
581 | + CCM(SET,NO), | |
582 | + CCM(SET,B), CCM(SET,NAE), CCM(SET,C), | |
583 | + CCM(SET,NB), CCM(SET,AE), CCM(SET,NC), | |
584 | + CCM(SET,Z), CCM(SET,E), | |
585 | + CCM(SET,NZ), CCM(SET,NE), | |
586 | + CCM(SET,BE), CCM(SET,NA), | |
587 | + CCM(SET,NBE), CCM(SET,A), | |
588 | + CCM(SET,S), | |
589 | + CCM(SET,NS), | |
590 | + CCM(SET,P), CCM(SET,PE), | |
591 | + CCM(SET,NP), CCM(SET,PO), | |
592 | + CCM(SET,L), CCM(SET,NGE), | |
593 | + CCM(SET,NL), CCM(SET,GE), | |
594 | + CCM(SET,LE), CCM(SET,NG), | |
595 | + CCM(SET,NLE), CCM(SET,G), | |
596 | + | |
597 | +Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left | |
598 | +Mnemonic_SAR, // Shift right | |
599 | +Mnemonic_ROR, // Rotate right | |
600 | +Mnemonic_RCR, // Rotate right through CARRY flag | |
601 | +Mnemonic_ROL, // Rotate left | |
602 | +Mnemonic_RCL, // Rotate left through CARRY flag | |
603 | +Mnemonic_SHR, // Unsigned shift right | |
604 | +Mnemonic_SHRD, // Double Precision Shift Right | |
605 | +Mnemonic_SHLD, // Double Precision Shift Left | |
606 | + | |
607 | +Mnemonic_SBB, // Integer Subtraction with Borrow | |
608 | +Mnemonic_SUB, // Subtract | |
609 | +Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values | |
610 | +Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values | |
611 | + | |
612 | +Mnemonic_TEST, // Logical Compare | |
613 | + | |
614 | +Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS | |
615 | +Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS | |
616 | + | |
617 | +Mnemonic_XOR, // Logical Exclusive OR | |
618 | +// | |
619 | +// packed things, | |
620 | +// | |
621 | +Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values | |
622 | +Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values | |
623 | + | |
624 | +Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values | |
625 | +Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers | |
626 | + | |
627 | +Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values | |
628 | +Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers | |
629 | +// | |
630 | +// String operations | |
631 | +// | |
632 | +Mnemonic_STD, // Set direction flag | |
633 | +Mnemonic_CLD, // Clear direction flag | |
634 | +Mnemonic_SCAS, // Scan string | |
635 | +Mnemonic_STOS, // Store string | |
636 | + | |
637 | +// | |
638 | +Mnemonic_WAIT, // Check pending pending unmasked floating-point exception | |
639 | +// | |
640 | +Mnemonic_Count | |
641 | +}; | |
642 | + | |
643 | +#undef CCM | |
644 | +#endif | |
645 | + | |
646 | +/** | |
647 | + * @brief Instruction prefixes, according to arch manual. | |
648 | + */ | |
649 | +typedef enum InstPrefix { | |
650 | + InstPrefix_Null = 0, | |
651 | + // Group 1 | |
652 | + InstPrefix_LOCK = 0xF0, | |
653 | + InstPrefix_REPNE = 0xF2, | |
654 | + InstPrefix_REPNZ = InstPrefix_REPNE, | |
655 | + InstPrefix_REP = 0xF3, InstPrefix_REPZ = InstPrefix_REP, | |
656 | + // Group 2 | |
657 | + InstPrefix_CS = 0x2E, | |
658 | + InstPrefix_SS = 0x36, | |
659 | + InstPrefix_DS = 0x3E, | |
660 | + InstPrefix_ES = 0x26, | |
661 | + InstPrefix_FS = 0x64, | |
662 | + InstPrefix_GS = 0x65, | |
663 | + // | |
664 | + InstPrefix_HintTaken = 0x3E, | |
665 | + InstPrefix_HintNotTaken = 0x2E, | |
666 | + // Group 3 | |
667 | + InstPrefix_OpndSize = 0x66, | |
668 | + // Group 4 | |
669 | + InstPrefix_AddrSize = 0x67 | |
670 | +} InstPrefix; | |
671 | + | |
672 | +inline unsigned getSizeBytes(OpndSize sz) | |
673 | +{ | |
674 | + if (sz==OpndSize_64) { return 8; } | |
675 | + if (sz==OpndSize_32) { return 4; } | |
676 | + if (sz==OpndSize_16) { return 2; } | |
677 | + if (sz==OpndSize_8) { return 1; } | |
678 | + assert(false); | |
679 | + return 0; | |
680 | +} | |
681 | + | |
682 | +inline bool isRegKind(OpndKind kind) | |
683 | +{ | |
684 | + return OpndKind_GPReg<= kind && kind<=OpndKind_MaxRegKind; | |
685 | +} | |
686 | + | |
687 | +/** | |
688 | + * @brief Returns RegName for a given name. | |
689 | + * | |
690 | + * Name is case-insensitive. | |
691 | + * @param regname - string name of a register | |
692 | + * @return RegName for the given name, or RegName_Null if name is invalid | |
693 | + */ | |
694 | +RegName getRegName(const char * regname); | |
695 | +/** | |
696 | + * Constructs RegName from the given OpndKind, size and index. | |
697 | + */ | |
698 | +inline RegName getRegName(OpndKind k, OpndSize s, int idx) | |
699 | +{ | |
700 | + return (RegName)REGNAME(k,s,idx); | |
701 | +} | |
702 | +/** | |
703 | + * Extracts a bit mask with a bit set at the position of the register's index. | |
704 | + */ | |
705 | +inline unsigned getRegMask(RegName reg) | |
706 | +{ | |
707 | + return 1<<(reg&0xff); | |
708 | +} | |
709 | +/** | |
710 | + * @brief Extracts OpndKind from the RegName. | |
711 | + */ | |
712 | +inline OpndKind getRegKind(RegName reg) | |
713 | +{ | |
714 | + return (OpndKind)(reg>>24); | |
715 | +} | |
716 | +/** | |
717 | + * @brief Extracts OpndSize from RegName. | |
718 | + */ | |
719 | +inline OpndSize getRegSize(RegName reg) | |
720 | +{ | |
721 | + return (OpndSize)((reg>>16)&0xFF); | |
722 | +} | |
723 | +/** | |
724 | + * Extracts an index from the given RegName. | |
725 | + */ | |
726 | +inline unsigned char getRegIndex(RegName reg) | |
727 | +{ | |
728 | + return (unsigned char)(reg&0xFF); | |
729 | +} | |
730 | +/** | |
731 | + * Returns a string name of the given RegName. The name returned is in upper-case. | |
732 | + * Returns NULL if invalid RegName specified. | |
733 | + */ | |
734 | +const char * getRegNameString(RegName reg); | |
735 | +/** | |
736 | + * Returns string name of a given OpndSize. | |
737 | + * Returns NULL if invalid OpndSize passed. | |
738 | + */ | |
739 | +const char * getOpndSizeString(OpndSize size); | |
740 | +/** | |
741 | + * Returns OpndSize passed by its string representation (case insensitive). | |
742 | + * Returns OpndSize_Null if invalid string specified. | |
743 | + * The 'sizeString' can not be NULL. | |
744 | + */ | |
745 | +OpndSize getOpndSize(const char * sizeString); | |
746 | +/** | |
747 | + * Returns string name of a given OpndKind. | |
748 | + * Returns NULL if the passed kind is invalid. | |
749 | + */ | |
750 | +const char * getOpndKindString(OpndKind kind); | |
751 | +/** | |
752 | + * Returns OpndKind found by its string representation (case insensitive). | |
753 | + * Returns OpndKind_Null if the name is invalid. | |
754 | + * The 'kindString' can not be NULL. | |
755 | + */ | |
756 | +OpndKind getOpndKind(const char * kindString); | |
757 | +/** | |
758 | + * | |
759 | + */ | |
760 | +const char * getConditionString(ConditionMnemonic cm); | |
761 | + | |
762 | +/** | |
763 | + * Constructs an RegName with the same index and kind, but with a different size from | |
764 | + * the given RegName (i.e. getRegAlias(EAX, OpndSize_16) => AX; getRegAlias(BL, OpndSize_32) => EBX). | |
765 | + * The constructed RegName is not checked in any way and thus may be invalid. | |
766 | + * Note, that the aliasing does not work for at least AH,BH,CH,DH, ESI, EDI, ESP and EBP regs. | |
767 | + */ | |
768 | +inline RegName getAliasReg(RegName reg, OpndSize sz) | |
769 | +{ | |
770 | + return (RegName)REGNAME(getRegKind(reg), sz, getRegIndex(reg)); | |
771 | +} | |
772 | + | |
773 | +/** | |
774 | + * brief Tests two RegName-s of the same kind for equality. | |
775 | + * | |
776 | + * @note Does work for 8 bit general purpose registers (AH, AL, BH, BL, etc). | |
777 | + */ | |
778 | +inline bool equals(RegName r0, RegName r1) | |
779 | +{ | |
780 | + return getRegKind(r0) == getRegKind(r1) && | |
781 | + getRegIndex(r0) == getRegIndex(r1); | |
782 | +} | |
783 | + | |
784 | +ENCODER_NAMESPACE_END | |
785 | + | |
786 | +#endif // ifndef _ENCODER_DEFS_H_ |
@@ -0,0 +1,365 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2012 The Android Open Source Project | |
3 | + * | |
4 | + * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | + * you may not use this file except in compliance with the License. | |
6 | + * You may obtain a copy of the License at | |
7 | + * | |
8 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | + * | |
10 | + * Unless required by applicable law or agreed to in writing, software | |
11 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | + * See the License for the specific language governing permissions and | |
14 | + * limitations under the License. | |
15 | + */ | |
16 | + | |
17 | +#ifndef _ENCODER_DEFS_EXT_H_ | |
18 | +#define _ENCODER_DEFS_EXT_H_ | |
19 | + | |
20 | + | |
21 | +// Used to isolate experimental or being tuned encoder into a separate | |
22 | +// namespace so it can coexist with a stable one in the same bundle. | |
23 | +#ifdef ENCODER_ISOLATE | |
24 | + #define ENCODER_NAMESPACE_START namespace enc_ia32 { | |
25 | + #define ENCODER_NAMESPACE_END }; | |
26 | +#else | |
27 | + #define ENCODER_NAMESPACE_START | |
28 | + #define ENCODER_NAMESPACE_END | |
29 | +#endif | |
30 | + | |
31 | +ENCODER_NAMESPACE_START | |
32 | +typedef enum OpndSize { | |
33 | + /** | |
34 | + * A change must be balanced with at least the following places: | |
35 | + * Ia32IRConstants.h :: getByteSize() uses some presumptions about OpndSize_ values | |
36 | + * Ia32::Constraint-s use the OpndSize as a mask | |
37 | + * encoder.cpp & encoder_master_info.cpp uses OpndSize as an index for hashing | |
38 | + * - perhaps there are much more places | |
39 | + */ | |
40 | + OpndSize_Null = 0, | |
41 | + OpndSize_8 = 0x01, | |
42 | + OpndSize_16 = 0x02, | |
43 | + OpndSize_32 = 0x04, | |
44 | + OpndSize_64 = 0x08, | |
45 | +#if !defined(TESTING_ENCODER) | |
46 | + OpndSize_80 = 0x10, | |
47 | + OpndSize_128 = 0x20, | |
48 | +#endif | |
49 | + OpndSize_Max, | |
50 | + OpndSize_Any = 0x3F, | |
51 | + OpndSize_Default = OpndSize_Any | |
52 | +} OpndSize; | |
53 | + | |
54 | +/** | |
55 | + * Conditional mnemonics. | |
56 | + * The values match the 'real' (==processor's) values of the appropriate | |
57 | + * condition values used in the opcodes. | |
58 | + */ | |
59 | +typedef enum ConditionMnemonic { | |
60 | + | |
61 | + ConditionMnemonic_O=0, | |
62 | + ConditionMnemonic_NO=1, | |
63 | + ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B, | |
64 | + ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB, | |
65 | + ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z, | |
66 | + ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ, | |
67 | + ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE, | |
68 | + ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE, | |
69 | + | |
70 | + ConditionMnemonic_S=8, | |
71 | + ConditionMnemonic_NS=9, | |
72 | + ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P, | |
73 | + ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP, | |
74 | + ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L, | |
75 | + ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL, | |
76 | + ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE, | |
77 | + ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE, | |
78 | + ConditionMnemonic_Count=16 | |
79 | +} ConditionMnemonic; | |
80 | + | |
81 | + | |
82 | +#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond | |
83 | + | |
84 | +//========================================================================================================= | |
85 | +typedef enum Mnemonic { | |
86 | + | |
87 | +Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL, | |
88 | +Mnemonic_JMP, // Jump | |
89 | +Mnemonic_MOV, // Move | |
90 | +Mnemonic_Jcc, // Jump if Condition Is Met | |
91 | + CCM(J,O), | |
92 | + CCM(J,NO), | |
93 | + CCM(J,B), CCM(J,NAE), CCM(J,C), | |
94 | + CCM(J,NB), CCM(J,AE), CCM(J,NC), | |
95 | + CCM(J,Z), CCM(J,E), | |
96 | + CCM(J,NZ), CCM(J,NE), | |
97 | + CCM(J,BE), CCM(J,NA), | |
98 | + CCM(J,NBE), CCM(J,A), | |
99 | + CCM(J,S), | |
100 | + CCM(J,NS), | |
101 | + CCM(J,P), CCM(J,PE), | |
102 | + CCM(J,NP), CCM(J,PO), | |
103 | + CCM(J,L), CCM(J,NGE), | |
104 | + CCM(J,NL), CCM(J,GE), | |
105 | + CCM(J,LE), CCM(J,NG), | |
106 | + CCM(J,NLE), CCM(J,G), | |
107 | +Mnemonic_CALL, // Call Procedure | |
108 | + | |
109 | +Mnemonic_ADC, // Add with Carry | |
110 | +Mnemonic_ADD, // Add | |
111 | +Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values | |
112 | +Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values | |
113 | +Mnemonic_AND, // Logical AND | |
114 | + | |
115 | +Mnemonic_BSF, // Bit scan forward | |
116 | +Mnemonic_BSR, // Bit scan reverse | |
117 | + | |
118 | +Mnemonic_CMC, // Complement Carry Flag | |
119 | +Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword | |
120 | +Mnemonic_CMOVcc, // Conditional Move | |
121 | + CCM(CMOV,O), | |
122 | + CCM(CMOV,NO), | |
123 | + CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C), | |
124 | + CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC), | |
125 | + CCM(CMOV,Z), CCM(CMOV,E), | |
126 | + CCM(CMOV,NZ), CCM(CMOV,NE), | |
127 | + CCM(CMOV,BE), CCM(CMOV,NA), | |
128 | + CCM(CMOV,NBE), CCM(CMOV,A), | |
129 | + | |
130 | + CCM(CMOV,S), | |
131 | + CCM(CMOV,NS), | |
132 | + CCM(CMOV,P), CCM(CMOV,PE), | |
133 | + CCM(CMOV,NP), CCM(CMOV,PO), | |
134 | + CCM(CMOV,L), CCM(CMOV,NGE), | |
135 | + CCM(CMOV,NL), CCM(CMOV,GE), | |
136 | + CCM(CMOV,LE), CCM(CMOV,NG), | |
137 | + CCM(CMOV,NLE), CCM(CMOV,G), | |
138 | + | |
139 | +Mnemonic_CMP, // Compare Two Operands | |
140 | +Mnemonic_CMPXCHG, // Compare and exchange | |
141 | +Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes | |
142 | +Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI | |
143 | +Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI | |
144 | +Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI | |
145 | +// | |
146 | +// double -> float | |
147 | +Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value | |
148 | +// double -> I_32 | |
149 | +Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer | |
150 | +// double [truncated] -> I_32 | |
151 | +Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer | |
152 | +// | |
153 | +// float -> double | |
154 | +Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value | |
155 | +// float -> I_32 | |
156 | +Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer | |
157 | +// float [truncated] -> I_32 | |
158 | +Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer | |
159 | +// | |
160 | +// I_32 -> double | |
161 | +Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value | |
162 | +// I_32 -> float | |
163 | +Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value | |
164 | + | |
165 | +Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS | |
166 | +Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS | |
167 | +Mnemonic_DEC, // Decrement by 1 | |
168 | +Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values | |
169 | +Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values | |
170 | +Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters | |
171 | +Mnemonic_FLDCW, // Load FPU control word | |
172 | +Mnemonic_FADDP, | |
173 | +Mnemonic_FLDZ, | |
174 | +Mnemonic_FADD, | |
175 | +Mnemonic_FSUBP, | |
176 | +Mnemonic_FSUB, | |
177 | +Mnemonic_FISUB, | |
178 | +Mnemonic_FMUL, | |
179 | +Mnemonic_FMULP, | |
180 | +Mnemonic_FDIVP, | |
181 | +Mnemonic_FDIV, | |
182 | +Mnemonic_FUCOM, | |
183 | +Mnemonic_FUCOMI, | |
184 | +Mnemonic_FUCOMP, | |
185 | +Mnemonic_FUCOMIP, | |
186 | +Mnemonic_FUCOMPP, | |
187 | +Mnemonic_FRNDINT, | |
188 | +Mnemonic_FNSTCW, // Store FPU control word | |
189 | +Mnemonic_FSTSW, // Store FPU status word | |
190 | +Mnemonic_FNSTSW, // Store FPU status word | |
191 | +Mnemonic_FILD, // Load Integer | |
192 | +Mnemonic_FLD, // Load Floating Point Value | |
193 | +Mnemonic_FLDLG2, | |
194 | +Mnemonic_FLDLN2, | |
195 | +Mnemonic_FLD1, | |
196 | + | |
197 | +Mnemonic_FCLEX, // Clear Exceptions | |
198 | +Mnemonic_FCHS, // Change sign of ST0 | |
199 | +Mnemonic_FNCLEX, // Clear Exceptions | |
200 | +Mnemonic_FIST, // Store Integer | |
201 | +Mnemonic_FISTP, // Store Integer, pop FPU stack | |
202 | +Mnemonic_FISTTP, // Store Integer with Truncation | |
203 | +Mnemonic_FPREM, // Partial Remainder | |
204 | +Mnemonic_FPREM1, // Partial Remainder | |
205 | +Mnemonic_FST, // Store Floating Point Value | |
206 | +Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack | |
207 | +Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack | |
208 | +Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack | |
209 | +Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack | |
210 | +Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack | |
211 | +Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack | |
212 | +Mnemonic_FYL2X, | |
213 | +Mnemonic_FYL2XP1, | |
214 | +Mnemonic_F2XM1, | |
215 | +Mnemonic_FPATAN, | |
216 | +Mnemonic_FXCH, | |
217 | +Mnemonic_FSCALE, | |
218 | + | |
219 | +Mnemonic_XCHG, | |
220 | +Mnemonic_DIV, // Unsigned Divide | |
221 | +Mnemonic_IDIV, // Signed Divide | |
222 | +Mnemonic_MUL, // Unsigned Multiply | |
223 | +Mnemonic_IMUL, // Signed Multiply | |
224 | +Mnemonic_INC, // Increment by 1 | |
225 | +Mnemonic_INT3, // Call break point | |
226 | + | |
227 | +Mnemonic_LEA, // Load Effective Address | |
228 | +Mnemonic_LEAVE, // High Level Procedure Exit | |
229 | +Mnemonic_LOOP, // Loop according to ECX counter | |
230 | +Mnemonic_LOOPE, // Loop according to ECX counter | |
231 | +Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX | |
232 | +Mnemonic_LAHF, // Load Flags into AH | |
233 | +Mnemonic_MOVD, // Move Double word | |
234 | +Mnemonic_MOVQ, // Move Quadword | |
235 | +Mnemonic_MOVS8, | |
236 | +Mnemonic_MOVS16, | |
237 | +Mnemonic_MOVS32, | |
238 | +Mnemonic_MOVS64, | |
239 | +Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value | |
240 | +Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value | |
241 | +Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values | |
242 | +Mnemonic_MOVSX, // Move with Sign-Extension | |
243 | +Mnemonic_MOVZX, // Move with Zero-Extend | |
244 | +Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values | |
245 | +Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values | |
246 | +Mnemonic_NEG, // Two's Complement Negation | |
247 | +Mnemonic_NOP, // No Operation | |
248 | +Mnemonic_NOT, // One's Complement Negation | |
249 | +Mnemonic_OR, // Logical Inclusive OR | |
250 | +Mnemonic_PREFETCH, // prefetch | |
251 | +Mnemonic_PADDQ, // Add Packed Quadword Integers | |
252 | +Mnemonic_PAND, // Logical AND | |
253 | +Mnemonic_POR, // Bitwise Logical OR | |
254 | +Mnemonic_PSUBQ, // Subtract Packed Quadword Integers | |
255 | +Mnemonic_PANDN, | |
256 | +Mnemonic_PSLLQ, | |
257 | +Mnemonic_PSRLQ, | |
258 | +Mnemonic_PXOR, // Logical Exclusive OR | |
259 | +Mnemonic_POP, // Pop a Value from the Stack | |
260 | +Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack | |
261 | +Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack | |
262 | +Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack | |
263 | +Mnemonic_RET, // Return from Procedure | |
264 | + | |
265 | +Mnemonic_SETcc, // Set Byte on Condition | |
266 | + CCM(SET,O), | |
267 | + CCM(SET,NO), | |
268 | + CCM(SET,B), CCM(SET,NAE), CCM(SET,C), | |
269 | + CCM(SET,NB), CCM(SET,AE), CCM(SET,NC), | |
270 | + CCM(SET,Z), CCM(SET,E), | |
271 | + CCM(SET,NZ), CCM(SET,NE), | |
272 | + CCM(SET,BE), CCM(SET,NA), | |
273 | + CCM(SET,NBE), CCM(SET,A), | |
274 | + CCM(SET,S), | |
275 | + CCM(SET,NS), | |
276 | + CCM(SET,P), CCM(SET,PE), | |
277 | + CCM(SET,NP), CCM(SET,PO), | |
278 | + CCM(SET,L), CCM(SET,NGE), | |
279 | + CCM(SET,NL), CCM(SET,GE), | |
280 | + CCM(SET,LE), CCM(SET,NG), | |
281 | + CCM(SET,NLE), CCM(SET,G), | |
282 | + | |
283 | +Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left | |
284 | +Mnemonic_SAR, // Unsigned shift right | |
285 | +Mnemonic_ROR, // Rotate right | |
286 | +Mnemonic_RCR, // Rotate right through CARRY flag | |
287 | +Mnemonic_ROL, // Rotate left | |
288 | +Mnemonic_RCL, // Rotate left through CARRY flag | |
289 | +Mnemonic_SHR, // Signed shift right | |
290 | +Mnemonic_SHRD, // Double Precision Shift Right | |
291 | +Mnemonic_SHLD, // Double Precision Shift Left | |
292 | + | |
293 | +Mnemonic_SBB, // Integer Subtraction with Borrow | |
294 | +Mnemonic_SUB, // Subtract | |
295 | +Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values | |
296 | +Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values | |
297 | + | |
298 | +Mnemonic_TEST, // Logical Compare | |
299 | + | |
300 | +Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS | |
301 | +Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS | |
302 | + | |
303 | +Mnemonic_XOR, // Logical Exclusive OR | |
304 | +// | |
305 | +// packed things, | |
306 | +// | |
307 | +Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values | |
308 | +Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values | |
309 | + | |
310 | +Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values | |
311 | +Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers | |
312 | + | |
313 | +Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values | |
314 | +Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers | |
315 | +// | |
316 | +// String operations | |
317 | +// | |
318 | +Mnemonic_STD, // Set direction flag | |
319 | +Mnemonic_CLD, // Clear direction flag | |
320 | +Mnemonic_SCAS, // Scan string | |
321 | +Mnemonic_STOS, // Store string | |
322 | + | |
323 | +// | |
324 | +Mnemonic_WAIT, // Check pending pending unmasked floating-point exception | |
325 | +Mnemonic_PADDB, //!< Add packed byte integers | |
326 | +Mnemonic_PADDW, //!< Add packed word integers | |
327 | +Mnemonic_PADDD, //!< Add packed doubleword integers | |
328 | +Mnemonic_PSUBB, //!< Subtract packed byte integers | |
329 | +Mnemonic_PSUBW, //!< Subtract packed word integers | |
330 | +Mnemonic_PSUBD, //!< Subtract packed doubleword integers | |
331 | +Mnemonic_PMULLW, //!< Multiply packed word integers | |
332 | +Mnemonic_PMULLD, //!< Multiply packed doubleword integers | |
333 | +Mnemonic_PSLLW, //!< Shift words left and shift in 0s | |
334 | +Mnemonic_PSLLD, //!< Shift doublewords left and shift in 0s | |
335 | +Mnemonic_PSRAW, //!< Shift words right and shift in sign bits | |
336 | +Mnemonic_PSRAD, //!< Shift doublewords right and shift in sign bits | |
337 | +Mnemonic_PSRLW, //!< Shift words right and shift in 0s | |
338 | +Mnemonic_PSRLD, //!< Shift doublewords right and shift in 0s | |
339 | +Mnemonic_PMOVSXBW, //!< Sign extend 8 packed signed 8-bit integers in the low 8 bytes to 8 packed signed 16-bit integers | |
340 | +Mnemonic_PSHUFB, //!< Shuffle bytes | |
341 | +Mnemonic_PSHUFD, //!< Shuffle doublewords | |
342 | +Mnemonic_PSHUFLW, //!< Shuffle packed low words | |
343 | +Mnemonic_PSHUFHW, //!< Shuffle packed high words | |
344 | +Mnemonic_PHADDSW, //!< Add 16-bit signed integers horizontally, then pack saturated integers | |
345 | +Mnemonic_PHADDW, //!< Add 16-bit signed integers horizontally, then pack | |
346 | +Mnemonic_PHADDD, //!< Add 32-bit signed integers horizontally, then pack | |
347 | +Mnemonic_PHSUBSW, //!< Subtract 16-bit signed integers horizontally, then pack saturated integers | |
348 | +Mnemonic_PHSUBW, //!< Subtract 16-bit signed integers horizontally, then pack | |
349 | +Mnemonic_PHSUBD, //!< Subtract 32-bit signed integers horizontally, then pack | |
350 | +Mnemonic_PEXTRB, //!< Extract a byte integer value from xmm | |
351 | +Mnemonic_PEXTRW, //!< Extract a word integer value from xmm | |
352 | +Mnemonic_PEXTRD, //!< Extract a doubleword integer value from xmm | |
353 | +Mnemonic_MOVDQA, //!< Move aligned double quadword | |
354 | +Mnemonic_SHUFPS, //!< Shuffle single words | |
355 | +Mnemonic_MOVAPS, //!< Move aligned single word | |
356 | + | |
357 | +// | |
358 | +Mnemonic_Count | |
359 | +} Mnemonic; | |
360 | + | |
361 | +#undef CCM | |
362 | + | |
363 | +ENCODER_NAMESPACE_END | |
364 | + | |
365 | +#endif // ifndef _ENCODER_DEFS_EXT_H_ |
@@ -0,0 +1,382 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | +#ifndef __ENC_PRVT_H_INCLUDED__ | |
21 | +#define __ENC_PRVT_H_INCLUDED__ | |
22 | + | |
23 | +#include "enc_base.h" | |
24 | + | |
25 | +ENCODER_NAMESPACE_START | |
26 | +/* | |
27 | + * @file | |
28 | + * @brief Contains some definitions/constants and other stuff used by the | |
29 | + * Encoder internally. | |
30 | + */ | |
31 | + | |
32 | +enum OpcodeByteKind { | |
33 | + //OpcodeByteKind_Opcode = 0x0000, | |
34 | + OpcodeByteKind_ZeroOpcodeByte = 0x0100, | |
35 | + // | |
36 | + // The names _SlashR, _SlahsNum, _ib, _iw, etc | |
37 | + // represent the appropriate abbreviations used | |
38 | + // in the mnemonic descriptions in the Intel's arch manual. | |
39 | + // | |
40 | + OpcodeByteKind_SlashR = 0x0200, | |
41 | + OpcodeByteKind_SlashNum = 0x0300, | |
42 | + OpcodeByteKind_ib = 0x0400, | |
43 | + OpcodeByteKind_iw = 0x0500, | |
44 | + OpcodeByteKind_id = 0x0600, | |
45 | +#ifdef _EM64T_ | |
46 | + OpcodeByteKind_io = 0x0700, | |
47 | +#endif | |
48 | + OpcodeByteKind_cb = 0x0800, | |
49 | + OpcodeByteKind_cw = 0x0900, | |
50 | + OpcodeByteKind_cd = 0x0A00, | |
51 | + //OpcodeByteKind_cp = 0x0B00, | |
52 | + //OpcodeByteKind_co = 0x0C00, | |
53 | + //OpcodeByteKind_ct = 0x0D00, | |
54 | + | |
55 | + OpcodeByteKind_rb = 0x0E00, | |
56 | + OpcodeByteKind_rw = 0x0F00, | |
57 | + OpcodeByteKind_rd = 0x1000, | |
58 | +#ifdef _EM64T_ | |
59 | + OpcodeByteKind_ro = 0x1100, | |
60 | + //OpcodeByteKind_REX = 0x1200, | |
61 | + OpcodeByteKind_REX_W = 0x1300, | |
62 | +#endif | |
63 | + OpcodeByteKind_plus_i = 0x1400, | |
64 | + /** | |
65 | + * a special marker, means 'no opcode on the given position' | |
66 | + * used in opcodes array, to specify the empty slot, say | |
67 | + * to fill an em64t-specific opcode on ia32. | |
68 | + * last 'e' made lowercase to avoid a mess with 'F' in | |
69 | + * OpcodeByteKind_LAST . | |
70 | + */ | |
71 | + OpcodeByteKind_EMPTY = 0xFFFE, | |
72 | + /** | |
73 | + * a special marker, means 'no more opcodes in the array' | |
74 | + * used in in opcodes array to show that there are no more | |
75 | + * opcodes in the array for a given mnemonic. | |
76 | + */ | |
77 | + OpcodeByteKind_LAST = 0xFFFF, | |
78 | + /** | |
79 | + * a mask to extract the OpcodeByteKind | |
80 | + */ | |
81 | + OpcodeByteKind_KindMask = 0xFF00, | |
82 | + /** | |
83 | + * a mask to extract the opcode byte when presented | |
84 | + */ | |
85 | + OpcodeByteKind_OpcodeMask = 0x00FF | |
86 | +}; | |
87 | + | |
88 | +#ifdef USE_ENCODER_DEFINES | |
89 | + | |
90 | +#define N {0, 0, 0, 0 } | |
91 | +#define U {1, 0, 1, OpndRole_Use } | |
92 | +#define D {1, 1, 0, OpndRole_Def } | |
93 | +#define DU {1, 1, 1, OpndRole_Def|OpndRole_Use } | |
94 | + | |
95 | +#define U_U {2, 0, 2, OpndRole_Use<<2 | OpndRole_Use } | |
96 | +#define D_U {2, 1, 1, OpndRole_Def<<2 | OpndRole_Use } | |
97 | +#define D_DU {2, 2, 1, OpndRole_Def<<2 | (OpndRole_Def|OpndRole_Use) } | |
98 | +#define DU_U {2, 1, 2, ((OpndRole_Def|OpndRole_Use)<<2 | OpndRole_Use) } | |
99 | +#define DU_DU {2, 2, 2, ((OpndRole_Def|OpndRole_Use)<<2 | (OpndRole_Def|OpndRole_Use)) } | |
100 | + | |
101 | +#define DU_DU_DU {3, 3, 3, ((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | (OpndRole_Def|OpndRole_Use) } | |
102 | +#define DU_DU_U {3, 2, 3, (((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) } | |
103 | +#define D_DU_U {3, 2, 2, (((OpndRole_Def)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) } | |
104 | +#define D_U_U {3, 1, 2, (((OpndRole_Def)<<4) | ((OpndRole_Use)<<2) | OpndRole_Use) } | |
105 | + | |
106 | +// Special encoding of 0x00 opcode byte. Note: it's all O-s, not zeros. | |
107 | +#define OxOO OpcodeByteKind_ZeroOpcodeByte | |
108 | + | |
109 | +#define Size16 InstPrefix_OpndSize | |
110 | + | |
111 | +#define _r OpcodeByteKind_SlashR | |
112 | + | |
113 | +#define _0 OpcodeByteKind_SlashNum|0 | |
114 | +#define _1 OpcodeByteKind_SlashNum|1 | |
115 | +#define _2 OpcodeByteKind_SlashNum|2 | |
116 | +#define _3 OpcodeByteKind_SlashNum|3 | |
117 | +#define _4 OpcodeByteKind_SlashNum|4 | |
118 | +#define _5 OpcodeByteKind_SlashNum|5 | |
119 | +#define _6 OpcodeByteKind_SlashNum|6 | |
120 | +#define _7 OpcodeByteKind_SlashNum|7 | |
121 | + | |
122 | +// '+i' for floating-point instructions | |
123 | +#define _i OpcodeByteKind_plus_i | |
124 | + | |
125 | + | |
126 | +#define ib OpcodeByteKind_ib | |
127 | +#define iw OpcodeByteKind_iw | |
128 | +#define id OpcodeByteKind_id | |
129 | + | |
130 | +#define cb OpcodeByteKind_cb | |
131 | +#define cw OpcodeByteKind_cw | |
132 | +#define cd OpcodeByteKind_cd | |
133 | + | |
134 | +#define rb OpcodeByteKind_rb | |
135 | +#define rw OpcodeByteKind_rw | |
136 | +#define rd OpcodeByteKind_rd | |
137 | + | |
138 | +#define AL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AL} | |
139 | +#define AH {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AH} | |
140 | +#define AX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_AX} | |
141 | +#define EAX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EAX} | |
142 | +#ifdef _EM64T_ | |
143 | + #define RAX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RAX } | |
144 | +#endif | |
145 | + | |
146 | +#define CL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_CL} | |
147 | +#define ECX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ECX} | |
148 | +#ifdef _EM64T_ | |
149 | + #define RCX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RCX} | |
150 | +#endif | |
151 | + | |
152 | +#define DX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_DX} | |
153 | +#define EDX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDX} | |
154 | +#ifdef _EM64T_ | |
155 | + #define RDX { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDX } | |
156 | +#endif | |
157 | + | |
158 | +#define ESI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ESI} | |
159 | +#ifdef _EM64T_ | |
160 | + #define RSI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RSI } | |
161 | +#endif | |
162 | + | |
163 | +#define EDI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDI} | |
164 | +#ifdef _EM64T_ | |
165 | + #define RDI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDI } | |
166 | +#endif | |
167 | + | |
168 | +#define r8 {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_Null} | |
169 | +#define r16 {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_Null} | |
170 | +#define r32 {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_Null} | |
171 | +#ifdef _EM64T_ | |
172 | + #define r64 { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_Null } | |
173 | +#endif | |
174 | + | |
175 | +#define r_m8 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Any, RegName_Null} | |
176 | +#define r_m16 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Any, RegName_Null} | |
177 | +#define r_m32 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null} | |
178 | + | |
179 | +#define r_m8s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Signed, RegName_Null} | |
180 | +#define r_m16s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Signed, RegName_Null} | |
181 | +#define r_m32s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Signed, RegName_Null} | |
182 | + | |
183 | +#define r_m8u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Zero, RegName_Null} | |
184 | +#define r_m16u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Zero, RegName_Null} | |
185 | +#define r_m32u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Zero, RegName_Null} | |
186 | + | |
187 | +//'m' was only used in LEA mnemonic, but is replaced with | |
188 | +// set of exact sizes. See more comments for LEA instruction in TheTable. | |
189 | +//#define m {OpndKind_Mem, OpndSize_Null, RegName_Null} | |
190 | +#define m8 {OpndKind_Mem, OpndSize_8, OpndExt_Any, RegName_Null} | |
191 | +#define m16 {OpndKind_Mem, OpndSize_16, OpndExt_Any, RegName_Null} | |
192 | +#define m32 {OpndKind_Mem, OpndSize_32, OpndExt_Any, RegName_Null} | |
193 | +#define m64 {OpndKind_Mem, OpndSize_64, OpndExt_Any, RegName_Null} | |
194 | +#ifdef _EM64T_ | |
195 | + #define r_m64 { (OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null } | |
196 | +#endif | |
197 | + | |
198 | +#define imm8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null} | |
199 | +#define imm16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null} | |
200 | +#define imm32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} | |
201 | + | |
202 | +#define imm8s {OpndKind_Imm, OpndSize_8, OpndExt_Signed, RegName_Null} | |
203 | +#define imm16s {OpndKind_Imm, OpndSize_16, OpndExt_Signed, RegName_Null} | |
204 | +#define imm32s {OpndKind_Imm, OpndSize_32, OpndExt_Signed, RegName_Null} | |
205 | + | |
206 | +#define imm8u {OpndKind_Imm, OpndSize_8, OpndExt_Zero, RegName_Null} | |
207 | +#define imm16u {OpndKind_Imm, OpndSize_16, OpndExt_Zero, RegName_Null} | |
208 | +#define imm32u {OpndKind_Imm, OpndSize_32, OpndExt_Zero, RegName_Null} | |
209 | + | |
210 | +#ifdef _EM64T_ | |
211 | + #define imm64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null } | |
212 | +#endif | |
213 | + | |
214 | +//FIXME: moff-s are in fact memory refs, but presented as immediate. | |
215 | +// Need to specify this in OpndDesc. | |
216 | +#define moff8 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} | |
217 | +#define moff16 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} | |
218 | +#define moff32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} | |
219 | +#ifdef _EM64T_ | |
220 | + #define moff64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null} | |
221 | +#endif | |
222 | + | |
223 | + | |
224 | +#define rel8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null} | |
225 | +#define rel16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null} | |
226 | +#define rel32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} | |
227 | + | |
228 | +#define mm64 {OpndKind_MMXReg, OpndSize_64, OpndExt_Any, RegName_Null} | |
229 | +#define mm_m64 {(OpndKind)(OpndKind_MMXReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null} | |
230 | + | |
231 | +#define xmm64 {OpndKind_XMMReg, OpndSize_64, OpndExt_Any, RegName_Null} | |
232 | +#define xmm_m64 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null} | |
233 | + | |
234 | +#define xmm32 {OpndKind_XMMReg, OpndSize_32, OpndExt_Any, RegName_Null} | |
235 | +#define xmm_m32 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null} | |
236 | + | |
237 | +#define FP0S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP0S} | |
238 | +#define FP0D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP0D} | |
239 | +#define FP1S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP1S} | |
240 | +#define FP1D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP1D} | |
241 | +#define fp32 {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_Null} | |
242 | +#define fp64 {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_Null} | |
243 | + | |
244 | +#ifdef _EM64T_ | |
245 | + #define io OpcodeByteKind_io | |
246 | + #define REX_W OpcodeByteKind_REX_W | |
247 | + | |
248 | +#endif | |
249 | + | |
250 | +#endif // USE_ENCODER_DEFINES | |
251 | + | |
252 | +/** | |
253 | + * @brief Represents the REX part of instruction. | |
254 | + */ | |
255 | +struct Rex { | |
256 | + unsigned char b : 1; | |
257 | + unsigned char x : 1; | |
258 | + unsigned char r : 1; | |
259 | + unsigned char w : 1; | |
260 | + unsigned char dummy : 4; // must be '0100'b | |
261 | + unsigned int :24; | |
262 | +}; | |
263 | + | |
264 | +/** | |
265 | + * @brief Describes SIB (scale,index,base) byte. | |
266 | + */ | |
267 | +struct SIB { | |
268 | + unsigned char base:3; | |
269 | + unsigned char index:3; | |
270 | + unsigned char scale:2; | |
271 | + unsigned int padding:24; | |
272 | +}; | |
273 | +/** | |
274 | + * @brief Describes ModRM byte. | |
275 | + */ | |
276 | +struct ModRM | |
277 | +{ | |
278 | + unsigned char rm:3; | |
279 | + unsigned char reg:3; | |
280 | + unsigned char mod:2; | |
281 | + unsigned int padding:24; | |
282 | +}; | |
283 | + | |
284 | + | |
285 | + | |
286 | +/** | |
287 | +* exactly the same as EncoderBase::OpcodeDesc, but also holds info about | |
288 | +* platform on which the opcode is applicable. | |
289 | +*/ | |
290 | +struct OpcodeInfo { | |
291 | + enum platform { | |
292 | + /// an opcode is valid on all platforms | |
293 | + all, | |
294 | + // opcode is valid on IA-32 only | |
295 | + em64t, | |
296 | + // opcode is valid on Intel64 only | |
297 | + ia32, | |
298 | + // opcode is added for the sake of disassembling, should not be used in encoding | |
299 | + decoder, | |
300 | + // only appears in master table, replaced with 'decoder' in hashed version | |
301 | + decoder32, | |
302 | + // only appears in master table, replaced with 'decoder' in hashed version | |
303 | + decoder64, | |
304 | + }; | |
305 | + platform platf; | |
306 | + unsigned opcode[4+1+1]; | |
307 | + EncoderBase::OpndDesc opnds[EncoderBase::MAX_NUM_OPCODE_OPERANDS]; | |
308 | + EncoderBase::OpndRolesDesc roles; | |
309 | +}; | |
310 | + | |
311 | +/** | |
312 | + * @defgroup MF_ Mnemonic flags | |
313 | +*/ | |
314 | + | |
315 | + /** | |
316 | + * Operation has no special properties. | |
317 | + */ | |
318 | +#define MF_NONE (0x00000000) | |
319 | + /** | |
320 | + * Operation affects flags | |
321 | + */ | |
322 | +#define MF_AFFECTS_FLAGS (0x00000001) | |
323 | + /** | |
324 | + * Operation uses flags - conditional operations, ADC/SBB/ETC | |
325 | + */ | |
326 | +#define MF_USES_FLAGS (0x00000002) | |
327 | + /** | |
328 | + * Operation is conditional - MOVcc/SETcc/Jcc/ETC | |
329 | + */ | |
330 | +#define MF_CONDITIONAL (0x00000004) | |
331 | +/** | |
332 | + * Operation is symmetric - its args can be swapped (ADD/MUL/etc). | |
333 | + */ | |
334 | +#define MF_SYMMETRIC (0x00000008) | |
335 | +/** | |
336 | + * Operation is XOR-like - XOR, SUB - operations of 'arg,arg' is pure def, | |
337 | + * without use. | |
338 | + */ | |
339 | +#define MF_SAME_ARG_NO_USE (0x00000010) | |
340 | + | |
341 | +///@} // ~MNF | |
342 | + | |
343 | +/** | |
344 | + * @see same structure as EncoderBase::MnemonicDesc, but carries | |
345 | + * MnemonicInfo::OpcodeInfo[] instead of OpcodeDesc[]. | |
346 | + * Only used during prebuilding the encoding tables, thus it's hidden under | |
347 | + * the appropriate define. | |
348 | + */ | |
349 | +struct MnemonicInfo { | |
350 | + /** | |
351 | + * The mnemonic itself | |
352 | + */ | |
353 | + Mnemonic mn; | |
354 | + /** | |
355 | + * Various characteristics of mnemonic. | |
356 | + * @see MF_ | |
357 | + */ | |
358 | + unsigned flags; | |
359 | + /** | |
360 | + * Number of args/des/uses/roles for the operation. For the operations | |
361 | + * which may use different number of operands (i.e. IMUL/SHL) use the | |
362 | + * most common value, or leave '0' if you are sure this info is not | |
363 | + * required. | |
364 | + */ | |
365 | + EncoderBase::OpndRolesDesc roles; | |
366 | + /** | |
367 | + * Print name of the mnemonic | |
368 | + */ | |
369 | + const char * name; | |
370 | + /** | |
371 | + * Array of opcodes. | |
372 | + * The terminating opcode description always have OpcodeByteKind_LAST | |
373 | + * at the opcodes[i].opcode[0]. | |
374 | + * The size of '25' has nothing behind it, just counted the max | |
375 | + * number of opcodes currently used (MOV instruction). | |
376 | + */ | |
377 | + OpcodeInfo opcodes[25]; | |
378 | +}; | |
379 | + | |
380 | +ENCODER_NAMESPACE_END | |
381 | + | |
382 | +#endif // ~__ENC_PRVT_H_INCLUDED__ |
@@ -0,0 +1,2164 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | + | |
21 | + | |
22 | +#include <assert.h> | |
23 | +#include <stdio.h> | |
24 | +#include <stdlib.h> //qsort | |
25 | +#include <string.h> | |
26 | +#include <memory.h> | |
27 | +#include <errno.h> | |
28 | +#include <stdlib.h> | |
29 | + | |
30 | + | |
31 | +// need to use EM64T-specifics - new registers, defines from enc_prvt, etc... | |
32 | +#if !defined(_EM64T_) | |
33 | + #define UNDEF_EM64T | |
34 | + #define _EM64T_ | |
35 | +#endif | |
36 | + | |
37 | +#define USE_ENCODER_DEFINES | |
38 | +#include "enc_prvt.h" | |
39 | +#include "enc_defs.h" | |
40 | + | |
41 | +#ifdef UNDEF_EM64T | |
42 | + #undef _EM64T_ | |
43 | +#endif | |
44 | + | |
45 | +//Android x86 | |
46 | +#if 0 //!defined(_HAVE_MMX_) | |
47 | + #define Mnemonic_PADDQ Mnemonic_Null | |
48 | + #define Mnemonic_PAND Mnemonic_Null | |
49 | + #define Mnemonic_POR Mnemonic_Null | |
50 | + #define Mnemonic_PSUBQ Mnemonic_Null | |
51 | +#endif | |
52 | + | |
53 | +ENCODER_NAMESPACE_START | |
54 | + | |
55 | + | |
56 | +EncoderBase::MnemonicDesc EncoderBase::mnemonics[Mnemonic_Count]; | |
57 | +EncoderBase::OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES]; | |
58 | +unsigned char EncoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX]; | |
59 | + | |
60 | + | |
61 | +/** | |
62 | + * @file | |
63 | + * @brief 'Master' copy of encoding data. | |
64 | + */ | |
65 | + | |
66 | +/* | |
67 | +This file contains a 'master copy' of encoding table - this is the info used | |
68 | +by both generator of native instructions (EncoderBase class) and by | |
69 | +disassembling routines. The first one uses an info how to encode the | |
70 | +instruction, and the second does an opposite - several separate tables are | |
71 | +built at runtime from this main table. | |
72 | + | |
73 | +============================================================================= | |
74 | + | |
75 | +The table was designed for easy support and maintenance. Thus, it was made as | |
76 | +much close as possible to the Intel's IA32 Architecture Manual descriptions. | |
77 | +The info is based on the latest (at the moment of writing) revision which is | |
78 | +June 2005, order number 253666-016. | |
79 | + | |
80 | +Normally, almost all of opcodes in the 'master' table represented exactly as | |
81 | +they are shown in the Intel's Architecture manual (well, with slashes | |
82 | +replaced with underscore). There are several exclusions especially marked. | |
83 | + | |
84 | +Normally, to add an opcode/instruction, one only need to copy the whole | |
85 | +string from the manual, and simply replace '/' with '_'. | |
86 | + | |
87 | +I.e., TheManual reads for DEC: | |
88 | + (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1. | |
89 | + (2) REX + FE /1 DEC r/m8* Valid N.E. Decrement r/m8 by 1. | |
90 | + (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1. | |
91 | + | |
92 | +1. Note, that there is no need to explicitly specify REX-based opcodes for | |
93 | + instruction to handle additional registers on EM64T: | |
94 | + | |
95 | + (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1. | |
96 | + (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1. | |
97 | + | |
98 | +2. Copy the string, strip off the text comments, replace '/'=>'_'. Note, that | |
99 | + the second line is for EM64T only | |
100 | + | |
101 | + (1) FE /1 DEC r/m8 | |
102 | + (3) REX.W + FF /1 DEC r/m64 | |
103 | + | |
104 | +3. Fill out the mnemonic, opcode parameters parts | |
105 | + | |
106 | + BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU) | |
107 | + BEGIN_OPCODES() | |
108 | + {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU }, | |
109 | + {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU }, | |
110 | + | |
111 | + DU here - one argument, it's used and defined | |
112 | + | |
113 | +4. That's it, that simple ! | |
114 | + | |
115 | +The operand roles (DU here) are used by Jitrino's optimizing engine to | |
116 | +perform data flow analysis. It also used to store/obtain number of operands. | |
117 | + | |
118 | +Special cases are (see the table for details): | |
119 | +LEA | |
120 | +Some FPU operations (i.e. FSTP) | |
121 | +packed things (XORPD, XORPS, CVTDQ2PD, CVTTPD2DQ) | |
122 | + | |
123 | +Also, the Jitrino's needs require to specify all operands - including | |
124 | +implicit ones (see IMUL). | |
125 | + | |
126 | +The master table iself does not need to be ordered - it's get sorted before | |
127 | +processing. It's recommended (though it's not a law) to group similar | |
128 | +instructions together - i.e. FPU instructions, MMX, etc. | |
129 | + | |
130 | +============================================================================= | |
131 | + | |
132 | +The encoding engine builds several tables basing on the 'master' one (here | |
133 | +'mnemonic' is a kind of synonim for 'instruction'): | |
134 | + | |
135 | +- list of mnemonics which holds general info about instructions | |
136 | + (EncoderBase::mnemonics) | |
137 | +- an array of opcodes descriptions (EncodeBase::opcodes) | |
138 | +- a mapping between a hash value and an opcode description record for a given | |
139 | + mnemonic (EncoderBase::opcodesHashMap) | |
140 | + | |
141 | +The EncoderBase::mnemonics holds general info about instructions. | |
142 | +The EncoderBase::opcodesHashMap is used for fast opcode selection basing on | |
143 | +a hash value. | |
144 | +The EncodeBase::opcodes is used for the encoding itself. | |
145 | + | |
146 | +============================================================================= | |
147 | +The hash value is calculated and used as follows: | |
148 | + | |
149 | +JIT-ted code uses the following operand sizes: 8-, 16-, 32- and 64-bits and | |
150 | +size for an operand can be encoded in just 2 bits. | |
151 | + | |
152 | +The following operand locations are available: one of registers - GP, FP, | |
153 | +MMX, XMM (not taking segment registers), a memory and an immediate, which | |
154 | +gives us 6 variants and can be enumerated in 3 bits. | |
155 | + | |
156 | +As a grand total, the the whole operand's info needed for opcode selection | |
157 | +can be packed in 5 bits. Taking into account the IMUL mnemonic with its 3 | |
158 | +operands (including implicit ones), we're getting 15 bits per instruction and | |
159 | +the complete table is about 32768 items per single instruction. | |
160 | + | |
161 | +Seems too many, but luckily, the 15 bit limit will never be reached: the | |
162 | +worst case is IMUL with its 3 operands: | |
163 | +(IMUL r64, r/m64, imm32)/(IMUL r32, r/m32, imm32). | |
164 | +So, assigning lowest value to GP register, the max value of hash can be | |
165 | +reduced. | |
166 | + | |
167 | +The hash values to use are: | |
168 | +sizes: | |
169 | + 8 -> 11 | |
170 | + 16 -> 10 | |
171 | + 32 -> 01 | |
172 | + 64 -> 00 | |
173 | +locations: | |
174 | + gp reg -> 000 | |
175 | + memory -> 001 | |
176 | + fp reg -> 010 | |
177 | + mmx reg -> 011 | |
178 | + xmm reg -> 100 | |
179 | + immediate -> 101 | |
180 | +and the grand total for the worst case would be | |
181 | +[ GP 32] [GP 32] [Imm 32] | |
182 | +[000-01] [000-01] [101 01] = 1077 | |
183 | + | |
184 | +However, the implicit operands adds additional value, and the worstest case | |
185 | +is 'SHLD r_m32, r32, CL=r8'. This gives us the maximum number of: | |
186 | + | |
187 | +[mem 32] [GP 32] [GP 8b] | |
188 | +[001-01] [000-01] [000-11] = 5155. | |
189 | + | |
190 | +The max number is pretty big and the hash functions is quite rare, thus it | |
191 | +is not resonable to use a direct addressing i.e. | |
192 | +OpcodeDesc[mnemonic][hash_code] - there would be a huge waste of space. | |
193 | + | |
194 | +Instead, we use a kind of mapping: the opcodes info is stored in packed | |
195 | +(here: non rare) array. The max number of opcodes will not exceed 255 for | |
196 | +each instruction. And we have an index array in which we store a mapping | |
197 | +between a hash code value and opcode position for each given instruction. | |
198 | + | |
199 | +Sounds a bit sophisticated, but in real is simple, the opcode gets selected | |
200 | +in 2 simple steps: | |
201 | + | |
202 | +1. Select [hash,mnemonic] => 'n'. | |
203 | + | |
204 | +The array is pretty rare - many cells contain 0xFF which | |
205 | +means 'invalid hash - no opcode with given characteristics' | |
206 | + | |
207 | +char EnbcoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX] = | |
208 | + | |
209 | ++----+----+----+----+----+----+ | |
210 | +| 00 | 05 | FF | FF | 03 | 12 | ... | |
211 | +|---------+-------------------+ | |
212 | +| 12 | FF | FF | n | 04 | 25 | ... <- Mnemonic | |
213 | +|-----------------------------+ | |
214 | +| FF | 11 | FF | 10 | 13 | .. | ... | |
215 | ++-----------------------------+ | |
216 | + ... ^ | |
217 | + | | |
218 | + hash | |
219 | + | |
220 | +2. Select [n,mnemonic] => 'opcode_desc11' | |
221 | + | |
222 | +OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES] = | |
223 | + | |
224 | ++---------------+---------------+---------------+---------------+ | |
225 | +| opcode_desc00 | opcode_desc01 | opcode_desc02 | last_opcode | ... | |
226 | ++---------------+---------------+---------------+---------------+ | |
227 | +| opcode_desc10 | opcode_desc11 | last_opcode | xxx | <- Mnemonic | |
228 | ++---------------+---------------+---------------+---------------+ | |
229 | +| opcode_desc20 | opcode_desc21 | opcode_desc22 | opcode_desc23 | ... | |
230 | ++---------------+---------------+---------------+---------------+ | |
231 | + ... | |
232 | + ^ | |
233 | + | | |
234 | + n | |
235 | + | |
236 | +Now, use 'opcode_desc11'. | |
237 | + | |
238 | +============================================================================= | |
239 | +The array of opcodes descriptions (EncodeBase::opcodes) is specially prepared | |
240 | +to maximize performance - the EncoderBase::encode() is quite hot on client | |
241 | +applications for the Jitrino/Jitrino.JET. | |
242 | +The preparation is that opcode descriptions from the 'master' encoding table | |
243 | +are preprocessed and a special set of OpcodeDesc prepared: | |
244 | +First, the 'raw' opcode bytes are extracted. Here, 'raw' means the bytes that | |
245 | +do not depened on any operands values, do not require any analysis and can be | |
246 | +simply copied into the output buffer during encoding. Also, number of these | |
247 | +'raw' bytes is counted. The fields are OpcodeDesc::opcode and | |
248 | +OpcodeDesc::opcode_len. | |
249 | + | |
250 | +Then the fisrt non-implicit operand found and its index is stored in | |
251 | +OpcodeDesc::first_opnd. | |
252 | + | |
253 | +The bytes that require processing and analysis ('/r', '+i', etc) are | |
254 | +extracted and stored in OpcodeDesc::aux0 and OpcodeDesc::aux1 fields. | |
255 | + | |
256 | +Here, a special trick is performed: | |
257 | + Some opcodes have register/memory operand, but this is not reflected in | |
258 | + opcode column - for example, (MOVQ xmm64, xmm_m64). In this case, a fake | |
259 | + '_r' added to OpcodeDesc::aux field. | |
260 | + Some other opcodes have immediate operands, but this is again not | |
261 | + reflected in opcode column - for example, CALL cd or PUSH imm32. | |
262 | + In this case, a fake '/cd' or fake '/id' added to appropriate | |
263 | + OpcodeDesc::aux field. | |
264 | + | |
265 | +The OpcodeDesc::last is non-zero for the final OpcodeDesc record (which does | |
266 | +not have valid data itself). | |
267 | +*/ | |
268 | + | |
269 | +// TODO: To extend flexibility, replace bool fields in MnemonicDesc & | |
270 | +// MnemonicInfo with a set of flags packed into integer field. | |
271 | + | |
272 | +unsigned short EncoderBase::getHash(const OpcodeInfo* odesc) | |
273 | +{ | |
274 | + /* | |
275 | + NOTE: any changes in the hash computation must be stricty balanced with | |
276 | + EncoderBase::Operand::hash_it and EncoderBase::Operands() | |
277 | + */ | |
278 | + unsigned short hash = 0; | |
279 | + // The hash computation, uses fast way - table selection instead of if-s. | |
280 | + if (odesc->roles.count > 0) { | |
281 | + OpndKind kind = odesc->opnds[0].kind; | |
282 | + OpndSize size = odesc->opnds[0].size; | |
283 | + assert(kind<COUNTOF(kind_hash)); | |
284 | + assert(size<COUNTOF(size_hash)); | |
285 | + hash = get_kind_hash(kind) | get_size_hash(size); | |
286 | + } | |
287 | + | |
288 | + if (odesc->roles.count > 1) { | |
289 | + OpndKind kind = odesc->opnds[1].kind; | |
290 | + OpndSize size = odesc->opnds[1].size; | |
291 | + assert(kind<COUNTOF(kind_hash)); | |
292 | + assert(size<COUNTOF(size_hash)); | |
293 | + hash = (hash<<HASH_BITS_PER_OPERAND) | | |
294 | + (get_kind_hash(kind) | get_size_hash(size)); | |
295 | + } | |
296 | + | |
297 | + if (odesc->roles.count > 2) { | |
298 | + OpndKind kind = odesc->opnds[2].kind; | |
299 | + OpndSize size = odesc->opnds[2].size; | |
300 | + assert(kind<COUNTOF(kind_hash)); | |
301 | + assert(size<COUNTOF(size_hash)); | |
302 | + hash = (hash<<HASH_BITS_PER_OPERAND) | | |
303 | + (get_kind_hash(kind) | get_size_hash(size)); | |
304 | + } | |
305 | + assert(hash <= HASH_MAX); | |
306 | + return hash; | |
307 | +} | |
308 | + | |
309 | + | |
310 | +#define BEGIN_MNEMONIC(mn, flags, roles) \ | |
311 | + { Mnemonic_##mn, flags, roles, #mn, | |
312 | +#define END_MNEMONIC() }, | |
313 | +#define BEGIN_OPCODES() { | |
314 | +#define END_OPCODES() { OpcodeInfo::all, {OpcodeByteKind_LAST}, {}, {0, 0, 0, 0}}} | |
315 | + | |
316 | + | |
317 | +static MnemonicInfo masterEncodingTable[] = { | |
318 | +// | |
319 | +// Null | |
320 | +// | |
321 | +BEGIN_MNEMONIC(Null, MF_NONE, N) | |
322 | +BEGIN_OPCODES() | |
323 | +END_OPCODES() | |
324 | +END_MNEMONIC() | |
325 | + | |
326 | +BEGIN_MNEMONIC(LAHF, MF_USES_FLAGS, D) | |
327 | +BEGIN_OPCODES() | |
328 | +// TheManual says it's not always supported in em64t mode, thus excluding it | |
329 | + {OpcodeInfo::ia32, {0x9F}, {EAX}, D }, | |
330 | +END_OPCODES() | |
331 | +END_MNEMONIC() | |
332 | +// | |
333 | +// ALU mnemonics - add, adc, or, xor, and, cmp, sub, sbb | |
334 | +// as they differ only in the opcode extention (/digit) number and | |
335 | +// in which number the opcode start from, the opcode definitions | |
336 | +// for those instructions are packed together | |
337 | +// | |
338 | +// The 'opcode_starts_from' and 'opcode_ext' in DEFINE_ALU_OPCODES() | |
339 | +// are enough to define OpcodeInfo::all opcodes and the 'first_opcode' | |
340 | +// parameter is only due to ADD instruction, which requires an zero opcode | |
341 | +// byte which, in turn, is coded especially in the current coding scheme. | |
342 | +// | |
343 | + | |
344 | +#define DEFINE_ALU_OPCODES( opc_ext, opcode_starts_from, first_opcode, def_use ) \ | |
345 | +\ | |
346 | + {OpcodeInfo::decoder, {opcode_starts_from + 4, ib}, {AL, imm8}, DU_U },\ | |
347 | + {OpcodeInfo::decoder, {Size16, opcode_starts_from + 5, iw}, {AX, imm16}, DU_U },\ | |
348 | + {OpcodeInfo::decoder, {opcode_starts_from + 5, id}, {EAX, imm32}, DU_U },\ | |
349 | + {OpcodeInfo::decoder64, {REX_W, opcode_starts_from+5, id}, {RAX, imm32s},DU_U },\ | |
350 | +\ | |
351 | + {OpcodeInfo::all, {0x80, opc_ext, ib}, {r_m8, imm8}, def_use },\ | |
352 | + {OpcodeInfo::all, {Size16, 0x81, opc_ext, iw}, {r_m16, imm16}, def_use },\ | |
353 | + {OpcodeInfo::all, {0x81, opc_ext, id}, {r_m32, imm32}, def_use },\ | |
354 | + {OpcodeInfo::em64t, {REX_W, 0x81, opc_ext, id}, {r_m64, imm32s}, def_use },\ | |
355 | +\ | |
356 | + {OpcodeInfo::all, {Size16, 0x83, opc_ext, ib}, {r_m16, imm8s}, def_use },\ | |
357 | + {OpcodeInfo::all, {0x83, opc_ext, ib}, {r_m32, imm8s}, def_use },\ | |
358 | + {OpcodeInfo::em64t, {REX_W, 0x83, opc_ext, ib}, {r_m64, imm8s}, def_use },\ | |
359 | +\ | |
360 | + {OpcodeInfo::all, {first_opcode, _r}, {r_m8, r8}, def_use },\ | |
361 | +\ | |
362 | + {OpcodeInfo::all, {Size16, opcode_starts_from+1, _r}, {r_m16, r16}, def_use },\ | |
363 | + {OpcodeInfo::all, {opcode_starts_from+1, _r}, {r_m32, r32}, def_use },\ | |
364 | + {OpcodeInfo::em64t, {REX_W, opcode_starts_from+1, _r}, {r_m64, r64}, def_use },\ | |
365 | +\ | |
366 | + {OpcodeInfo::all, {opcode_starts_from+2, _r}, {r8, r_m8}, def_use },\ | |
367 | +\ | |
368 | + {OpcodeInfo::all, {Size16, opcode_starts_from+3, _r}, {r16, r_m16}, def_use },\ | |
369 | + {OpcodeInfo::all, {opcode_starts_from+3, _r}, {r32, r_m32}, def_use },\ | |
370 | + {OpcodeInfo::em64t, {REX_W, opcode_starts_from+3, _r}, {r64, r_m64}, def_use }, | |
371 | + | |
372 | +BEGIN_MNEMONIC(ADD, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) | |
373 | +BEGIN_OPCODES() | |
374 | + DEFINE_ALU_OPCODES(_0, 0x00, OxOO, DU_U ) | |
375 | +END_OPCODES() | |
376 | +END_MNEMONIC() | |
377 | + | |
378 | +BEGIN_MNEMONIC(OR, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) | |
379 | +BEGIN_OPCODES() | |
380 | + DEFINE_ALU_OPCODES(_1, 0x08, 0x08, DU_U ) | |
381 | +END_OPCODES() | |
382 | +END_MNEMONIC() | |
383 | + | |
384 | +BEGIN_MNEMONIC(ADC, MF_AFFECTS_FLAGS|MF_USES_FLAGS|MF_SYMMETRIC, DU_U) | |
385 | +BEGIN_OPCODES() | |
386 | + DEFINE_ALU_OPCODES(_2, 0x10, 0x10, DU_U ) | |
387 | +END_OPCODES() | |
388 | +END_MNEMONIC() | |
389 | + | |
390 | +BEGIN_MNEMONIC(SBB, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) | |
391 | +BEGIN_OPCODES() | |
392 | + DEFINE_ALU_OPCODES(_3, 0x18, 0x18, DU_U ) | |
393 | +END_OPCODES() | |
394 | +END_MNEMONIC() | |
395 | + | |
396 | +BEGIN_MNEMONIC(AND, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) | |
397 | +BEGIN_OPCODES() | |
398 | + DEFINE_ALU_OPCODES(_4, 0x20, 0x20, DU_U ) | |
399 | +END_OPCODES() | |
400 | +END_MNEMONIC() | |
401 | + | |
402 | + | |
403 | +BEGIN_MNEMONIC(SUB, MF_AFFECTS_FLAGS|MF_SAME_ARG_NO_USE, DU_U) | |
404 | +BEGIN_OPCODES() | |
405 | + DEFINE_ALU_OPCODES(_5, 0x28, 0x28, DU_U ) | |
406 | +END_OPCODES() | |
407 | +END_MNEMONIC() | |
408 | + | |
409 | + | |
410 | +BEGIN_MNEMONIC(XOR, MF_AFFECTS_FLAGS|MF_SYMMETRIC|MF_SAME_ARG_NO_USE, DU_U) | |
411 | +BEGIN_OPCODES() | |
412 | + DEFINE_ALU_OPCODES( _6, 0x30, 0x30, DU_U ) | |
413 | +END_OPCODES() | |
414 | +END_MNEMONIC() | |
415 | + | |
416 | +BEGIN_MNEMONIC(CMP, MF_AFFECTS_FLAGS, U_U) | |
417 | +BEGIN_OPCODES() | |
418 | + DEFINE_ALU_OPCODES( _7, 0x38, 0x38, U_U ) | |
419 | +END_OPCODES() | |
420 | +END_MNEMONIC() | |
421 | + | |
422 | +BEGIN_MNEMONIC(CMPXCHG, MF_AFFECTS_FLAGS, N) | |
423 | +BEGIN_OPCODES() | |
424 | + {OpcodeInfo::all, {0x0F, 0xB0, _r}, {r_m8, r8, AL}, DU_DU_DU }, | |
425 | + {OpcodeInfo::all, {Size16, 0x0F, 0xB1, _r}, {r_m16, r16, AX}, DU_DU_DU }, | |
426 | + {OpcodeInfo::all, {0x0F, 0xB1, _r}, {r_m32, r32, EAX}, DU_DU_DU}, | |
427 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB1, _r}, {r_m64, r64, RAX}, DU_DU_DU }, | |
428 | +END_OPCODES() | |
429 | +END_MNEMONIC() | |
430 | + | |
431 | +BEGIN_MNEMONIC(CMPXCHG8B, MF_AFFECTS_FLAGS, D) | |
432 | +BEGIN_OPCODES() | |
433 | + {OpcodeInfo::all, {0x0F, 0xC7, _1}, {m64}, DU }, | |
434 | +END_OPCODES() | |
435 | +END_MNEMONIC() | |
436 | + | |
437 | +#undef DEFINE_ALU_OPCODES | |
438 | +// | |
439 | +// | |
440 | +// | |
441 | +BEGIN_MNEMONIC(ADDSD, MF_NONE, DU_U) | |
442 | +BEGIN_OPCODES() | |
443 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x58, _r}, {xmm64, xmm_m64}, DU_U}, | |
444 | +END_OPCODES() | |
445 | +END_MNEMONIC() | |
446 | + | |
447 | +BEGIN_MNEMONIC(ADDSS, MF_NONE, DU_U) | |
448 | +BEGIN_OPCODES() | |
449 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x58, _r}, {xmm32, xmm_m32}, DU_U}, | |
450 | +END_OPCODES() | |
451 | +END_MNEMONIC() | |
452 | + | |
453 | + | |
454 | +BEGIN_MNEMONIC(BSF, MF_AFFECTS_FLAGS, N) | |
455 | +BEGIN_OPCODES() | |
456 | + {OpcodeInfo::all, {0x0F, 0xBC}, {r32, r_m32}, D_U}, | |
457 | +END_OPCODES() | |
458 | +END_MNEMONIC() | |
459 | + | |
460 | +BEGIN_MNEMONIC(BSR, MF_AFFECTS_FLAGS, N) | |
461 | +BEGIN_OPCODES() | |
462 | + {OpcodeInfo::all, {0x0F, 0xBD}, {r32, r_m32}, D_U}, | |
463 | +END_OPCODES() | |
464 | +END_MNEMONIC() | |
465 | + | |
466 | + | |
467 | +BEGIN_MNEMONIC(CALL, MF_NONE, U ) | |
468 | +BEGIN_OPCODES() | |
469 | + {OpcodeInfo::all, {0xE8, cd}, {rel32}, U }, | |
470 | + {OpcodeInfo::ia32, {Size16, 0xE8, cw}, {rel16}, U }, | |
471 | + {OpcodeInfo::ia32, {0xFF, _2}, {r_m32}, U }, | |
472 | + {OpcodeInfo::em64t, {0xFF, _2}, {r_m64}, U }, | |
473 | +END_OPCODES() | |
474 | +END_MNEMONIC() | |
475 | + | |
476 | +BEGIN_MNEMONIC(CMC, MF_USES_FLAGS|MF_AFFECTS_FLAGS, N) | |
477 | +BEGIN_OPCODES() | |
478 | + {OpcodeInfo::decoder, {0xF5}, {}, N }, | |
479 | +END_OPCODES() | |
480 | +END_MNEMONIC() | |
481 | + | |
482 | +//TODO: Workaround. Actually, it's D_DU, but Jitrino's CG thinks it's D_U | |
483 | +BEGIN_MNEMONIC(CDQ, MF_NONE, D_U ) | |
484 | +BEGIN_OPCODES() | |
485 | + {OpcodeInfo::all, {0x99}, {DX, AX}, D_U }, | |
486 | + {OpcodeInfo::all, {0x99}, {EDX, EAX}, D_U }, | |
487 | + {OpcodeInfo::em64t, {REX_W, 0x99}, {RDX, RAX}, D_U }, | |
488 | +END_OPCODES() | |
489 | +END_MNEMONIC() | |
490 | + | |
491 | +#define DEFINE_CMOVcc_MNEMONIC( cc ) \ | |
492 | + BEGIN_MNEMONIC(CMOV##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU_U ) \ | |
493 | +BEGIN_OPCODES() \ | |
494 | + {OpcodeInfo::all, {Size16, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r16, r_m16}, DU_U }, \ | |
495 | + {OpcodeInfo::all, {0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r32, r_m32}, DU_U }, \ | |
496 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r64, r_m64}, DU_U }, \ | |
497 | +END_OPCODES() \ | |
498 | +END_MNEMONIC() | |
499 | + | |
500 | +DEFINE_CMOVcc_MNEMONIC(O) | |
501 | +DEFINE_CMOVcc_MNEMONIC(NO) | |
502 | +DEFINE_CMOVcc_MNEMONIC(B) | |
503 | +DEFINE_CMOVcc_MNEMONIC(NB) | |
504 | +DEFINE_CMOVcc_MNEMONIC(Z) | |
505 | +DEFINE_CMOVcc_MNEMONIC(NZ) | |
506 | +DEFINE_CMOVcc_MNEMONIC(BE) | |
507 | +DEFINE_CMOVcc_MNEMONIC(NBE) | |
508 | +DEFINE_CMOVcc_MNEMONIC(S) | |
509 | +DEFINE_CMOVcc_MNEMONIC(NS) | |
510 | +DEFINE_CMOVcc_MNEMONIC(P) | |
511 | +DEFINE_CMOVcc_MNEMONIC(NP) | |
512 | +DEFINE_CMOVcc_MNEMONIC(L) | |
513 | +DEFINE_CMOVcc_MNEMONIC(NL) | |
514 | +DEFINE_CMOVcc_MNEMONIC(LE) | |
515 | +DEFINE_CMOVcc_MNEMONIC(NLE) | |
516 | + | |
517 | +#undef DEFINE_CMOVcc_MNEMONIC | |
518 | + | |
519 | +/***************************************************************************** | |
520 | + ***** SSE conversion routines ***** | |
521 | +*****************************************************************************/ | |
522 | +// | |
523 | +// double -> float | |
524 | +BEGIN_MNEMONIC(CVTSD2SS, MF_NONE, D_U ) | |
525 | +BEGIN_OPCODES() | |
526 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x5A, _r}, {xmm32, xmm_m64}, D_U }, | |
527 | +END_OPCODES() | |
528 | +END_MNEMONIC() | |
529 | + | |
530 | +// double -> I_32 | |
531 | +BEGIN_MNEMONIC(CVTSD2SI, MF_NONE, D_U ) | |
532 | +BEGIN_OPCODES() | |
533 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x2D, _r}, {r32, xmm_m64}, D_U }, | |
534 | + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2D, _r}, {r64, xmm_m64}, D_U }, | |
535 | +END_OPCODES() | |
536 | +END_MNEMONIC() | |
537 | + | |
538 | +// double [truncated] -> I_32 | |
539 | +BEGIN_MNEMONIC(CVTTSD2SI, MF_NONE, D_U ) | |
540 | +BEGIN_OPCODES() | |
541 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x2C, _r}, {r32, xmm_m64}, D_U }, | |
542 | + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2C, _r}, {r64, xmm_m64}, D_U }, | |
543 | +END_OPCODES() | |
544 | +END_MNEMONIC() | |
545 | + | |
546 | +// float -> double | |
547 | +BEGIN_MNEMONIC(CVTSS2SD, MF_NONE, D_U ) | |
548 | +BEGIN_OPCODES() | |
549 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x5A, _r}, {xmm64, xmm_m32}, D_U }, | |
550 | +END_OPCODES() | |
551 | +END_MNEMONIC() | |
552 | + | |
553 | +// float -> I_32 | |
554 | +BEGIN_MNEMONIC(CVTSS2SI, MF_NONE, D_U ) | |
555 | +BEGIN_OPCODES() | |
556 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x2D, _r}, {r32, xmm_m32}, D_U}, | |
557 | + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2D, _r}, {r64, xmm_m32}, D_U}, | |
558 | +END_OPCODES() | |
559 | +END_MNEMONIC() | |
560 | + | |
561 | +// float [truncated] -> I_32 | |
562 | +BEGIN_MNEMONIC(CVTTSS2SI, MF_NONE, D_U ) | |
563 | +BEGIN_OPCODES() | |
564 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x2C, _r}, {r32, xmm_m32}, D_U}, | |
565 | + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2C, _r}, {r64, xmm_m32}, D_U}, | |
566 | +END_OPCODES() | |
567 | +END_MNEMONIC() | |
568 | + | |
569 | +// I_32 -> double | |
570 | +BEGIN_MNEMONIC(CVTSI2SD, MF_NONE, D_U ) | |
571 | +BEGIN_OPCODES() | |
572 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m32}, D_U}, | |
573 | + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m64}, D_U}, | |
574 | +END_OPCODES() | |
575 | +END_MNEMONIC() | |
576 | + | |
577 | +// I_32 -> float | |
578 | +BEGIN_MNEMONIC(CVTSI2SS, MF_NONE, D_U ) | |
579 | +BEGIN_OPCODES() | |
580 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m32}, D_U}, | |
581 | + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m64}, D_U}, | |
582 | +END_OPCODES() | |
583 | +END_MNEMONIC() | |
584 | + | |
585 | +// | |
586 | +// ~ SSE conversions | |
587 | +// | |
588 | + | |
589 | +BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU ) | |
590 | +BEGIN_OPCODES() | |
591 | + {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU }, | |
592 | + | |
593 | + {OpcodeInfo::all, {Size16, 0xFF, _1}, {r_m16}, DU }, | |
594 | + {OpcodeInfo::all, {0xFF, _1}, {r_m32}, DU }, | |
595 | + {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU }, | |
596 | + | |
597 | + {OpcodeInfo::ia32, {Size16, 0x48|rw}, {r16}, DU }, | |
598 | + {OpcodeInfo::ia32, {0x48|rd}, {r32}, DU }, | |
599 | +END_OPCODES() | |
600 | +END_MNEMONIC() | |
601 | + | |
602 | + | |
603 | +BEGIN_MNEMONIC(DIVSD, MF_NONE, DU_U) | |
604 | +BEGIN_OPCODES() | |
605 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x5E, _r}, {xmm64, xmm_m64}, DU_U }, | |
606 | +END_OPCODES() | |
607 | +END_MNEMONIC() | |
608 | + | |
609 | + | |
610 | +BEGIN_MNEMONIC(DIVSS, MF_NONE, DU_U) | |
611 | +BEGIN_OPCODES() | |
612 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x5E, _r}, {xmm32, xmm_m32}, DU_U }, | |
613 | +END_OPCODES() | |
614 | +END_MNEMONIC() | |
615 | + | |
616 | +/**************************************************************************** | |
617 | + ***** FPU operations ***** | |
618 | +****************************************************************************/ | |
619 | + | |
620 | +BEGIN_MNEMONIC(FADDP, MF_NONE, DU ) | |
621 | +BEGIN_OPCODES() | |
622 | + {OpcodeInfo::all, {0xDE, 0xC1}, {FP0D}, DU }, | |
623 | + {OpcodeInfo::all, {0xDE, 0xC1}, {FP0S}, DU }, | |
624 | +END_OPCODES() | |
625 | +END_MNEMONIC() | |
626 | + | |
627 | +BEGIN_MNEMONIC(FLDZ, MF_NONE, U ) | |
628 | +BEGIN_OPCODES() | |
629 | + {OpcodeInfo::all, {0xD9, 0xEE}, {FP0D}, D }, | |
630 | + {OpcodeInfo::all, {0xD9, 0xEE}, {FP0S}, D }, | |
631 | +END_OPCODES() | |
632 | +END_MNEMONIC() | |
633 | + | |
634 | +BEGIN_MNEMONIC(FADD, MF_NONE, U ) | |
635 | +BEGIN_OPCODES() | |
636 | + {OpcodeInfo::all, {0xDC, _0}, {FP0D, m64}, DU_U }, | |
637 | + {OpcodeInfo::all, {0xD8, _0}, {FP0S, m32}, DU_U }, | |
638 | +END_OPCODES() | |
639 | +END_MNEMONIC() | |
640 | + | |
641 | +BEGIN_MNEMONIC(FSUBP, MF_NONE, DU ) | |
642 | +BEGIN_OPCODES() | |
643 | + {OpcodeInfo::all, {0xDE, 0xE9}, {FP0D}, DU }, | |
644 | + {OpcodeInfo::all, {0xDE, 0xE9}, {FP0S}, DU }, | |
645 | +END_OPCODES() | |
646 | +END_MNEMONIC() | |
647 | + | |
648 | +BEGIN_MNEMONIC(FSUB, MF_NONE, U ) | |
649 | +BEGIN_OPCODES() | |
650 | + {OpcodeInfo::all, {0xDC, _4}, {FP0D, m64}, DU_U }, | |
651 | + {OpcodeInfo::all, {0xD8, _4}, {FP0S, m32}, DU_U }, | |
652 | +END_OPCODES() | |
653 | +END_MNEMONIC() | |
654 | + | |
655 | +BEGIN_MNEMONIC(FISUB, MF_NONE, U ) | |
656 | +BEGIN_OPCODES() | |
657 | + {OpcodeInfo::all, {0xDA, _4}, {FP0S, m32}, DU_U }, | |
658 | +// {OpcodeInfo::all, {0xDE, _4}, {FP0S, m16}, DU_U }, | |
659 | +END_OPCODES() | |
660 | +END_MNEMONIC() | |
661 | + | |
662 | + | |
663 | + | |
664 | +BEGIN_MNEMONIC(FMUL, MF_NONE, DU_U ) | |
665 | +BEGIN_OPCODES() | |
666 | + {OpcodeInfo::all, {0xD8, _1}, {FP0S, m32}, DU_U }, | |
667 | + {OpcodeInfo::all, {0xDC, _1}, {FP0D, m64}, DU_U }, | |
668 | +END_OPCODES() | |
669 | +END_MNEMONIC() | |
670 | + | |
671 | +BEGIN_MNEMONIC(FMULP, MF_NONE, DU ) | |
672 | +BEGIN_OPCODES() | |
673 | + {OpcodeInfo::all, {0xDE, 0xC9}, {FP0D}, DU }, | |
674 | + {OpcodeInfo::all, {0xDE, 0xC9}, {FP0S}, DU }, | |
675 | +END_OPCODES() | |
676 | +END_MNEMONIC() | |
677 | + | |
678 | +BEGIN_MNEMONIC(FDIVP, MF_NONE, DU ) | |
679 | +BEGIN_OPCODES() | |
680 | + {OpcodeInfo::all, {0xDE, 0xF9}, {FP0D}, DU }, | |
681 | + {OpcodeInfo::all, {0xDE, 0xF9}, {FP0S}, DU }, | |
682 | +END_OPCODES() | |
683 | +END_MNEMONIC() | |
684 | + | |
685 | +BEGIN_MNEMONIC(FDIV, MF_NONE, U ) | |
686 | +BEGIN_OPCODES() | |
687 | + {OpcodeInfo::all, {0xDC, _6}, {FP0D, m64}, DU_U }, | |
688 | + {OpcodeInfo::all, {0xD8, _6}, {FP0S, m32}, DU_U }, | |
689 | +END_OPCODES() | |
690 | +END_MNEMONIC() | |
691 | + | |
692 | + | |
693 | +BEGIN_MNEMONIC(FUCOM, MF_NONE, D_U ) | |
694 | +BEGIN_OPCODES() | |
695 | + {OpcodeInfo::all, {0xDD, 0xE1}, {FP0D, FP1D}, DU_U }, | |
696 | + {OpcodeInfo::all, {0xDD, 0xE1}, {FP0S, FP1S}, DU_U }, | |
697 | + // A little trick: actually, these 2 opcodes take only index of the | |
698 | + // needed register. To make the things similar to other instructions | |
699 | + // we encode here as if they took FPREG. | |
700 | + {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp32}, DU }, | |
701 | + {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp64}, DU }, | |
702 | +END_OPCODES() | |
703 | +END_MNEMONIC() | |
704 | + | |
705 | +BEGIN_MNEMONIC(FUCOMI, MF_NONE, D_U ) | |
706 | +BEGIN_OPCODES() | |
707 | + // A little trick: actually, these 2 opcodes take only index of the | |
708 | + // needed register. To make the things similar to other instructions | |
709 | + // we encode here as if they took FPREG. | |
710 | + {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp32}, DU }, | |
711 | + {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp64}, DU }, | |
712 | +END_OPCODES() | |
713 | +END_MNEMONIC() | |
714 | + | |
715 | +BEGIN_MNEMONIC(FUCOMP, MF_NONE, D_U ) | |
716 | +BEGIN_OPCODES() | |
717 | + {OpcodeInfo::all, {0xDD, 0xE9}, {FP0D, FP1D}, DU_U }, | |
718 | + {OpcodeInfo::all, {0xDD, 0xE9}, {FP0S, FP1S}, DU_U }, | |
719 | + // A little trick: actually, these 2 opcodes take only index of the | |
720 | + // needed register. To make the things similar to other instructions | |
721 | + // we encode here as if they took FPREG. | |
722 | + {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp32}, DU }, | |
723 | + {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp64}, DU }, | |
724 | +END_OPCODES() | |
725 | +END_MNEMONIC() | |
726 | + | |
727 | +BEGIN_MNEMONIC(FUCOMIP, MF_NONE, D_U ) | |
728 | +BEGIN_OPCODES() | |
729 | + // A little trick: actually, these 2 opcodes take only index of the | |
730 | + // needed register. To make the things similar to other instructions | |
731 | + // we encode here as if they took FPREG. | |
732 | + {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp32}, DU }, | |
733 | + {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp64}, DU }, | |
734 | +END_OPCODES() | |
735 | +END_MNEMONIC() | |
736 | + | |
737 | +BEGIN_MNEMONIC(FUCOMPP, MF_NONE, U ) | |
738 | +BEGIN_OPCODES() | |
739 | + {OpcodeInfo::all, {0xDA, 0xE9}, {FP0D, FP1D}, DU_U }, | |
740 | + {OpcodeInfo::all, {0xDA, 0xE9}, {FP0S, FP1S}, DU_U }, | |
741 | +END_OPCODES() | |
742 | +END_MNEMONIC() | |
743 | + | |
744 | +BEGIN_MNEMONIC(FLDCW, MF_NONE, U ) | |
745 | +BEGIN_OPCODES() | |
746 | + {OpcodeInfo::all, {0xD9, _5}, {m16}, U }, | |
747 | +END_OPCODES() | |
748 | +END_MNEMONIC() | |
749 | + | |
750 | +BEGIN_MNEMONIC(FNSTCW, MF_NONE, D) | |
751 | +BEGIN_OPCODES() | |
752 | + {OpcodeInfo::all, {0xD9, _7}, {m16}, D }, | |
753 | +END_OPCODES() | |
754 | +END_MNEMONIC() | |
755 | + | |
756 | +BEGIN_MNEMONIC(FSTSW, MF_NONE, D) | |
757 | +BEGIN_OPCODES() | |
758 | + {OpcodeInfo::all, {0x9B, 0xDF, 0xE0}, {EAX}, D }, | |
759 | +END_OPCODES() | |
760 | +END_MNEMONIC() | |
761 | + | |
762 | +BEGIN_MNEMONIC(FNSTSW, MF_NONE, D) | |
763 | +BEGIN_OPCODES() | |
764 | + {OpcodeInfo::all, {0xDF, 0xE0}, {EAX}, D }, | |
765 | +END_OPCODES() | |
766 | +END_MNEMONIC() | |
767 | + | |
768 | +BEGIN_MNEMONIC(FCHS, MF_NONE, DU ) | |
769 | +BEGIN_OPCODES() | |
770 | + {OpcodeInfo::all, {0xD9, 0xE0}, {FP0D}, DU }, | |
771 | + {OpcodeInfo::all, {0xD9, 0xE0}, {FP0S}, DU }, | |
772 | +END_OPCODES() | |
773 | +END_MNEMONIC() | |
774 | + | |
775 | +BEGIN_MNEMONIC(FCLEX, MF_NONE, N) | |
776 | +BEGIN_OPCODES() | |
777 | + {OpcodeInfo::all, {0x9B, 0xDB, 0xE2}, {}, N }, | |
778 | +END_OPCODES() | |
779 | +END_MNEMONIC() | |
780 | + | |
781 | +BEGIN_MNEMONIC(FNCLEX, MF_NONE, N) | |
782 | +BEGIN_OPCODES() | |
783 | + {OpcodeInfo::all, {0xDB, 0xE2}, {}, N }, | |
784 | +END_OPCODES() | |
785 | +END_MNEMONIC() | |
786 | + | |
787 | +//BEGIN_MNEMONIC(FDECSTP, MF_NONE, N) | |
788 | +// BEGIN_OPCODES() | |
789 | +// {OpcodeInfo::all, {0xD9, 0xF6}, {}, N }, | |
790 | +// END_OPCODES() | |
791 | +//END_MNEMONIC() | |
792 | + | |
793 | +BEGIN_MNEMONIC(FILD, MF_NONE, D_U ) | |
794 | +BEGIN_OPCODES() | |
795 | + {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U }, | |
796 | + {OpcodeInfo::all, {0xDF, _5}, {FP0D, m64}, D_U }, | |
797 | + {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U }, | |
798 | +END_OPCODES() | |
799 | +END_MNEMONIC() | |
800 | + | |
801 | +//BEGIN_MNEMONIC(FINCSTP, MF_NONE, N) | |
802 | +// BEGIN_OPCODES() | |
803 | +// {OpcodeInfo::all, {0xD9, 0xF7}, {}, N }, | |
804 | +// END_OPCODES() | |
805 | +//END_MNEMONIC() | |
806 | + | |
807 | +BEGIN_MNEMONIC(FIST, MF_NONE, D_U ) | |
808 | +BEGIN_OPCODES() | |
809 | + {OpcodeInfo::all, {0xDB, _2}, {m32, FP0S}, D_U }, | |
810 | +END_OPCODES() | |
811 | +END_MNEMONIC() | |
812 | + | |
813 | +BEGIN_MNEMONIC(FISTP, MF_NONE, D_U ) | |
814 | +BEGIN_OPCODES() | |
815 | + {OpcodeInfo::all, {0xDB, _3}, {m32, FP0S}, D_U }, | |
816 | + {OpcodeInfo::all, {0xDF, _7}, {m64, FP0D}, D_U }, | |
817 | +END_OPCODES() | |
818 | +END_MNEMONIC() | |
819 | + | |
820 | +BEGIN_MNEMONIC(FISTTP, MF_NONE, D_U ) | |
821 | +BEGIN_OPCODES() | |
822 | + {OpcodeInfo::all, {0xDD, _1}, {m64, FP0D}, D_U }, | |
823 | + {OpcodeInfo::all, {0xDB, _1}, {m32, FP0S}, D_U }, | |
824 | +END_OPCODES() | |
825 | +END_MNEMONIC() | |
826 | + | |
827 | +BEGIN_MNEMONIC(FRNDINT, MF_NONE, DU ) | |
828 | +BEGIN_OPCODES() | |
829 | + {OpcodeInfo::all, {0xD9, 0xFC}, {FP0S}, DU }, | |
830 | + {OpcodeInfo::all, {0xD9, 0xFC}, {FP0D}, DU }, | |
831 | +END_OPCODES() | |
832 | +END_MNEMONIC() | |
833 | + | |
834 | +BEGIN_MNEMONIC(FLD, MF_NONE, D_U ) | |
835 | +BEGIN_OPCODES() | |
836 | + {OpcodeInfo::all, {0xD9, _0}, {FP0S, m32}, D_U }, | |
837 | + {OpcodeInfo::all, {0xDD, _0}, {FP0D, m64}, D_U }, | |
838 | +END_OPCODES() | |
839 | +END_MNEMONIC() | |
840 | + | |
841 | +BEGIN_MNEMONIC(FLDLG2, MF_NONE, U ) | |
842 | +BEGIN_OPCODES() | |
843 | + {OpcodeInfo::all, {0xD9, 0xEC}, {FP0S}, D }, | |
844 | + {OpcodeInfo::all, {0xD9, 0xEC}, {FP0D}, D }, | |
845 | +END_OPCODES() | |
846 | +END_MNEMONIC() | |
847 | + | |
848 | +BEGIN_MNEMONIC(FLDLN2, MF_NONE, U ) | |
849 | +BEGIN_OPCODES() | |
850 | + {OpcodeInfo::all, {0xD9, 0xED}, {FP0S}, D }, | |
851 | + {OpcodeInfo::all, {0xD9, 0xED}, {FP0D}, D }, | |
852 | +END_OPCODES() | |
853 | +END_MNEMONIC() | |
854 | + | |
855 | +BEGIN_MNEMONIC(FLD1, MF_NONE, U ) | |
856 | +BEGIN_OPCODES() | |
857 | + {OpcodeInfo::all, {0xD9, 0xE8}, {FP0S}, D }, | |
858 | + {OpcodeInfo::all, {0xD9, 0xE8}, {FP0D}, D }, | |
859 | +END_OPCODES() | |
860 | +END_MNEMONIC() | |
861 | + | |
862 | + | |
863 | +BEGIN_MNEMONIC(FPREM, MF_NONE, N) | |
864 | + BEGIN_OPCODES() | |
865 | + {OpcodeInfo::all, {0xD9, 0xF8}, {}, N }, | |
866 | + END_OPCODES() | |
867 | +END_MNEMONIC() | |
868 | + | |
869 | +BEGIN_MNEMONIC(FPREM1, MF_NONE, N) | |
870 | +BEGIN_OPCODES() | |
871 | + {OpcodeInfo::all, {0xD9, 0xF5}, {}, N }, | |
872 | +END_OPCODES() | |
873 | +END_MNEMONIC() | |
874 | + | |
875 | +BEGIN_MNEMONIC(FST, MF_NONE, D_U ) | |
876 | +BEGIN_OPCODES() | |
877 | + {OpcodeInfo::all, {0xD9, _2}, {m32, FP0S}, D_U }, | |
878 | + {OpcodeInfo::all, {0xDD, _2}, {m64, FP0D}, D_U }, | |
879 | + // A little trick: actually, these 2 opcodes take only index of the | |
880 | + // needed register. To make the things similar to other instructions | |
881 | + // we encode here as if they took FPREG. | |
882 | + {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp32}, D }, | |
883 | + {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp64}, D }, | |
884 | +END_OPCODES() | |
885 | +END_MNEMONIC() | |
886 | + | |
887 | +BEGIN_MNEMONIC(FSTP, MF_NONE, D_U ) | |
888 | +BEGIN_OPCODES() | |
889 | + {OpcodeInfo::all, {0xD9, _3}, {m32, FP0S}, D_U }, | |
890 | + {OpcodeInfo::all, {0xDD, _3}, {m64, FP0D}, D_U }, | |
891 | + // A little trick: actually, these 2 opcodes take only index of the | |
892 | + // needed register. To make the things similar to other instructions | |
893 | + // we encode here as if they took FPREG. | |
894 | + {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp32}, D }, | |
895 | + {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp64}, D }, | |
896 | +END_OPCODES() | |
897 | +END_MNEMONIC() | |
898 | + | |
899 | +BEGIN_MNEMONIC(FSQRT, MF_NONE, DU) | |
900 | + BEGIN_OPCODES() | |
901 | + {OpcodeInfo::all, {0xD9, 0xFA}, {FP0S}, DU }, | |
902 | + {OpcodeInfo::all, {0xD9, 0xFA}, {FP0D}, DU }, | |
903 | + END_OPCODES() | |
904 | +END_MNEMONIC() | |
905 | + | |
906 | + | |
907 | +BEGIN_MNEMONIC(FYL2X, MF_NONE, DU) | |
908 | + BEGIN_OPCODES() | |
909 | + {OpcodeInfo::all, {0xD9, 0xF1}, {FP0S}, DU }, | |
910 | + {OpcodeInfo::all, {0xD9, 0xF1}, {FP0D}, DU }, | |
911 | + END_OPCODES() | |
912 | +END_MNEMONIC() | |
913 | + | |
914 | + | |
915 | +BEGIN_MNEMONIC(FYL2XP1, MF_NONE, DU) | |
916 | + BEGIN_OPCODES() | |
917 | + {OpcodeInfo::all, {0xD9, 0xF9}, {FP0S}, DU }, | |
918 | + {OpcodeInfo::all, {0xD9, 0xF9}, {FP0D}, DU }, | |
919 | + END_OPCODES() | |
920 | +END_MNEMONIC() | |
921 | + | |
922 | +BEGIN_MNEMONIC(F2XM1, MF_NONE, DU) | |
923 | + BEGIN_OPCODES() | |
924 | + {OpcodeInfo::all, {0xD9, 0xF0}, {FP0S}, DU }, | |
925 | + {OpcodeInfo::all, {0xD9, 0xF0}, {FP0D}, DU }, | |
926 | + END_OPCODES() | |
927 | +END_MNEMONIC() | |
928 | + | |
929 | +BEGIN_MNEMONIC(FPATAN, MF_NONE, DU) | |
930 | + BEGIN_OPCODES() | |
931 | + {OpcodeInfo::all, {0xD9, 0xF3}, {FP0S}, DU }, | |
932 | + {OpcodeInfo::all, {0xD9, 0xF3}, {FP0D}, DU }, | |
933 | + END_OPCODES() | |
934 | +END_MNEMONIC() | |
935 | + | |
936 | +BEGIN_MNEMONIC(FXCH, MF_NONE, DU) | |
937 | + BEGIN_OPCODES() | |
938 | + {OpcodeInfo::all, {0xD9, 0xC9}, {FP0S}, DU }, | |
939 | + {OpcodeInfo::all, {0xD9, 0xC9}, {FP0D}, DU }, | |
940 | + END_OPCODES() | |
941 | +END_MNEMONIC() | |
942 | + | |
943 | +BEGIN_MNEMONIC(FSCALE, MF_NONE, DU) | |
944 | + BEGIN_OPCODES() | |
945 | + {OpcodeInfo::all, {0xD9, 0xFD}, {FP0S}, DU }, | |
946 | + {OpcodeInfo::all, {0xD9, 0xFD}, {FP0D}, DU }, | |
947 | + END_OPCODES() | |
948 | +END_MNEMONIC() | |
949 | + | |
950 | +BEGIN_MNEMONIC(FABS, MF_NONE, DU) | |
951 | + BEGIN_OPCODES() | |
952 | + {OpcodeInfo::all, {0xD9, 0xE1}, {FP0S}, DU }, | |
953 | + {OpcodeInfo::all, {0xD9, 0xE1}, {FP0D}, DU }, | |
954 | + END_OPCODES() | |
955 | +END_MNEMONIC() | |
956 | + | |
957 | +BEGIN_MNEMONIC(FSIN, MF_NONE, DU) | |
958 | + BEGIN_OPCODES() | |
959 | + {OpcodeInfo::all, {0xD9, 0xFE}, {FP0S}, DU }, | |
960 | + {OpcodeInfo::all, {0xD9, 0xFE}, {FP0D}, DU }, | |
961 | + END_OPCODES() | |
962 | +END_MNEMONIC() | |
963 | + | |
964 | +BEGIN_MNEMONIC(FCOS, MF_NONE, DU) | |
965 | + BEGIN_OPCODES() | |
966 | + {OpcodeInfo::all, {0xD9, 0xFF}, {FP0S}, DU }, | |
967 | + {OpcodeInfo::all, {0xD9, 0xFF}, {FP0D}, DU }, | |
968 | + END_OPCODES() | |
969 | +END_MNEMONIC() | |
970 | + | |
971 | +BEGIN_MNEMONIC(FPTAN, MF_NONE, DU) | |
972 | + BEGIN_OPCODES() | |
973 | + {OpcodeInfo::all, {0xD9, 0xF2}, {FP0S}, DU }, | |
974 | + {OpcodeInfo::all, {0xD9, 0xF2}, {FP0D}, DU }, | |
975 | + END_OPCODES() | |
976 | +END_MNEMONIC() | |
977 | + | |
978 | +// | |
979 | +// ~ FPU | |
980 | +// | |
981 | + | |
982 | +BEGIN_MNEMONIC(DIV, MF_AFFECTS_FLAGS, DU_DU_U) | |
983 | +BEGIN_OPCODES() | |
984 | +#if !defined(_EM64T_) | |
985 | + {OpcodeInfo::all, {0xF6, _6}, {AH, AL, r_m8}, DU_DU_U }, | |
986 | + {OpcodeInfo::all, {Size16, 0xF7, _6}, {DX, AX, r_m16}, DU_DU_U }, | |
987 | +#endif | |
988 | + {OpcodeInfo::all, {0xF7, _6}, {EDX, EAX, r_m32}, DU_DU_U }, | |
989 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _6}, {RDX, RAX, r_m64}, DU_DU_U }, | |
990 | +END_OPCODES() | |
991 | +END_MNEMONIC() | |
992 | + | |
993 | +BEGIN_MNEMONIC(IDIV, MF_AFFECTS_FLAGS, DU_DU_U) | |
994 | +BEGIN_OPCODES() | |
995 | +#if !defined(_EM64T_) | |
996 | + {OpcodeInfo::all, {0xF6, _7}, {AH, AL, r_m8}, DU_DU_U }, | |
997 | + {OpcodeInfo::all, {Size16, 0xF7, _7}, {DX, AX, r_m16}, DU_DU_U }, | |
998 | +#endif | |
999 | + {OpcodeInfo::all, {0xF7, _7}, {EDX, EAX, r_m32}, DU_DU_U }, | |
1000 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _7}, {RDX, RAX, r_m64}, DU_DU_U }, | |
1001 | +END_OPCODES() | |
1002 | +END_MNEMONIC() | |
1003 | + | |
1004 | + | |
1005 | +BEGIN_MNEMONIC(IMUL, MF_AFFECTS_FLAGS, D_DU_U) | |
1006 | +BEGIN_OPCODES() | |
1007 | + /*{OpcodeInfo::all, {0xF6, _5}, {AH, AL, r_m8}, D_DU_U }, | |
1008 | + {OpcodeInfo::all, {Size16, 0xF7, _5}, {DX, AX, r_m16}, D_DU_U }, | |
1009 | + */ | |
1010 | + // | |
1011 | + {OpcodeInfo::all, {0xF7, _5}, {EDX, EAX, r_m32}, D_DU_U }, | |
1012 | + //todo: this opcode's hash conflicts with IMUL r64,r_m64 - they're both 0. | |
1013 | + // this particular is not currently used, so we may safely drop it, but need to | |
1014 | + // revisit the hash implementation | |
1015 | + // {OpcodeInfo::em64t, {REX_W, 0xF7, _5}, {RDX, RAX, r_m64}, D_DU_U }, | |
1016 | + // | |
1017 | + {OpcodeInfo::all, {Size16, 0x0F, 0xAF, _r}, {r16,r_m16}, DU_U }, | |
1018 | + {OpcodeInfo::all, {0x0F, 0xAF, _r}, {r32,r_m32}, DU_U }, | |
1019 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xAF, _r}, {r64,r_m64}, DU_U }, | |
1020 | + {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,r_m16,imm8s}, D_DU_U }, | |
1021 | + {OpcodeInfo::all, {0x6B, _r, ib}, {r32,r_m32,imm8s}, D_DU_U }, | |
1022 | + {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,r_m64,imm8s}, D_DU_U }, | |
1023 | + {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,imm8s}, DU_U }, | |
1024 | + {OpcodeInfo::all, {0x6B, _r, ib}, {r32,imm8s}, DU_U }, | |
1025 | + {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,imm8s}, DU_U }, | |
1026 | + {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,r_m16,imm16}, D_U_U }, | |
1027 | + {OpcodeInfo::all, {0x69, _r, id}, {r32,r_m32,imm32}, D_U_U }, | |
1028 | + {OpcodeInfo::em64t, {REX_W, 0x69, _r, id}, {r64,r_m64,imm32s}, D_U_U }, | |
1029 | + {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,imm16}, DU_U }, | |
1030 | + {OpcodeInfo::all, {0x69, _r, id}, {r32,imm32}, DU_U }, | |
1031 | +END_OPCODES() | |
1032 | +END_MNEMONIC() | |
1033 | + | |
1034 | +BEGIN_MNEMONIC(MUL, MF_AFFECTS_FLAGS, U ) | |
1035 | +BEGIN_OPCODES() | |
1036 | + {OpcodeInfo::all, {0xF6, _4}, {AX, AL, r_m8}, D_DU_U }, | |
1037 | + {OpcodeInfo::all, {Size16, 0xF7, _4}, {DX, AX, r_m16}, D_DU_U }, | |
1038 | + {OpcodeInfo::all, {0xF7, _4}, {EDX, EAX, r_m32}, D_DU_U }, | |
1039 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _4}, {RDX, RAX, r_m64}, D_DU_U }, | |
1040 | +END_OPCODES() | |
1041 | +END_MNEMONIC() | |
1042 | + | |
1043 | +BEGIN_MNEMONIC(INC, MF_AFFECTS_FLAGS, DU ) | |
1044 | +BEGIN_OPCODES() | |
1045 | + {OpcodeInfo::all, {0xFE, _0}, {r_m8}, DU }, | |
1046 | + {OpcodeInfo::all, {Size16, 0xFF, _0}, {r_m16}, DU }, | |
1047 | + {OpcodeInfo::all, {0xFF, _0}, {r_m32}, DU }, | |
1048 | + {OpcodeInfo::em64t, {REX_W, 0xFF, _0}, {r_m64}, DU }, | |
1049 | + {OpcodeInfo::ia32, {Size16, 0x40|rw}, {r16}, DU }, | |
1050 | + {OpcodeInfo::ia32, {0x40|rd}, {r32}, DU }, | |
1051 | +END_OPCODES() | |
1052 | +END_MNEMONIC() | |
1053 | + | |
1054 | +BEGIN_MNEMONIC(INT3, MF_NONE, N) | |
1055 | +BEGIN_OPCODES() | |
1056 | + {OpcodeInfo::all, {0xCC}, {}, N }, | |
1057 | +END_OPCODES() | |
1058 | +END_MNEMONIC() | |
1059 | + | |
1060 | +#define DEFINE_Jcc_MNEMONIC( cc ) \ | |
1061 | + BEGIN_MNEMONIC(J##cc, MF_USES_FLAGS|MF_CONDITIONAL, U ) \ | |
1062 | +BEGIN_OPCODES() \ | |
1063 | + {OpcodeInfo::all, {0x70 + ConditionMnemonic_##cc, cb }, { rel8 }, U }, \ | |
1064 | + {OpcodeInfo::ia32, {Size16, 0x0F, 0x80 + ConditionMnemonic_##cc, cw}, { rel16 }, U }, \ | |
1065 | + {OpcodeInfo::all, {0x0F, 0x80 + ConditionMnemonic_##cc, cd}, { rel32 }, U }, \ | |
1066 | +END_OPCODES() \ | |
1067 | +END_MNEMONIC() | |
1068 | + | |
1069 | + | |
1070 | +DEFINE_Jcc_MNEMONIC(O) | |
1071 | +DEFINE_Jcc_MNEMONIC(NO) | |
1072 | +DEFINE_Jcc_MNEMONIC(B) | |
1073 | +DEFINE_Jcc_MNEMONIC(NB) | |
1074 | +DEFINE_Jcc_MNEMONIC(Z) | |
1075 | +DEFINE_Jcc_MNEMONIC(NZ) | |
1076 | +DEFINE_Jcc_MNEMONIC(BE) | |
1077 | +DEFINE_Jcc_MNEMONIC(NBE) | |
1078 | + | |
1079 | +DEFINE_Jcc_MNEMONIC(S) | |
1080 | +DEFINE_Jcc_MNEMONIC(NS) | |
1081 | +DEFINE_Jcc_MNEMONIC(P) | |
1082 | +DEFINE_Jcc_MNEMONIC(NP) | |
1083 | +DEFINE_Jcc_MNEMONIC(L) | |
1084 | +DEFINE_Jcc_MNEMONIC(NL) | |
1085 | +DEFINE_Jcc_MNEMONIC(LE) | |
1086 | +DEFINE_Jcc_MNEMONIC(NLE) | |
1087 | + | |
1088 | +#undef DEFINE_Jcc_MNEMONIC | |
1089 | + | |
1090 | +BEGIN_MNEMONIC(JMP, MF_NONE, U ) | |
1091 | +BEGIN_OPCODES() | |
1092 | + {OpcodeInfo::all, {0xEB, cb}, {rel8}, U }, | |
1093 | + {OpcodeInfo::ia32, {Size16, 0xE9, cw}, {rel16}, U }, | |
1094 | + {OpcodeInfo::all, {0xE9, cd}, {rel32}, U }, | |
1095 | + {OpcodeInfo::ia32, {Size16, 0xFF, _4}, {r_m16}, U }, | |
1096 | + {OpcodeInfo::ia32, {0xFF, _4}, {r_m32}, U }, | |
1097 | + {OpcodeInfo::em64t, {0xFF, _4}, {r_m64}, U }, | |
1098 | +END_OPCODES() | |
1099 | +END_MNEMONIC() | |
1100 | + | |
1101 | +BEGIN_MNEMONIC(LEA, MF_NONE, D_U ) | |
1102 | +BEGIN_OPCODES() | |
1103 | + /* | |
1104 | + A special case: the LEA instruction itself does not care about size of | |
1105 | + second operand. This is obviuos why it is, and thus in The Manual, a | |
1106 | + simple 'm' without size is used. | |
1107 | + However, in the Jitrino's instrucitons we'll have an operand with a size. | |
1108 | + Also, the hashing scheme is not supposed to handle OpndSize_Null, and | |
1109 | + making it to do so will lead to unnecessary complication of hashing | |
1110 | + scheme. Thus, instead of handling it as a special case, we simply make | |
1111 | + copies of the opcodes with sizes set. | |
1112 | + {OpcodeInfo::all, {0x8D, _r}, {r32, m}, D_U }, | |
1113 | + {OpcodeInfo::em64t, {0x8D, _r}, {r64, m}, D_U }, | |
1114 | + */ | |
1115 | + //Android x86: keep r32, m32 only, otherwise, will have decoding error | |
1116 | + //{OpcodeInfo::all, {0x8D, _r}, {r32, m8}, D_U }, | |
1117 | + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m8}, D_U }, | |
1118 | + //{OpcodeInfo::all, {0x8D, _r}, {r32, m16}, D_U }, | |
1119 | + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m16}, D_U }, | |
1120 | + {OpcodeInfo::all, {0x8D, _r}, {r32, m32}, D_U }, | |
1121 | + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m32}, D_U }, | |
1122 | + {OpcodeInfo::all, {0x8D, _r}, {r32, m64}, D_U }, | |
1123 | + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m64}, D_U }, | |
1124 | +END_OPCODES() | |
1125 | +END_MNEMONIC() | |
1126 | + | |
1127 | +BEGIN_MNEMONIC(LOOP, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) | |
1128 | +BEGIN_OPCODES() | |
1129 | + {OpcodeInfo::all, {0xE2, cb}, {ECX, rel8}, DU_U }, | |
1130 | +END_OPCODES() | |
1131 | +END_MNEMONIC() | |
1132 | + | |
1133 | +BEGIN_MNEMONIC(LOOPE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) | |
1134 | +BEGIN_OPCODES() | |
1135 | + {OpcodeInfo::all, {0xE1, cb}, {ECX, rel8}, DU_U }, | |
1136 | +END_OPCODES() | |
1137 | +END_MNEMONIC() | |
1138 | + | |
1139 | +BEGIN_MNEMONIC(LOOPNE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) | |
1140 | +BEGIN_OPCODES() | |
1141 | + {OpcodeInfo::all, {0xE0, cb}, {ECX, rel8}, DU_U }, | |
1142 | +END_OPCODES() | |
1143 | +END_MNEMONIC() | |
1144 | + | |
1145 | +BEGIN_MNEMONIC(MOV, MF_NONE, D_U) | |
1146 | +BEGIN_OPCODES() | |
1147 | + {OpcodeInfo::all, {0x88, _r}, {r_m8,r8}, D_U }, | |
1148 | + | |
1149 | + {OpcodeInfo::all, {Size16, 0x89, _r}, {r_m16,r16}, D_U }, | |
1150 | + {OpcodeInfo::all, {0x89, _r}, {r_m32,r32}, D_U }, | |
1151 | + {OpcodeInfo::em64t, {REX_W, 0x89, _r}, {r_m64,r64}, D_U }, | |
1152 | + {OpcodeInfo::all, {0x8A, _r}, {r8,r_m8}, D_U }, | |
1153 | + | |
1154 | + {OpcodeInfo::all, {Size16, 0x8B, _r}, {r16,r_m16}, D_U }, | |
1155 | + {OpcodeInfo::all, {0x8B, _r}, {r32,r_m32}, D_U }, | |
1156 | + {OpcodeInfo::em64t, {REX_W, 0x8B, _r}, {r64,r_m64}, D_U }, | |
1157 | + | |
1158 | + {OpcodeInfo::all, {0xB0|rb}, {r8,imm8}, D_U }, | |
1159 | + | |
1160 | + {OpcodeInfo::all, {Size16, 0xB8|rw}, {r16,imm16}, D_U }, | |
1161 | + {OpcodeInfo::all, {0xB8|rd}, {r32,imm32}, D_U }, | |
1162 | + {OpcodeInfo::em64t, {REX_W, 0xB8|rd}, {r64,imm64}, D_U }, | |
1163 | + {OpcodeInfo::all, {0xC6, _0}, {r_m8,imm8}, D_U }, | |
1164 | + | |
1165 | + {OpcodeInfo::all, {Size16, 0xC7, _0}, {r_m16,imm16}, D_U }, | |
1166 | + {OpcodeInfo::all, {0xC7, _0}, {r_m32,imm32}, D_U }, | |
1167 | + {OpcodeInfo::em64t, {REX_W, 0xC7, _0}, {r_m64,imm32s}, D_U }, | |
1168 | + | |
1169 | + {OpcodeInfo::decoder, {0xA0}, {AL, moff8}, D_U }, | |
1170 | + {OpcodeInfo::decoder, {Size16, 0xA1}, {AX, moff16}, D_U }, | |
1171 | + {OpcodeInfo::decoder, {0xA1}, {EAX, moff32}, D_U }, | |
1172 | + //{OpcodeInfo::decoder64, {REX_W, 0xA1}, {RAX, moff64}, D_U }, | |
1173 | + | |
1174 | + {OpcodeInfo::decoder, {0xA2}, {moff8, AL}, D_U }, | |
1175 | + {OpcodeInfo::decoder, {Size16, 0xA3}, {moff16, AX}, D_U }, | |
1176 | + {OpcodeInfo::decoder, {0xA3}, {moff32, EAX}, D_U }, | |
1177 | + //{OpcodeInfo::decoder64, {REX_W, 0xA3}, {moff64, RAX}, D_U }, | |
1178 | +END_OPCODES() | |
1179 | +END_MNEMONIC() | |
1180 | + | |
1181 | + | |
1182 | + | |
1183 | +BEGIN_MNEMONIC(XCHG, MF_NONE, DU_DU ) | |
1184 | +BEGIN_OPCODES() | |
1185 | + {OpcodeInfo::all, {0x87, _r}, {r_m32,r32}, DU_DU }, | |
1186 | +END_OPCODES() | |
1187 | +END_MNEMONIC() | |
1188 | + | |
1189 | + | |
1190 | +BEGIN_MNEMONIC(MOVQ, MF_NONE, D_U ) | |
1191 | +BEGIN_OPCODES() | |
1192 | +#ifdef _HAVE_MMX_ | |
1193 | + {OpcodeInfo::all, {0x0F, 0x6F, _r}, {mm64, mm_m64}, D_U }, | |
1194 | + {OpcodeInfo::all, {0x0F, 0x7F, _r}, {mm_m64, mm64}, D_U }, | |
1195 | +#endif | |
1196 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x7E }, {xmm64, xmm_m64}, D_U }, | |
1197 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD6 }, {xmm_m64, xmm64}, D_U }, | |
1198 | +// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r_m64}, D_U }, | |
1199 | +// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r_m64, xmm64}, D_U }, | |
1200 | + {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r64}, D_U }, | |
1201 | + {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r64, xmm64}, D_U }, | |
1202 | +END_OPCODES() | |
1203 | +END_MNEMONIC() | |
1204 | + | |
1205 | + | |
1206 | +BEGIN_MNEMONIC(MOVD, MF_NONE, D_U ) | |
1207 | +BEGIN_OPCODES() | |
1208 | + {OpcodeInfo::all, {0x66, 0x0F, 0x6E, _r}, {xmm32, r_m32}, D_U }, | |
1209 | + {OpcodeInfo::all, {0x66, 0x0F, 0x7E, _r}, {r_m32, xmm32}, D_U }, | |
1210 | +END_OPCODES() | |
1211 | +END_MNEMONIC() | |
1212 | + | |
1213 | +// | |
1214 | +// A bunch of MMX instructions | |
1215 | +// | |
1216 | +#ifdef _HAVE_MMX_ | |
1217 | + | |
1218 | +BEGIN_MNEMONIC(EMMS, MF_NONE, N) | |
1219 | +BEGIN_OPCODES() | |
1220 | + {OpcodeInfo::all, {0x0F, 0x77}, {}, N }, | |
1221 | +END_OPCODES() | |
1222 | +END_MNEMONIC() | |
1223 | + | |
1224 | +#endif | |
1225 | + | |
1226 | +BEGIN_MNEMONIC(PADDQ, MF_NONE, DU_U) | |
1227 | +BEGIN_OPCODES() | |
1228 | +#ifdef _HAVE_MMX_ | |
1229 | + {OpcodeInfo::all, {0x0F, 0xD4, _r}, {mm64, mm_m64}, DU_U }, | |
1230 | +#endif | |
1231 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD4, _r}, {xmm64, xmm_m64}, DU_U }, | |
1232 | +END_OPCODES() | |
1233 | +END_MNEMONIC() | |
1234 | + | |
1235 | +BEGIN_MNEMONIC(PAND, MF_NONE, DU_U) | |
1236 | +BEGIN_OPCODES() | |
1237 | +#ifdef _HAVE_MMX_ | |
1238 | + {OpcodeInfo::all, {0x0F, 0xDB, _r}, {mm64, mm_m64}, DU_U }, | |
1239 | +#endif | |
1240 | + {OpcodeInfo::all, {0x66, 0x0F, 0xDB, _r}, {xmm64, xmm_m64}, DU_U }, | |
1241 | +END_OPCODES() | |
1242 | +END_MNEMONIC() | |
1243 | + | |
1244 | +BEGIN_MNEMONIC(POR, MF_NONE, DU_U) | |
1245 | +BEGIN_OPCODES() | |
1246 | +#ifdef _HAVE_MMX_ | |
1247 | + {OpcodeInfo::all, {0x0F, 0xEB, _r}, {mm64, mm_m64}, DU_U }, | |
1248 | +#endif | |
1249 | + {OpcodeInfo::all, {0x66, 0x0F, 0xEB, _r}, {xmm64, xmm_m64}, DU_U }, | |
1250 | +END_OPCODES() | |
1251 | +END_MNEMONIC() | |
1252 | + | |
1253 | +BEGIN_MNEMONIC(PSUBQ, MF_NONE, DU_U) | |
1254 | +BEGIN_OPCODES() | |
1255 | +#ifdef _HAVE_MMX_ | |
1256 | + {OpcodeInfo::all, {0x0F, 0xFB, _r}, {mm64, mm_m64}, DU_U }, | |
1257 | +#endif | |
1258 | + {OpcodeInfo::all, {0x66, 0x0F, 0xFB, _r}, {xmm64, xmm_m64}, DU_U }, | |
1259 | +END_OPCODES() | |
1260 | +END_MNEMONIC() | |
1261 | + | |
1262 | +BEGIN_MNEMONIC(PANDN, MF_NONE, DU_U) | |
1263 | +BEGIN_OPCODES() | |
1264 | +#ifdef _HAVE_MMX_ | |
1265 | + {OpcodeInfo::all, {0x0F, 0xDF, _r}, {mm64, mm_m64}, DU_U }, | |
1266 | +#endif | |
1267 | + {OpcodeInfo::all, {0x66, 0x0F, 0xDF, _r}, {xmm64, xmm_m64}, DU_U }, | |
1268 | +END_OPCODES() | |
1269 | +END_MNEMONIC() | |
1270 | +BEGIN_MNEMONIC(PSLLQ, MF_NONE, DU_U) | |
1271 | +BEGIN_OPCODES() | |
1272 | +#ifdef _HAVE_MMX_ | |
1273 | + {OpcodeInfo::all, {0x0F, 0xF3, _r}, {mm64, mm_m64}, DU_U }, | |
1274 | +#endif | |
1275 | + {OpcodeInfo::all, {0x66, 0x0F, 0xF3, _r}, {xmm64, xmm_m64}, DU_U }, | |
1276 | + {OpcodeInfo::all, {0x66, 0x0F, 0x73, _6, ib}, {xmm64, imm8}, DU_U }, | |
1277 | +END_OPCODES() | |
1278 | +END_MNEMONIC() | |
1279 | +BEGIN_MNEMONIC(PSRLQ, MF_NONE, DU_U) | |
1280 | +BEGIN_OPCODES() | |
1281 | +#ifdef _HAVE_MMX_ | |
1282 | + {OpcodeInfo::all, {0x0F, 0xD3, _r}, {mm64, mm_m64}, DU_U }, | |
1283 | +#endif | |
1284 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD3, _r}, {xmm64, xmm_m64}, DU_U }, | |
1285 | + {OpcodeInfo::all, {0x66, 0x0F, 0x73, _2, ib}, {xmm64, imm8}, DU_U }, | |
1286 | +END_OPCODES() | |
1287 | +END_MNEMONIC() | |
1288 | + | |
1289 | +BEGIN_MNEMONIC(PXOR, MF_NONE, DU_U) | |
1290 | +BEGIN_OPCODES() | |
1291 | +#ifdef _HAVE_MMX_ | |
1292 | + {OpcodeInfo::all, {0x0F, 0xEF, _r}, {mm64, mm_m64}, DU_U }, | |
1293 | +#endif | |
1294 | + {OpcodeInfo::all, {0x66, 0x0F, 0xEF, _r}, {xmm64, xmm_m64}, DU_U }, | |
1295 | +END_OPCODES() | |
1296 | +END_MNEMONIC() | |
1297 | + | |
1298 | + | |
1299 | +BEGIN_MNEMONIC(MOVAPD, MF_NONE, D_U ) | |
1300 | +BEGIN_OPCODES() | |
1301 | + {OpcodeInfo::all, {0x66, 0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U }, | |
1302 | + {OpcodeInfo::all, {0x66, 0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U }, | |
1303 | +END_OPCODES() | |
1304 | +END_MNEMONIC() | |
1305 | + | |
1306 | +BEGIN_MNEMONIC(MOVAPS, MF_NONE, D_U ) | |
1307 | +BEGIN_OPCODES() | |
1308 | + {OpcodeInfo::all, {0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U }, | |
1309 | + {OpcodeInfo::all, {0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U }, | |
1310 | +END_OPCODES() | |
1311 | +END_MNEMONIC() | |
1312 | + | |
1313 | +BEGIN_MNEMONIC(SHUFPS, MF_NONE, D_U_U ) | |
1314 | +BEGIN_OPCODES() | |
1315 | + {OpcodeInfo::all, {0x0F, 0xC6, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, | |
1316 | +END_OPCODES() | |
1317 | +END_MNEMONIC() | |
1318 | + | |
1319 | + | |
1320 | +BEGIN_MNEMONIC(MOVSD, MF_NONE, D_U ) | |
1321 | +BEGIN_OPCODES() | |
1322 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x10, _r}, {xmm64, xmm_m64}, D_U }, | |
1323 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x11, _r}, {xmm_m64, xmm64}, D_U }, | |
1324 | +END_OPCODES() | |
1325 | +END_MNEMONIC() | |
1326 | + | |
1327 | +BEGIN_MNEMONIC(MOVSS, MF_NONE, D_U ) | |
1328 | +BEGIN_OPCODES() | |
1329 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x10, _r}, {xmm32, xmm_m32}, D_U }, | |
1330 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x11, _r}, {xmm_m32, xmm32}, D_U }, | |
1331 | +END_OPCODES() | |
1332 | +END_MNEMONIC() | |
1333 | + | |
1334 | +BEGIN_MNEMONIC(MOVSX, MF_NONE, D_U ) | |
1335 | +BEGIN_OPCODES() | |
1336 | + {OpcodeInfo::all, {Size16, 0x0F, 0xBE, _r}, {r16, r_m8s}, D_U }, | |
1337 | + {OpcodeInfo::all, {0x0F, 0xBE, _r}, {r32, r_m8s}, D_U }, | |
1338 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBE, _r}, {r64, r_m8s}, D_U }, | |
1339 | + | |
1340 | + {OpcodeInfo::all, {0x0F, 0xBF, _r}, {r32, r_m16s}, D_U }, | |
1341 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBF, _r}, {r64, r_m16s}, D_U }, | |
1342 | + | |
1343 | + {OpcodeInfo::em64t, {REX_W, 0x63, _r}, {r64, r_m32s}, D_U }, | |
1344 | +END_OPCODES() | |
1345 | +END_MNEMONIC() | |
1346 | + | |
1347 | +BEGIN_MNEMONIC(MOVZX, MF_NONE, D_U ) | |
1348 | +BEGIN_OPCODES() | |
1349 | + {OpcodeInfo::all, {Size16, 0x0F, 0xB6, _r}, {r16, r_m8u}, D_U }, | |
1350 | + {OpcodeInfo::all, {0x0F, 0xB6, _r}, {r32, r_m8u}, D_U }, | |
1351 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB6, _r}, {r64, r_m8u}, D_U }, | |
1352 | + | |
1353 | + {OpcodeInfo::all, {0x0F, 0xB7, _r}, {r32, r_m16u}, D_U }, | |
1354 | + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB7, _r}, {r64, r_m16u}, D_U }, | |
1355 | + //workaround to get r/rm32->r64 ZX mov functionality: | |
1356 | + //simple 32bit reg copying zeros high bits in 64bit reg | |
1357 | + {OpcodeInfo::em64t, {0x8B, _r}, {r64, r_m32u}, D_U }, | |
1358 | +END_OPCODES() | |
1359 | +END_MNEMONIC() | |
1360 | + | |
1361 | +BEGIN_MNEMONIC(MULSD, MF_NONE, DU_U) | |
1362 | +BEGIN_OPCODES() | |
1363 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x59, _r}, {xmm64, xmm_m64}, DU_U }, | |
1364 | +END_OPCODES() | |
1365 | +END_MNEMONIC() | |
1366 | + | |
1367 | +BEGIN_MNEMONIC(MULSS, MF_NONE, DU_U) | |
1368 | +BEGIN_OPCODES() | |
1369 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x59, _r}, {xmm32, xmm_m32}, DU_U }, | |
1370 | +END_OPCODES() | |
1371 | +END_MNEMONIC() | |
1372 | + | |
1373 | +BEGIN_MNEMONIC(NEG, MF_AFFECTS_FLAGS, DU ) | |
1374 | +BEGIN_OPCODES() | |
1375 | + {OpcodeInfo::all, {0xF6, _3}, {r_m8}, DU }, | |
1376 | + | |
1377 | + {OpcodeInfo::all, {Size16, 0xF7, _3}, {r_m16}, DU }, | |
1378 | + {OpcodeInfo::all, {0xF7, _3}, {r_m32}, DU }, | |
1379 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _3}, {r_m64}, DU }, | |
1380 | +END_OPCODES() | |
1381 | +END_MNEMONIC() | |
1382 | + | |
1383 | +BEGIN_MNEMONIC(NOP, MF_NONE, N) | |
1384 | +BEGIN_OPCODES() | |
1385 | + {OpcodeInfo::all, {0x90}, {}, N }, | |
1386 | +END_OPCODES() | |
1387 | +END_MNEMONIC() | |
1388 | + | |
1389 | +BEGIN_MNEMONIC(NOT, MF_AFFECTS_FLAGS, DU ) | |
1390 | +BEGIN_OPCODES() | |
1391 | + {OpcodeInfo::all, {0xF6, _2}, {r_m8}, DU }, | |
1392 | + {OpcodeInfo::all, {Size16, 0xF7, _2}, {r_m16}, DU }, | |
1393 | + {OpcodeInfo::all, {0xF7, _2}, {r_m32}, DU }, | |
1394 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _2}, {r_m64}, DU }, | |
1395 | +END_OPCODES() | |
1396 | +END_MNEMONIC() | |
1397 | + | |
1398 | +BEGIN_MNEMONIC(POP, MF_NONE, D) | |
1399 | +BEGIN_OPCODES() | |
1400 | + {OpcodeInfo::all, {Size16, 0x8F, _0}, {r_m16}, D }, | |
1401 | + {OpcodeInfo::ia32, {0x8F, _0}, {r_m32}, D }, | |
1402 | + {OpcodeInfo::em64t, {0x8F, _0}, {r_m64}, D }, | |
1403 | + | |
1404 | + {OpcodeInfo::all, {Size16, 0x58|rw }, {r16}, D }, | |
1405 | + {OpcodeInfo::ia32, {0x58|rd }, {r32}, D }, | |
1406 | + {OpcodeInfo::em64t, {0x58|rd }, {r64}, D }, | |
1407 | +END_OPCODES() | |
1408 | +END_MNEMONIC() | |
1409 | + | |
1410 | +BEGIN_MNEMONIC(POPFD, MF_AFFECTS_FLAGS, N) | |
1411 | +BEGIN_OPCODES() | |
1412 | + {OpcodeInfo::all, {0x9D}, {}, N }, | |
1413 | +END_OPCODES() | |
1414 | +END_MNEMONIC() | |
1415 | + | |
1416 | +BEGIN_MNEMONIC(PREFETCH, MF_NONE, U) | |
1417 | +BEGIN_OPCODES() | |
1418 | + {OpcodeInfo::all, {0x0F, 0x18, _0}, {m8}, U }, | |
1419 | +END_OPCODES() | |
1420 | +END_MNEMONIC() | |
1421 | + | |
1422 | +BEGIN_MNEMONIC(PUSH, MF_NONE, U ) | |
1423 | +BEGIN_OPCODES() | |
1424 | + {OpcodeInfo::all, {Size16, 0xFF, _6}, {r_m16}, U }, | |
1425 | + {OpcodeInfo::ia32, {0xFF, _6}, {r_m32}, U }, | |
1426 | + {OpcodeInfo::em64t, {0xFF, _6}, {r_m64}, U }, | |
1427 | + | |
1428 | + {OpcodeInfo::all, {Size16, 0x50|rw }, {r16}, U }, | |
1429 | + {OpcodeInfo::ia32, {0x50|rd }, {r32}, U }, | |
1430 | + {OpcodeInfo::em64t, {0x50|rd }, {r64}, U }, | |
1431 | + | |
1432 | + {OpcodeInfo::all, {0x6A}, {imm8}, U }, | |
1433 | + {OpcodeInfo::all, {Size16, 0x68}, {imm16}, U }, | |
1434 | + {OpcodeInfo::ia32, {0x68}, {imm32}, U }, | |
1435 | +// {OpcodeInfo::em64t, {0x68}, {imm64}, U }, | |
1436 | +END_OPCODES() | |
1437 | +END_MNEMONIC() | |
1438 | + | |
1439 | +BEGIN_MNEMONIC(PUSHFD, MF_USES_FLAGS, N) | |
1440 | +BEGIN_OPCODES() | |
1441 | + {OpcodeInfo::all, {0x9C}, {}, N }, | |
1442 | +END_OPCODES() | |
1443 | +END_MNEMONIC() | |
1444 | + | |
1445 | + | |
1446 | +BEGIN_MNEMONIC(RET, MF_NONE, N) | |
1447 | +BEGIN_OPCODES() | |
1448 | + {OpcodeInfo::all, {0xC3}, {}, N }, | |
1449 | + {OpcodeInfo::all, {0xC2, iw}, {imm16}, U }, | |
1450 | +END_OPCODES() | |
1451 | +END_MNEMONIC() | |
1452 | + | |
1453 | +#define DEFINE_SETcc_MNEMONIC( cc ) \ | |
1454 | + BEGIN_MNEMONIC(SET##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU) \ | |
1455 | +BEGIN_OPCODES() \ | |
1456 | + {OpcodeInfo::all, {0x0F, 0x90 + ConditionMnemonic_##cc}, {r_m8}, DU }, \ | |
1457 | +END_OPCODES() \ | |
1458 | +END_MNEMONIC() | |
1459 | + | |
1460 | +DEFINE_SETcc_MNEMONIC(O) | |
1461 | +DEFINE_SETcc_MNEMONIC(NO) | |
1462 | +DEFINE_SETcc_MNEMONIC(B) | |
1463 | +DEFINE_SETcc_MNEMONIC(NB) | |
1464 | +DEFINE_SETcc_MNEMONIC(Z) | |
1465 | +DEFINE_SETcc_MNEMONIC(NZ) | |
1466 | +DEFINE_SETcc_MNEMONIC(BE) | |
1467 | +DEFINE_SETcc_MNEMONIC(NBE) | |
1468 | + | |
1469 | +DEFINE_SETcc_MNEMONIC(S) | |
1470 | +DEFINE_SETcc_MNEMONIC(NS) | |
1471 | +DEFINE_SETcc_MNEMONIC(P) | |
1472 | +DEFINE_SETcc_MNEMONIC(NP) | |
1473 | +DEFINE_SETcc_MNEMONIC(L) | |
1474 | +DEFINE_SETcc_MNEMONIC(NL) | |
1475 | +DEFINE_SETcc_MNEMONIC(LE) | |
1476 | +DEFINE_SETcc_MNEMONIC(NLE) | |
1477 | + | |
1478 | +#undef DEFINE_SETcc_MNEMONIC | |
1479 | + | |
1480 | +#define DEFINE_SHIFT_MNEMONIC(nam, slash_num, flags) \ | |
1481 | +BEGIN_MNEMONIC(nam, flags, DU_U) \ | |
1482 | +BEGIN_OPCODES()\ | |
1483 | + /* D0 & D1 opcodes are added w/o 2nd operand (1) because */\ | |
1484 | + /* they are used for decoding only so only instruction length is needed */\ | |
1485 | + {OpcodeInfo::decoder, {0xD0, slash_num}, {r_m8/*,const_1*/}, DU },\ | |
1486 | + {OpcodeInfo::all, {0xD2, slash_num}, {r_m8, CL}, DU_U },\ | |
1487 | + {OpcodeInfo::all, {0xC0, slash_num, ib}, {r_m8, imm8}, DU_U },\ | |
1488 | +\ | |
1489 | + {OpcodeInfo::decoder, {Size16, 0xD1, slash_num}, {r_m16/*,const_1*/}, DU },\ | |
1490 | + {OpcodeInfo::all, {Size16, 0xD3, slash_num}, {r_m16, CL}, DU_U },\ | |
1491 | + {OpcodeInfo::all, {Size16, 0xC1, slash_num, ib}, {r_m16, imm8 }, DU_U },\ | |
1492 | +\ | |
1493 | + {OpcodeInfo::decoder, {0xD1, slash_num}, {r_m32/*,const_1*/}, DU },\ | |
1494 | + {OpcodeInfo::decoder64, {REX_W, 0xD1, slash_num}, {r_m64/*,const_1*/}, DU },\ | |
1495 | +\ | |
1496 | + {OpcodeInfo::all, {0xD3, slash_num}, {r_m32, CL}, DU_U },\ | |
1497 | + {OpcodeInfo::em64t, {REX_W, 0xD3, slash_num}, {r_m64, CL}, DU_U },\ | |
1498 | +\ | |
1499 | + {OpcodeInfo::all, {0xC1, slash_num, ib}, {r_m32, imm8}, DU_U },\ | |
1500 | + {OpcodeInfo::em64t, {REX_W, 0xC1, slash_num, ib}, {r_m64, imm8}, DU_U },\ | |
1501 | +END_OPCODES()\ | |
1502 | +END_MNEMONIC() | |
1503 | + | |
1504 | + | |
1505 | +DEFINE_SHIFT_MNEMONIC(ROL, _0, MF_AFFECTS_FLAGS) | |
1506 | +DEFINE_SHIFT_MNEMONIC(ROR, _1, MF_AFFECTS_FLAGS) | |
1507 | +DEFINE_SHIFT_MNEMONIC(RCL, _2, MF_AFFECTS_FLAGS|MF_USES_FLAGS) | |
1508 | +DEFINE_SHIFT_MNEMONIC(RCR, _3, MF_AFFECTS_FLAGS|MF_USES_FLAGS) | |
1509 | + | |
1510 | +DEFINE_SHIFT_MNEMONIC(SAL, _4, MF_AFFECTS_FLAGS) | |
1511 | +DEFINE_SHIFT_MNEMONIC(SHR, _5, MF_AFFECTS_FLAGS) | |
1512 | +DEFINE_SHIFT_MNEMONIC(SAR, _7, MF_AFFECTS_FLAGS) | |
1513 | + | |
1514 | +#undef DEFINE_SHIFT_MNEMONIC | |
1515 | + | |
1516 | +BEGIN_MNEMONIC(SHLD, MF_AFFECTS_FLAGS, N) | |
1517 | +BEGIN_OPCODES() | |
1518 | + {OpcodeInfo::all, {0x0F, 0xA5}, {r_m32, r32, CL}, DU_DU_U }, | |
1519 | + {OpcodeInfo::all, {0x0F, 0xA4}, {r_m32, r32, imm8}, DU_DU_U }, | |
1520 | +END_OPCODES() | |
1521 | +END_MNEMONIC() | |
1522 | + | |
1523 | +BEGIN_MNEMONIC(SHRD, MF_AFFECTS_FLAGS, N) | |
1524 | +// TODO: the def/use info is wrong | |
1525 | +BEGIN_OPCODES() | |
1526 | + {OpcodeInfo::all, {0x0F, 0xAD}, {r_m32, r32, CL}, DU_DU_U }, | |
1527 | +END_OPCODES() | |
1528 | +END_MNEMONIC() | |
1529 | + | |
1530 | + | |
1531 | +BEGIN_MNEMONIC(SUBSD, MF_NONE, DU_U) | |
1532 | +BEGIN_OPCODES() | |
1533 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x5C, _r}, {xmm64, xmm_m64}, DU_U }, | |
1534 | +END_OPCODES() | |
1535 | +END_MNEMONIC() | |
1536 | + | |
1537 | +BEGIN_MNEMONIC(SUBSS, MF_NONE, DU_U) | |
1538 | +BEGIN_OPCODES() | |
1539 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x5C, _r}, {xmm32, xmm_m32}, DU_U }, | |
1540 | +END_OPCODES() | |
1541 | +END_MNEMONIC() | |
1542 | + | |
1543 | +BEGIN_MNEMONIC(TEST, MF_AFFECTS_FLAGS, U_U) | |
1544 | +BEGIN_OPCODES() | |
1545 | + | |
1546 | + {OpcodeInfo::decoder, {0xA8, ib}, { AL, imm8}, U_U }, | |
1547 | + {OpcodeInfo::decoder, {0xA9, iw}, { AX, imm16}, U_U }, | |
1548 | + {OpcodeInfo::decoder, {0xA9, id}, { EAX, imm32}, U_U }, | |
1549 | + {OpcodeInfo::decoder64, {REX_W, 0xA9, id}, { RAX, imm32s}, U_U }, | |
1550 | + | |
1551 | + {OpcodeInfo::all, {0xF6, _0, ib}, {r_m8,imm8}, U_U }, | |
1552 | + | |
1553 | + {OpcodeInfo::all, {Size16, 0xF7, _0, iw}, {r_m16,imm16}, U_U }, | |
1554 | + {OpcodeInfo::all, {0xF7, _0, id}, {r_m32,imm32}, U_U }, | |
1555 | + {OpcodeInfo::em64t, {REX_W, 0xF7, _0, id}, {r_m64,imm32s}, U_U }, | |
1556 | + | |
1557 | + {OpcodeInfo::all, {0x84, _r}, {r_m8,r8}, U_U }, | |
1558 | + | |
1559 | + {OpcodeInfo::all, {Size16, 0x85, _r}, {r_m16,r16}, U_U }, | |
1560 | + {OpcodeInfo::all, {0x85, _r}, {r_m32,r32}, U_U }, | |
1561 | + {OpcodeInfo::em64t, {REX_W, 0x85, _r}, {r_m64,r64}, U_U }, | |
1562 | +END_OPCODES() | |
1563 | +END_MNEMONIC() | |
1564 | + | |
1565 | + | |
1566 | +BEGIN_MNEMONIC(UCOMISD, MF_AFFECTS_FLAGS, U_U) | |
1567 | +BEGIN_OPCODES() | |
1568 | + {OpcodeInfo::all, {0x66, 0x0F, 0x2E, _r}, {xmm64, xmm_m64}, U_U }, | |
1569 | +END_OPCODES() | |
1570 | +END_MNEMONIC() | |
1571 | + | |
1572 | +BEGIN_MNEMONIC(UCOMISS, MF_AFFECTS_FLAGS, U_U) | |
1573 | +BEGIN_OPCODES() | |
1574 | + {OpcodeInfo::all, {0x0F, 0x2E, _r}, {xmm32, xmm_m32}, U_U }, | |
1575 | +END_OPCODES() | |
1576 | +END_MNEMONIC() | |
1577 | + | |
1578 | +BEGIN_MNEMONIC(COMISD, MF_AFFECTS_FLAGS, U_U) | |
1579 | +BEGIN_OPCODES() | |
1580 | + {OpcodeInfo::all, {0x66, 0x0F, 0x2F, _r}, {xmm64, xmm_m64}, U_U }, | |
1581 | +END_OPCODES() | |
1582 | +END_MNEMONIC() | |
1583 | + | |
1584 | +BEGIN_MNEMONIC(COMISS, MF_AFFECTS_FLAGS, U_U) | |
1585 | +BEGIN_OPCODES() | |
1586 | + {OpcodeInfo::all, {0x0F, 0x2F, _r}, {xmm32, xmm_m32}, U_U }, | |
1587 | +END_OPCODES() | |
1588 | +END_MNEMONIC() | |
1589 | + | |
1590 | +BEGIN_MNEMONIC(XORPD, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U) | |
1591 | +BEGIN_OPCODES() | |
1592 | + //Note: they're actually 128 bits | |
1593 | + {OpcodeInfo::all, {0x66, 0x0F, 0x57, _r}, {xmm64, xmm_m64}, DU_U }, | |
1594 | +END_OPCODES() | |
1595 | +END_MNEMONIC() | |
1596 | + | |
1597 | +BEGIN_MNEMONIC(XORPS, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U) | |
1598 | +BEGIN_OPCODES() | |
1599 | + //Note: they're actually 128 bits | |
1600 | + {OpcodeInfo::all, {0x0F, 0x57, _r}, {xmm32, xmm_m32}, DU_U }, | |
1601 | +END_OPCODES() | |
1602 | +END_MNEMONIC() | |
1603 | + | |
1604 | +BEGIN_MNEMONIC(CVTDQ2PD, MF_NONE, D_U ) | |
1605 | +BEGIN_OPCODES() | |
1606 | + //Note: they're actually 128 bits | |
1607 | + {OpcodeInfo::all, {0xF3, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U }, | |
1608 | +END_OPCODES() | |
1609 | +END_MNEMONIC() | |
1610 | + | |
1611 | +BEGIN_MNEMONIC(CVTDQ2PS, MF_NONE, D_U ) | |
1612 | +BEGIN_OPCODES() | |
1613 | + //Note: they're actually 128 bits | |
1614 | + {OpcodeInfo::all, {0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U }, | |
1615 | +END_OPCODES() | |
1616 | +END_MNEMONIC() | |
1617 | + | |
1618 | +BEGIN_MNEMONIC(CVTTPD2DQ, MF_NONE, D_U ) | |
1619 | +BEGIN_OPCODES() | |
1620 | + //Note: they're actually 128 bits | |
1621 | + {OpcodeInfo::all, {0x66, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U }, | |
1622 | +END_OPCODES() | |
1623 | +END_MNEMONIC() | |
1624 | + | |
1625 | +BEGIN_MNEMONIC(CVTTPS2DQ, MF_NONE, D_U ) | |
1626 | +BEGIN_OPCODES() | |
1627 | + //Note: they're actually 128 bits | |
1628 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U }, | |
1629 | +END_OPCODES() | |
1630 | +END_MNEMONIC() | |
1631 | + | |
1632 | +// | |
1633 | +// String operations | |
1634 | +// | |
1635 | +BEGIN_MNEMONIC(STD, MF_AFFECTS_FLAGS, N) | |
1636 | +BEGIN_OPCODES() | |
1637 | + {OpcodeInfo::all, {0xFD}, {}, N }, | |
1638 | +END_OPCODES() | |
1639 | +END_MNEMONIC() | |
1640 | + | |
1641 | +BEGIN_MNEMONIC(CLD, MF_AFFECTS_FLAGS, N) | |
1642 | +BEGIN_OPCODES() | |
1643 | + {OpcodeInfo::all, {0xFC}, {}, N }, | |
1644 | +END_OPCODES() | |
1645 | +END_MNEMONIC() | |
1646 | + | |
1647 | +BEGIN_MNEMONIC(SCAS, MF_AFFECTS_FLAGS, N) | |
1648 | +// to be symmetric, this mnemonic must have either m32 or RegName_EAX | |
1649 | +// but as long, as Jitrino's CG does not use the mnemonic, leaving it | |
1650 | +// in its natural form | |
1651 | +BEGIN_OPCODES() | |
1652 | + {OpcodeInfo::all, {0xAF}, {}, N }, | |
1653 | +END_OPCODES() | |
1654 | +END_MNEMONIC() | |
1655 | + | |
1656 | +BEGIN_MNEMONIC(STOS, MF_AFFECTS_FLAGS, DU_DU_U) | |
1657 | +BEGIN_OPCODES() | |
1658 | + {OpcodeInfo::all, {0xAB}, {EDI, ECX, EAX}, DU_DU_U }, | |
1659 | + {OpcodeInfo::all, {0xAA}, {EDI, ECX, AL}, DU_DU_U }, | |
1660 | + {OpcodeInfo::em64t, {REX_W, 0xAB}, {RDI, RCX, RAX}, DU_DU_U }, | |
1661 | +END_OPCODES() | |
1662 | +END_MNEMONIC() | |
1663 | + | |
1664 | +/* | |
1665 | +MOVS and CMPS are the special cases. | |
1666 | +Most the code in both CG and Encoder do not expect 2 memory operands. | |
1667 | +Also, they are not supposed to setup constrains on which register the | |
1668 | +memory reference must reside - m8,m8 or m32,m32 is not the choice. | |
1669 | +We can't use r8,r8 either - will have problem with 8bit EDI, ESI. | |
1670 | +So, as the workaround we do r32,r32 and specify size of the operand through | |
1671 | +the specific mnemonic - the same is in the codegen. | |
1672 | +*/ | |
1673 | +BEGIN_MNEMONIC(MOVS8, MF_NONE, DU_DU_DU) | |
1674 | +BEGIN_OPCODES() | |
1675 | + {OpcodeInfo::ia32, {0xA4}, {r32,r32,ECX}, DU_DU_DU }, | |
1676 | + {OpcodeInfo::em64t, {0xA4}, {r64,r64,RCX}, DU_DU_DU }, | |
1677 | +END_OPCODES() | |
1678 | +END_MNEMONIC() | |
1679 | + | |
1680 | +BEGIN_MNEMONIC(MOVS16, MF_NONE, DU_DU_DU) | |
1681 | +BEGIN_OPCODES() | |
1682 | + {OpcodeInfo::ia32, {Size16, 0xA5}, {r32,r32,ECX}, DU_DU_DU }, | |
1683 | + {OpcodeInfo::em64t, {Size16, 0xA5}, {r64,r64,RCX}, DU_DU_DU }, | |
1684 | +END_OPCODES() | |
1685 | +END_MNEMONIC() | |
1686 | + | |
1687 | +BEGIN_MNEMONIC(MOVS32, MF_NONE, DU_DU_DU) | |
1688 | +BEGIN_OPCODES() | |
1689 | + {OpcodeInfo::ia32, {0xA5}, {r32,r32,ECX}, DU_DU_DU }, | |
1690 | + {OpcodeInfo::em64t, {0xA5}, {r64,r64,RCX}, DU_DU_DU }, | |
1691 | +END_OPCODES() | |
1692 | +END_MNEMONIC() | |
1693 | + | |
1694 | +BEGIN_MNEMONIC(MOVS64, MF_NONE, DU_DU_DU) | |
1695 | +BEGIN_OPCODES() | |
1696 | + {OpcodeInfo::em64t, {REX_W,0xA5}, {r64,r64,RCX}, DU_DU_DU }, | |
1697 | +END_OPCODES() | |
1698 | +END_MNEMONIC() | |
1699 | + | |
1700 | +BEGIN_MNEMONIC(CMPSB, MF_AFFECTS_FLAGS, DU_DU_DU) | |
1701 | +BEGIN_OPCODES() | |
1702 | + {OpcodeInfo::ia32, {0xA6}, {ESI,EDI,ECX}, DU_DU_DU }, | |
1703 | + {OpcodeInfo::em64t, {0xA6}, {RSI,RDI,RCX}, DU_DU_DU }, | |
1704 | +END_OPCODES() | |
1705 | +END_MNEMONIC() | |
1706 | + | |
1707 | +BEGIN_MNEMONIC(CMPSW, MF_AFFECTS_FLAGS, DU_DU_DU) | |
1708 | +BEGIN_OPCODES() | |
1709 | + {OpcodeInfo::ia32, {Size16, 0xA7}, {ESI,EDI,ECX}, DU_DU_DU }, | |
1710 | + {OpcodeInfo::em64t, {Size16, 0xA7}, {RSI,RDI,RCX}, DU_DU_DU }, | |
1711 | +END_OPCODES() | |
1712 | +END_MNEMONIC() | |
1713 | + | |
1714 | +BEGIN_MNEMONIC(CMPSD, MF_AFFECTS_FLAGS, DU_DU_DU) | |
1715 | +BEGIN_OPCODES() | |
1716 | + {OpcodeInfo::ia32, {0xA7}, {ESI,EDI,ECX}, DU_DU_DU }, | |
1717 | + {OpcodeInfo::em64t, {0xA7}, {RSI,RDI,RCX}, DU_DU_DU }, | |
1718 | +END_OPCODES() | |
1719 | +END_MNEMONIC() | |
1720 | + | |
1721 | + | |
1722 | +BEGIN_MNEMONIC(WAIT, MF_AFFECTS_FLAGS, N) | |
1723 | +BEGIN_OPCODES() | |
1724 | + {OpcodeInfo::all, {0x9B}, {}, N }, | |
1725 | +END_OPCODES() | |
1726 | +END_MNEMONIC() | |
1727 | + | |
1728 | +// | |
1729 | +// ~String operations | |
1730 | +// | |
1731 | + | |
1732 | +// | |
1733 | +//Note: the instructions below added for the sake of disassembling routine. | |
1734 | +// They need to have flags, params and params usage to be defined more precisely. | |
1735 | +// | |
1736 | +BEGIN_MNEMONIC(LEAVE, MF_NONE, N) | |
1737 | +BEGIN_OPCODES() | |
1738 | + {OpcodeInfo::decoder, {0xC9}, {}, N }, | |
1739 | +END_OPCODES() | |
1740 | +END_MNEMONIC() | |
1741 | + | |
1742 | +BEGIN_MNEMONIC(ENTER, MF_NONE, N) | |
1743 | +BEGIN_OPCODES() | |
1744 | + {OpcodeInfo::decoder, {0xC8, iw, ib}, {imm16, imm8}, N }, | |
1745 | +END_OPCODES() | |
1746 | +END_MNEMONIC() | |
1747 | + | |
1748 | +BEGIN_MNEMONIC(PADDB, MF_NONE, DU_U) | |
1749 | +BEGIN_OPCODES() | |
1750 | + {OpcodeInfo::all, {0x66, 0x0F, 0xFC, _r}, {xmm64, xmm_m64}, DU_U }, | |
1751 | +END_OPCODES() | |
1752 | +END_MNEMONIC() | |
1753 | + | |
1754 | +BEGIN_MNEMONIC(PADDW, MF_NONE, DU_U) | |
1755 | +BEGIN_OPCODES() | |
1756 | + {OpcodeInfo::all, {0x66, 0x0F, 0xFD, _r}, {xmm64, xmm_m64}, DU_U }, | |
1757 | +END_OPCODES() | |
1758 | +END_MNEMONIC() | |
1759 | + | |
1760 | +BEGIN_MNEMONIC(PADDD, MF_NONE, DU_U) | |
1761 | +BEGIN_OPCODES() | |
1762 | + {OpcodeInfo::all, {0x66, 0x0F, 0xFE, _r}, {xmm64, xmm_m64}, DU_U }, | |
1763 | +END_OPCODES() | |
1764 | +END_MNEMONIC() | |
1765 | + | |
1766 | +BEGIN_MNEMONIC(PSUBB, MF_NONE, DU_U) | |
1767 | +BEGIN_OPCODES() | |
1768 | + {OpcodeInfo::all, {0x66, 0x0F, 0xF8, _r}, {xmm64, xmm_m64}, DU_U }, | |
1769 | +END_OPCODES() | |
1770 | +END_MNEMONIC() | |
1771 | + | |
1772 | +BEGIN_MNEMONIC(PSUBW, MF_NONE, DU_U) | |
1773 | +BEGIN_OPCODES() | |
1774 | + {OpcodeInfo::all, {0x66, 0x0F, 0xF9, _r}, {xmm64, xmm_m64}, DU_U }, | |
1775 | +END_OPCODES() | |
1776 | +END_MNEMONIC() | |
1777 | + | |
1778 | +BEGIN_MNEMONIC(PSUBD, MF_NONE, DU_U) | |
1779 | +BEGIN_OPCODES() | |
1780 | + {OpcodeInfo::all, {0x66, 0x0F, 0xFA, _r}, {xmm64, xmm_m64}, DU_U }, | |
1781 | +END_OPCODES() | |
1782 | +END_MNEMONIC() | |
1783 | + | |
1784 | +BEGIN_MNEMONIC(PMULLW, MF_NONE, DU_U) | |
1785 | +BEGIN_OPCODES() | |
1786 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD5, _r}, {xmm64, xmm_m64}, DU_U }, | |
1787 | +END_OPCODES() | |
1788 | +END_MNEMONIC() | |
1789 | + | |
1790 | +BEGIN_MNEMONIC(PMULLD, MF_NONE, DU_U) | |
1791 | +BEGIN_OPCODES() | |
1792 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x40, _r}, {xmm64, xmm_m64}, DU_U }, | |
1793 | +END_OPCODES() | |
1794 | +END_MNEMONIC() | |
1795 | + | |
1796 | +BEGIN_MNEMONIC(PSLLW, MF_NONE, DU_U) | |
1797 | +BEGIN_OPCODES() | |
1798 | + {OpcodeInfo::all, {0x66, 0x0F, 0xF1, _r}, {xmm64, xmm_m64}, DU_U }, | |
1799 | + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _6, ib}, {xmm64, imm8}, DU_U }, | |
1800 | +END_OPCODES() | |
1801 | +END_MNEMONIC() | |
1802 | + | |
1803 | +BEGIN_MNEMONIC(PSLLD, MF_NONE, DU_U) | |
1804 | +BEGIN_OPCODES() | |
1805 | + {OpcodeInfo::all, {0x66, 0x0F, 0xF2, _r}, {xmm64, xmm_m64}, DU_U }, | |
1806 | + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _6, ib}, {xmm64, imm8}, DU_U }, | |
1807 | +END_OPCODES() | |
1808 | +END_MNEMONIC() | |
1809 | + | |
1810 | +BEGIN_MNEMONIC(PSRAW, MF_NONE, DU_U) | |
1811 | +BEGIN_OPCODES() | |
1812 | + {OpcodeInfo::all, {0x66, 0x0F, 0xE1, _r}, {xmm64, xmm_m64}, DU_U }, | |
1813 | + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _4, ib}, {xmm64, imm8}, DU_U }, | |
1814 | +END_OPCODES() | |
1815 | +END_MNEMONIC() | |
1816 | + | |
1817 | +BEGIN_MNEMONIC(PSRAD, MF_NONE, DU_U) | |
1818 | +BEGIN_OPCODES() | |
1819 | + {OpcodeInfo::all, {0x66, 0x0F, 0xE2, _r}, {xmm64, xmm_m64}, DU_U }, | |
1820 | + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _4, ib}, {xmm64, imm8}, DU_U }, | |
1821 | +END_OPCODES() | |
1822 | +END_MNEMONIC() | |
1823 | + | |
1824 | +BEGIN_MNEMONIC(PSRLW, MF_NONE, DU_U) | |
1825 | +BEGIN_OPCODES() | |
1826 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD1, _r}, {xmm64, xmm_m64}, DU_U }, | |
1827 | + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _2, ib}, {xmm64, imm8}, DU_U }, | |
1828 | +END_OPCODES() | |
1829 | +END_MNEMONIC() | |
1830 | + | |
1831 | +BEGIN_MNEMONIC(PSRLD, MF_NONE, DU_U) | |
1832 | +BEGIN_OPCODES() | |
1833 | + {OpcodeInfo::all, {0x66, 0x0F, 0xD2, _r}, {xmm64, xmm_m64}, DU_U }, | |
1834 | + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _2, ib}, {xmm64, imm8}, DU_U }, | |
1835 | +END_OPCODES() | |
1836 | +END_MNEMONIC() | |
1837 | + | |
1838 | +BEGIN_MNEMONIC(PMOVSXBW, MF_NONE, DU_U) | |
1839 | +BEGIN_OPCODES() | |
1840 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x20, _r}, {xmm64, xmm_m64}, DU_U }, | |
1841 | +END_OPCODES() | |
1842 | +END_MNEMONIC() | |
1843 | + | |
1844 | +BEGIN_MNEMONIC(PSHUFB, MF_NONE, DU_U) | |
1845 | +BEGIN_OPCODES() | |
1846 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x00, _r}, {xmm64, xmm_m64}, DU_U }, | |
1847 | +END_OPCODES() | |
1848 | +END_MNEMONIC() | |
1849 | + | |
1850 | +BEGIN_MNEMONIC(PSHUFD, MF_NONE, D_U_U) | |
1851 | +BEGIN_OPCODES() | |
1852 | + {OpcodeInfo::all, {0x66, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, | |
1853 | +END_OPCODES() | |
1854 | +END_MNEMONIC() | |
1855 | + | |
1856 | +BEGIN_MNEMONIC(PSHUFLW, MF_NONE, D_U_U) | |
1857 | +BEGIN_OPCODES() | |
1858 | + {OpcodeInfo::all, {0xF2, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, | |
1859 | +END_OPCODES() | |
1860 | +END_MNEMONIC() | |
1861 | + | |
1862 | +BEGIN_MNEMONIC(PSHUFHW, MF_NONE, D_U_U) | |
1863 | +BEGIN_OPCODES() | |
1864 | + {OpcodeInfo::all, {0xF3, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, | |
1865 | +END_OPCODES() | |
1866 | +END_MNEMONIC() | |
1867 | + | |
1868 | +BEGIN_MNEMONIC(PHADDSW, MF_NONE, DU_U) | |
1869 | +BEGIN_OPCODES() | |
1870 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x03, _r}, {xmm64, xmm_m64}, DU_U }, | |
1871 | +END_OPCODES() | |
1872 | +END_MNEMONIC() | |
1873 | + | |
1874 | +BEGIN_MNEMONIC(PHADDW, MF_NONE, DU_U) | |
1875 | +BEGIN_OPCODES() | |
1876 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x01, _r}, {xmm64, xmm_m64}, DU_U }, | |
1877 | +END_OPCODES() | |
1878 | +END_MNEMONIC() | |
1879 | + | |
1880 | +BEGIN_MNEMONIC(PHADDD, MF_NONE, DU_U) | |
1881 | +BEGIN_OPCODES() | |
1882 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x02, _r}, {xmm64, xmm_m64}, DU_U }, | |
1883 | +END_OPCODES() | |
1884 | +END_MNEMONIC() | |
1885 | + | |
1886 | +BEGIN_MNEMONIC(PHSUBSW, MF_NONE, DU_U) | |
1887 | +BEGIN_OPCODES() | |
1888 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x07, _r}, {xmm64, xmm_m64}, DU_U }, | |
1889 | +END_OPCODES() | |
1890 | +END_MNEMONIC() | |
1891 | + | |
1892 | +BEGIN_MNEMONIC(PHSUBW, MF_NONE, DU_U) | |
1893 | +BEGIN_OPCODES() | |
1894 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x05, _r}, {xmm64, xmm_m64}, DU_U }, | |
1895 | +END_OPCODES() | |
1896 | +END_MNEMONIC() | |
1897 | + | |
1898 | +BEGIN_MNEMONIC(PHSUBD, MF_NONE, DU_U) | |
1899 | +BEGIN_OPCODES() | |
1900 | + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x06, _r}, {xmm64, xmm_m64}, DU_U }, | |
1901 | +END_OPCODES() | |
1902 | +END_MNEMONIC() | |
1903 | + | |
1904 | +BEGIN_MNEMONIC(PEXTRB, MF_NONE, D_U_U) | |
1905 | +BEGIN_OPCODES() | |
1906 | + {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x14, _r, ib}, {r32, xmm64, imm8}, D_U_U }, | |
1907 | +END_OPCODES() | |
1908 | +END_MNEMONIC() | |
1909 | + | |
1910 | +BEGIN_MNEMONIC(PEXTRW, MF_NONE, D_U_U) | |
1911 | +BEGIN_OPCODES() | |
1912 | + {OpcodeInfo::all, {0x66, 0x0F, 0xC5, _r, ib}, {r32, xmm64, imm8}, D_U_U }, | |
1913 | +END_OPCODES() | |
1914 | +END_MNEMONIC() | |
1915 | + | |
1916 | +BEGIN_MNEMONIC(PEXTRD, MF_NONE, D_U_U) | |
1917 | +BEGIN_OPCODES() | |
1918 | + {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x16, _r, ib}, {r_m32, xmm64, imm8}, D_U_U }, | |
1919 | +END_OPCODES() | |
1920 | +END_MNEMONIC() | |
1921 | + | |
1922 | +BEGIN_MNEMONIC(MOVDQA, MF_NONE|MF_SYMMETRIC, D_U) | |
1923 | +BEGIN_OPCODES() | |
1924 | + {OpcodeInfo::all, {0x66, 0x0F, 0x6F, _r}, {xmm64, xmm_m64}, D_U }, | |
1925 | + //The encoder cannot properly look up when operands are symmetric but opcode is not: | |
1926 | + //{OpcodeInfo::all, {0x66, 0x0F, 0x7F, _r}, {xmm_m128, xmm128}, D_U }, | |
1927 | +END_OPCODES() | |
1928 | +END_MNEMONIC() | |
1929 | + | |
1930 | +}; // ~masterEncodingTable[] | |
1931 | + | |
1932 | +ENCODER_NAMESPACE_END | |
1933 | + | |
1934 | +ENCODER_NAMESPACE_START | |
1935 | + | |
1936 | +static int compareMnemonicInfo(const void* info1, const void* info2) | |
1937 | +{ | |
1938 | + Mnemonic id1, id2; | |
1939 | + | |
1940 | + id1 = ((const MnemonicInfo*) info1)->mn; | |
1941 | + id2 = ((const MnemonicInfo*) info2)->mn; | |
1942 | + if (id1 < id2) | |
1943 | + return -1; | |
1944 | + if (id1 > id2) | |
1945 | + return 1; | |
1946 | + return 0; | |
1947 | +} | |
1948 | + | |
1949 | +int EncoderBase::buildTable(void) | |
1950 | +{ | |
1951 | + // A check: all mnemonics must be covered | |
1952 | + assert(COUNTOF(masterEncodingTable) == Mnemonic_Count); | |
1953 | + | |
1954 | + // sort out the mnemonics so the list become ordered | |
1955 | + qsort(masterEncodingTable, Mnemonic_Count, sizeof(MnemonicInfo), compareMnemonicInfo); | |
1956 | + | |
1957 | + // | |
1958 | + // clear the things | |
1959 | + // | |
1960 | + memset(opcodesHashMap, NOHASH, sizeof(opcodesHashMap)); | |
1961 | + memset(opcodes, 0, sizeof(opcodes)); | |
1962 | + // | |
1963 | + // and, finally, build it | |
1964 | + for (unsigned i=0; i<Mnemonic_Count; i++) { | |
1965 | + assert((Mnemonic)i == (masterEncodingTable + i)->mn); | |
1966 | + buildMnemonicDesc(masterEncodingTable+i); | |
1967 | + } | |
1968 | + return 0; | |
1969 | +} | |
1970 | + | |
1971 | +void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo) | |
1972 | +{ | |
1973 | + MnemonicDesc& mdesc = mnemonics[minfo->mn]; | |
1974 | + mdesc.mn = minfo->mn; | |
1975 | + mdesc.flags = minfo->flags; | |
1976 | + mdesc.roles = minfo->roles; | |
1977 | + mdesc.name = minfo->name; | |
1978 | + | |
1979 | + // | |
1980 | + // fill the used opcodes | |
1981 | + // | |
1982 | + for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) { | |
1983 | + | |
1984 | + const OpcodeInfo& oinfo = minfo->opcodes[i]; | |
1985 | + OpcodeDesc& odesc = opcodes[minfo->mn][oindex]; | |
1986 | + // last opcode ? | |
1987 | + if (oinfo.opcode[0] == OpcodeByteKind_LAST) { | |
1988 | + // mark the opcode 'last', exit | |
1989 | + odesc.opcode_len = 0; | |
1990 | + odesc.last = 1; | |
1991 | + break; | |
1992 | + } | |
1993 | + odesc.last = 0; | |
1994 | +#ifdef _EM64T_ | |
1995 | + if (oinfo.platf == OpcodeInfo::ia32) { continue; } | |
1996 | + if (oinfo.platf == OpcodeInfo::decoder32) { continue; } | |
1997 | +#else | |
1998 | + if (oinfo.platf == OpcodeInfo::em64t) { continue; } | |
1999 | + if (oinfo.platf == OpcodeInfo::decoder64) { continue; } | |
2000 | +#endif | |
2001 | + if (oinfo.platf == OpcodeInfo::decoder64 || | |
2002 | + oinfo.platf == OpcodeInfo::decoder32) { | |
2003 | + odesc.platf = OpcodeInfo::decoder; | |
2004 | + } | |
2005 | + else { | |
2006 | + odesc.platf = (char)oinfo.platf; | |
2007 | + } | |
2008 | + // | |
2009 | + // fill out opcodes | |
2010 | + // | |
2011 | + unsigned j = 0; | |
2012 | + odesc.opcode_len = 0; | |
2013 | + for(; oinfo.opcode[j]; j++) { | |
2014 | + unsigned opcod = oinfo.opcode[j]; | |
2015 | + unsigned kind = opcod&OpcodeByteKind_KindMask; | |
2016 | + if (kind == OpcodeByteKind_REX_W) { | |
2017 | + odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48; | |
2018 | + continue; | |
2019 | + } | |
2020 | + else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) { | |
2021 | + break; | |
2022 | + } | |
2023 | + unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask); | |
2024 | + odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte; | |
2025 | + } | |
2026 | + assert(odesc.opcode_len<5); | |
2027 | + odesc.aux0 = odesc.aux1 = 0; | |
2028 | + if (oinfo.opcode[j] != 0) { | |
2029 | + odesc.aux0 = oinfo.opcode[j]; | |
2030 | + assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0); | |
2031 | + ++j; | |
2032 | + if(oinfo.opcode[j] != 0) { | |
2033 | + odesc.aux1 = oinfo.opcode[j]; | |
2034 | + assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0); | |
2035 | + } | |
2036 | + } | |
2037 | + else if (oinfo.roles.count>=2) { | |
2038 | + if (((oinfo.opnds[0].kind&OpndKind_Mem) && | |
2039 | + (isRegKind(oinfo.opnds[1].kind))) || | |
2040 | + ((oinfo.opnds[1].kind&OpndKind_Mem) && | |
2041 | + (isRegKind(oinfo.opnds[0].kind)))) { | |
2042 | + // Example: MOVQ xmm1, xmm/m64 has only opcodes | |
2043 | + // same with SHRD | |
2044 | + // Adding fake /r | |
2045 | + odesc.aux0 = _r; | |
2046 | + } | |
2047 | + } | |
2048 | + else if (oinfo.roles.count==1) { | |
2049 | + if (oinfo.opnds[0].kind&OpndKind_Mem) { | |
2050 | + // Example: SETcc r/m8, adding fake /0 | |
2051 | + odesc.aux0 = _0; | |
2052 | + } | |
2053 | + } | |
2054 | + // check imm | |
2055 | + if (oinfo.roles.count > 0 && | |
2056 | + (oinfo.opnds[0].kind == OpndKind_Imm || | |
2057 | + oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) { | |
2058 | + // Example: CALL cd, PUSH imm32 - they fit both opnds[0] and | |
2059 | + // opnds[oinfo.roles.count-1]. | |
2060 | + // The A3 opcode fits only opnds[0] - it's currently have | |
2061 | + // MOV imm32, EAX. Looks ridiculous, but this is how the | |
2062 | + // moffset is currently implemented. Will need to fix together | |
2063 | + // with other usages of moff. | |
2064 | + // adding fake /cd or fake /id | |
2065 | + unsigned imm_opnd_index = | |
2066 | + oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1; | |
2067 | + OpndSize sz = oinfo.opnds[imm_opnd_index].size; | |
2068 | + unsigned imm_encode, coff_encode; | |
2069 | + if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; } | |
2070 | + else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;} | |
2071 | + else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; } | |
2072 | + else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; } | |
2073 | + else { assert(false); imm_encode=0xCC; coff_encode=0xCC; } | |
2074 | + if (odesc.aux1 == 0) { | |
2075 | + if (odesc.aux0==0) { | |
2076 | + odesc.aux0 = imm_encode; | |
2077 | + } | |
2078 | + else { | |
2079 | + if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) { | |
2080 | + odesc.aux1 = imm_encode; | |
2081 | + } | |
2082 | + } | |
2083 | + } | |
2084 | + else { | |
2085 | + assert(odesc.aux1==imm_encode); | |
2086 | + } | |
2087 | + | |
2088 | + } | |
2089 | + | |
2090 | + assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds)); | |
2091 | + memcpy(odesc.opnds, oinfo.opnds, | |
2092 | + sizeof(EncoderBase::OpndDesc) | |
2093 | + * EncoderBase::MAX_NUM_OPCODE_OPERANDS); | |
2094 | + odesc.roles = oinfo.roles; | |
2095 | + odesc.first_opnd = 0; | |
2096 | + if (odesc.opnds[0].reg != RegName_Null) { | |
2097 | + ++odesc.first_opnd; | |
2098 | + if (odesc.opnds[1].reg != RegName_Null) { | |
2099 | + ++odesc.first_opnd; | |
2100 | + } | |
2101 | + } | |
2102 | + | |
2103 | + if (odesc.platf == OpcodeInfo::decoder) { | |
2104 | + // if the opcode is only for decoding info, then do not hash it. | |
2105 | + ++oindex; | |
2106 | + continue; | |
2107 | + } | |
2108 | + | |
2109 | + // | |
2110 | + // check whether the operand info is a mask (i.e. r_m*). | |
2111 | + // in this case, split the info to have separate entries for 'r' | |
2112 | + // and for 'm'. | |
2113 | + // the good news is that there can be only one such operand. | |
2114 | + // | |
2115 | + int opnd2split = -1; | |
2116 | + for (unsigned k=0; k<oinfo.roles.count; k++) { | |
2117 | + if ((oinfo.opnds[k].kind & OpndKind_Mem) && | |
2118 | + (OpndKind_Mem != oinfo.opnds[k].kind)) { | |
2119 | + opnd2split = k; | |
2120 | + break; | |
2121 | + } | |
2122 | + }; | |
2123 | + | |
2124 | + if (opnd2split == -1) { | |
2125 | + // not a mask, hash it, store it, continue. | |
2126 | + unsigned short hash = getHash(&oinfo); | |
2127 | + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; | |
2128 | + ++oindex; | |
2129 | + continue; | |
2130 | + }; | |
2131 | + | |
2132 | + OpcodeInfo storeItem = oinfo; | |
2133 | + unsigned short hash; | |
2134 | + | |
2135 | + // remove the memory part of the mask, and store only 'r' part | |
2136 | + storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem); | |
2137 | + hash = getHash(&storeItem); | |
2138 | + if (opcodesHashMap[minfo->mn][hash] == NOHASH) { | |
2139 | + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; | |
2140 | + } | |
2141 | + // else { | |
2142 | + // do not overwrite if there is something there, just check that operands match | |
2143 | + // the reason is that for some instructions there are several possibilities: | |
2144 | + // say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe | |
2145 | + // the first one is better for 'dec r'. | |
2146 | + // as we're currently processing an opcode with memory part in operand, | |
2147 | + // leave already filled items intact, so if there is 'OP reg' there, this | |
2148 | + // better choice will be left in the table instead of 'OP r_m' | |
2149 | + // } | |
2150 | + | |
2151 | + // compute hash of memory-based operand, 'm' part in 'r_m' | |
2152 | + storeItem.opnds[opnd2split].kind = OpndKind_Mem; | |
2153 | + hash = getHash(&storeItem); | |
2154 | + // should not happen: for the r_m opcodes, there is a possibility | |
2155 | + // that hash value of 'r' part intersects with 'OP r' value, but it's | |
2156 | + // impossible for 'm' part. | |
2157 | + assert(opcodesHashMap[minfo->mn][hash] == NOHASH); | |
2158 | + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; | |
2159 | + | |
2160 | + ++oindex; | |
2161 | + } | |
2162 | +} | |
2163 | + | |
2164 | +ENCODER_NAMESPACE_END |
@@ -0,0 +1,836 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2012 The Android Open Source Project | |
3 | + * | |
4 | + * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | + * you may not use this file except in compliance with the License. | |
6 | + * You may obtain a copy of the License at | |
7 | + * | |
8 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | + * | |
10 | + * Unless required by applicable law or agreed to in writing, software | |
11 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | + * See the License for the specific language governing permissions and | |
14 | + * limitations under the License. | |
15 | + */ | |
16 | + | |
17 | +#include <stdio.h> | |
18 | +#include <assert.h> | |
19 | +#include <limits.h> | |
20 | +#include "enc_base.h" | |
21 | +#include "enc_wrapper.h" | |
22 | +#include "dec_base.h" | |
23 | +#include "utils/Log.h" | |
24 | + | |
25 | +//#define PRINT_ENCODER_STREAM | |
26 | +bool dump_x86_inst = false; | |
27 | + | |
28 | +/** | |
29 | + * @brief Provides mapping between PhysicalReg and RegName used by encoder | |
30 | + * @param physicalReg The physical register | |
31 | + * @return Returns encoder's register name | |
32 | + */ | |
33 | +static RegName mapFromPhysicalReg (int physicalReg) | |
34 | +{ | |
35 | + RegName reg = RegName_Null; | |
36 | + | |
37 | + //Get mapping between PhysicalReg and RegName | |
38 | + switch (physicalReg) | |
39 | + { | |
40 | + case PhysicalReg_EAX: | |
41 | + reg = RegName_EAX; | |
42 | + break; | |
43 | + case PhysicalReg_EBX: | |
44 | + reg = RegName_EBX; | |
45 | + break; | |
46 | + case PhysicalReg_ECX: | |
47 | + reg = RegName_ECX; | |
48 | + break; | |
49 | + case PhysicalReg_EDX: | |
50 | + reg = RegName_EDX; | |
51 | + break; | |
52 | + case PhysicalReg_EDI: | |
53 | + reg = RegName_EDI; | |
54 | + break; | |
55 | + case PhysicalReg_ESI: | |
56 | + reg = RegName_ESI; | |
57 | + break; | |
58 | + case PhysicalReg_ESP: | |
59 | + reg = RegName_ESP; | |
60 | + break; | |
61 | + case PhysicalReg_EBP: | |
62 | + reg = RegName_EBP; | |
63 | + break; | |
64 | + case PhysicalReg_XMM0: | |
65 | + reg = RegName_XMM0; | |
66 | + break; | |
67 | + case PhysicalReg_XMM1: | |
68 | + reg = RegName_XMM1; | |
69 | + break; | |
70 | + case PhysicalReg_XMM2: | |
71 | + reg = RegName_XMM2; | |
72 | + break; | |
73 | + case PhysicalReg_XMM3: | |
74 | + reg = RegName_XMM3; | |
75 | + break; | |
76 | + case PhysicalReg_XMM4: | |
77 | + reg = RegName_XMM4; | |
78 | + break; | |
79 | + case PhysicalReg_XMM5: | |
80 | + reg = RegName_XMM5; | |
81 | + break; | |
82 | + case PhysicalReg_XMM6: | |
83 | + reg = RegName_XMM6; | |
84 | + break; | |
85 | + case PhysicalReg_XMM7: | |
86 | + reg = RegName_XMM7; | |
87 | + break; | |
88 | + default: | |
89 | + //We have no mapping | |
90 | + reg = RegName_Null; | |
91 | + break; | |
92 | + } | |
93 | + | |
94 | + return reg; | |
95 | +} | |
96 | + | |
97 | +//getRegSize, getAliasReg: | |
98 | +//OpndSize, RegName, OpndExt: enum enc_defs.h | |
99 | +inline void add_r(EncoderBase::Operands & args, int physicalReg, OpndSize sz, OpndExt ext = OpndExt_None) { | |
100 | + if (sz == OpndSize_128) | |
101 | + { | |
102 | + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined | |
103 | + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still | |
104 | + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. | |
105 | + sz = OpndSize_64; | |
106 | + } | |
107 | + | |
108 | + RegName reg = mapFromPhysicalReg (physicalReg); | |
109 | + if (sz != getRegSize(reg)) { | |
110 | + reg = getAliasReg(reg, sz); | |
111 | + } | |
112 | + args.add(EncoderBase::Operand(reg, ext)); | |
113 | +} | |
114 | +inline void add_m(EncoderBase::Operands & args, int baseReg, int disp, OpndSize sz, OpndExt ext = OpndExt_None) { | |
115 | + if (sz == OpndSize_128) | |
116 | + { | |
117 | + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined | |
118 | + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still | |
119 | + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. | |
120 | + sz = OpndSize_64; | |
121 | + } | |
122 | + | |
123 | + args.add(EncoderBase::Operand(sz, | |
124 | + mapFromPhysicalReg (baseReg), | |
125 | + RegName_Null, 0, | |
126 | + disp, ext)); | |
127 | +} | |
128 | +inline void add_m_scale(EncoderBase::Operands & args, int baseReg, int indexReg, int scale, | |
129 | + OpndSize sz, OpndExt ext = OpndExt_None) { | |
130 | + if (sz == OpndSize_128) | |
131 | + { | |
132 | + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined | |
133 | + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still | |
134 | + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. | |
135 | + sz = OpndSize_64; | |
136 | + } | |
137 | + | |
138 | + args.add(EncoderBase::Operand(sz, | |
139 | + mapFromPhysicalReg (baseReg), | |
140 | + mapFromPhysicalReg (indexReg), scale, | |
141 | + 0, ext)); | |
142 | +} | |
143 | +inline void add_m_disp_scale(EncoderBase::Operands & args, int baseReg, int disp, int indexReg, int scale, | |
144 | + OpndSize sz, OpndExt ext = OpndExt_None) { | |
145 | + if (sz == OpndSize_128) | |
146 | + { | |
147 | + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined | |
148 | + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still | |
149 | + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. | |
150 | + sz = OpndSize_64; | |
151 | + } | |
152 | + | |
153 | + args.add(EncoderBase::Operand(sz, | |
154 | + mapFromPhysicalReg (baseReg), | |
155 | + mapFromPhysicalReg (indexReg), scale, | |
156 | + disp, ext)); | |
157 | +} | |
158 | + | |
159 | +inline void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) { | |
160 | + return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i)); | |
161 | +} | |
162 | +inline void add_imm(EncoderBase::Operands & args, OpndSize sz, int value, bool is_signed) { | |
163 | + //assert(n_size != imm.get_size()); | |
164 | + args.add(EncoderBase::Operand(sz, value, | |
165 | + is_signed ? OpndExt_Signed : OpndExt_Zero)); | |
166 | +} | |
167 | + | |
168 | +#define MAX_DECODED_STRING_LEN 1024 | |
169 | +char tmpBuffer[MAX_DECODED_STRING_LEN]; | |
170 | + | |
171 | +void printOperand(const EncoderBase::Operand & opnd) { | |
172 | + unsigned int sz; | |
173 | + if(!dump_x86_inst) return; | |
174 | + sz = strlen(tmpBuffer); | |
175 | + if(opnd.size() != OpndSize_32) { | |
176 | + const char * opndSizeString = getOpndSizeString(opnd.size()); | |
177 | + | |
178 | + if (opndSizeString == NULL) { | |
179 | + // If the string that represents operand size is null it means that | |
180 | + // the operand size is an invalid value. Although this could be a | |
181 | + // problem if instruction is corrupted, technically failing to | |
182 | + // disassemble is not fatal. Thus, let's warn but proceed with using | |
183 | + // an empty string. | |
184 | + ALOGW("JIT-WARNING: Cannot decode instruction operand size."); | |
185 | + opndSizeString = ""; | |
186 | + } | |
187 | + | |
188 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN - sz, "%s ", | |
189 | + opndSizeString); | |
190 | + } | |
191 | + if(opnd.is_mem()) { | |
192 | + if(opnd.scale() != 0) { | |
193 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, | |
194 | + "%d(%s,%s,%d)", opnd.disp(), | |
195 | + getRegNameString(opnd.base()), | |
196 | + getRegNameString(opnd.index()), opnd.scale()); | |
197 | + } else { | |
198 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%d(%s)", | |
199 | + opnd.disp(), getRegNameString(opnd.base())); | |
200 | + } | |
201 | + } | |
202 | + if(opnd.is_imm()) { | |
203 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "#%x", | |
204 | + (int)opnd.imm()); | |
205 | + } | |
206 | + if(opnd.is_reg()) { | |
207 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s", | |
208 | + getRegNameString(opnd.reg())); | |
209 | + } | |
210 | +} | |
211 | +//TODO: the order of operands | |
212 | +//to make the printout have the same order as assembly in .S | |
213 | +//I reverse the order here | |
214 | +void printDecoderInst(Inst & decInst) { | |
215 | + unsigned int sz; | |
216 | + if(!dump_x86_inst) return; | |
217 | + sz = strlen(tmpBuffer); | |
218 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s ", | |
219 | + EncoderBase::toStr(decInst.mn)); | |
220 | + for(unsigned int k = 0; k < decInst.argc; k++) { | |
221 | + if(k > 0) { | |
222 | + sz = strlen(tmpBuffer); | |
223 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", "); | |
224 | + } | |
225 | + printOperand(decInst.operands[decInst.argc-1-k]); | |
226 | + } | |
227 | + ALOGE("%s", tmpBuffer); | |
228 | +} | |
229 | +void printOperands(EncoderBase::Operands& opnds) { | |
230 | + unsigned int sz; | |
231 | + if(!dump_x86_inst) return; | |
232 | + for(unsigned int k = 0; k < opnds.count(); k++) { | |
233 | + if(k > 0) { | |
234 | + sz = strlen(tmpBuffer); | |
235 | + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", "); | |
236 | + } | |
237 | + printOperand(opnds[opnds.count()-1-k]); | |
238 | + } | |
239 | +} | |
240 | +void printEncoderInst(Mnemonic m, EncoderBase::Operands& opnds) { | |
241 | + if(!dump_x86_inst) return; | |
242 | + snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- ENC %s ", | |
243 | + EncoderBase::toStr(m)); | |
244 | + printOperands(opnds); | |
245 | + ALOGE("%s", tmpBuffer); | |
246 | +} | |
247 | +int decodeThenPrint(char* stream_start) { | |
248 | + if(!dump_x86_inst) return 0; | |
249 | + snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- INST @ %p: ", | |
250 | + stream_start); | |
251 | + Inst decInst; | |
252 | + unsigned numBytes = DecoderBase::decode(stream_start, &decInst); | |
253 | + printDecoderInst(decInst); | |
254 | + return numBytes; | |
255 | +} | |
256 | + | |
257 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm(Mnemonic m, OpndSize size, int imm, char * stream) { | |
258 | + EncoderBase::Operands args; | |
259 | + //assert(imm.get_size() == size_32); | |
260 | + add_imm(args, size, imm, true/*is_signed*/); | |
261 | +#ifdef PRINT_ENCODER_STREAM | |
262 | + char* stream_start = stream; | |
263 | +#endif | |
264 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
265 | +#ifdef PRINT_ENCODER_STREAM | |
266 | + printEncoderInst(m, args); | |
267 | + decodeThenPrint(stream_start); | |
268 | +#endif | |
269 | + return stream; | |
270 | +} | |
271 | +extern "C" ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream) { | |
272 | + Inst decInst; | |
273 | + unsigned numBytes = DecoderBase::decode(stream, &decInst); | |
274 | + return numBytes; | |
275 | +} | |
276 | + | |
277 | +extern "C" ENCODER_DECLARE_EXPORT uintptr_t encoder_get_cur_operand_offset(int opnd_id) | |
278 | +{ | |
279 | + return (uintptr_t)EncoderBase::getOpndLocation(opnd_id); | |
280 | +} | |
281 | + | |
282 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm(int imm, char * stream) { | |
283 | + Inst decInst; | |
284 | + EncoderBase::Operands args; | |
285 | + | |
286 | + //Decode the instruction | |
287 | + DecoderBase::decode(stream, &decInst); | |
288 | + | |
289 | + add_imm(args, decInst.operands[0].size(), imm, true/*is_signed*/); | |
290 | + char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args); | |
291 | +#ifdef PRINT_ENCODER_STREAM | |
292 | + printEncoderInst(decInst.mn, args); | |
293 | + decodeThenPrint(stream); | |
294 | +#endif | |
295 | + return stream_next; | |
296 | +} | |
297 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem(Mnemonic m, OpndSize size, | |
298 | + int disp, int base_reg, bool isBasePhysical, char * stream) { | |
299 | + EncoderBase::Operands args; | |
300 | + add_m(args, base_reg, disp, size); | |
301 | +#ifdef PRINT_ENCODER_STREAM | |
302 | + char* stream_start = stream; | |
303 | +#endif | |
304 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
305 | +#ifdef PRINT_ENCODER_STREAM | |
306 | + printEncoderInst(m, args); | |
307 | + decodeThenPrint(stream_start); | |
308 | +#endif | |
309 | + return stream; | |
310 | +} | |
311 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg(Mnemonic m, OpndSize size, | |
312 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
313 | + EncoderBase::Operands args; | |
314 | + if(m == Mnemonic_DIV || m == Mnemonic_IDIV || m == Mnemonic_MUL || m == Mnemonic_IMUL) { | |
315 | + add_r(args, 0/*eax*/, size); | |
316 | + add_r(args, 3/*edx*/, size); | |
317 | + } | |
318 | + add_r(args, reg, size); | |
319 | +#ifdef PRINT_ENCODER_STREAM | |
320 | + char* stream_start = stream; | |
321 | +#endif | |
322 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
323 | +#ifdef PRINT_ENCODER_STREAM | |
324 | + printEncoderInst(m, args); | |
325 | + decodeThenPrint(stream_start); | |
326 | +#endif | |
327 | + return stream; | |
328 | +} | |
329 | +//! \brief Allows for different operand sizes | |
330 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg(Mnemonic m, OpndSize size, | |
331 | + int imm, int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
332 | + return encoder_imm_reg_diff_sizes(m, size, imm, size, reg, isPhysical, type, stream); | |
333 | +} | |
334 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize, | |
335 | + int reg, bool isPhysical, OpndSize destOpndSize, | |
336 | + int reg2, bool isPhysical2, LowOpndRegType type, char * stream) { | |
337 | + if((m == Mnemonic_MOV || m == Mnemonic_MOVQ || m == Mnemonic_MOVD) && reg == reg2) return stream; | |
338 | + EncoderBase::Operands args; | |
339 | + add_r(args, reg2, destOpndSize); //destination | |
340 | + if(m == Mnemonic_SAL || m == Mnemonic_SHR || m == Mnemonic_SHL || m == Mnemonic_SAR) | |
341 | + add_r(args, reg, OpndSize_8); | |
342 | + else | |
343 | + add_r(args, reg, srcOpndSize); | |
344 | +#ifdef PRINT_ENCODER_STREAM | |
345 | + char* stream_start = stream; | |
346 | +#endif | |
347 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
348 | +#ifdef PRINT_ENCODER_STREAM | |
349 | + printEncoderInst(m, args); | |
350 | + decodeThenPrint(stream_start); | |
351 | +#endif | |
352 | + return stream; | |
353 | +} | |
354 | +//both operands have same size | |
355 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg(Mnemonic m, OpndSize size, | |
356 | + int reg, bool isPhysical, | |
357 | + int reg2, bool isPhysical2, LowOpndRegType type, char * stream) { | |
358 | + return encoder_reg_reg_diff_sizes(m, size, reg, isPhysical, size, reg2, isPhysical2, type, stream); | |
359 | +} | |
360 | +//! \brief Allows for different operand sizes | |
361 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, | |
362 | + int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize, | |
363 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
364 | + EncoderBase::Operands args; | |
365 | + add_r(args, reg, regOpndSize); | |
366 | + add_m(args, base_reg, disp, memOpndSize); | |
367 | +#ifdef PRINT_ENCODER_STREAM | |
368 | + char* stream_start = stream; | |
369 | +#endif | |
370 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
371 | +#ifdef PRINT_ENCODER_STREAM | |
372 | + printEncoderInst(m, args); | |
373 | + decodeThenPrint(stream_start); | |
374 | +#endif | |
375 | + return stream; | |
376 | +} | |
377 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_reg(Mnemonic m, OpndSize size, | |
378 | + int disp, int base_reg, bool isBasePhysical, | |
379 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
380 | + return encoder_mem_to_reg_diff_sizes(m, size, disp, base_reg, isBasePhysical, size, reg, isPhysical, type, stream); | |
381 | +} | |
382 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_scale_reg(Mnemonic m, OpndSize size, | |
383 | + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, | |
384 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
385 | + EncoderBase::Operands args; | |
386 | + add_r(args, reg, size); | |
387 | + add_m_scale(args, base_reg, index_reg, scale, size); | |
388 | +#ifdef PRINT_ENCODER_STREAM | |
389 | + char* stream_start = stream; | |
390 | +#endif | |
391 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
392 | +#ifdef PRINT_ENCODER_STREAM | |
393 | + printEncoderInst(m, args); | |
394 | + decodeThenPrint(stream_start); | |
395 | +#endif | |
396 | + return stream; | |
397 | +} | |
398 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem_scale(Mnemonic m, OpndSize size, | |
399 | + int reg, bool isPhysical, | |
400 | + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, | |
401 | + LowOpndRegType type, char * stream) { | |
402 | + EncoderBase::Operands args; | |
403 | + add_m_scale(args, base_reg, index_reg, scale, size); | |
404 | + add_r(args, reg, size); | |
405 | +#ifdef PRINT_ENCODER_STREAM | |
406 | + char* stream_start = stream; | |
407 | +#endif | |
408 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
409 | +#ifdef PRINT_ENCODER_STREAM | |
410 | + printEncoderInst(m, args); | |
411 | + decodeThenPrint(stream_start); | |
412 | +#endif | |
413 | + return stream; | |
414 | +} | |
415 | +//! \brief Allows for different operand sizes | |
416 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, | |
417 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
418 | + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
419 | + EncoderBase::Operands args; | |
420 | + add_r(args, reg, regOpndSize); | |
421 | + add_m_disp_scale(args, base_reg, disp, index_reg, scale, memOpndSize); | |
422 | +#ifdef PRINT_ENCODER_STREAM | |
423 | + char* stream_start = stream; | |
424 | +#endif | |
425 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
426 | +#ifdef PRINT_ENCODER_STREAM | |
427 | + printEncoderInst(m, args); | |
428 | + decodeThenPrint(stream_start); | |
429 | +#endif | |
430 | + return stream; | |
431 | +} | |
432 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size, | |
433 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
434 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
435 | + return encoder_mem_disp_scale_to_reg_diff_sizes(m, size, base_reg, isBasePhysical, | |
436 | + disp, index_reg, isIndexPhysical, scale, size, reg, isPhysical, | |
437 | + type, stream); | |
438 | +} | |
439 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size, | |
440 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
441 | + int reg, bool isPhysical, LowOpndRegType type, char * stream) { | |
442 | + EncoderBase::Operands args; | |
443 | + add_r(args, reg, OpndSize_32); | |
444 | + add_m_disp_scale(args, base_reg, disp, index_reg, scale, size); | |
445 | +#ifdef PRINT_ENCODER_STREAM | |
446 | + char* stream_start = stream; | |
447 | +#endif | |
448 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
449 | +#ifdef PRINT_ENCODER_STREAM | |
450 | + printEncoderInst(m, args); | |
451 | + decodeThenPrint(stream_start); | |
452 | +#endif | |
453 | + return stream; | |
454 | +} | |
455 | +extern "C" ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size, | |
456 | + int reg, bool isPhysical, | |
457 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
458 | + LowOpndRegType type, char* stream) { | |
459 | + EncoderBase::Operands args; | |
460 | + add_m_disp_scale(args, base_reg, disp, index_reg, scale, size); | |
461 | + add_r(args, reg, size); | |
462 | +#ifdef PRINT_ENCODER_STREAM | |
463 | + char* stream_start = stream; | |
464 | +#endif | |
465 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
466 | +#ifdef PRINT_ENCODER_STREAM | |
467 | + printEncoderInst(m, args); | |
468 | + decodeThenPrint(stream_start); | |
469 | +#endif | |
470 | + return stream; | |
471 | +} | |
472 | + | |
473 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem(Mnemonic m, OpndSize size, | |
474 | + int reg, bool isPhysical, | |
475 | + int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char * stream) { | |
476 | + EncoderBase::Operands args; | |
477 | + add_m(args, base_reg, disp, size); | |
478 | + add_r(args, reg, size); | |
479 | +#ifdef PRINT_ENCODER_STREAM | |
480 | + char* stream_start = stream; | |
481 | +#endif | |
482 | + if (m == Mnemonic_CMPXCHG ){ | |
483 | + //CMPXCHG require EAX as args | |
484 | + add_r(args,PhysicalReg_EAX,size); | |
485 | + //Add lock prefix for CMPXCHG, guarantee the atomic of CMPXCHG in multi-core platform | |
486 | + stream = (char *)EncoderBase::prefix(stream, InstPrefix_LOCK); | |
487 | + } | |
488 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
489 | +#ifdef PRINT_ENCODER_STREAM | |
490 | + printEncoderInst(m, args); | |
491 | + decodeThenPrint(stream_start); | |
492 | +#endif | |
493 | + return stream; | |
494 | +} | |
495 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes (Mnemonic m, OpndSize sizeImm, int imm, | |
496 | + OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream) | |
497 | +{ | |
498 | + //Create the operands | |
499 | + EncoderBase::Operands args; | |
500 | + //Add destination register | |
501 | + add_r (args, reg, sizeReg); | |
502 | + //For imul, we need to add implicit register explicitly | |
503 | + if (m == Mnemonic_IMUL) | |
504 | + { | |
505 | + add_r (args, reg, sizeReg); | |
506 | + } | |
507 | + //Finally add the immediate | |
508 | + add_imm (args, sizeImm, imm, true/*is_signed*/); | |
509 | + | |
510 | +#ifdef PRINT_ENCODER_STREAM | |
511 | + char* stream_start = stream; | |
512 | +#endif | |
513 | + | |
514 | + //Now do the encoding | |
515 | + stream = EncoderBase::encode (stream, m, args); | |
516 | + | |
517 | +#ifdef PRINT_ENCODER_STREAM | |
518 | + printEncoderInst(m, args); | |
519 | + decodeThenPrint(stream_start); | |
520 | +#endif | |
521 | + | |
522 | + return stream; | |
523 | +} | |
524 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream) { | |
525 | + Inst decInst; | |
526 | + EncoderBase::Operands args; | |
527 | + | |
528 | + //Decode the instruction | |
529 | + DecoderBase::decode(stream, &decInst); | |
530 | + | |
531 | + args.add(decInst.operands[0]); | |
532 | + add_imm(args, decInst.operands[1].size(), imm, true/*is_signed*/); | |
533 | + char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args); | |
534 | +#ifdef PRINT_ENCODER_STREAM | |
535 | + printEncoderInst(decInst.mn, args); | |
536 | + decodeThenPrint(stream); | |
537 | +#endif | |
538 | + return stream_next; | |
539 | +} | |
540 | + | |
541 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem(Mnemonic m, OpndSize size, | |
542 | + int imm, | |
543 | + int disp, int base_reg, bool isBasePhysical, char * stream) { | |
544 | + return encoder_imm_mem_diff_sizes(m, size, imm, size, disp, base_reg, isBasePhysical, stream); | |
545 | +} | |
546 | + | |
547 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm, | |
548 | + OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream) | |
549 | +{ | |
550 | + //Add operands | |
551 | + EncoderBase::Operands args; | |
552 | + add_m (args, baseRegister, disp, memOpndSize); | |
553 | + add_imm (args, immOpndSize, imm, true); | |
554 | + | |
555 | +#ifdef PRINT_ENCODER_STREAM | |
556 | + char* stream_start = stream; | |
557 | +#endif | |
558 | + | |
559 | + //Do the encoding | |
560 | + stream = EncoderBase::encode (stream, m, args); | |
561 | + | |
562 | +#ifdef PRINT_ENCODER_STREAM | |
563 | + printEncoderInst(m, args); | |
564 | + decodeThenPrint(stream_start); | |
565 | +#endif | |
566 | + | |
567 | + return stream; | |
568 | +} | |
569 | + | |
570 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_fp_mem(Mnemonic m, OpndSize size, int reg, | |
571 | + int disp, int base_reg, bool isBasePhysical, char * stream) { | |
572 | + EncoderBase::Operands args; | |
573 | + add_m(args, base_reg, disp, size); | |
574 | + // a fake FP register as operand | |
575 | + add_fp(args, reg, size == OpndSize_64/*is_double*/); | |
576 | +#ifdef PRINT_ENCODER_STREAM | |
577 | + char* stream_start = stream; | |
578 | +#endif | |
579 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
580 | +#ifdef PRINT_ENCODER_STREAM | |
581 | + printEncoderInst(m, args); | |
582 | + decodeThenPrint(stream_start); | |
583 | +#endif | |
584 | + return stream; | |
585 | +} | |
586 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_fp(Mnemonic m, OpndSize size, | |
587 | + int disp, int base_reg, bool isBasePhysical, | |
588 | + int reg, char * stream) { | |
589 | + EncoderBase::Operands args; | |
590 | + // a fake FP register as operand | |
591 | + add_fp(args, reg, size == OpndSize_64/*is_double*/); | |
592 | + add_m(args, base_reg, disp, size); | |
593 | +#ifdef PRINT_ENCODER_STREAM | |
594 | + char* stream_start = stream; | |
595 | +#endif | |
596 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
597 | +#ifdef PRINT_ENCODER_STREAM | |
598 | + printEncoderInst(m, args); | |
599 | + decodeThenPrint(stream_start); | |
600 | +#endif | |
601 | + return stream; | |
602 | +} | |
603 | + | |
604 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_return(char * stream) { | |
605 | + EncoderBase::Operands args; | |
606 | +#ifdef PRINT_ENCODER_STREAM | |
607 | + char* stream_start = stream; | |
608 | +#endif | |
609 | + stream = (char *)EncoderBase::encode(stream, Mnemonic_RET, args); | |
610 | +#ifdef PRINT_ENCODER_STREAM | |
611 | + printEncoderInst(Mnemonic_RET, args); | |
612 | + decodeThenPrint(stream_start); | |
613 | +#endif | |
614 | + return stream; | |
615 | +} | |
616 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char * stream) { | |
617 | + Mnemonic m = pop ? Mnemonic_FUCOMIP : Mnemonic_FUCOMI; | |
618 | + //a single operand or 2 operands? | |
619 | + //FST ST(i) has a single operand in encoder.inl? | |
620 | + EncoderBase::Operands args; | |
621 | + add_fp(args, reg, isDouble); | |
622 | +#ifdef PRINT_ENCODER_STREAM | |
623 | + char* stream_start = stream; | |
624 | +#endif | |
625 | + stream = (char *)EncoderBase::encode(stream, m, args); | |
626 | +#ifdef PRINT_ENCODER_STREAM | |
627 | + printEncoderInst(m, args); | |
628 | + decodeThenPrint(stream_start); | |
629 | +#endif | |
630 | + return stream; | |
631 | +} | |
632 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_mem_to_reg(OpndSize size, | |
633 | + int disp, int base_reg, bool isBasePhysical, | |
634 | + int reg, bool isPhysical, char * stream) { | |
635 | + EncoderBase::Operands args; | |
636 | + add_r(args, reg, OpndSize_32); | |
637 | + add_m(args, base_reg, disp, size); | |
638 | +#ifdef PRINT_ENCODER_STREAM | |
639 | + char* stream_start = stream; | |
640 | +#endif | |
641 | + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args); | |
642 | +#ifdef PRINT_ENCODER_STREAM | |
643 | + printEncoderInst(Mnemonic_MOVZX, args); | |
644 | + decodeThenPrint(stream_start); | |
645 | +#endif | |
646 | + return stream; | |
647 | +} | |
648 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_mem_to_reg(OpndSize size, | |
649 | + int disp, int base_reg, bool isBasePhysical, | |
650 | + int reg, bool isPhysical, char * stream) { | |
651 | + EncoderBase::Operands args; | |
652 | + add_r(args, reg, OpndSize_32); | |
653 | + add_m(args, base_reg, disp, size); | |
654 | +#ifdef PRINT_ENCODER_STREAM | |
655 | + char* stream_start = stream; | |
656 | +#endif | |
657 | + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args); | |
658 | +#ifdef PRINT_ENCODER_STREAM | |
659 | + printEncoderInst(Mnemonic_MOVSX, args); | |
660 | + decodeThenPrint(stream_start); | |
661 | +#endif | |
662 | + return stream; | |
663 | +} | |
664 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size, | |
665 | + int reg, bool isPhysical, int reg2, | |
666 | + bool isPhysical2, LowOpndRegType type, char * stream) { | |
667 | + EncoderBase::Operands args; | |
668 | + add_r(args, reg2, OpndSize_32); //destination | |
669 | + add_r(args, reg, size); | |
670 | +#ifdef PRINT_ENCODER_STREAM | |
671 | + char* stream_start = stream; | |
672 | +#endif | |
673 | + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args); | |
674 | +#ifdef PRINT_ENCODER_STREAM | |
675 | + printEncoderInst(Mnemonic_MOVZX, args); | |
676 | + decodeThenPrint(stream_start); | |
677 | +#endif | |
678 | + return stream; | |
679 | +} | |
680 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size, | |
681 | + int reg, bool isPhysical,int reg2, | |
682 | + bool isPhysical2, LowOpndRegType type, char * stream) { | |
683 | + EncoderBase::Operands args; | |
684 | + add_r(args, reg2, OpndSize_32); //destination | |
685 | + add_r(args, reg, size); | |
686 | +#ifdef PRINT_ENCODER_STREAM | |
687 | + char* stream_start = stream; | |
688 | +#endif | |
689 | + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args); | |
690 | +#ifdef PRINT_ENCODER_STREAM | |
691 | + printEncoderInst(Mnemonic_MOVSX, args); | |
692 | + decodeThenPrint(stream_start); | |
693 | +#endif | |
694 | + return stream; | |
695 | +} | |
696 | + | |
697 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize, | |
698 | + int sourceReg, OpndSize sourceRegSize, int destReg, OpndSize destRegSize, char * stream) | |
699 | +{ | |
700 | + EncoderBase::Operands args; | |
701 | + | |
702 | + //Add the source and destination registers | |
703 | + add_r (args, destReg, destRegSize); | |
704 | + add_r (args, sourceReg, sourceRegSize); | |
705 | + | |
706 | + //Now add the immediate. We expect in three operand situation that immediate is last argument | |
707 | + add_imm (args, immediateSize, imm, true/*is_signed*/); | |
708 | + | |
709 | +#ifdef PRINT_ENCODER_STREAM | |
710 | + char* stream_start = stream; | |
711 | +#endif | |
712 | + | |
713 | + //Do the actual encoding | |
714 | + stream = EncoderBase::encode (stream, m, args); | |
715 | + | |
716 | +#ifdef PRINT_ENCODER_STREAM | |
717 | + printEncoderInst (m, args); | |
718 | + decodeThenPrint (stream_start); | |
719 | +#endif | |
720 | + | |
721 | + //Return the updated stream pointer | |
722 | + return stream; | |
723 | +} | |
724 | + | |
725 | +/** | |
726 | + * @brief Generates variable sized nop instructions. | |
727 | + * @param numBytes Number of bytes for the nop instruction. If this value is | |
728 | + * larger than 9 bytes, more than one nop instruction will be generated. | |
729 | + * @param stream Instruction stream where to place the nops | |
730 | + * @return Updated instruction stream pointer after generating the nops | |
731 | + */ | |
732 | +extern "C" ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream) { | |
733 | + return EncoderBase::nops(stream, numBytes); | |
734 | +} | |
735 | + | |
736 | +// Disassemble the operand "opnd" and put the readable format in "strbuf" | |
737 | +// up to a string length of "len". | |
738 | +unsigned int DisassembleOperandToBuf(const EncoderBase::Operand& opnd, char* strbuf, unsigned int len) | |
739 | +{ | |
740 | + unsigned int sz = 0; | |
741 | + if(opnd.size() != OpndSize_32) { | |
742 | + const char * opndSizeString = getOpndSizeString(opnd.size()); | |
743 | + | |
744 | + if (opndSizeString == NULL) { | |
745 | + // If the string that represents operand size is null it means that | |
746 | + // the operand size is an invalid value. Although this could be a | |
747 | + // problem if instruction is corrupted, technically failing to | |
748 | + // disassemble is not fatal. Thus, let's warn but proceed with using | |
749 | + // an empty string. | |
750 | + ALOGW("JIT-WARNING: Cannot decode instruction operand size."); | |
751 | + opndSizeString = ""; | |
752 | + } | |
753 | + | |
754 | + sz += snprintf(&strbuf[sz], len-sz, "%s ", opndSizeString); | |
755 | + } | |
756 | + if(opnd.is_mem()) { | |
757 | + if(opnd.scale() != 0) { | |
758 | + sz += snprintf(&strbuf[sz], len-sz, "%d(%s,%s,%d)", opnd.disp(), | |
759 | + getRegNameString(opnd.base()), | |
760 | + getRegNameString(opnd.index()), opnd.scale()); | |
761 | + } else { | |
762 | + sz += snprintf(&strbuf[sz], len-sz, "%d(%s)", | |
763 | + opnd.disp(), getRegNameString(opnd.base())); | |
764 | + } | |
765 | + } else if(opnd.is_imm()) { | |
766 | + sz += snprintf(&strbuf[sz], len-sz, "#%x", (int)opnd.imm()); | |
767 | + } else if(opnd.is_reg()) { | |
768 | + sz += snprintf(&strbuf[sz], len-sz, "%s", | |
769 | + getRegNameString(opnd.reg())); | |
770 | + } | |
771 | + return sz; | |
772 | +} | |
773 | + | |
774 | +// Disassemble the instruction "decInst" and put the readable format | |
775 | +// in "strbuf" up to a string length of "len". | |
776 | +void DisassembleInstToBuf(Inst& decInst, char* strbuf, unsigned int len) | |
777 | +{ | |
778 | + unsigned int sz = 0; | |
779 | + int k; | |
780 | + sz += snprintf(&strbuf[sz], len-sz, "%s ", EncoderBase::toStr(decInst.mn)); | |
781 | + if (decInst.argc > 0) { | |
782 | + sz += DisassembleOperandToBuf(decInst.operands[decInst.argc-1], | |
783 | + &strbuf[sz], len-sz); | |
784 | + for(k = decInst.argc-2; k >= 0; k--) { | |
785 | + sz += snprintf(&strbuf[sz], len-sz, ", "); | |
786 | + sz += DisassembleOperandToBuf(decInst.operands[k], &strbuf[sz], len-sz); | |
787 | + } | |
788 | + } | |
789 | +} | |
790 | + | |
791 | +// Disassmble the x86 instruction pointed to by code pointer "stream." | |
792 | +// Put the disassemble text in the "strbuf" up to string length "len". | |
793 | +// Return the code pointer after the disassemble x86 instruction. | |
794 | +extern "C" ENCODER_DECLARE_EXPORT | |
795 | +char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len) | |
796 | +{ | |
797 | + Inst decInst; | |
798 | + unsigned numBytes = DecoderBase::decode(stream, &decInst); | |
799 | + DisassembleInstToBuf(decInst, strbuf, len); | |
800 | + return (stream + numBytes); | |
801 | +} | |
802 | + | |
803 | +/** | |
804 | + * @brief Physical register char* counterparts | |
805 | + */ | |
806 | +static const char * PhysicalRegString[] = { "eax", "ebx", "ecx", "edx", "edi", | |
807 | + "esi", "esp", "ebp", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", | |
808 | + "xmm6", "xmm7", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7", | |
809 | + "null" | |
810 | + }; | |
811 | + | |
812 | +/** | |
813 | + * @brief Scratch register char* counterparts | |
814 | + */ | |
815 | +static const char * ScratchRegString[] = { "scratch1", "scratch2", "scratch3", | |
816 | + "scratch4", "scratch5", "scratch6", "scratch7", "scratch8", "scratch9", | |
817 | + "scratch10" }; | |
818 | + | |
819 | +extern "C" ENCODER_DECLARE_EXPORT | |
820 | +/** | |
821 | + * @brief Transform a physical register into its char* counterpart | |
822 | + * @param reg the PhysicalReg we want to have a char* equivalent | |
823 | + * @return the register reg in char* form | |
824 | + */ | |
825 | +const char * physicalRegToString(PhysicalReg reg) | |
826 | +{ | |
827 | + if (reg < PhysicalReg_Null) { | |
828 | + return PhysicalRegString[reg]; | |
829 | + } else if (reg >= PhysicalReg_SCRATCH_1 && reg <= PhysicalReg_SCRATCH_10) { | |
830 | + return ScratchRegString[reg - PhysicalReg_SCRATCH_1]; | |
831 | + } else if (reg == PhysicalReg_Null) { | |
832 | + return "null"; | |
833 | + } else { | |
834 | + return "corrupted-data"; | |
835 | + } | |
836 | +} |
@@ -0,0 +1,283 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2012 The Android Open Source Project | |
3 | + * | |
4 | + * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | + * you may not use this file except in compliance with the License. | |
6 | + * You may obtain a copy of the License at | |
7 | + * | |
8 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | + * | |
10 | + * Unless required by applicable law or agreed to in writing, software | |
11 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | + * See the License for the specific language governing permissions and | |
14 | + * limitations under the License. | |
15 | + */ | |
16 | + | |
17 | +#ifndef _VM_ENC_WRAPPER_H_ | |
18 | +#define _VM_ENC_WRAPPER_H_ | |
19 | + | |
20 | +#include "enc_defs_ext.h" | |
21 | + | |
22 | +extern bool dump_x86_inst; | |
23 | +typedef enum PhysicalReg { | |
24 | + // Currently initializing StartOfGPMarker to be 0 in order to match | |
25 | + // register index in Reg_No. However, ideally PhysicalReg_Null should | |
26 | + // be 0 and the rest moved over. | |
27 | + PhysicalReg_StartOfGPMarker = 0, | |
28 | + PhysicalReg_EAX = PhysicalReg_StartOfGPMarker, | |
29 | + PhysicalReg_EBX, PhysicalReg_ECX, PhysicalReg_EDX, | |
30 | + PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_ESP, PhysicalReg_EBP, | |
31 | + PhysicalReg_EndOfGPMarker = PhysicalReg_EBP, | |
32 | + | |
33 | + PhysicalReg_StartOfXmmMarker, | |
34 | + PhysicalReg_XMM0 = PhysicalReg_StartOfXmmMarker, | |
35 | + PhysicalReg_XMM1, PhysicalReg_XMM2, PhysicalReg_XMM3, | |
36 | + PhysicalReg_XMM4, PhysicalReg_XMM5, PhysicalReg_XMM6, PhysicalReg_XMM7, | |
37 | + PhysicalReg_EndOfXmmMarker = PhysicalReg_XMM7, | |
38 | + | |
39 | + PhysicalReg_StartOfX87Marker, | |
40 | + PhysicalReg_ST0 = PhysicalReg_StartOfX87Marker, PhysicalReg_ST1, | |
41 | + PhysicalReg_ST2, PhysicalReg_ST3, PhysicalReg_ST4, PhysicalReg_ST5, | |
42 | + PhysicalReg_ST6, PhysicalReg_ST7, | |
43 | + PhysicalReg_EndOfX87Marker = PhysicalReg_ST7, | |
44 | + | |
45 | + PhysicalReg_Null, | |
46 | + //used as scratch logical register in NCG O1 | |
47 | + //should not overlap with regular logical register, start from 100 | |
48 | + PhysicalReg_SCRATCH_1 = 100, PhysicalReg_SCRATCH_2, PhysicalReg_SCRATCH_3, PhysicalReg_SCRATCH_4, | |
49 | + PhysicalReg_SCRATCH_5, PhysicalReg_SCRATCH_6, PhysicalReg_SCRATCH_7, PhysicalReg_SCRATCH_8, | |
50 | + PhysicalReg_SCRATCH_9, PhysicalReg_SCRATCH_10, | |
51 | + | |
52 | + //This should be the last entry | |
53 | + PhysicalReg_Last = PhysicalReg_SCRATCH_10 | |
54 | +} PhysicalReg; | |
55 | + | |
56 | +typedef enum Reg_No { | |
57 | +#ifdef _EM64T_ | |
58 | + rax_reg = 0,rbx_reg, rcx_reg, rdx_reg, | |
59 | + rdi_reg, rsi_reg, rsp_reg, rbp_reg, | |
60 | + r8_reg, r9_reg, r10_reg, r11_reg, | |
61 | + r12_reg, r13_reg, r14_reg, r15_reg, | |
62 | + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, | |
63 | + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, | |
64 | + xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg, | |
65 | + xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg, | |
66 | + | |
67 | +#else // !defined(_EM64T_) | |
68 | + | |
69 | + eax_reg = 0,ebx_reg, ecx_reg, edx_reg, | |
70 | + edi_reg, esi_reg, esp_reg, ebp_reg, | |
71 | + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, | |
72 | + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, | |
73 | + fs_reg, | |
74 | +#endif | |
75 | + /** @brief Total number of registers.*/ | |
76 | + n_reg | |
77 | +} Reg_No; | |
78 | +// | |
79 | +// instruction operand sizes: 8,16,32,64 bits | |
80 | +// | |
81 | +typedef enum Opnd_Size { | |
82 | + size_8 = 0, | |
83 | + size_16, | |
84 | + size_32, | |
85 | + size_64, | |
86 | + n_size, | |
87 | +#ifdef _EM64T_ | |
88 | + size_platf = size_64 | |
89 | +#else | |
90 | + size_platf = size_32 | |
91 | +#endif | |
92 | +} Opnd_Size; | |
93 | + | |
94 | +// | |
95 | +// opcodes for alu instructions | |
96 | +// | |
97 | +typedef enum ALU_Opcode { | |
98 | + add_opc = 0,or_opc, adc_opc, sbb_opc, | |
99 | + and_opc, sub_opc, xor_opc, cmp_opc, | |
100 | + mul_opc, imul_opc, div_opc, idiv_opc, | |
101 | + sll_opc, srl_opc, sra_opc, //shift right arithmetic | |
102 | + shl_opc, shr_opc, | |
103 | + sal_opc, sar_opc, | |
104 | + neg_opc, not_opc, andn_opc, | |
105 | + n_alu | |
106 | +} ALU_Opcode; | |
107 | + | |
108 | +typedef enum ConditionCode { | |
109 | + Condition_O = 0, | |
110 | + Condition_NO = 1, | |
111 | + Condition_B = 2, | |
112 | + Condition_NAE = Condition_B, | |
113 | + Condition_C = Condition_B, | |
114 | + Condition_NB = 3, | |
115 | + Condition_AE = Condition_NB, | |
116 | + Condition_NC = Condition_NB, | |
117 | + Condition_Z = 4, | |
118 | + Condition_E = Condition_Z, | |
119 | + Condition_NZ = 5, | |
120 | + Condition_NE = Condition_NZ, | |
121 | + Condition_BE = 6, | |
122 | + Condition_NA = Condition_BE, | |
123 | + Condition_NBE = 7, | |
124 | + Condition_A = Condition_NBE, | |
125 | + | |
126 | + Condition_S = 8, | |
127 | + Condition_NS = 9, | |
128 | + Condition_P = 10, | |
129 | + Condition_PE = Condition_P, | |
130 | + Condition_NP = 11, | |
131 | + Condition_PO = Condition_NP, | |
132 | + Condition_L = 12, | |
133 | + Condition_NGE = Condition_L, | |
134 | + Condition_NL = 13, | |
135 | + Condition_GE = Condition_NL, | |
136 | + Condition_LE = 14, | |
137 | + Condition_NG = Condition_LE, | |
138 | + Condition_NLE = 15, | |
139 | + Condition_G = Condition_NLE, | |
140 | + Condition_Count = 16 | |
141 | +} ConditionCode; | |
142 | + | |
143 | +// | |
144 | +// prefix code | |
145 | +// | |
146 | +typedef enum InstrPrefix { | |
147 | + no_prefix, | |
148 | + lock_prefix = 0xF0, | |
149 | + hint_branch_taken_prefix = 0x2E, | |
150 | + hint_branch_not_taken_prefix = 0x3E, | |
151 | + prefix_repne = 0xF2, | |
152 | + prefix_repnz = prefix_repne, | |
153 | + prefix_repe = 0xF3, | |
154 | + prefix_repz = prefix_repe, | |
155 | + prefix_rep = 0xF3, | |
156 | + prefix_cs = 0x2E, | |
157 | + prefix_ss = 0x36, | |
158 | + prefix_ds = 0x3E, | |
159 | + prefix_es = 0x26, | |
160 | + prefix_fs = 0x64, | |
161 | + prefix_gs = 0x65 | |
162 | +} InstrPrefix; | |
163 | + | |
164 | +enum LowOpndRegType | |
165 | +{ | |
166 | + LowOpndRegType_gp = 0, | |
167 | + LowOpndRegType_fs = 1, | |
168 | + LowOpndRegType_xmm = 2, | |
169 | + LowOpndRegType_fs_s = 3, | |
170 | + LowOpndRegType_ss = 4, | |
171 | + LowOpndRegType_invalid = 256, | |
172 | +}; | |
173 | + | |
174 | +enum LogicalRegType | |
175 | +{ | |
176 | + LogicalType_invalid = 0, | |
177 | + LowOpndRegType_scratch = 8, | |
178 | + LowOpndRegType_temp = 16, | |
179 | + LowOpndRegType_hard = 32, | |
180 | + LowOpndRegType_virtual = 64, | |
181 | +}; | |
182 | + | |
183 | +//if inline, separte enc_wrapper.cpp into two files, one of them is .inl | |
184 | +// enc_wrapper.cpp needs to handle both cases | |
185 | +#ifdef ENCODER_INLINE | |
186 | + #define ENCODER_DECLARE_EXPORT inline | |
187 | + #include "enc_wrapper.inl" | |
188 | +#else | |
189 | + #define ENCODER_DECLARE_EXPORT | |
190 | +#endif | |
191 | + | |
192 | +#ifdef __cplusplus | |
193 | +extern "C" | |
194 | +{ | |
195 | +#endif | |
196 | +ENCODER_DECLARE_EXPORT char* encoder_imm(Mnemonic m, OpndSize size, | |
197 | + int imm, char* stream); | |
198 | +ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream); | |
199 | +ENCODER_DECLARE_EXPORT char* encoder_update_imm(int imm, char * stream); | |
200 | +ENCODER_DECLARE_EXPORT char* encoder_mem(Mnemonic m, OpndSize size, | |
201 | + int disp, int base_reg, bool isBasePhysical, char* stream); | |
202 | +ENCODER_DECLARE_EXPORT char* encoder_reg(Mnemonic m, OpndSize size, | |
203 | + int reg, bool isPhysical, LowOpndRegType type, char* stream); | |
204 | +ENCODER_DECLARE_EXPORT char* encoder_reg_reg(Mnemonic m, OpndSize size, | |
205 | + int reg, bool isPhysical, | |
206 | + int reg2, bool isPhysical2, LowOpndRegType type, char* stream); | |
207 | +ENCODER_DECLARE_EXPORT char* encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize, | |
208 | + int reg, bool isPhysical, OpndSize destOpndSize, | |
209 | + int reg2, bool isPhysical2, LowOpndRegType type, char* stream); | |
210 | +ENCODER_DECLARE_EXPORT char* encoder_mem_reg(Mnemonic m, OpndSize size, | |
211 | + int disp, int base_reg, bool isBasePhysical, | |
212 | + int reg, bool isPhysical, LowOpndRegType type, char* stream); | |
213 | +ENCODER_DECLARE_EXPORT char* encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, | |
214 | + int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize, | |
215 | + int reg, bool isPhysical, LowOpndRegType type, char* stream); | |
216 | +ENCODER_DECLARE_EXPORT char* encoder_mem_scale_reg(Mnemonic m, OpndSize size, | |
217 | + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, | |
218 | + int reg, bool isPhysical, LowOpndRegType type, char* stream); | |
219 | +ENCODER_DECLARE_EXPORT char* encoder_reg_mem_scale(Mnemonic m, OpndSize size, | |
220 | + int reg, bool isPhysical, | |
221 | + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, | |
222 | + LowOpndRegType type, char* stream); | |
223 | +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size, | |
224 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
225 | + int reg, bool isPhysical, LowOpndRegType type, char * stream); | |
226 | +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, | |
227 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
228 | + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream); | |
229 | +ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size, | |
230 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
231 | + int reg, bool isPhysical, LowOpndRegType type, char * stream); | |
232 | +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_2(Mnemonic m, OpndSize memOpndSize, | |
233 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
234 | + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream); | |
235 | +ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size, | |
236 | + int reg, bool isPhysical, | |
237 | + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, | |
238 | + LowOpndRegType type, char* stream); | |
239 | +ENCODER_DECLARE_EXPORT char* encoder_reg_mem(Mnemonic m, OpndSize size, | |
240 | + int reg, bool isPhysical, | |
241 | + int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char* stream); | |
242 | +ENCODER_DECLARE_EXPORT char* encoder_imm_reg(Mnemonic m, OpndSize size, | |
243 | + int imm, int reg, bool isPhysical, LowOpndRegType type, char* stream); | |
244 | +ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes(Mnemonic m, OpndSize sizeImm, | |
245 | + int imm, OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream); | |
246 | +ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream); | |
247 | +ENCODER_DECLARE_EXPORT char* encoder_imm_mem(Mnemonic m, OpndSize size, | |
248 | + int imm, | |
249 | + int disp, int base_reg, bool isBasePhysical, char* stream); | |
250 | +ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm, | |
251 | + OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream); | |
252 | +ENCODER_DECLARE_EXPORT char* encoder_fp_mem(Mnemonic m, OpndSize size, int reg, | |
253 | + int disp, int base_reg, bool isBasePhysical, char* stream); | |
254 | +ENCODER_DECLARE_EXPORT char* encoder_mem_fp(Mnemonic m, OpndSize size, | |
255 | + int disp, int base_reg, bool isBasePhysical, | |
256 | + int reg, char* stream); | |
257 | +ENCODER_DECLARE_EXPORT char* encoder_return(char* stream); | |
258 | +ENCODER_DECLARE_EXPORT char* encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char* stream); | |
259 | +ENCODER_DECLARE_EXPORT char* encoder_movez_mem_to_reg(OpndSize size, | |
260 | + int disp, int base_reg, bool isBasePhysical, | |
261 | + int reg, bool isPhysical, char* stream); | |
262 | +ENCODER_DECLARE_EXPORT char* encoder_moves_mem_to_reg(OpndSize size, | |
263 | + int disp, int base_reg, bool isBasePhysical, | |
264 | + int reg, bool isPhysical, char* stream); | |
265 | +ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size, | |
266 | + int reg, bool isPhysical, int reg2, | |
267 | + bool isPhysical2, LowOpndRegType type, char * stream); | |
268 | +ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size, | |
269 | + int reg, bool isPhysical, int reg2, | |
270 | + bool isPhysical2, LowOpndRegType type, char * stream); | |
271 | +ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize, | |
272 | + int sourceReg, OpndSize sourceRegSize, int destReg, | |
273 | + OpndSize destRegSize, char * stream); | |
274 | +ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream); | |
275 | +ENCODER_DECLARE_EXPORT int decodeThenPrint(char* stream_start); | |
276 | +ENCODER_DECLARE_EXPORT char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len); | |
277 | + | |
278 | +//Provide a char* equivalent to a PhysicalReg type | |
279 | +ENCODER_DECLARE_EXPORT const char * physicalRegToString(PhysicalReg reg); | |
280 | +#ifdef __cplusplus | |
281 | +} | |
282 | +#endif | |
283 | +#endif // _VM_ENC_WRAPPER_H_ |
@@ -0,0 +1,717 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | +/** | |
21 | + * @file | |
22 | + * @brief Simple interface for generating processor instructions. | |
23 | + * | |
24 | + * The interface works for both IA32 and EM64T. By default, only IA32 | |
25 | + * capabilities are presented. To enable EM64T feature, the _EM64T_ macro | |
26 | + * must be defined (and, of course, a proper library version to be used). | |
27 | + * | |
28 | + * The interface is based on the original ia32.h encoder interface, | |
29 | + * with some simplifications and add-ons - EM64T-specific, SSE and SSE2. | |
30 | + * | |
31 | + * The interface mostly intended for existing legacy code like LIL code | |
32 | + * generator. From the implementation point of view, it's just a wrapper | |
33 | + * around the EncoderBase functionality. | |
34 | + */ | |
35 | + | |
36 | +#ifndef _VM_ENCODER_H_ | |
37 | +#define _VM_ENCODER_H_ | |
38 | + | |
39 | +#include <limits.h> | |
40 | +#include "enc_base.h" | |
41 | +//#include "open/types.h" | |
42 | + | |
43 | +#ifdef _EM64T_ | |
44 | +// size of general-purpose value on the stack in bytes | |
45 | +#define GR_STACK_SIZE 8 | |
46 | +// size of floating-point value on the stack in bytes | |
47 | +#define FR_STACK_SIZE 8 | |
48 | + | |
49 | +#if defined(WIN32) || defined(_WIN64) | |
50 | + // maximum number of GP registers for inputs | |
51 | + const int MAX_GR = 4; | |
52 | + // maximum number of FP registers for inputs | |
53 | + const int MAX_FR = 4; | |
54 | + // WIN64 reserves 4 words for shadow space | |
55 | + const int SHADOW = 4 * GR_STACK_SIZE; | |
56 | +#else | |
57 | + // maximum number of GP registers for inputs | |
58 | + const int MAX_GR = 6; | |
59 | + // maximum number of FP registers for inputs | |
60 | + const int MAX_FR = 8; | |
61 | + // Linux x64 doesn't reserve shadow space | |
62 | + const int SHADOW = 0; | |
63 | +#endif | |
64 | + | |
65 | +#else | |
66 | +// size of general-purpose value on the stack in bytes | |
67 | +#define GR_STACK_SIZE 4 | |
68 | +// size of general-purpose value on the stack in bytes | |
69 | +#define FR_STACK_SIZE 8 | |
70 | + | |
71 | +// maximum number of GP registers for inputs | |
72 | +const int MAX_GR = 0; | |
73 | +// maximum number of FP registers for inputs | |
74 | +const int MAX_FR = 0; | |
75 | +#endif | |
76 | + | |
77 | +typedef enum Reg_No { | |
78 | +#ifdef _EM64T_ | |
79 | + rax_reg = 0,rbx_reg, rcx_reg, rdx_reg, | |
80 | + rdi_reg, rsi_reg, rsp_reg, rbp_reg, | |
81 | + r8_reg, r9_reg, r10_reg, r11_reg, | |
82 | + r12_reg, r13_reg, r14_reg, r15_reg, | |
83 | + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, | |
84 | + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, | |
85 | + xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg, | |
86 | + xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg, | |
87 | + | |
88 | +#else // !defined(_EM64T_) | |
89 | + | |
90 | + eax_reg = 0,ebx_reg, ecx_reg, edx_reg, | |
91 | + edi_reg, esi_reg, esp_reg, ebp_reg, | |
92 | + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, | |
93 | + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, | |
94 | + fs_reg, | |
95 | +#endif | |
96 | + /** @brief Total number of registers.*/ | |
97 | + n_reg | |
98 | +} Reg_No; | |
99 | +// | |
100 | +// instruction operand sizes: 8,16,32,64 bits | |
101 | +// | |
102 | +typedef enum Opnd_Size { | |
103 | + size_8 = 0, | |
104 | + size_16, | |
105 | + size_32, | |
106 | + size_64, | |
107 | + n_size, | |
108 | +#ifdef _EM64T_ | |
109 | + size_platf = size_64 | |
110 | +#else | |
111 | + size_platf = size_32 | |
112 | +#endif | |
113 | +} Opnd_Size; | |
114 | + | |
115 | +// | |
116 | +// opcodes for alu instructions | |
117 | +// | |
118 | +typedef enum ALU_Opcode { | |
119 | + add_opc = 0,or_opc, adc_opc, sbb_opc, | |
120 | + and_opc, sub_opc, xor_opc, cmp_opc, | |
121 | + n_alu | |
122 | +} ALU_Opcode; | |
123 | + | |
124 | +// | |
125 | +// opcodes for shift instructions | |
126 | +// | |
127 | +typedef enum Shift_Opcode { | |
128 | + shld_opc, shrd_opc, shl_opc, shr_opc, | |
129 | + sar_opc, ror_opc, max_shift_opcode=6, n_shift = 6 | |
130 | +} Shift_Opcode; | |
131 | + | |
132 | +typedef enum ConditionCode { | |
133 | + Condition_O = 0, | |
134 | + Condition_NO = 1, | |
135 | + Condition_B = 2, | |
136 | + Condition_NAE = Condition_B, | |
137 | + Condition_C = Condition_B, | |
138 | + Condition_NB = 3, | |
139 | + Condition_AE = Condition_NB, | |
140 | + Condition_NC = Condition_NB, | |
141 | + Condition_Z = 4, | |
142 | + Condition_E = Condition_Z, | |
143 | + Condition_NZ = 5, | |
144 | + Condition_NE = Condition_NZ, | |
145 | + Condition_BE = 6, | |
146 | + Condition_NA = Condition_BE, | |
147 | + Condition_NBE = 7, | |
148 | + Condition_A = Condition_NBE, | |
149 | + | |
150 | + Condition_S = 8, | |
151 | + Condition_NS = 9, | |
152 | + Condition_P = 10, | |
153 | + Condition_PE = Condition_P, | |
154 | + Condition_NP = 11, | |
155 | + Condition_PO = Condition_NP, | |
156 | + Condition_L = 12, | |
157 | + Condition_NGE = Condition_L, | |
158 | + Condition_NL = 13, | |
159 | + Condition_GE = Condition_NL, | |
160 | + Condition_LE = 14, | |
161 | + Condition_NG = Condition_LE, | |
162 | + Condition_NLE = 15, | |
163 | + Condition_G = Condition_NLE, | |
164 | + Condition_Count = 16 | |
165 | +} ConditionCode; | |
166 | + | |
167 | +// | |
168 | +// prefix code | |
169 | +// | |
170 | +typedef enum InstrPrefix { | |
171 | + no_prefix, | |
172 | + lock_prefix = 0xF0, | |
173 | + hint_branch_taken_prefix = 0x2E, | |
174 | + hint_branch_not_taken_prefix = 0x3E, | |
175 | + prefix_repne = 0xF2, | |
176 | + prefix_repnz = prefix_repne, | |
177 | + prefix_repe = 0xF3, | |
178 | + prefix_repz = prefix_repe, | |
179 | + prefix_rep = 0xF3, | |
180 | + prefix_cs = 0x2E, | |
181 | + prefix_ss = 0x36, | |
182 | + prefix_ds = 0x3E, | |
183 | + prefix_es = 0x26, | |
184 | + prefix_fs = 0x64, | |
185 | + prefix_gs = 0x65 | |
186 | +} InstrPrefix; | |
187 | + | |
188 | + | |
189 | +// | |
190 | +// an instruction operand | |
191 | +// | |
192 | +class Opnd { | |
193 | + | |
194 | +protected: | |
195 | + enum Tag { SignedImm, UnsignedImm, Reg, Mem, FP, XMM }; | |
196 | + | |
197 | + const Tag tag; | |
198 | + | |
199 | + Opnd(Tag t): tag(t) {} | |
200 | + | |
201 | +public: | |
202 | + void * operator new(size_t, void * mem) { | |
203 | + return mem; | |
204 | + } | |
205 | + | |
206 | + void operator delete(void *) {} | |
207 | + | |
208 | + void operator delete(void *, void *) {} | |
209 | + | |
210 | +private: | |
211 | + // disallow copying | |
212 | + Opnd(const Opnd &): tag(Mem) { assert(false); } | |
213 | + Opnd& operator=(const Opnd &) { assert(false); return *this; } | |
214 | +}; | |
215 | +typedef int I_32; | |
216 | +class Imm_Opnd: public Opnd { | |
217 | + | |
218 | +protected: | |
219 | + union { | |
220 | +#ifdef _EM64T_ | |
221 | + int64 value; | |
222 | + unsigned char bytes[8]; | |
223 | +#else | |
224 | + I_32 value; | |
225 | + unsigned char bytes[4]; | |
226 | +#endif | |
227 | + }; | |
228 | + Opnd_Size size; | |
229 | + | |
230 | +public: | |
231 | + Imm_Opnd(I_32 val, bool isSigned = true): | |
232 | + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(size_32) { | |
233 | + if (isSigned) { | |
234 | + if (CHAR_MIN <= val && val <= CHAR_MAX) { | |
235 | + size = size_8; | |
236 | + } else if (SHRT_MIN <= val && val <= SHRT_MAX) { | |
237 | + size = size_16; | |
238 | + } | |
239 | + } else { | |
240 | + assert(val >= 0); | |
241 | + if (val <= UCHAR_MAX) { | |
242 | + size = size_8; | |
243 | + } else if (val <= USHRT_MAX) { | |
244 | + size = size_16; | |
245 | + } | |
246 | + } | |
247 | + } | |
248 | + Imm_Opnd(const Imm_Opnd& that): Opnd(that.tag), value(that.value), size(that.size) {}; | |
249 | + | |
250 | +#ifdef _EM64T_ | |
251 | + Imm_Opnd(Opnd_Size sz, int64 val, bool isSigned = true): | |
252 | + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) { | |
253 | +#ifndef NDEBUG | |
254 | + switch (size) { | |
255 | + case size_8: | |
256 | + assert(val == (int64)(I_8)val); | |
257 | + break; | |
258 | + case size_16: | |
259 | + assert(val == (int64)(int16)val); | |
260 | + break; | |
261 | + case size_32: | |
262 | + assert(val == (int64)(I_32)val); | |
263 | + break; | |
264 | + case size_64: | |
265 | + break; | |
266 | + case n_size: | |
267 | + assert(false); | |
268 | + break; | |
269 | + } | |
270 | +#endif // NDEBUG | |
271 | + } | |
272 | + | |
273 | + int64 get_value() const { return value; } | |
274 | + | |
275 | +#else | |
276 | + | |
277 | + Imm_Opnd(Opnd_Size sz, I_32 val, int isSigned = true): | |
278 | + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) { | |
279 | +#ifndef NDEBUG | |
280 | + switch (size) { | |
281 | + case size_8: | |
282 | + assert((I_32)val == (I_32)(I_8)val); | |
283 | + break; | |
284 | + case size_16: | |
285 | + assert((I_32)val == (I_32)(int16)val); | |
286 | + break; | |
287 | + case size_32: | |
288 | + break; | |
289 | + case size_64: | |
290 | + case n_size: | |
291 | + assert(false); | |
292 | + break; | |
293 | + } | |
294 | +#endif // NDEBUG | |
295 | + } | |
296 | + | |
297 | + I_32 get_value() const { return value; } | |
298 | + | |
299 | +#endif | |
300 | + Opnd_Size get_size() const { return size; } | |
301 | + bool is_signed() const { return tag == SignedImm; } | |
302 | +}; | |
303 | + | |
304 | +class RM_Opnd: public Opnd { | |
305 | + | |
306 | +public: | |
307 | + bool is_reg() const { return tag != SignedImm && tag != UnsignedImm && tag != Mem; } | |
308 | + | |
309 | +protected: | |
310 | + RM_Opnd(Tag t): Opnd(t) {} | |
311 | + | |
312 | +private: | |
313 | + // disallow copying | |
314 | + RM_Opnd(const RM_Opnd &): Opnd(Reg) { assert(false); } | |
315 | +}; | |
316 | + | |
317 | +class R_Opnd: public RM_Opnd { | |
318 | + | |
319 | +protected: | |
320 | + Reg_No _reg_no; | |
321 | + | |
322 | +public: | |
323 | + R_Opnd(Reg_No r): RM_Opnd(Reg), _reg_no(r) {} | |
324 | + Reg_No reg_no() const { return _reg_no; } | |
325 | + | |
326 | +private: | |
327 | + // disallow copying | |
328 | + R_Opnd(const R_Opnd &): RM_Opnd(Reg) { assert(false); } | |
329 | +}; | |
330 | + | |
331 | +// | |
332 | +// a memory operand with displacement | |
333 | +// Can also serve as a full memory operand with base,index, displacement and scale. | |
334 | +// Use n_reg to specify 'no register', say, for index. | |
335 | +class M_Opnd: public RM_Opnd { | |
336 | + | |
337 | +protected: | |
338 | + Imm_Opnd m_disp; | |
339 | + Imm_Opnd m_scale; | |
340 | + R_Opnd m_index; | |
341 | + R_Opnd m_base; | |
342 | + | |
343 | +public: | |
344 | + //M_Opnd(Opnd_Size sz): RM_Opnd(Mem, K_M, sz), m_disp(0), m_scale(0), m_index(n_reg), m_base(n_reg) {} | |
345 | + M_Opnd(I_32 disp): | |
346 | + RM_Opnd(Mem), m_disp(disp), m_scale(0), m_index(n_reg), m_base(n_reg) {} | |
347 | + M_Opnd(Reg_No rbase, I_32 rdisp): | |
348 | + RM_Opnd(Mem), m_disp(rdisp), m_scale(0), m_index(n_reg), m_base(rbase) {} | |
349 | + M_Opnd(I_32 disp, Reg_No rbase, Reg_No rindex, unsigned scale): | |
350 | + RM_Opnd(Mem), m_disp(disp), m_scale(scale), m_index(rindex), m_base(rbase) {} | |
351 | + M_Opnd(const M_Opnd & that) : RM_Opnd(Mem), | |
352 | + m_disp((int)that.m_disp.get_value()), m_scale((int)that.m_scale.get_value()), | |
353 | + m_index(that.m_index.reg_no()), m_base(that.m_base.reg_no()) | |
354 | + {} | |
355 | + // | |
356 | + inline const R_Opnd & base(void) const { return m_base; } | |
357 | + inline const R_Opnd & index(void) const { return m_index; } | |
358 | + inline const Imm_Opnd & scale(void) const { return m_scale; } | |
359 | + inline const Imm_Opnd & disp(void) const { return m_disp; } | |
360 | +}; | |
361 | + | |
362 | +// | |
363 | +// a memory operand with base register and displacement | |
364 | +// | |
365 | +class M_Base_Opnd: public M_Opnd { | |
366 | + | |
367 | +public: | |
368 | + M_Base_Opnd(Reg_No base, I_32 disp) : M_Opnd(disp, base, n_reg, 0) {} | |
369 | + | |
370 | +private: | |
371 | + // disallow copying - but it leads to ICC errors #734 in encoder.inl | |
372 | + // M_Base_Opnd(const M_Base_Opnd &): M_Opnd(0) { assert(false); } | |
373 | +}; | |
374 | + | |
375 | +// | |
376 | +// a memory operand with base register, scaled index register | |
377 | +// and displacement. | |
378 | +// | |
379 | +class M_Index_Opnd : public M_Opnd { | |
380 | + | |
381 | +public: | |
382 | + M_Index_Opnd(Reg_No base, Reg_No index, I_32 disp, unsigned scale): | |
383 | + M_Opnd(disp, base, index, scale) {} | |
384 | + | |
385 | +private: | |
386 | + // disallow copying - but it leads to ICC errors #734 in encoder.inl | |
387 | + // M_Index_Opnd(const M_Index_Opnd &): M_Opnd(0) { assert(false); } | |
388 | +}; | |
389 | + | |
390 | +class XMM_Opnd : public Opnd { | |
391 | + | |
392 | +protected: | |
393 | + unsigned m_idx; | |
394 | + | |
395 | +public: | |
396 | + XMM_Opnd(unsigned _idx): Opnd(XMM), m_idx(_idx) {}; | |
397 | + unsigned get_idx( void ) const { return m_idx; }; | |
398 | + | |
399 | +private: | |
400 | + // disallow copying | |
401 | + XMM_Opnd(const XMM_Opnd &): Opnd(XMM) { assert(false); } | |
402 | +}; | |
403 | + | |
404 | +// | |
405 | +// operand structures for ia32 registers | |
406 | +// | |
407 | +#ifdef _EM64T_ | |
408 | + | |
409 | +extern R_Opnd rax_opnd; | |
410 | +extern R_Opnd rcx_opnd; | |
411 | +extern R_Opnd rdx_opnd; | |
412 | +extern R_Opnd rbx_opnd; | |
413 | +extern R_Opnd rdi_opnd; | |
414 | +extern R_Opnd rsi_opnd; | |
415 | +extern R_Opnd rsp_opnd; | |
416 | +extern R_Opnd rbp_opnd; | |
417 | + | |
418 | +extern R_Opnd r8_opnd; | |
419 | +extern R_Opnd r9_opnd; | |
420 | +extern R_Opnd r10_opnd; | |
421 | +extern R_Opnd r11_opnd; | |
422 | +extern R_Opnd r12_opnd; | |
423 | +extern R_Opnd r13_opnd; | |
424 | +extern R_Opnd r14_opnd; | |
425 | +extern R_Opnd r15_opnd; | |
426 | + | |
427 | +extern XMM_Opnd xmm8_opnd; | |
428 | +extern XMM_Opnd xmm9_opnd; | |
429 | +extern XMM_Opnd xmm10_opnd; | |
430 | +extern XMM_Opnd xmm11_opnd; | |
431 | +extern XMM_Opnd xmm12_opnd; | |
432 | +extern XMM_Opnd xmm13_opnd; | |
433 | +extern XMM_Opnd xmm14_opnd; | |
434 | +extern XMM_Opnd xmm15_opnd; | |
435 | +#else | |
436 | + | |
437 | +extern R_Opnd eax_opnd; | |
438 | +extern R_Opnd ecx_opnd; | |
439 | +extern R_Opnd edx_opnd; | |
440 | +extern R_Opnd ebx_opnd; | |
441 | +extern R_Opnd esp_opnd; | |
442 | +extern R_Opnd ebp_opnd; | |
443 | +extern R_Opnd esi_opnd; | |
444 | +extern R_Opnd edi_opnd; | |
445 | + | |
446 | +#endif // _EM64T_ | |
447 | + | |
448 | +extern XMM_Opnd xmm0_opnd; | |
449 | +extern XMM_Opnd xmm1_opnd; | |
450 | +extern XMM_Opnd xmm2_opnd; | |
451 | +extern XMM_Opnd xmm3_opnd; | |
452 | +extern XMM_Opnd xmm4_opnd; | |
453 | +extern XMM_Opnd xmm5_opnd; | |
454 | +extern XMM_Opnd xmm6_opnd; | |
455 | +extern XMM_Opnd xmm7_opnd; | |
456 | + | |
457 | +#ifdef NO_ENCODER_INLINE | |
458 | + #define ENCODER_DECLARE_EXPORT | |
459 | +#else | |
460 | + #define ENCODER_DECLARE_EXPORT inline | |
461 | + #include "encoder.inl" | |
462 | +#endif | |
463 | + | |
464 | +// prefix | |
465 | +ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p); | |
466 | + | |
467 | +// stack push and pop instructions | |
468 | +ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
469 | +ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm); | |
470 | +ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
471 | + | |
472 | +// cmpxchg or xchg | |
473 | +ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); | |
474 | +ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); | |
475 | + | |
476 | +// inc(rement), dec(rement), not, neg(ate) instructions | |
477 | +ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
478 | +ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
479 | +ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
480 | +ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
481 | +ENCODER_DECLARE_EXPORT char * nop(char * stream); | |
482 | +ENCODER_DECLARE_EXPORT char * int3(char * stream); | |
483 | + | |
484 | +// alu instructions: add, or, adc, sbb, and, sub, xor, cmp | |
485 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
486 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf); | |
487 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
488 | + | |
489 | +// test instruction | |
490 | +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
491 | +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); | |
492 | + | |
493 | +// shift instructions: shl, shr, sar, shld, shrd, ror | |
494 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
495 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
496 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
497 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); | |
498 | + | |
499 | +// multiply instructions: mul, imul | |
500 | +ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
501 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
502 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
503 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, const Imm_Opnd& imm, Opnd_Size sz = size_platf); | |
504 | + | |
505 | +// divide instructions: div, idiv | |
506 | +ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
507 | +ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
508 | + | |
509 | +// data movement: mov | |
510 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf); | |
511 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
512 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); | |
513 | + | |
514 | +ENCODER_DECLARE_EXPORT char * movsx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
515 | +ENCODER_DECLARE_EXPORT char * movzx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
516 | + | |
517 | +ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm); | |
518 | +ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm); | |
519 | +ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm); | |
520 | +ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm); | |
521 | + | |
522 | +// sse mov | |
523 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
524 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl); | |
525 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
526 | + | |
527 | +// sse add, sub, mul, div | |
528 | +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
529 | +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
530 | + | |
531 | +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
532 | +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
533 | + | |
534 | +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
535 | +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
536 | + | |
537 | +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
538 | +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
539 | + | |
540 | +// xor, compare | |
541 | +ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); | |
542 | + | |
543 | +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
544 | +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl); | |
545 | + | |
546 | +// sse conversions | |
547 | +ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); | |
548 | +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl); | |
549 | +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl); | |
550 | +ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
551 | +ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); | |
552 | +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64); | |
553 | +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); | |
554 | +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32); | |
555 | +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); | |
556 | + | |
557 | +// condition operations | |
558 | +ENCODER_DECLARE_EXPORT char * cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
559 | +ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8); | |
560 | + | |
561 | +// load effective address: lea | |
562 | +ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz = size_platf); | |
563 | +ENCODER_DECLARE_EXPORT char * cdq(char * stream); | |
564 | +ENCODER_DECLARE_EXPORT char * wait(char * stream); | |
565 | + | |
566 | +// control-flow instructions | |
567 | +ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm); | |
568 | + | |
569 | +// jump with 8-bit relative | |
570 | +ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm); | |
571 | + | |
572 | +// jump with 32-bit relative | |
573 | +ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm); | |
574 | + | |
575 | +// register indirect jump | |
576 | +ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
577 | + | |
578 | +// jump to target address | |
579 | +ENCODER_DECLARE_EXPORT char *jump(char * stream, char *target); | |
580 | + | |
581 | +// jump with displacement | |
582 | +//char * jump(char * stream, I_32 disp); | |
583 | + | |
584 | +// conditional branch with 8-bit branch offset | |
585 | +ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix); | |
586 | + | |
587 | +// conditional branch with 32-bit branch offset | |
588 | +ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix); | |
589 | + | |
590 | +// conditional branch with target label address | |
591 | +//char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix = no_prefix); | |
592 | + | |
593 | +// conditional branch with displacement immediate | |
594 | +ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, I_32 disp, InstrPrefix prefix = no_prefix); | |
595 | + | |
596 | +// call with displacement | |
597 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm); | |
598 | + | |
599 | +// indirect call through register or memory location | |
600 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); | |
601 | + | |
602 | +// call target address | |
603 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target); | |
604 | + | |
605 | +// return instruction | |
606 | +ENCODER_DECLARE_EXPORT char * ret(char * stream); | |
607 | +ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop); | |
608 | +ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm); | |
609 | + | |
610 | +// string operations | |
611 | +ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set); | |
612 | +ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix); | |
613 | +ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix); | |
614 | + | |
615 | +// floating-point instructions | |
616 | + | |
617 | +// st(0) = st(0) fp_op m{32,64}real | |
618 | +//!char * fp_op_mem(char * stream, FP_Opcode opc,const M_Opnd& mem,int is_double); | |
619 | + | |
620 | +// st(0) = st(0) fp_op st(i) | |
621 | +//!char *fp_op(char * stream, FP_Opcode opc,unsigned i); | |
622 | + | |
623 | +// st(i) = st(i) fp_op st(0) ; optionally pop stack | |
624 | +//!char * fp_op(char * stream, FP_Opcode opc,unsigned i,unsigned pop_stk); | |
625 | + | |
626 | +// compare st(0),st(1) and pop stack twice | |
627 | +//!char * fcompp(char * stream); | |
628 | +ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem); | |
629 | +ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem); | |
630 | +ENCODER_DECLARE_EXPORT char * fnstsw(char * stream); | |
631 | +//!char * fchs(char * stream); | |
632 | +//!char * frem(char * stream); | |
633 | +//!char * fxch(char * stream,unsigned i); | |
634 | +//!char * fcomip(char * stream, unsigned i); | |
635 | + | |
636 | +// load from memory (as fp) into fp register stack | |
637 | +ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, bool is_double); | |
638 | +//!char *fld80(char * stream,const M_Opnd& mem); | |
639 | + | |
640 | +// load from memory (as int) into fp register stack | |
641 | +//!char * fild(char * stream,const M_Opnd& mem,int is_long); | |
642 | + | |
643 | +// push st(i) onto fp register stack | |
644 | +//!char * fld(char * stream,unsigned i); | |
645 | + | |
646 | +// push the constants 0.0 and 1.0 onto the fp register stack | |
647 | +//!char * fldz(char * stream); | |
648 | +//!char * fld1(char * stream); | |
649 | + | |
650 | +// store stack to memory (as int), always popping the stack | |
651 | +ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, bool is_long, bool pop_stk); | |
652 | +// store stack to to memory (as fp), optionally popping the stack | |
653 | +ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, bool is_double, bool pop_stk); | |
654 | +// store ST(0) to ST(i), optionally popping the stack. Takes 1 clock | |
655 | +ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk); | |
656 | + | |
657 | +//!char * pushad(char * stream); | |
658 | +//!char * pushfd(char * stream); | |
659 | +//!char * popad(char * stream); | |
660 | +//!char * popfd(char * stream); | |
661 | + | |
662 | +// stack frame allocation instructions: enter & leave | |
663 | +// | |
664 | +// enter frame_size | |
665 | +// | |
666 | +// is equivalent to: | |
667 | +// | |
668 | +// push ebp | |
669 | +// mov ebp,esp | |
670 | +// sub esp,frame_size | |
671 | +// | |
672 | +//!char *enter(char * stream,const Imm_Opnd& imm); | |
673 | + | |
674 | +// leave | |
675 | +// is equivalent to: | |
676 | +// | |
677 | +// mov esp,ebp | |
678 | +// pop ebp | |
679 | +//!char *leave(char * stream); | |
680 | + | |
681 | +// sahf loads SF, ZF, AF, PF, and CF flags from eax | |
682 | +//!char *sahf(char * stream); | |
683 | + | |
684 | +// Intrinsic FP math functions | |
685 | + | |
686 | +//!char *math_fsin(char * stream); | |
687 | +//!char *math_fcos(char * stream); | |
688 | +//!char *math_fabs(char * stream); | |
689 | +//!char *math_fpatan(char * stream); | |
690 | +ENCODER_DECLARE_EXPORT char * fprem(char * stream); | |
691 | +ENCODER_DECLARE_EXPORT char * fprem1(char * stream); | |
692 | +//!char *math_frndint(char * stream); | |
693 | +//!char *math_fptan(char * stream); | |
694 | + | |
695 | +// | |
696 | +// Add 1-7 bytes padding, with as few instructions as possible, | |
697 | +// with no effect on the processor state (e.g., registers, flags) | |
698 | +// | |
699 | +//!char *padding(char * stream, unsigned num); | |
700 | + | |
701 | +// prolog and epilog code generation | |
702 | +//- char *prolog(char * stream,unsigned frame_size,unsigned reg_save_mask); | |
703 | +//- char *epilog(char * stream,unsigned reg_save_mask); | |
704 | + | |
705 | +//!extern R_Opnd reg_operand_array[]; | |
706 | + | |
707 | +// fsave and frstor | |
708 | +//!char *fsave(char * stream); | |
709 | +//!char *frstor(char * stream); | |
710 | + | |
711 | +// lahf : Load Status Flags into AH Register | |
712 | +//!char *lahf(char * stream); | |
713 | + | |
714 | +// mfence : Memory Fence | |
715 | +//!char *mfence(char * stream); | |
716 | + | |
717 | +#endif // _VM_ENCODER_H_ |
@@ -0,0 +1,863 @@ | ||
1 | +/* | |
2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | + * contributor license agreements. See the NOTICE file distributed with | |
4 | + * this work for additional information regarding copyright ownership. | |
5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | + * (the "License"); you may not use this file except in compliance with | |
7 | + * the License. You may obtain a copy of the License at | |
8 | + * | |
9 | + * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | + * | |
11 | + * Unless required by applicable law or agreed to in writing, software | |
12 | + * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | + * See the License for the specific language governing permissions and | |
15 | + * limitations under the License. | |
16 | + */ | |
17 | +/** | |
18 | + * @author Alexander V. Astapchuk | |
19 | + */ | |
20 | +#include <stdio.h> | |
21 | +#include <assert.h> | |
22 | +#include <limits.h> | |
23 | + | |
24 | +extern const RegName map_of_regno_2_regname[]; | |
25 | +extern const OpndSize map_of_EncoderOpndSize_2_RealOpndSize[]; | |
26 | +extern const Mnemonic map_of_alu_opcode_2_mnemonic[]; | |
27 | +extern const Mnemonic map_of_shift_opcode_2_mnemonic[]; | |
28 | + | |
29 | +// S_ stands for 'Signed' | |
30 | +extern const Mnemonic S_map_of_condition_code_2_branch_mnemonic[]; | |
31 | +// U_ stands for 'Unsigned' | |
32 | +extern const Mnemonic U_map_of_condition_code_2_branch_mnemonic[]; | |
33 | + | |
34 | +inline static RegName map_reg(Reg_No r) { | |
35 | + assert(r >= 0 && r <= n_reg); | |
36 | + return map_of_regno_2_regname[r]; | |
37 | +} | |
38 | + | |
39 | +inline static OpndSize map_size(Opnd_Size o_size) { | |
40 | + assert(o_size >= 0 && o_size <= n_size); | |
41 | + return map_of_EncoderOpndSize_2_RealOpndSize[o_size]; | |
42 | +} | |
43 | + | |
44 | +inline static Mnemonic map_alu(ALU_Opcode alu) { | |
45 | + assert(alu >= 0 && alu < n_alu); | |
46 | + return map_of_alu_opcode_2_mnemonic[alu]; | |
47 | +} | |
48 | + | |
49 | +inline static Mnemonic map_shift(Shift_Opcode shc) { | |
50 | + assert(shc >= 0 && shc < n_shift); | |
51 | + return map_of_shift_opcode_2_mnemonic[shc]; | |
52 | +} | |
53 | + | |
54 | +inline bool fit8(int64 val) { | |
55 | + return (CHAR_MIN <= val) && (val <= CHAR_MAX); | |
56 | +} | |
57 | + | |
58 | +inline bool fit32(int64 val) { | |
59 | + return (INT_MIN <= val) && (val <= INT_MAX); | |
60 | +} | |
61 | + | |
62 | +inline static void add_r(EncoderBase::Operands & args, const R_Opnd & r, Opnd_Size sz, OpndExt ext = OpndExt_None) { | |
63 | + RegName reg = map_reg(r.reg_no()); | |
64 | + if (sz != n_size) { | |
65 | + OpndSize size = map_size(sz); | |
66 | + if (size != getRegSize(reg)) { | |
67 | + reg = getAliasReg(reg, size); | |
68 | + } | |
69 | + } | |
70 | + args.add(EncoderBase::Operand(reg, ext)); | |
71 | +} | |
72 | + | |
73 | +inline static void add_m(EncoderBase::Operands & args, const M_Opnd & m, Opnd_Size sz, OpndExt ext = OpndExt_None) { | |
74 | + assert(n_size != sz); | |
75 | + args.add(EncoderBase::Operand(map_size(sz), | |
76 | + map_reg(m.base().reg_no()), map_reg(m.index().reg_no()), | |
77 | + (unsigned)m.scale().get_value(), (int)m.disp().get_value(), ext)); | |
78 | +} | |
79 | + | |
80 | +inline static void add_rm(EncoderBase::Operands & args, const RM_Opnd & rm, Opnd_Size sz, OpndExt ext = OpndExt_None) { | |
81 | + rm.is_reg() ? add_r(args, (R_Opnd &)rm, sz, ext) : add_m(args, (M_Opnd &)rm, sz, ext); | |
82 | +} | |
83 | + | |
84 | +inline static void add_xmm(EncoderBase::Operands & args, const XMM_Opnd & xmm, bool dbl) { | |
85 | + // Gregory - | |
86 | + // XMM registers indexes in Reg_No enum are shifted by xmm0_reg, their indexes | |
87 | + // don't start with 0, so it is necessary to subtract xmm0_reg index from | |
88 | + // xmm.get_idx() value | |
89 | + assert(xmm.get_idx() >= xmm0_reg); | |
90 | + return args.add((RegName)( (dbl ? RegName_XMM0D : RegName_XMM0S) + xmm.get_idx() - | |
91 | + xmm0_reg)); | |
92 | +} | |
93 | + | |
94 | +inline static void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) { | |
95 | + return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i)); | |
96 | +} | |
97 | + | |
98 | +inline static void add_imm(EncoderBase::Operands & args, const Imm_Opnd & imm) { | |
99 | + assert(n_size != imm.get_size()); | |
100 | + args.add(EncoderBase::Operand(map_size(imm.get_size()), imm.get_value(), | |
101 | + imm.is_signed() ? OpndExt_Signed : OpndExt_Zero)); | |
102 | +} | |
103 | + | |
104 | +ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p) { | |
105 | + *stream = (char)p; | |
106 | + return stream + 1; | |
107 | +} | |
108 | + | |
109 | +// stack push and pop instructions | |
110 | +ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
111 | + EncoderBase::Operands args; | |
112 | + add_rm(args, rm, sz); | |
113 | + return (char*)EncoderBase::encode(stream, Mnemonic_PUSH, args); | |
114 | +} | |
115 | + | |
116 | +ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm) { | |
117 | + EncoderBase::Operands args; | |
118 | +#ifdef _EM64T_ | |
119 | + add_imm(args, imm); | |
120 | +#else | |
121 | + // we need this workaround to be compatible with the former ia32 encoder implementation | |
122 | + add_imm(args, Imm_Opnd(size_32, imm.get_value())); | |
123 | +#endif | |
124 | + return EncoderBase::encode(stream, Mnemonic_PUSH, args); | |
125 | +} | |
126 | + | |
127 | +ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
128 | + EncoderBase::Operands args; | |
129 | + add_rm(args, rm, sz); | |
130 | + return (char*)EncoderBase::encode(stream, Mnemonic_POP, args); | |
131 | +} | |
132 | + | |
133 | +// cmpxchg or xchg | |
134 | +ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { | |
135 | + EncoderBase::Operands args; | |
136 | + add_rm(args, rm, sz); | |
137 | + add_r(args, r, sz); | |
138 | + RegName implicitReg = getAliasReg(RegName_EAX, map_size(sz)); | |
139 | + args.add(implicitReg); | |
140 | + return (char*)EncoderBase::encode(stream, Mnemonic_CMPXCHG, args); | |
141 | +} | |
142 | + | |
143 | +ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { | |
144 | + EncoderBase::Operands args; | |
145 | + add_rm(args, rm, sz); | |
146 | + add_r(args, r, sz); | |
147 | + return (char*)EncoderBase::encode(stream, Mnemonic_XCHG, args); | |
148 | +} | |
149 | + | |
150 | +// inc(rement), dec(rement), not, neg(ate) instructions | |
151 | +ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
152 | + EncoderBase::Operands args; | |
153 | + add_rm(args, rm, sz); | |
154 | + return (char*)EncoderBase::encode(stream, Mnemonic_INC, args); | |
155 | +} | |
156 | + | |
157 | +ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
158 | + EncoderBase::Operands args; | |
159 | + add_rm(args, rm, sz); | |
160 | + return (char*)EncoderBase::encode(stream, Mnemonic_DEC, args); | |
161 | +} | |
162 | + | |
163 | +ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
164 | + EncoderBase::Operands args; | |
165 | + add_rm(args, rm, sz); | |
166 | + return (char*)EncoderBase::encode(stream, Mnemonic_NOT, args); | |
167 | +} | |
168 | + | |
169 | +ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
170 | + EncoderBase::Operands args; | |
171 | + add_rm(args, rm, sz); | |
172 | + return (char*)EncoderBase::encode(stream, Mnemonic_NEG, args); | |
173 | +} | |
174 | + | |
175 | +ENCODER_DECLARE_EXPORT char * nop(char * stream) { | |
176 | + EncoderBase::Operands args; | |
177 | + return (char*)EncoderBase::encode(stream, Mnemonic_NOP, args); | |
178 | +} | |
179 | + | |
180 | +ENCODER_DECLARE_EXPORT char * int3(char * stream) { | |
181 | + EncoderBase::Operands args; | |
182 | + return (char*)EncoderBase::encode(stream, Mnemonic_INT3, args); | |
183 | +} | |
184 | + | |
185 | +// alu instructions: add, or, adc, sbb, and, sub, xor, cmp | |
186 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { | |
187 | + EncoderBase::Operands args; | |
188 | + add_rm(args, rm, sz); | |
189 | + add_imm(args, imm); | |
190 | + return (char*)EncoderBase::encode(stream, map_alu(opc), args); | |
191 | +}; | |
192 | + | |
193 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) { | |
194 | + EncoderBase::Operands args; | |
195 | + add_rm(args, m, sz); | |
196 | + add_rm(args, r, sz); | |
197 | + return (char*)EncoderBase::encode(stream, map_alu(opc), args); | |
198 | +} | |
199 | + | |
200 | +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
201 | + EncoderBase::Operands args; | |
202 | + add_rm(args, r, sz); | |
203 | + add_rm(args, rm, sz); | |
204 | + return (char*)EncoderBase::encode(stream, map_alu(opc), args); | |
205 | +} | |
206 | + | |
207 | +// test instruction | |
208 | +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { | |
209 | + EncoderBase::Operands args; | |
210 | + add_rm(args, rm, sz); | |
211 | + assert(imm.get_size() <= sz); | |
212 | + add_imm(args, imm); | |
213 | + return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args); | |
214 | +} | |
215 | + | |
216 | +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { | |
217 | + EncoderBase::Operands args; | |
218 | + add_rm(args, rm, sz); | |
219 | + add_r(args, r, sz); | |
220 | + return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args); | |
221 | +} | |
222 | + | |
223 | +// shift instructions: shl, shr, sar, shld, shrd | |
224 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { | |
225 | + EncoderBase::Operands args; | |
226 | + add_rm(args, rm, sz); | |
227 | + add_imm(args, imm); | |
228 | + return (char*)EncoderBase::encode(stream, map_shift(shc), args); | |
229 | +} | |
230 | + | |
231 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, Opnd_Size sz) { | |
232 | + EncoderBase::Operands args; | |
233 | + add_rm(args, rm, sz); | |
234 | + args.add(RegName_CL); | |
235 | + return (char*)EncoderBase::encode(stream, map_shift(shc), args); | |
236 | +} | |
237 | + | |
238 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, | |
239 | + const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) { | |
240 | + EncoderBase::Operands args; | |
241 | + assert(shc == shld_opc || shc == shrd_opc); | |
242 | + add_rm(args, rm, sz); | |
243 | + add_r(args, r, sz); | |
244 | + add_imm(args, imm); | |
245 | + return (char*)EncoderBase::encode(stream, map_shift(shc), args); | |
246 | +} | |
247 | + | |
248 | +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, | |
249 | + const R_Opnd & r, Opnd_Size sz) { | |
250 | + EncoderBase::Operands args; | |
251 | + assert(shc == shld_opc || shc == shrd_opc); | |
252 | + add_rm(args, rm, sz); | |
253 | + add_r(args, r, sz); | |
254 | + args.add(RegName_CL); | |
255 | + return (char*)EncoderBase::encode(stream, map_shift(shc), args); | |
256 | +} | |
257 | + | |
258 | +// multiply instructions: mul, imul | |
259 | +ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
260 | + EncoderBase::Operands args; | |
261 | + args.add(RegName_EDX); | |
262 | + args.add(RegName_EAX); | |
263 | + add_rm(args, rm, sz); | |
264 | + return (char*)EncoderBase::encode(stream, Mnemonic_MUL, args); | |
265 | +} | |
266 | + | |
267 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
268 | + EncoderBase::Operands args; | |
269 | + add_r(args, r, sz); | |
270 | + add_rm(args, rm, sz); | |
271 | + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); | |
272 | +} | |
273 | + | |
274 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) { | |
275 | + EncoderBase::Operands args; | |
276 | + add_r(args, r, sz); | |
277 | + add_imm(args, imm); | |
278 | + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); | |
279 | +} | |
280 | + | |
281 | +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, | |
282 | + const Imm_Opnd & imm, Opnd_Size sz) { | |
283 | + EncoderBase::Operands args; | |
284 | + add_r(args, r, sz); | |
285 | + add_rm(args, rm, sz); | |
286 | + add_imm(args, imm); | |
287 | + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); | |
288 | +} | |
289 | + | |
290 | +// divide instructions: div, idiv | |
291 | +ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
292 | + EncoderBase::Operands args; | |
293 | +#ifdef _EM64T_ | |
294 | + add_r(args, rdx_opnd, sz); | |
295 | + add_r(args, rax_opnd, sz); | |
296 | +#else | |
297 | + add_r(args, edx_opnd, sz); | |
298 | + add_r(args, eax_opnd, sz); | |
299 | +#endif | |
300 | + add_rm(args, rm, sz); | |
301 | + return (char*)EncoderBase::encode(stream, Mnemonic_IDIV, args); | |
302 | +} | |
303 | + | |
304 | +ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
305 | + EncoderBase::Operands args; | |
306 | +#ifdef _EM64T_ | |
307 | + add_r(args, rdx_opnd, sz); | |
308 | + add_r(args, rax_opnd, sz); | |
309 | +#else | |
310 | + add_r(args, edx_opnd, sz); | |
311 | + add_r(args, eax_opnd, sz); | |
312 | +#endif | |
313 | + add_rm(args, rm, sz); | |
314 | + return (char*)EncoderBase::encode(stream, Mnemonic_DIV, args); | |
315 | +} | |
316 | + | |
317 | +// data movement: mov | |
318 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) { | |
319 | + EncoderBase::Operands args; | |
320 | + add_m(args, m, sz); | |
321 | + add_r(args, r, sz); | |
322 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); | |
323 | +} | |
324 | + | |
325 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
326 | + EncoderBase::Operands args; | |
327 | + add_r(args, r, sz); | |
328 | + add_rm(args, rm, sz); | |
329 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); | |
330 | +} | |
331 | + | |
332 | +ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { | |
333 | + EncoderBase::Operands args; | |
334 | + add_rm(args, rm, sz); | |
335 | + add_imm(args, imm); | |
336 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); | |
337 | +} | |
338 | + | |
339 | +ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) { | |
340 | + EncoderBase::Operands args; | |
341 | + add_rm(args, rm, size_32); | |
342 | + add_xmm(args, xmm, false); | |
343 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args); | |
344 | +} | |
345 | + | |
346 | +ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) { | |
347 | + EncoderBase::Operands args; | |
348 | + add_xmm(args, xmm, false); | |
349 | + add_rm(args, rm, size_32); | |
350 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args); | |
351 | +} | |
352 | + | |
353 | +ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) { | |
354 | + EncoderBase::Operands args; | |
355 | + add_rm(args, rm, size_64); | |
356 | + add_xmm(args, xmm, true); | |
357 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args); | |
358 | +} | |
359 | + | |
360 | +ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) { | |
361 | + EncoderBase::Operands args; | |
362 | + add_xmm(args, xmm, true); | |
363 | + add_rm(args, rm, size_64); | |
364 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args); | |
365 | +} | |
366 | + | |
367 | +ENCODER_DECLARE_EXPORT char * movsx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
368 | + EncoderBase::Operands args; | |
369 | + add_r(args, r, n_size); | |
370 | + add_rm(args, rm, sz, OpndExt_Signed); | |
371 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVSX, args); | |
372 | +} | |
373 | + | |
374 | +ENCODER_DECLARE_EXPORT char * movzx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
375 | + EncoderBase::Operands args; | |
376 | + add_r(args, r, n_size); | |
377 | + // movzx r64, r/m32 is not available on em64t | |
378 | + // mov r32, r/m32 should zero out upper bytes | |
379 | + assert(sz <= size_16); | |
380 | + add_rm(args, rm, sz, OpndExt_Zero); | |
381 | + return (char*)EncoderBase::encode(stream, Mnemonic_MOVZX, args); | |
382 | +} | |
383 | + | |
384 | +// sse mov | |
385 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
386 | + EncoderBase::Operands args; | |
387 | + add_xmm(args, xmm, dbl); | |
388 | + add_m(args, mem, dbl ? size_64 : size_32); | |
389 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args); | |
390 | +} | |
391 | + | |
392 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl) { | |
393 | + EncoderBase::Operands args; | |
394 | + add_m(args, mem, dbl ? size_64 : size_32); | |
395 | + add_xmm(args, xmm, dbl); | |
396 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args); | |
397 | +} | |
398 | + | |
399 | +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
400 | + EncoderBase::Operands args; | |
401 | + add_xmm(args, xmm0, dbl); | |
402 | + add_xmm(args, xmm1, dbl); | |
403 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args ); | |
404 | +} | |
405 | + | |
406 | +// sse add, sub, mul, div | |
407 | +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
408 | + EncoderBase::Operands args; | |
409 | + add_xmm(args, xmm, dbl); | |
410 | + add_m(args, mem, dbl ? size_64 : size_32); | |
411 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args); | |
412 | +} | |
413 | + | |
414 | +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
415 | + EncoderBase::Operands args; | |
416 | + add_xmm(args, xmm0, dbl); | |
417 | + add_xmm(args, xmm1, dbl); | |
418 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args); | |
419 | +} | |
420 | + | |
421 | +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
422 | + EncoderBase::Operands args; | |
423 | + add_xmm(args, xmm, dbl); | |
424 | + add_m(args, mem, dbl ? size_64 : size_32); | |
425 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args); | |
426 | +} | |
427 | + | |
428 | +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
429 | + EncoderBase::Operands args; | |
430 | + add_xmm(args, xmm0, dbl); | |
431 | + add_xmm(args, xmm1, dbl); | |
432 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args); | |
433 | +} | |
434 | + | |
435 | +ENCODER_DECLARE_EXPORT char * sse_mul( char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
436 | + EncoderBase::Operands args; | |
437 | + add_xmm(args, xmm, dbl); | |
438 | + add_m(args, mem, dbl ? size_64 : size_32); | |
439 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args); | |
440 | +} | |
441 | + | |
442 | +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd& xmm0, const XMM_Opnd& xmm1, bool dbl) { | |
443 | + EncoderBase::Operands args; | |
444 | + add_xmm(args, xmm0, dbl); | |
445 | + add_xmm(args, xmm1, dbl); | |
446 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args); | |
447 | +} | |
448 | + | |
449 | +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
450 | + EncoderBase::Operands args; | |
451 | + add_xmm(args, xmm, dbl); | |
452 | + add_m(args, mem, dbl ? size_64 : size_32); | |
453 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args); | |
454 | +} | |
455 | + | |
456 | +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
457 | + EncoderBase::Operands args; | |
458 | + add_xmm(args, xmm0, dbl); | |
459 | + add_xmm(args, xmm1, dbl); | |
460 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args); | |
461 | +} | |
462 | + | |
463 | +ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { | |
464 | + EncoderBase::Operands args; | |
465 | + add_xmm(args, xmm0, true); | |
466 | + add_xmm(args, xmm1, true); | |
467 | + return (char*)EncoderBase::encode(stream, Mnemonic_PXOR, args); | |
468 | +} | |
469 | + | |
470 | +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
471 | + EncoderBase::Operands args; | |
472 | + add_xmm(args, xmm0, true); | |
473 | + add_xmm(args, xmm1, true); | |
474 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args); | |
475 | +} | |
476 | + | |
477 | +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl) { | |
478 | + EncoderBase::Operands args; | |
479 | + add_xmm(args, xmm0, dbl); | |
480 | + add_m(args, mem, dbl ? size_64 : size_32); | |
481 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args); | |
482 | +} | |
483 | + | |
484 | +// sse conversions | |
485 | +ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { | |
486 | + EncoderBase::Operands args; | |
487 | + add_xmm(args, xmm, dbl); | |
488 | + add_m(args, mem, size_32); | |
489 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTSI2SD : Mnemonic_CVTSI2SS, args); | |
490 | +} | |
491 | + | |
492 | +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl) { | |
493 | + EncoderBase::Operands args; | |
494 | + add_rm(args, reg, size_32); | |
495 | + add_m(args, mem, dbl ? size_64 : size_32); | |
496 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args); | |
497 | +} | |
498 | + | |
499 | +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl) { | |
500 | + EncoderBase::Operands args; | |
501 | + add_rm(args, reg, size_32); | |
502 | + add_xmm(args, xmm, dbl); | |
503 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args); | |
504 | +} | |
505 | + | |
506 | +ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
507 | + EncoderBase::Operands args; | |
508 | + add_xmm(args, xmm0, dbl); | |
509 | + add_xmm(args, xmm1, dbl); | |
510 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTPD2DQ : Mnemonic_CVTTPS2DQ, args); | |
511 | +} | |
512 | + | |
513 | +ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { | |
514 | + EncoderBase::Operands args; | |
515 | + add_xmm(args, xmm0, dbl); | |
516 | + add_xmm(args, xmm1, dbl); | |
517 | + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTDQ2PD : Mnemonic_CVTDQ2PS, args); | |
518 | +} | |
519 | + | |
520 | +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64) { | |
521 | + EncoderBase::Operands args; | |
522 | + add_xmm(args, xmm0, false); | |
523 | + add_m(args, mem64, size_64); | |
524 | + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args); | |
525 | +} | |
526 | + | |
527 | +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { | |
528 | + EncoderBase::Operands args; | |
529 | + add_xmm(args, xmm0, false); | |
530 | + add_xmm(args, xmm1, true); | |
531 | + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args); | |
532 | +} | |
533 | + | |
534 | +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32) { | |
535 | + EncoderBase::Operands args; | |
536 | + add_xmm(args, xmm0, true); | |
537 | + add_m(args, mem32, size_32); | |
538 | + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args); | |
539 | +} | |
540 | + | |
541 | +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { | |
542 | + EncoderBase::Operands args; | |
543 | + add_xmm(args, xmm0, true); | |
544 | + add_xmm(args, xmm1, false); | |
545 | + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args); | |
546 | +} | |
547 | + | |
548 | +// condition operations | |
549 | +ENCODER_DECLARE_EXPORT char *cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { | |
550 | + EncoderBase::Operands args; | |
551 | + add_r(args, r, sz); | |
552 | + add_rm(args, rm, sz); | |
553 | + return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_CMOVcc + cc), args); | |
554 | +} | |
555 | + | |
556 | +ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8) { | |
557 | + EncoderBase::Operands args; | |
558 | + add_rm(args, rm8, size_8); | |
559 | + return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_SETcc + cc), args); | |
560 | +} | |
561 | + | |
562 | +// load effective address: lea | |
563 | +ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz) { | |
564 | + EncoderBase::Operands args; | |
565 | + add_r(args, r, sz); | |
566 | + add_m(args, m, sz); | |
567 | + return (char*)EncoderBase::encode(stream, Mnemonic_LEA, args); | |
568 | +} | |
569 | + | |
570 | +ENCODER_DECLARE_EXPORT char * cdq(char * stream) { | |
571 | + EncoderBase::Operands args; | |
572 | + args.add(RegName_EDX); | |
573 | + args.add(RegName_EAX); | |
574 | + return (char*)EncoderBase::encode(stream, Mnemonic_CDQ, args); | |
575 | +} | |
576 | + | |
577 | +ENCODER_DECLARE_EXPORT char * wait(char * stream) { | |
578 | + return (char*)EncoderBase::encode(stream, Mnemonic_WAIT, EncoderBase::Operands()); | |
579 | +} | |
580 | + | |
581 | +// control-flow instructions | |
582 | + | |
583 | +// loop | |
584 | +ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm) { | |
585 | + EncoderBase::Operands args; | |
586 | + assert(imm.get_size() == size_8); | |
587 | + args.add(RegName_ECX); | |
588 | + add_imm(args, imm); | |
589 | + return (char*)EncoderBase::encode(stream, Mnemonic_LOOP, args); | |
590 | +} | |
591 | + | |
592 | +// jump | |
593 | +ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm) { | |
594 | + EncoderBase::Operands args; | |
595 | + assert(imm.get_size() == size_8); | |
596 | + add_imm(args, imm); | |
597 | + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); | |
598 | +} | |
599 | + | |
600 | +ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm) { | |
601 | + EncoderBase::Operands args; | |
602 | + assert(imm.get_size() == size_32); | |
603 | + add_imm(args, imm); | |
604 | + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); | |
605 | +} | |
606 | + | |
607 | +ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz) { | |
608 | + EncoderBase::Operands args; | |
609 | + add_rm(args, rm, sz); | |
610 | + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); | |
611 | +} | |
612 | + | |
613 | +/** | |
614 | + * @note On EM64T: if target lies beyond 2G (does not fit into 32 bit | |
615 | + * offset) then generates indirect jump using RAX (whose content is | |
616 | + * destroyed). | |
617 | + */ | |
618 | +ENCODER_DECLARE_EXPORT char * jump(char * stream, char * target) { | |
619 | +#ifdef _EM64T_ | |
620 | + int64 offset = target - stream; | |
621 | + // sub 2 bytes for the short version | |
622 | + offset -= 2; | |
623 | + if (fit8(offset)) { | |
624 | + // use 8-bit signed relative form | |
625 | + return jump8(stream, Imm_Opnd(size_8, offset)); | |
626 | + } else if (fit32(offset)) { | |
627 | + // sub 5 (3 + 2)bytes for the long version | |
628 | + offset -= 3; | |
629 | + // use 32-bit signed relative form | |
630 | + return jump32(stream, Imm_Opnd(size_32, offset)); | |
631 | + } | |
632 | + // need to use absolute indirect jump | |
633 | + stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64); | |
634 | + return jump(stream, rax_opnd, size_64); | |
635 | +#else | |
636 | + I_32 offset = target - stream; | |
637 | + // sub 2 bytes for the short version | |
638 | + offset -= 2; | |
639 | + if (fit8(offset)) { | |
640 | + // use 8-bit signed relative form | |
641 | + return jump8(stream, Imm_Opnd(size_8, offset)); | |
642 | + } | |
643 | + // sub 5 (3 + 2) bytes for the long version | |
644 | + offset -= 3; | |
645 | + // use 32-bit signed relative form | |
646 | + return jump32(stream, Imm_Opnd(size_32, offset)); | |
647 | +#endif | |
648 | +} | |
649 | + | |
650 | +// branch | |
651 | +ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cond, | |
652 | + const Imm_Opnd & imm, | |
653 | + InstrPrefix pref) | |
654 | +{ | |
655 | + if (pref != no_prefix) { | |
656 | + assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix); | |
657 | + stream = prefix(stream, pref); | |
658 | + } | |
659 | + Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond); | |
660 | + EncoderBase::Operands args; | |
661 | + assert(imm.get_size() == size_8); | |
662 | + add_imm(args, imm); | |
663 | + return (char*)EncoderBase::encode(stream, m, args); | |
664 | +} | |
665 | + | |
666 | +ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cond, | |
667 | + const Imm_Opnd & imm, | |
668 | + InstrPrefix pref) | |
669 | +{ | |
670 | + if (pref != no_prefix) { | |
671 | + assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix); | |
672 | + stream = prefix(stream, pref); | |
673 | + } | |
674 | + Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond); | |
675 | + EncoderBase::Operands args; | |
676 | + assert(imm.get_size() == size_32); | |
677 | + add_imm(args, imm); | |
678 | + return (char*)EncoderBase::encode(stream, m, args); | |
679 | +} | |
680 | + | |
681 | +/* | |
682 | +ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix) { | |
683 | +// sub 2 bytes for the short version | |
684 | +int64 offset = stream-target-2; | |
685 | +if( fit8(offset) ) { | |
686 | +return branch8(stream, cc, Imm_Opnd(size_8, (char)offset), is_signed); | |
687 | +} | |
688 | +return branch32(stream, cc, Imm_Opnd(size_32, (int)offset), is_signed); | |
689 | +} | |
690 | +*/ | |
691 | + | |
692 | +// call | |
693 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm) | |
694 | +{ | |
695 | + EncoderBase::Operands args; | |
696 | + add_imm(args, imm); | |
697 | + return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args); | |
698 | +} | |
699 | + | |
700 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, | |
701 | + Opnd_Size sz) | |
702 | +{ | |
703 | + EncoderBase::Operands args; | |
704 | + add_rm(args, rm, sz); | |
705 | + return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args); | |
706 | +} | |
707 | + | |
708 | +/** | |
709 | +* @note On EM64T: if target lies beyond 2G (does not fit into 32 bit | |
710 | +* offset) then generates indirect jump using RAX (whose content is | |
711 | +* destroyed). | |
712 | +*/ | |
713 | +ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target) | |
714 | +{ | |
715 | +#ifdef _EM64T_ | |
716 | + int64 offset = target - stream; | |
717 | + if (fit32(offset)) { | |
718 | + offset -= 5; // sub 5 bytes for this instruction | |
719 | + Imm_Opnd imm(size_32, offset); | |
720 | + return call(stream, imm); | |
721 | + } | |
722 | + // need to use absolute indirect call | |
723 | + stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64); | |
724 | + return call(stream, rax_opnd, size_64); | |
725 | +#else | |
726 | + I_32 offset = target - stream; | |
727 | + offset -= 5; // sub 5 bytes for this instruction | |
728 | + Imm_Opnd imm(size_32, offset); | |
729 | + return call(stream, imm); | |
730 | +#endif | |
731 | +} | |
732 | + | |
733 | +// return instruction | |
734 | +ENCODER_DECLARE_EXPORT char * ret(char * stream) | |
735 | +{ | |
736 | + EncoderBase::Operands args; | |
737 | + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); | |
738 | +} | |
739 | + | |
740 | +ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm) | |
741 | +{ | |
742 | + EncoderBase::Operands args; | |
743 | + // TheManual says imm can be 16-bit only | |
744 | + //assert(imm.get_size() <= size_16); | |
745 | + args.add(EncoderBase::Operand(map_size(size_16), imm.get_value())); | |
746 | + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); | |
747 | +} | |
748 | + | |
749 | +ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop) | |
750 | +{ | |
751 | + // TheManual says it can only be imm16 | |
752 | + EncoderBase::Operands args(EncoderBase::Operand(OpndSize_16, pop, OpndExt_Zero)); | |
753 | + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); | |
754 | +} | |
755 | + | |
756 | +// floating-point instructions | |
757 | +ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, | |
758 | + bool is_double) { | |
759 | + EncoderBase::Operands args; | |
760 | + // a fake FP register as operand | |
761 | + add_fp(args, 0, is_double); | |
762 | + add_m(args, m, is_double ? size_64 : size_32); | |
763 | + return (char*)EncoderBase::encode(stream, Mnemonic_FLD, args); | |
764 | +} | |
765 | + | |
766 | +ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, | |
767 | + bool is_long, bool pop_stk) | |
768 | +{ | |
769 | + EncoderBase::Operands args; | |
770 | + if (pop_stk) { | |
771 | + add_m(args, mem, is_long ? size_64 : size_32); | |
772 | + // a fake FP register as operand | |
773 | + add_fp(args, 0, is_long); | |
774 | + return (char*)EncoderBase::encode(stream, Mnemonic_FISTP, args); | |
775 | + } | |
776 | + // only 32-bit operands are supported | |
777 | + assert(is_long == false); | |
778 | + add_m(args, mem, size_32); | |
779 | + add_fp(args, 0, false); | |
780 | + return (char*)EncoderBase::encode(stream, Mnemonic_FIST, args); | |
781 | +} | |
782 | + | |
783 | +ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, | |
784 | + bool is_double, bool pop_stk) | |
785 | +{ | |
786 | + EncoderBase::Operands args; | |
787 | + add_m(args, m, is_double ? size_64 : size_32); | |
788 | + // a fake FP register as operand | |
789 | + add_fp(args, 0, is_double); | |
790 | + return (char*)EncoderBase::encode(stream, | |
791 | + pop_stk ? Mnemonic_FSTP : Mnemonic_FST, | |
792 | + args); | |
793 | +} | |
794 | + | |
795 | +ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk) | |
796 | +{ | |
797 | + EncoderBase::Operands args; | |
798 | + add_fp(args, i, true); | |
799 | + return (char*)EncoderBase::encode(stream, | |
800 | + pop_stk ? Mnemonic_FSTP : Mnemonic_FST, | |
801 | + args); | |
802 | +} | |
803 | + | |
804 | +ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem) { | |
805 | + EncoderBase::Operands args; | |
806 | + add_m(args, mem, size_16); | |
807 | + return (char*)EncoderBase::encode(stream, Mnemonic_FLDCW, args); | |
808 | +} | |
809 | + | |
810 | +ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem) { | |
811 | + EncoderBase::Operands args; | |
812 | + add_m(args, mem, size_16); | |
813 | + return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, args); | |
814 | +} | |
815 | + | |
816 | +ENCODER_DECLARE_EXPORT char * fnstsw(char * stream) | |
817 | +{ | |
818 | + return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, | |
819 | + EncoderBase::Operands()); | |
820 | +} | |
821 | + | |
822 | +// string operations | |
823 | +ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set) { | |
824 | + EncoderBase::Operands args; | |
825 | + return (char*)EncoderBase::encode(stream, | |
826 | + set ? Mnemonic_STD : Mnemonic_CLD, | |
827 | + args); | |
828 | +} | |
829 | + | |
830 | +ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix) | |
831 | +{ | |
832 | + EncoderBase::Operands args; | |
833 | + if (prefix != no_prefix) { | |
834 | + assert(prefix == prefix_repnz || prefix == prefix_repz); | |
835 | + *stream = prefix; | |
836 | + ++stream; | |
837 | + } | |
838 | + return (char*)EncoderBase::encode(stream, Mnemonic_SCAS, args); | |
839 | +} | |
840 | + | |
841 | +ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix) | |
842 | +{ | |
843 | + if (prefix != no_prefix) { | |
844 | + assert(prefix == prefix_rep); | |
845 | + *stream = prefix; | |
846 | + ++stream; | |
847 | + } | |
848 | + | |
849 | + EncoderBase::Operands args; | |
850 | + return (char*)EncoderBase::encode(stream, Mnemonic_STOS, args); | |
851 | +} | |
852 | + | |
853 | +// Intrinsic FP math functions | |
854 | + | |
855 | +ENCODER_DECLARE_EXPORT char * fprem(char * stream) { | |
856 | + return (char*)EncoderBase::encode(stream, Mnemonic_FPREM, | |
857 | + EncoderBase::Operands()); | |
858 | +} | |
859 | + | |
860 | +ENCODER_DECLARE_EXPORT char * fprem1(char * stream) { | |
861 | + return (char*)EncoderBase::encode(stream, Mnemonic_FPREM1, | |
862 | + EncoderBase::Operands()); | |
863 | +} |
@@ -0,0 +1,458 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/load_store.cpp | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#include <assert.h> | |
19 | +#include <stdio.h> | |
20 | +#include <cutils/log.h> | |
21 | + | |
22 | +#include "codeflinger/x86/GGLX86Assembler.h" | |
23 | + | |
24 | +namespace android { | |
25 | + | |
26 | +// ---------------------------------------------------------------------------- | |
27 | + | |
28 | +void GGLX86Assembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) | |
29 | +{ | |
30 | + const int bits = addr.size; | |
31 | + const int inc = (flags & WRITE_BACK)?1:0; | |
32 | + switch (bits) { | |
33 | + case 32: | |
34 | + if (inc) { | |
35 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg); | |
36 | + ADD_IMM_TO_REG(4, addr.reg); | |
37 | + } else { | |
38 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg); | |
39 | + } | |
40 | + break; | |
41 | + case 24: | |
42 | + // 24 bits formats are a little special and used only for RGB | |
43 | + // 0x00BBGGRR is unpacked as R,G,B | |
44 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg, OpndSize_8); | |
45 | + ROR(8, s.reg); | |
46 | + MOV_REG_TO_MEM(s.reg, 1, addr.reg, OpndSize_8); | |
47 | + ROR(8, s.reg); | |
48 | + MOV_REG_TO_MEM(s.reg, 2, addr.reg, OpndSize_8); | |
49 | + if (!(s.flags & CORRUPTIBLE)) { | |
50 | + ROR(16, s.reg); | |
51 | + } | |
52 | + if (inc) { | |
53 | + ADD_IMM_TO_REG(3, addr.reg); | |
54 | + } | |
55 | + break; | |
56 | + case 16: | |
57 | + if (inc) { | |
58 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16); | |
59 | + ADD_IMM_TO_REG(2, addr.reg); | |
60 | + } else { | |
61 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16); | |
62 | + } | |
63 | + break; | |
64 | + case 8: | |
65 | + if (inc) { | |
66 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8); | |
67 | + ADD_IMM_TO_REG(1, addr.reg); | |
68 | + } else { | |
69 | + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8); | |
70 | + } | |
71 | + break; | |
72 | + } | |
73 | +} | |
74 | + | |
75 | +void GGLX86Assembler::load(pointer_t& addr, const pixel_t& s, uint32_t flags) | |
76 | +{ | |
77 | + Scratch scratches(registerFile()); | |
78 | + int s0; | |
79 | + | |
80 | + const int bits = addr.size; | |
81 | + // WRITE_BACK indicates that the base register will also be updated after loading the data | |
82 | + const int inc = (flags & WRITE_BACK)?1:0; | |
83 | + switch (bits) { | |
84 | + case 32: | |
85 | + if (inc) { | |
86 | + MOV_MEM_TO_REG(0, addr.reg, s.reg); | |
87 | + ADD_IMM_TO_REG(4, addr.reg); | |
88 | + | |
89 | + } else MOV_MEM_TO_REG(0, addr.reg, s.reg); | |
90 | + break; | |
91 | + case 24: | |
92 | + // 24 bits formats are a little special and used only for RGB | |
93 | + // R,G,B is packed as 0x00BBGGRR | |
94 | + s0 = scratches.obtain(); | |
95 | + if (s.reg != addr.reg) { | |
96 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); //R | |
97 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G | |
98 | + SHL(8, s0); | |
99 | + OR_REG_TO_REG(s0, s.reg); | |
100 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B | |
101 | + SHL(16, s0); | |
102 | + OR_REG_TO_REG(s0, s.reg); | |
103 | + } else { | |
104 | + int s1 = scratches.obtain(); | |
105 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s1); //R | |
106 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G | |
107 | + SHL(8, s0); | |
108 | + OR_REG_TO_REG(s0, s1); | |
109 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B | |
110 | + SHL(16, s0); | |
111 | + OR_REG_TO_REG(s0, s1); | |
112 | + MOV_REG_TO_REG(s1, s.reg); | |
113 | + scratches.recycle(s1); | |
114 | + | |
115 | + } | |
116 | + scratches.recycle(s0); | |
117 | + if (inc) | |
118 | + ADD_IMM_TO_REG(3, addr.reg); | |
119 | + break; | |
120 | + case 16: | |
121 | + if (inc) { | |
122 | + MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg); | |
123 | + ADD_IMM_TO_REG(2, addr.reg); | |
124 | + } | |
125 | + else MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg); | |
126 | + break; | |
127 | + case 8: | |
128 | + if (inc) { | |
129 | + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); | |
130 | + ADD_IMM_TO_REG(1, addr.reg); | |
131 | + } | |
132 | + else MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); | |
133 | + break; | |
134 | + } | |
135 | + if (inc) MOV_REG_TO_MEM(addr.reg, addr.offset_ebp, PhysicalReg_EBP); | |
136 | +} | |
137 | + | |
138 | +void GGLX86Assembler::extract(integer_t& d, int s, int h, int l, int bits) | |
139 | +{ | |
140 | + const int maskLen = h-l; | |
141 | + | |
142 | + assert(maskLen<=8); | |
143 | + assert(h); | |
144 | + | |
145 | + | |
146 | + if (h != bits) { | |
147 | + const int mask = ((1<<maskLen)-1) << l; | |
148 | + MOV_REG_TO_REG(s, d.reg); | |
149 | + AND_IMM_TO_REG(mask, d.reg);// component = packed & mask; | |
150 | + s = d.reg; | |
151 | + } | |
152 | + | |
153 | + if (l) { | |
154 | + MOV_REG_TO_REG(s, d.reg); | |
155 | + SHR(l, d.reg);// component = packed >> l; | |
156 | + s = d.reg; | |
157 | + } | |
158 | + | |
159 | + if (s != d.reg) { | |
160 | + MOV_REG_TO_REG(s, d.reg); | |
161 | + } | |
162 | + | |
163 | + d.s = maskLen; | |
164 | +} | |
165 | + | |
166 | +void GGLX86Assembler::extract(integer_t& d, const pixel_t& s, int component) | |
167 | +{ | |
168 | + extract(d, s.reg, | |
169 | + s.format.c[component].h, | |
170 | + s.format.c[component].l, | |
171 | + s.size()); | |
172 | +} | |
173 | + | |
174 | +void GGLX86Assembler::extract(component_t& d, const pixel_t& s, int component) | |
175 | +{ | |
176 | + integer_t r(d.reg, 32, d.flags, d.offset_ebp); | |
177 | + extract(r, s.reg, | |
178 | + s.format.c[component].h, | |
179 | + s.format.c[component].l, | |
180 | + s.size()); | |
181 | + d = component_t(r); | |
182 | +} | |
183 | + | |
184 | + | |
185 | +void GGLX86Assembler::expand(integer_t& d, const component_t& s, int dbits) | |
186 | +{ | |
187 | + if (s.l || (s.flags & CLEAR_HI)) { | |
188 | + extract(d, s.reg, s.h, s.l, 32); | |
189 | + expand(d, d, dbits); | |
190 | + } else { | |
191 | + expand(d, integer_t(s.reg, s.size(), s.flags, s.offset_ebp), dbits); | |
192 | + } | |
193 | +} | |
194 | + | |
195 | +void GGLX86Assembler::expand(component_t& d, const component_t& s, int dbits) | |
196 | +{ | |
197 | + integer_t r(d.reg, 32, d.flags, d.offset_ebp); | |
198 | + expand(r, s, dbits); | |
199 | + d = component_t(r); | |
200 | +} | |
201 | + | |
202 | +void GGLX86Assembler::expand(integer_t& dst, const integer_t& src, int dbits) | |
203 | +{ | |
204 | + assert(src.size()); | |
205 | + | |
206 | + Scratch scratches(registerFile()); | |
207 | + int sbits = src.size(); | |
208 | + int s = src.reg; | |
209 | + int d = dst.reg; | |
210 | + | |
211 | + // be sure to set 'dst' after we read 'src' as they may be identical | |
212 | + dst.s = dbits; | |
213 | + dst.flags = 0; | |
214 | + | |
215 | + if (dbits<=sbits) { | |
216 | + if (s != d) { | |
217 | + MOV_REG_TO_REG(s, d); | |
218 | + } | |
219 | + return; | |
220 | + } | |
221 | + | |
222 | + if (sbits == 1) { | |
223 | + MOV_REG_TO_REG(s, d); | |
224 | + SHL(dbits, d); | |
225 | + SUB_REG_TO_REG(s, d); | |
226 | + // d = (s<<dbits) - s; | |
227 | + return; | |
228 | + } | |
229 | + | |
230 | + if (dbits % sbits) { | |
231 | + MOV_REG_TO_REG(s, d); | |
232 | + SHL(dbits-sbits, d); | |
233 | + // d = s << (dbits-sbits); | |
234 | + dbits -= sbits; | |
235 | + int temp = scratches.obtain(); | |
236 | + do { | |
237 | + MOV_REG_TO_REG(d, temp); | |
238 | + SHR(sbits, temp); | |
239 | + OR_REG_TO_REG(temp, d); | |
240 | + // d |= d >> sbits; | |
241 | + dbits -= sbits; | |
242 | + sbits *= 2; | |
243 | + } while(dbits>0); | |
244 | + return; | |
245 | + } | |
246 | + | |
247 | + dbits -= sbits; | |
248 | + do { | |
249 | + MOV_REG_TO_REG(s, d); | |
250 | + SHL(sbits, d); | |
251 | + OR_REG_TO_REG(s, d); | |
252 | + // d |= d<<sbits; | |
253 | + s = d; | |
254 | + dbits -= sbits; | |
255 | + if (sbits*2 < dbits) { | |
256 | + sbits *= 2; | |
257 | + } | |
258 | + } while(dbits>0); | |
259 | +} | |
260 | + | |
261 | +void GGLX86Assembler::downshift( | |
262 | + pixel_t& d, int component, component_t s, reg_t& dither) | |
263 | +{ | |
264 | + const needs_t& needs = mBuilderContext.needs; | |
265 | + Scratch scratches(registerFile()); | |
266 | + // s(temp) is loaded in build_blending | |
267 | + s.reg = scratches.obtain(); | |
268 | + MOV_MEM_TO_REG(s.offset_ebp, EBP, s.reg); | |
269 | + | |
270 | + int sh = s.h; | |
271 | + int sl = s.l; | |
272 | + int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; | |
273 | + int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; | |
274 | + int sbits = sh - sl; | |
275 | + | |
276 | + int dh = d.format.c[component].h; | |
277 | + int dl = d.format.c[component].l; | |
278 | + int dbits = dh - dl; | |
279 | + int dithering = 0; | |
280 | + | |
281 | + ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); | |
282 | + | |
283 | + if (sbits>dbits) { | |
284 | + // see if we need to dither | |
285 | + dithering = mDithering; | |
286 | + } | |
287 | + | |
288 | + int ireg = d.reg; | |
289 | + if (!(d.flags & FIRST)) { | |
290 | + if (s.flags & CORRUPTIBLE) { | |
291 | + ireg = s.reg; | |
292 | + } else { | |
293 | + ireg = scratches.obtain(); | |
294 | + } | |
295 | + } | |
296 | + d.flags &= ~FIRST; | |
297 | + | |
298 | + if (maskHiBits) { | |
299 | + // we need to mask the high bits (and possibly the lowbits too) | |
300 | + // and we might be able to use immediate mask. | |
301 | + if (!dithering) { | |
302 | + // we don't do this if we only have maskLoBits because we can | |
303 | + // do it more efficiently below (in the case where dl=0) | |
304 | + const int offset = sh - dbits; | |
305 | + if (dbits<=8 && offset >= 0) { | |
306 | + const uint32_t mask = ((1<<dbits)-1) << offset; | |
307 | + build_and_immediate(ireg, s.reg, mask, 32); | |
308 | + s.reg = ireg; | |
309 | + sl = offset; | |
310 | + sbits = dbits; | |
311 | + maskLoBits = maskHiBits = 0; | |
312 | + } | |
313 | + } else { | |
314 | + // in the dithering case though, we need to preserve the lower bits | |
315 | + const uint32_t mask = ((1<<sbits)-1) << sl; | |
316 | + build_and_immediate(ireg, s.reg, mask, 32); | |
317 | + s.reg = ireg; | |
318 | + maskLoBits = maskHiBits = 0; | |
319 | + } | |
320 | + } | |
321 | + | |
322 | + // XXX: we could special case (maskHiBits & !maskLoBits) | |
323 | + // like we do for maskLoBits below, but it happens very rarely | |
324 | + // that we have maskHiBits only and the conditions necessary to lead | |
325 | + // to better code (like doing d |= s << 24) | |
326 | + | |
327 | + if (maskHiBits) { | |
328 | + MOV_REG_TO_REG(s.reg, ireg); | |
329 | + SHL(32-sh, ireg); | |
330 | + sl += 32-sh; | |
331 | + sh = 32; | |
332 | + s.reg = ireg; | |
333 | + maskHiBits = 0; | |
334 | + } | |
335 | + | |
336 | + // Downsampling should be performed as follows: | |
337 | + // V * ((1<<dbits)-1) / ((1<<sbits)-1) | |
338 | + // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] | |
339 | + // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] | |
340 | + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits | |
341 | + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) | |
342 | + // | |
343 | + // By approximating (1>>dbits) and (1>>sbits) to 0: | |
344 | + // | |
345 | + // V>>(sbits-dbits) - V>>sbits | |
346 | + // | |
347 | + // A good approximation is V>>(sbits-dbits), | |
348 | + // but better one (needed for dithering) is: | |
349 | + // | |
350 | + // (V>>(sbits-dbits)<<sbits - V)>>sbits | |
351 | + // (V<<dbits - V)>>sbits | |
352 | + // (V - V>>dbits)>>(sbits-dbits) | |
353 | + | |
354 | + // Dithering is done here | |
355 | + if (dithering) { | |
356 | + comment("dithering"); | |
357 | + if (sl) { | |
358 | + MOV_REG_TO_REG(s.reg, ireg); | |
359 | + SHR(sl, ireg); | |
360 | + sh -= sl; | |
361 | + sl = 0; | |
362 | + s.reg = ireg; | |
363 | + } | |
364 | + // scaling (V-V>>dbits) | |
365 | + int temp_reg = scratches.obtain(); | |
366 | + MOV_REG_TO_REG(s.reg, temp_reg); | |
367 | + SHR(dbits, temp_reg); | |
368 | + MOV_REG_TO_REG(s.reg, ireg); | |
369 | + SUB_REG_TO_REG(temp_reg, ireg); | |
370 | + scratches.recycle(temp_reg); | |
371 | + const int shift = (GGL_DITHER_BITS - (sbits-dbits)); | |
372 | + dither.reg = scratches.obtain(); | |
373 | + MOV_MEM_TO_REG(dither.offset_ebp, EBP, dither.reg); | |
374 | + if (shift>0) { | |
375 | + temp_reg = scratches.obtain(); | |
376 | + MOV_REG_TO_REG(dither.reg, temp_reg); | |
377 | + SHR(shift, temp_reg); | |
378 | + ADD_REG_TO_REG(temp_reg, ireg); | |
379 | + scratches.recycle(temp_reg); | |
380 | + } | |
381 | + else if (shift<0) { | |
382 | + temp_reg = scratches.obtain(); | |
383 | + MOV_REG_TO_REG(dither.reg, temp_reg); | |
384 | + SHL(-shift, temp_reg); | |
385 | + ADD_REG_TO_REG(temp_reg, ireg); | |
386 | + scratches.recycle(temp_reg); | |
387 | + } | |
388 | + else { | |
389 | + ADD_REG_TO_REG(dither.reg, ireg); | |
390 | + } | |
391 | + scratches.recycle(dither.reg); | |
392 | + s.reg = ireg; | |
393 | + } | |
394 | + | |
395 | + if ((maskLoBits|dithering) && (sh > dbits)) { | |
396 | + int shift = sh-dbits; | |
397 | + if (dl) { | |
398 | + MOV_REG_TO_REG(s.reg, ireg); | |
399 | + SHR(shift, ireg); | |
400 | + if (ireg == d.reg) { | |
401 | + MOV_REG_TO_REG(ireg, d.reg); | |
402 | + SHL(dl, d.reg); | |
403 | + } else { | |
404 | + int temp_reg = scratches.obtain(); | |
405 | + MOV_REG_TO_REG(ireg, temp_reg); | |
406 | + SHL(dl, temp_reg); | |
407 | + OR_REG_TO_REG(temp_reg, d.reg); | |
408 | + scratches.recycle(temp_reg); | |
409 | + } | |
410 | + } else { | |
411 | + if (ireg == d.reg) { | |
412 | + MOV_REG_TO_REG(s.reg, d.reg); | |
413 | + SHR(shift, d.reg); | |
414 | + } else { | |
415 | + int temp_reg = scratches.obtain(); | |
416 | + MOV_REG_TO_REG(s.reg, temp_reg); | |
417 | + SHR(shift, temp_reg); | |
418 | + OR_REG_TO_REG(temp_reg, d.reg); | |
419 | + scratches.recycle(temp_reg); | |
420 | + } | |
421 | + } | |
422 | + } else { | |
423 | + int shift = sh-dh; | |
424 | + if (shift>0) { | |
425 | + if (ireg == d.reg) { | |
426 | + MOV_REG_TO_REG(s.reg, d.reg); | |
427 | + SHR(shift, d.reg); | |
428 | + } else { | |
429 | + int temp_reg = scratches.obtain(); | |
430 | + MOV_REG_TO_REG(s.reg, temp_reg); | |
431 | + SHR(shift, temp_reg); | |
432 | + OR_REG_TO_REG(temp_reg, d.reg); | |
433 | + scratches.recycle(temp_reg); | |
434 | + } | |
435 | + } else if (shift<0) { | |
436 | + if (ireg == d.reg) { | |
437 | + MOV_REG_TO_REG(s.reg, d.reg); | |
438 | + SHL(-shift, d.reg); | |
439 | + } else { | |
440 | + int temp_reg = scratches.obtain(); | |
441 | + MOV_REG_TO_REG(s.reg, temp_reg); | |
442 | + SHL(-shift, temp_reg); | |
443 | + OR_REG_TO_REG(temp_reg, d.reg); | |
444 | + scratches.recycle(temp_reg); | |
445 | + } | |
446 | + } else { | |
447 | + if (ireg == d.reg) { | |
448 | + if (s.reg != d.reg) { | |
449 | + MOV_REG_TO_REG(s.reg, d.reg); | |
450 | + } | |
451 | + } else { | |
452 | + OR_REG_TO_REG(s.reg, d.reg); | |
453 | + } | |
454 | + } | |
455 | + } | |
456 | +} | |
457 | + | |
458 | +}; // namespace android |
@@ -0,0 +1,1799 @@ | ||
1 | +/* libs/pixelflinger/codeflinger/x86/texturing.cpp | |
2 | +** | |
3 | +** Copyright 2006, The Android Open Source Project | |
4 | +** | |
5 | +** Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | +** you may not use this file except in compliance with the License. | |
7 | +** You may obtain a copy of the License at | |
8 | +** | |
9 | +** http://www.apache.org/licenses/LICENSE-2.0 | |
10 | +** | |
11 | +** Unless required by applicable law or agreed to in writing, software | |
12 | +** distributed under the License is distributed on an "AS IS" BASIS, | |
13 | +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | +** See the License for the specific language governing permissions and | |
15 | +** limitations under the License. | |
16 | +*/ | |
17 | + | |
18 | +#include <assert.h> | |
19 | +#include <stdint.h> | |
20 | +#include <stdlib.h> | |
21 | +#include <stdio.h> | |
22 | +#include <sys/types.h> | |
23 | + | |
24 | +#include <cutils/log.h> | |
25 | + | |
26 | +#include "codeflinger/x86/GGLX86Assembler.h" | |
27 | + | |
28 | + | |
29 | +namespace android { | |
30 | + | |
31 | +// --------------------------------------------------------------------------- | |
32 | + | |
33 | +// iterators are initialized like this: | |
34 | +// (intToFixedCenter(x) * dx)>>16 + x0 | |
35 | +// ((x<<16 + 0x8000) * dx)>>16 + x0 | |
36 | +// ((x<<16)*dx + (0x8000*dx))>>16 + x0 | |
37 | +// ( (x*dx) + dx>>1 ) + x0 | |
38 | +// (x*dx) + (dx>>1 + x0) | |
39 | + | |
40 | +void GGLX86Assembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) | |
41 | +{ | |
42 | + context_t const* c = mBuilderContext.c; | |
43 | + const needs_t& needs = mBuilderContext.needs; | |
44 | + int temp_reg; | |
45 | + | |
46 | + if (mSmooth) { | |
47 | + // NOTE: we could take this case in the mDithering + !mSmooth case, | |
48 | + // but this would use up to 4 more registers for the color components | |
49 | + // for only a little added quality. | |
50 | + // Currently, this causes the system to run out of registers in | |
51 | + // some case (see issue #719496) | |
52 | + | |
53 | + comment("compute initial iterated color (smooth and/or dither case)"); | |
54 | + | |
55 | + parts.iterated_packed = 0; | |
56 | + parts.packed = 0; | |
57 | + | |
58 | + // 0x1: color component | |
59 | + // 0x2: iterators | |
60 | + //parts.reload = 3; | |
61 | + const int optReload = mOptLevel >> 1; | |
62 | + if (optReload >= 3) parts.reload = 0; // reload nothing | |
63 | + else if (optReload == 2) parts.reload = 2; // reload iterators | |
64 | + else if (optReload == 1) parts.reload = 1; // reload colors | |
65 | + else if (optReload <= 0) parts.reload = 3; // reload both | |
66 | + | |
67 | + if (!mSmooth) { | |
68 | + // we're not smoothing (just dithering), we never have to | |
69 | + // reload the iterators | |
70 | + parts.reload &= ~2; | |
71 | + } | |
72 | + | |
73 | + Scratch scratches(registerFile()); | |
74 | + const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; | |
75 | + const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; | |
76 | + for (int i=0 ; i<4 ; i++) { | |
77 | + if (!mInfo[i].iterated) | |
78 | + continue; | |
79 | + // this component exists in the destination and is not replaced | |
80 | + // by a texture unit. | |
81 | + const int c = (parts.reload & 1) ? t0 : obtainReg(); | |
82 | + if (i==0) CONTEXT_LOAD(c, iterators.ydady); | |
83 | + if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); | |
84 | + if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); | |
85 | + if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); | |
86 | + parts.argb[i].reg = c; | |
87 | + | |
88 | + if (mInfo[i].smooth) { | |
89 | + parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); | |
90 | + const int dvdx = parts.argb_dx[i].reg; | |
91 | + temp_reg = scratches.obtain(); | |
92 | + CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); | |
93 | + MOV_REG_TO_REG(dvdx, temp_reg); | |
94 | + IMUL(x.reg, temp_reg); | |
95 | + ADD_REG_TO_REG(temp_reg, c); | |
96 | + scratches.recycle(temp_reg); | |
97 | + | |
98 | + // adjust the color iterator to make sure it won't overflow | |
99 | + if (!mAA) { | |
100 | + // this is not needed when we're using anti-aliasing | |
101 | + // because we will (have to) clamp the components | |
102 | + // anyway. | |
103 | + int end = scratches.obtain(); | |
104 | + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, end); | |
105 | + SHR(16, end); | |
106 | + IMUL(end, dvdx); | |
107 | + temp_reg = end; | |
108 | + // c - (dvdx*end + c) = -(dvdx*end) | |
109 | + MOV_REG_TO_REG(dvdx, temp_reg); | |
110 | + NEG(temp_reg); | |
111 | + ADD_REG_TO_REG(c, dvdx); | |
112 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp_reg, c); | |
113 | + /* | |
114 | + SUB_REG_TO_REG(dvdx, temp_reg); | |
115 | + switch(i) { | |
116 | + case 0: | |
117 | + JCC(Mnemonic_JNS, "1f_init_iterated_color"); | |
118 | + SUB_REG_TO_REG(dvdx, c); | |
119 | + label("1f_init_iterated_color"); | |
120 | + break; | |
121 | + case 1: | |
122 | + JCC(Mnemonic_JNS, "2f_init_iterated_color"); | |
123 | + SUB_REG_TO_REG(dvdx, c); | |
124 | + label("2f_init_iterated_color"); | |
125 | + break; | |
126 | + case 2: | |
127 | + JCC(Mnemonic_JNS, "3f_init_iterated_color"); | |
128 | + SUB_REG_TO_REG(dvdx, c); | |
129 | + label("3f_init_iterated_color"); | |
130 | + break; | |
131 | + case 3: | |
132 | + JCC(Mnemonic_JNS, "4f_init_iterated_color"); | |
133 | + SUB_REG_TO_REG(dvdx, c); | |
134 | + label("4f_init_iterated_color"); | |
135 | + break; | |
136 | + } | |
137 | + */ | |
138 | + | |
139 | + MOV_REG_TO_REG(c, temp_reg); | |
140 | + SAR(31, temp_reg); | |
141 | + NOT(temp_reg); | |
142 | + AND_REG_TO_REG(temp_reg, c); | |
143 | + scratches.recycle(end); | |
144 | + } | |
145 | + if(parts.reload & 2) | |
146 | + scratches.recycle(dvdx); | |
147 | + else | |
148 | + recycleReg(dvdx); | |
149 | + } | |
150 | + CONTEXT_STORE(c, generated_vars.argb[i].c); | |
151 | + if(parts.reload & 1) | |
152 | + scratches.recycle(parts.argb[i].reg); | |
153 | + else | |
154 | + recycleReg(parts.argb[i].reg); | |
155 | + | |
156 | + parts.argb[i].reg = -1; | |
157 | + //if (parts.reload & 1) { | |
158 | + // //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
159 | + //} | |
160 | + } | |
161 | + } else { | |
162 | + // We're not smoothed, so we can | |
163 | + // just use a packed version of the color and extract the | |
164 | + // components as needed (or not at all if we don't blend) | |
165 | + | |
166 | + // figure out if we need the iterated color | |
167 | + int load = 0; | |
168 | + for (int i=0 ; i<4 ; i++) { | |
169 | + component_info_t& info = mInfo[i]; | |
170 | + if ((info.inDest || info.needed) && !info.replaced) | |
171 | + load |= 1; | |
172 | + } | |
173 | + | |
174 | + parts.iterated_packed = 1; | |
175 | + parts.packed = (!mTextureMachine.mask && !mBlending | |
176 | + && !mFog && !mDithering); | |
177 | + parts.reload = 0; | |
178 | + if (load || parts.packed) { | |
179 | + if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { | |
180 | + comment("load initial iterated color (8888 packed)"); | |
181 | + parts.iterated.setTo(obtainReg(), | |
182 | + &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); | |
183 | + CONTEXT_LOAD(parts.iterated.reg, packed8888); | |
184 | + } else { | |
185 | + comment("load initial iterated color (dest format packed)"); | |
186 | + | |
187 | + parts.iterated.setTo(obtainReg(), &mCbFormat); | |
188 | + | |
189 | + // pre-mask the iterated color | |
190 | + const int bits = parts.iterated.size(); | |
191 | + const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; | |
192 | + uint32_t mask = 0; | |
193 | + if (mMasking) { | |
194 | + for (int i=0 ; i<4 ; i++) { | |
195 | + const int component_mask = 1<<i; | |
196 | + const int h = parts.iterated.format.c[i].h; | |
197 | + const int l = parts.iterated.format.c[i].l; | |
198 | + if (h && (!(mMasking & component_mask))) { | |
199 | + mask |= ((1<<(h-l))-1) << l; | |
200 | + } | |
201 | + } | |
202 | + } | |
203 | + | |
204 | + if (mMasking && ((mask & size)==0)) { | |
205 | + // none of the components are present in the mask | |
206 | + } else { | |
207 | + CONTEXT_LOAD(parts.iterated.reg, packed); | |
208 | + if (mCbFormat.size == 1) { | |
209 | + int imm = 0xFF; | |
210 | + AND_IMM_TO_REG(imm, parts.iterated.reg); | |
211 | + } else if (mCbFormat.size == 2) { | |
212 | + SHR(16, parts.iterated.reg); | |
213 | + } | |
214 | + } | |
215 | + | |
216 | + // pre-mask the iterated color | |
217 | + if (mMasking) { | |
218 | + //AND_IMM_TO_REG(mask, parts.iterated.reg); | |
219 | + build_and_immediate(parts.iterated.reg, parts.iterated.reg, | |
220 | + mask, bits); | |
221 | + } | |
222 | + } | |
223 | + mCurSp = mCurSp - 4; | |
224 | + parts.iterated.offset_ebp = mCurSp; | |
225 | + MOV_REG_TO_MEM(parts.iterated.reg, parts.iterated.offset_ebp, EBP); | |
226 | + //PUSH(parts.iterated.reg); | |
227 | + recycleReg(parts.iterated.reg); | |
228 | + parts.iterated.reg=-1; | |
229 | + } | |
230 | + } | |
231 | +} | |
232 | + | |
233 | +void GGLX86Assembler::build_iterated_color( | |
234 | + component_t& fragment, | |
235 | + fragment_parts_t& parts, | |
236 | + int component, | |
237 | + Scratch& regs) | |
238 | +{ | |
239 | + | |
240 | + if (!mInfo[component].iterated) | |
241 | + return; | |
242 | + | |
243 | + if (parts.iterated_packed) { | |
244 | + // iterated colors are packed, extract the one we need | |
245 | + parts.iterated.reg = regs.obtain(); | |
246 | + MOV_MEM_TO_REG(parts.iterated.offset_ebp, EBP, parts.iterated.reg); | |
247 | + extract(fragment, parts.iterated, component); | |
248 | + regs.recycle(parts.iterated.reg); | |
249 | + } else { | |
250 | + fragment.h = GGL_COLOR_BITS; | |
251 | + fragment.l = GGL_COLOR_BITS - 8; | |
252 | + fragment.flags |= CLEAR_LO; | |
253 | + // iterated colors are held in their own register, | |
254 | + // (smooth and/or dithering case) | |
255 | + Scratch scratches(registerFile()); | |
256 | + mBuilderContext.Rctx = scratches.obtain(); | |
257 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
258 | + if (parts.reload==3) { | |
259 | + // this implies mSmooth | |
260 | + int dx = scratches.obtain(); | |
261 | + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); | |
262 | + CONTEXT_LOAD(dx, generated_vars.argb[component].dx); | |
263 | + ADD_REG_TO_REG(fragment.reg, dx); | |
264 | + CONTEXT_STORE(dx, generated_vars.argb[component].c); | |
265 | + scratches.recycle(dx); | |
266 | + } else if (parts.reload & 1) { | |
267 | + //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg); | |
268 | + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); | |
269 | + } else { | |
270 | + // we don't reload, so simply rename the register and mark as | |
271 | + // non CORRUPTIBLE so that the texture env or blending code | |
272 | + // won't modify this (renamed) register | |
273 | + //regs.recycle(fragment.reg); | |
274 | + //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg); | |
275 | + // it will also be used in build_smooth_shade | |
276 | + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); | |
277 | + //fragment.reg = parts.argb[component].reg; | |
278 | + //fragment.flags &= ~CORRUPTIBLE; | |
279 | + } | |
280 | + scratches.recycle(mBuilderContext.Rctx); | |
281 | + if (mInfo[component].smooth && mAA) { | |
282 | + // when using smooth shading AND anti-aliasing, we need to clamp | |
283 | + // the iterators because there is always an extra pixel on the | |
284 | + // edges, which most of the time will cause an overflow | |
285 | + // (since technically its outside of the domain). | |
286 | + int temp = scratches.obtain(); | |
287 | + MOV_REG_TO_REG(fragment.reg, temp); | |
288 | + SAR(31, temp); | |
289 | + NOT(temp); | |
290 | + OR_REG_TO_REG(temp, fragment.reg); | |
291 | + component_sat(fragment, temp); | |
292 | + scratches.recycle(temp); | |
293 | + } | |
294 | + } | |
295 | +} | |
296 | + | |
297 | +// --------------------------------------------------------------------------- | |
298 | + | |
299 | +void GGLX86Assembler::decodeLogicOpNeeds(const needs_t& needs) | |
300 | +{ | |
301 | + // gather some informations about the components we need to process... | |
302 | + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; | |
303 | + switch(opcode) { | |
304 | + case GGL_COPY: | |
305 | + mLogicOp = 0; | |
306 | + break; | |
307 | + case GGL_CLEAR: | |
308 | + case GGL_SET: | |
309 | + mLogicOp = LOGIC_OP; | |
310 | + break; | |
311 | + case GGL_AND: | |
312 | + case GGL_AND_REVERSE: | |
313 | + case GGL_AND_INVERTED: | |
314 | + case GGL_XOR: | |
315 | + case GGL_OR: | |
316 | + case GGL_NOR: | |
317 | + case GGL_EQUIV: | |
318 | + case GGL_OR_REVERSE: | |
319 | + case GGL_OR_INVERTED: | |
320 | + case GGL_NAND: | |
321 | + mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; | |
322 | + break; | |
323 | + case GGL_NOOP: | |
324 | + case GGL_INVERT: | |
325 | + mLogicOp = LOGIC_OP|LOGIC_OP_DST; | |
326 | + break; | |
327 | + case GGL_COPY_INVERTED: | |
328 | + mLogicOp = LOGIC_OP|LOGIC_OP_SRC; | |
329 | + break; | |
330 | + }; | |
331 | +} | |
332 | + | |
333 | +void GGLX86Assembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) | |
334 | +{ | |
335 | + uint8_t replaced=0; | |
336 | + mTextureMachine.mask = 0; | |
337 | + mTextureMachine.activeUnits = 0; | |
338 | + for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { | |
339 | + texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
340 | + if (replaced == 0xF) { | |
341 | + // all components are replaced, skip this TMU. | |
342 | + tmu.format_idx = 0; | |
343 | + tmu.mask = 0; | |
344 | + tmu.replaced = replaced; | |
345 | + continue; | |
346 | + } | |
347 | + tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); | |
348 | + tmu.format = c->formats[tmu.format_idx]; | |
349 | + tmu.bits = tmu.format.size*8; | |
350 | + tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); | |
351 | + tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); | |
352 | + tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); | |
353 | + tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); | |
354 | + tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) | |
355 | + && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now | |
356 | + | |
357 | + // 5551 linear filtering is not supported | |
358 | + if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) | |
359 | + tmu.linear = 0; | |
360 | + | |
361 | + tmu.mask = 0; | |
362 | + tmu.replaced = replaced; | |
363 | + | |
364 | + if (tmu.format_idx) { | |
365 | + mTextureMachine.activeUnits++; | |
366 | + if (tmu.format.c[0].h) tmu.mask |= 0x1; | |
367 | + if (tmu.format.c[1].h) tmu.mask |= 0x2; | |
368 | + if (tmu.format.c[2].h) tmu.mask |= 0x4; | |
369 | + if (tmu.format.c[3].h) tmu.mask |= 0x8; | |
370 | + if (tmu.env == GGL_REPLACE) { | |
371 | + replaced |= tmu.mask; | |
372 | + } else if (tmu.env == GGL_DECAL) { | |
373 | + if (!tmu.format.c[GGLFormat::ALPHA].h) { | |
374 | + // if we don't have alpha, decal does nothing | |
375 | + tmu.mask = 0; | |
376 | + } else { | |
377 | + // decal always ignores At | |
378 | + tmu.mask &= ~(1<<GGLFormat::ALPHA); | |
379 | + } | |
380 | + } | |
381 | + } | |
382 | + mTextureMachine.mask |= tmu.mask; | |
383 | + ////printf("%d: mask=%08lx, replaced=%08lx\n", | |
384 | + // i, int(tmu.mask), int(tmu.replaced)); | |
385 | + } | |
386 | + mTextureMachine.replaced = replaced; | |
387 | + mTextureMachine.directTexture = 0; | |
388 | + ////printf("replaced=%08lx\n", mTextureMachine.replaced); | |
389 | +} | |
390 | + | |
391 | + | |
392 | +void GGLX86Assembler::init_textures( | |
393 | + tex_coord_t* coords, | |
394 | + const reg_t& x, const reg_t& y) | |
395 | +{ | |
396 | + context_t const* c = mBuilderContext.c; | |
397 | + const needs_t& needs = mBuilderContext.needs; | |
398 | + reg_t temp_reg_t; | |
399 | + int Rx = x.reg; | |
400 | + int Ry = y.reg; | |
401 | + | |
402 | + if (mTextureMachine.mask) { | |
403 | + comment("compute texture coordinates"); | |
404 | + } | |
405 | + | |
406 | + // init texture coordinates for each tmu | |
407 | + const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); | |
408 | + const bool multiTexture = mTextureMachine.activeUnits > 1; | |
409 | + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | |
410 | + const texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
411 | + if (tmu.format_idx == 0) | |
412 | + continue; | |
413 | + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | |
414 | + (tmu.twrap == GGL_NEEDS_WRAP_11)) | |
415 | + { | |
416 | + Scratch scratches(registerFile()); | |
417 | + // 1:1 texture | |
418 | + pointer_t& txPtr = coords[i].ptr; | |
419 | + txPtr.setTo(obtainReg(), tmu.bits); | |
420 | + CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); | |
421 | + SAR(16, txPtr.reg); | |
422 | + ADD_REG_TO_REG(txPtr.reg, Rx); | |
423 | + CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); | |
424 | + SAR(16, txPtr.reg); | |
425 | + ADD_REG_TO_REG(txPtr.reg, Ry); | |
426 | + // Rx and Ry are changed | |
427 | + // Rx = Rx + ti.iterators.ydsdy>>16 | |
428 | + // Ry = Ry + ti.iterators.ydtdy>>16 | |
429 | + // Rx = Ry * ti.stide + Rx | |
430 | + | |
431 | + // merge base & offset | |
432 | + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); | |
433 | + IMUL(Ry, txPtr.reg); | |
434 | + ADD_REG_TO_REG(txPtr.reg, Rx); | |
435 | + | |
436 | + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); | |
437 | + temp_reg_t.setTo(Rx); | |
438 | + base_offset(txPtr, txPtr, temp_reg_t); | |
439 | + //PUSH(txPtr.reg); | |
440 | + mCurSp = mCurSp - 4; | |
441 | + txPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg | |
442 | + MOV_REG_TO_MEM(txPtr.reg, txPtr.offset_ebp, EBP); | |
443 | + recycleReg(txPtr.reg); | |
444 | + txPtr.reg=-1; | |
445 | + } else { | |
446 | + Scratch scratches(registerFile()); | |
447 | + reg_t& s = coords[i].s; | |
448 | + reg_t& t = coords[i].t; | |
449 | + // s = (x * dsdx)>>16 + ydsdy | |
450 | + // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 | |
451 | + // t = (x * dtdx)>>16 + ydtdy | |
452 | + // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 | |
453 | + const int need_w = GGL_READ_NEEDS(W, needs.n); | |
454 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
455 | + if (need_w) { | |
456 | + s.setTo(obtainReg()); | |
457 | + t.setTo(obtainReg()); | |
458 | + CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); | |
459 | + CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); | |
460 | + CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); | |
461 | + CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); | |
462 | + recycleReg(s.reg); | |
463 | + recycleReg(t.reg); | |
464 | + } else { | |
465 | + int ydsdy = scratches.obtain(); | |
466 | + int dsdx = scratches.obtain(); | |
467 | + CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); | |
468 | + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); | |
469 | + IMUL(Rx, dsdx); | |
470 | + ADD_REG_TO_REG(dsdx, ydsdy); | |
471 | + CONTEXT_STORE(ydsdy, generated_vars.texture[i].spill[0]); | |
472 | + scratches.recycle(ydsdy); | |
473 | + scratches.recycle(dsdx); | |
474 | + | |
475 | + int ydtdy = scratches.obtain(); | |
476 | + int dtdx = scratches.obtain(); | |
477 | + CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); | |
478 | + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); | |
479 | + IMUL(Rx, dtdx); | |
480 | + ADD_REG_TO_REG(dtdx, ydtdy); | |
481 | + CONTEXT_STORE(ydtdy, generated_vars.texture[i].spill[1]); | |
482 | + scratches.recycle(ydtdy); | |
483 | + scratches.recycle(dtdx); | |
484 | + | |
485 | + // s.reg = Rx * ti.dsdx + ydsdy | |
486 | + // t.reg = Rx * ti.dtdx + ydtdy | |
487 | + } | |
488 | + } | |
489 | + | |
490 | + // direct texture? | |
491 | + if (!multiTexture && !mBlending && !mDithering && !mFog && | |
492 | + cb_format_idx == tmu.format_idx && !tmu.linear && | |
493 | + mTextureMachine.replaced == tmu.mask) | |
494 | + { | |
495 | + mTextureMachine.directTexture = i + 1; | |
496 | + } | |
497 | + } | |
498 | +} | |
499 | + | |
500 | +void GGLX86Assembler::build_textures( fragment_parts_t& parts, | |
501 | + Scratch& regs) | |
502 | +{ | |
503 | + context_t const* c = mBuilderContext.c; | |
504 | + const needs_t& needs = mBuilderContext.needs; | |
505 | + reg_t temp_reg_t; | |
506 | + //int Rctx = mBuilderContext.Rctx; | |
507 | + | |
508 | + | |
509 | + const bool multiTexture = mTextureMachine.activeUnits > 1; | |
510 | + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | |
511 | + const texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
512 | + if (tmu.format_idx == 0) | |
513 | + continue; | |
514 | + | |
515 | + pointer_t& txPtr = parts.coords[i].ptr; | |
516 | + pixel_t& texel = parts.texel[i]; | |
517 | + | |
518 | + // repeat... | |
519 | + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | |
520 | + (tmu.twrap == GGL_NEEDS_WRAP_11)) | |
521 | + { // 1:1 textures | |
522 | + comment("fetch texel"); | |
523 | + texel.setTo(regs.obtain(), &tmu.format); | |
524 | + txPtr.reg = regs.obtain(); | |
525 | + MOV_MEM_TO_REG(txPtr.offset_ebp, EBP, txPtr.reg); | |
526 | + mCurSp = mCurSp - 4; | |
527 | + texel.offset_ebp = mCurSp; | |
528 | + load(txPtr, texel, WRITE_BACK); | |
529 | + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); | |
530 | + regs.recycle(texel.reg); | |
531 | + regs.recycle(txPtr.reg); | |
532 | + } else { | |
533 | + Scratch scratches(registerFile()); | |
534 | + reg_t& s = parts.coords[i].s; | |
535 | + reg_t& t = parts.coords[i].t; | |
536 | + comment("reload s/t (multitexture or linear filtering)"); | |
537 | + s.reg = scratches.obtain(); | |
538 | + t.reg = scratches.obtain(); | |
539 | + mBuilderContext.Rctx = scratches.obtain(); | |
540 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
541 | + CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); | |
542 | + CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); | |
543 | + | |
544 | + comment("compute repeat/clamp"); | |
545 | + int width = scratches.obtain(); | |
546 | + int height = scratches.obtain(); | |
547 | + int U = 0; | |
548 | + int V = 0; | |
549 | + // U and V will be stored onto the stack due to the limited register | |
550 | + reg_t reg_U, reg_V; | |
551 | + | |
552 | + CONTEXT_LOAD(width, generated_vars.texture[i].width); | |
553 | + CONTEXT_LOAD(height, generated_vars.texture[i].height); | |
554 | + scratches.recycle(mBuilderContext.Rctx); | |
555 | + | |
556 | + int FRAC_BITS = 0; | |
557 | + if (tmu.linear) { | |
558 | + // linear interpolation | |
559 | + if (tmu.format.size == 1) { | |
560 | + // for 8-bits textures, we can afford | |
561 | + // 7 bits of fractional precision at no | |
562 | + // additional cost (we can't do 8 bits | |
563 | + // because filter8 uses signed 16 bits muls) | |
564 | + FRAC_BITS = 7; | |
565 | + } else if (tmu.format.size == 2) { | |
566 | + // filter16() is internally limited to 4 bits, so: | |
567 | + // FRAC_BITS=2 generates less instructions, | |
568 | + // FRAC_BITS=3,4,5 creates unpleasant artifacts, | |
569 | + // FRAC_BITS=6+ looks good | |
570 | + FRAC_BITS = 6; | |
571 | + } else if (tmu.format.size == 4) { | |
572 | + // filter32() is internally limited to 8 bits, so: | |
573 | + // FRAC_BITS=4 looks good | |
574 | + // FRAC_BITS=5+ looks better, but generates 3 extra ipp | |
575 | + FRAC_BITS = 6; | |
576 | + } else { | |
577 | + // for all other cases we use 4 bits. | |
578 | + FRAC_BITS = 4; | |
579 | + } | |
580 | + } | |
581 | + int u = scratches.obtain(); | |
582 | + // s.reg and t.reg are recycled in wrapping | |
583 | + wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS, scratches); | |
584 | + int v = scratches.obtain(); | |
585 | + wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS, scratches); | |
586 | + | |
587 | + | |
588 | + if (tmu.linear) { | |
589 | + | |
590 | + //mBuilderContext.Rctx = scratches.obtain(); | |
591 | + //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
592 | + //CONTEXT_LOAD(width, generated_vars.texture[i].width); | |
593 | + //CONTEXT_LOAD(height, generated_vars.texture[i].height); | |
594 | + //scratches.recycle(mBuilderContext.Rctx); | |
595 | + | |
596 | + comment("compute linear filtering offsets"); | |
597 | + // pixel size scale | |
598 | + const int shift = 31 - gglClz(tmu.format.size); | |
599 | + U = scratches.obtain(); | |
600 | + V = scratches.obtain(); | |
601 | + | |
602 | + | |
603 | + // sample the texel center | |
604 | + SUB_IMM_TO_REG(1<<(FRAC_BITS-1), u); | |
605 | + SUB_IMM_TO_REG(1<<(FRAC_BITS-1), v); | |
606 | + | |
607 | + // get the fractionnal part of U,V | |
608 | + MOV_REG_TO_REG(u, U); | |
609 | + AND_IMM_TO_REG((1<<FRAC_BITS)-1, U); | |
610 | + MOV_REG_TO_REG(v, V); | |
611 | + AND_IMM_TO_REG((1<<FRAC_BITS)-1, V); | |
612 | + | |
613 | + // below we will pop U and V in the filter function | |
614 | + mCurSp = mCurSp - 4; | |
615 | + MOV_REG_TO_MEM(U, mCurSp, EBP); | |
616 | + reg_U.offset_ebp = mCurSp; | |
617 | + mCurSp = mCurSp - 4; | |
618 | + MOV_REG_TO_MEM(V, mCurSp, EBP); | |
619 | + reg_V.offset_ebp = mCurSp; | |
620 | + | |
621 | + scratches.recycle(U); | |
622 | + scratches.recycle(V); | |
623 | + | |
624 | + // compute width-1 and height-1 | |
625 | + SUB_IMM_TO_REG(1, width); | |
626 | + SUB_IMM_TO_REG(1, height); | |
627 | + | |
628 | + // the registers are used up | |
629 | + int temp1 = scratches.obtain(); | |
630 | + int temp2 = scratches.obtain(); | |
631 | + // get the integer part of U,V and clamp/wrap | |
632 | + // and compute offset to the next texel | |
633 | + if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { | |
634 | + // u has already been REPEATed | |
635 | + SAR(FRAC_BITS, u); | |
636 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, width, u); | |
637 | + MOV_IMM_TO_REG(1<<shift, temp1); | |
638 | + MOV_REG_TO_REG(width, temp2); | |
639 | + // SHL may pollute the CF flag | |
640 | + SHL(shift, temp2); | |
641 | + mCurSp = mCurSp - 4; | |
642 | + int width_offset_ebp = mCurSp; | |
643 | + // width will be changed after the first comparison | |
644 | + MOV_REG_TO_MEM(width, width_offset_ebp, EBP); | |
645 | + CMP_REG_TO_REG(width, u); | |
646 | + CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, width); | |
647 | + if (shift) { | |
648 | + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, width); | |
649 | + } | |
650 | + MOV_REG_TO_REG(width, temp1); | |
651 | + NEG(temp1); | |
652 | + // width is actually changed | |
653 | + CMP_MEM_TO_REG(EBP, width_offset_ebp, u); | |
654 | + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, width); | |
655 | + } else { | |
656 | + // u has not been CLAMPed yet | |
657 | + // algorithm: | |
658 | + // if ((u>>4) >= width) | |
659 | + // u = width<<4 | |
660 | + // width = 0 | |
661 | + // else | |
662 | + // width = 1<<shift | |
663 | + // u = u>>4; // get integer part | |
664 | + // if (u<0) | |
665 | + // u = 0 | |
666 | + // width = 0 | |
667 | + // generated_vars.rt = width | |
668 | + | |
669 | + MOV_REG_TO_REG(width, temp2); | |
670 | + SHL(FRAC_BITS, temp2); | |
671 | + MOV_REG_TO_REG(u, temp1); | |
672 | + SAR(FRAC_BITS, temp1); | |
673 | + CMP_REG_TO_REG(temp1, width); | |
674 | + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, u); | |
675 | + // mov doesn't affect the flags | |
676 | + MOV_IMM_TO_REG(0, temp2); | |
677 | + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, width); | |
678 | + MOV_IMM_TO_REG(1 << shift, temp2); | |
679 | + CMOV_REG_TO_REG(Mnemonic_CMOVG, temp2, width); | |
680 | + | |
681 | + MOV_IMM_TO_REG(0, temp2); | |
682 | + SAR(FRAC_BITS, u); | |
683 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, u); | |
684 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, width); | |
685 | + } | |
686 | + scratches.recycle(temp1); | |
687 | + scratches.recycle(temp2); | |
688 | + mBuilderContext.Rctx = scratches.obtain(); | |
689 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
690 | + CONTEXT_STORE(width, generated_vars.rt); | |
691 | + | |
692 | + const int stride = width; | |
693 | + CONTEXT_LOAD(stride, generated_vars.texture[i].stride); | |
694 | + scratches.recycle(mBuilderContext.Rctx); | |
695 | + | |
696 | + temp1 = scratches.obtain(); | |
697 | + temp2 = scratches.obtain(); | |
698 | + | |
699 | + int height_offset_ebp; | |
700 | + if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { | |
701 | + // v has already been REPEATed | |
702 | + SAR(FRAC_BITS, v); | |
703 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, height, v); | |
704 | + MOV_IMM_TO_REG(1<<shift, temp1); | |
705 | + MOV_REG_TO_REG(height, temp2); | |
706 | + SHL(shift, temp2); | |
707 | + mCurSp = mCurSp - 4; | |
708 | + height_offset_ebp = mCurSp; | |
709 | + // height will be changed after the first comparison | |
710 | + MOV_REG_TO_MEM(height, height_offset_ebp, EBP); | |
711 | + CMP_REG_TO_REG(height, v); | |
712 | + CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, height); | |
713 | + if (shift) { | |
714 | + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, height); | |
715 | + } | |
716 | + MOV_REG_TO_REG(height, temp1); | |
717 | + NEG(temp1); | |
718 | + // height is actually changed | |
719 | + CMP_MEM_TO_REG(EBP, height_offset_ebp, v); | |
720 | + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, height); | |
721 | + IMUL(stride, height); | |
722 | + } else { | |
723 | + // u has not been CLAMPed yet | |
724 | + MOV_REG_TO_REG(height, temp2); | |
725 | + SHL(FRAC_BITS, temp2); | |
726 | + MOV_REG_TO_REG(v, temp1); | |
727 | + SAR(FRAC_BITS, temp1); | |
728 | + | |
729 | + mCurSp = mCurSp - 4; | |
730 | + height_offset_ebp = mCurSp; | |
731 | + // height may be changed after the first comparison | |
732 | + MOV_REG_TO_MEM(height, height_offset_ebp, EBP); | |
733 | + | |
734 | + CMP_REG_TO_REG(temp1, height); | |
735 | + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, v); | |
736 | + MOV_IMM_TO_REG(0, temp2); | |
737 | + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, height); | |
738 | + | |
739 | + if (shift) { | |
740 | + // stride = width. It's not used | |
741 | + // shift may pollute the flags | |
742 | + SHL(shift, stride); | |
743 | + // height may be changed to 0 | |
744 | + CMP_REG_TO_MEM(temp1, height_offset_ebp, EBP); | |
745 | + CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height); | |
746 | + } else { | |
747 | + CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height); | |
748 | + } | |
749 | + MOV_IMM_TO_REG(0, temp2); | |
750 | + SAR(FRAC_BITS, v); | |
751 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, v); | |
752 | + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, height); | |
753 | + } | |
754 | + scratches.recycle(temp1); | |
755 | + scratches.recycle(temp2); | |
756 | + mBuilderContext.Rctx = scratches.obtain(); | |
757 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
758 | + CONTEXT_STORE(height, generated_vars.lb); | |
759 | + scratches.recycle(mBuilderContext.Rctx); | |
760 | + } | |
761 | + | |
762 | + scratches.recycle(width); | |
763 | + scratches.recycle(height); | |
764 | + | |
765 | + // iterate texture coordinates... | |
766 | + comment("iterate s,t"); | |
767 | + int dsdx = scratches.obtain(); | |
768 | + s.reg = scratches.obtain(); | |
769 | + mBuilderContext.Rctx = scratches.obtain(); | |
770 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
771 | + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); | |
772 | + CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); | |
773 | + ADD_REG_TO_REG(dsdx, s.reg); | |
774 | + CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); | |
775 | + scratches.recycle(s.reg); | |
776 | + scratches.recycle(dsdx); | |
777 | + int dtdx = scratches.obtain(); | |
778 | + t.reg = scratches.obtain(); | |
779 | + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); | |
780 | + CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); | |
781 | + ADD_REG_TO_REG(dtdx, t.reg); | |
782 | + CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); | |
783 | + scratches.recycle(dtdx); | |
784 | + scratches.recycle(t.reg); | |
785 | + | |
786 | + // merge base & offset... | |
787 | + comment("merge base & offset"); | |
788 | + texel.setTo(scratches.obtain(), &tmu.format); | |
789 | + //txPtr.setTo(texel.reg, tmu.bits); | |
790 | + txPtr.setTo(scratches.obtain(), tmu.bits); | |
791 | + int stride = scratches.obtain(); | |
792 | + CONTEXT_LOAD(stride, generated_vars.texture[i].stride); | |
793 | + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); | |
794 | + scratches.recycle(mBuilderContext.Rctx); | |
795 | + MOVSX_REG_TO_REG(OpndSize_16, v, v); | |
796 | + MOVSX_REG_TO_REG(OpndSize_16, stride, stride); | |
797 | + IMUL(v, stride); | |
798 | + ADD_REG_TO_REG(stride, u);// u+v*stride | |
799 | + temp_reg_t.setTo(u); | |
800 | + base_offset(txPtr, txPtr, temp_reg_t); | |
801 | + | |
802 | + // recycle registers we don't need anymore | |
803 | + scratches.recycle(u); | |
804 | + scratches.recycle(v); | |
805 | + scratches.recycle(stride); | |
806 | + | |
807 | + mCurSp = mCurSp - 4; | |
808 | + texel.offset_ebp = mCurSp; | |
809 | + // load texel | |
810 | + if (!tmu.linear) { | |
811 | + comment("fetch texel in building texture"); | |
812 | + load(txPtr, texel, 0); | |
813 | + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); | |
814 | + scratches.recycle(texel.reg); | |
815 | + scratches.recycle(txPtr.reg); | |
816 | + } else { | |
817 | + comment("fetch texel, bilinear"); | |
818 | + // the registes are not enough. We spill texel and previous U and V | |
819 | + // texel.reg is recycled in the following functions since there are more than one code path | |
820 | + switch (tmu.format.size) { | |
821 | + case 1: | |
822 | + filter8(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); | |
823 | + break; | |
824 | + case 2: | |
825 | + filter16(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); | |
826 | + break; | |
827 | + case 3: | |
828 | + filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); | |
829 | + break; | |
830 | + case 4: | |
831 | + filter32(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); | |
832 | + break; | |
833 | + } | |
834 | + } | |
835 | + } | |
836 | + } | |
837 | +} | |
838 | + | |
839 | +void GGLX86Assembler::build_iterate_texture_coordinates( | |
840 | + const fragment_parts_t& parts) | |
841 | +{ | |
842 | + const bool multiTexture = mTextureMachine.activeUnits > 1; | |
843 | + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | |
844 | + const texture_unit_t& tmu = mTextureMachine.tmu[i]; | |
845 | + if (tmu.format_idx == 0) | |
846 | + continue; | |
847 | + | |
848 | + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | |
849 | + (tmu.twrap == GGL_NEEDS_WRAP_11)) | |
850 | + { // 1:1 textures | |
851 | + const pointer_t& txPtr = parts.coords[i].ptr; | |
852 | + ADD_IMM_TO_MEM(txPtr.size>>3, txPtr.offset_ebp, EBP); | |
853 | + } else { | |
854 | + Scratch scratches(registerFile()); | |
855 | + int s = parts.coords[i].s.reg; | |
856 | + int t = parts.coords[i].t.reg; | |
857 | + mBuilderContext.Rctx = scratches.obtain(); | |
858 | + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); | |
859 | + s = scratches.obtain(); | |
860 | + int dsdx = scratches.obtain(); | |
861 | + CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); | |
862 | + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); | |
863 | + ADD_REG_TO_REG(dsdx, s); | |
864 | + CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); | |
865 | + scratches.recycle(s); | |
866 | + scratches.recycle(dsdx); | |
867 | + int dtdx = scratches.obtain(); | |
868 | + t = scratches.obtain(); | |
869 | + CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); | |
870 | + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); | |
871 | + ADD_REG_TO_REG(dtdx, t); | |
872 | + CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); | |
873 | + scratches.recycle(t); | |
874 | + scratches.recycle(dtdx); | |
875 | + } | |
876 | + } | |
877 | +} | |
878 | + | |
879 | +void GGLX86Assembler::filter8( | |
880 | + const fragment_parts_t& parts, | |
881 | + pixel_t& texel, const texture_unit_t& tmu, | |
882 | + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, | |
883 | + int FRAC_BITS, Scratch& scratches) | |
884 | +{ | |
885 | + if (tmu.format.components != GGL_ALPHA && | |
886 | + tmu.format.components != GGL_LUMINANCE) | |
887 | + { | |
888 | + // this is a packed format, and we don't support | |
889 | + // linear filtering (it's probably RGB 332) | |
890 | + // Should not happen with OpenGL|ES | |
891 | + MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, texel.reg); | |
892 | + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); | |
893 | + scratches.recycle(texel.reg); | |
894 | + scratches.recycle(txPtr.reg); | |
895 | + return; | |
896 | + } | |
897 | + | |
898 | + // ------------------------ | |
899 | + | |
900 | + //int d = scratches.obtain(); | |
901 | + //int u = scratches.obtain(); | |
902 | + //int k = scratches.obtain(); | |
903 | + | |
904 | + scratches.recycle(texel.reg); | |
905 | + int rt = scratches.obtain(); | |
906 | + int lb = scratches.obtain(); | |
907 | + | |
908 | + // RB -> U * V | |
909 | + | |
910 | + mBuilderContext.Rctx = scratches.obtain(); | |
911 | + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); | |
912 | + CONTEXT_LOAD(rt, generated_vars.rt); | |
913 | + CONTEXT_LOAD(lb, generated_vars.lb); | |
914 | + scratches.recycle(mBuilderContext.Rctx); | |
915 | + int pixel= scratches.obtain(); | |
916 | + | |
917 | + int offset = pixel; | |
918 | + | |
919 | + MOV_REG_TO_REG(rt, offset); | |
920 | + ADD_REG_TO_REG(lb, offset); | |
921 | + | |
922 | + int temp_reg1 = scratches.obtain(); | |
923 | + int temp_reg2 = scratches.obtain(); | |
924 | + // it seems that the address mode with base and scale reg cannot be encoded correctly | |
925 | + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg1, OpndSize_8); | |
926 | + ADD_REG_TO_REG(txPtr.reg, offset); | |
927 | + MOVZX_MEM_TO_REG(OpndSize_8, offset, 0, temp_reg1); | |
928 | + // pixel is only 8-bits | |
929 | + MOV_REG_TO_REG(temp_reg1, pixel); | |
930 | + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); | |
931 | + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2); | |
932 | + IMUL(temp_reg2, temp_reg1); | |
933 | + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); | |
934 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg2); | |
935 | + IMUL(temp_reg2, pixel); | |
936 | + NEG(temp_reg1); | |
937 | + ADD_IMM_TO_REG(1<<(FRAC_BITS*2), temp_reg1); | |
938 | + mCurSp = mCurSp - 4; | |
939 | + int d_offset_ebp = mCurSp; | |
940 | + MOV_REG_TO_MEM(pixel, d_offset_ebp, EBP); | |
941 | + mCurSp = mCurSp - 4; | |
942 | + int k_offset_ebp = mCurSp; | |
943 | + MOV_REG_TO_MEM(temp_reg1, k_offset_ebp, EBP); | |
944 | + | |
945 | + | |
946 | + // LB -> (1-U) * V | |
947 | + MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg2); | |
948 | + NEG(temp_reg2); | |
949 | + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2); | |
950 | + MOV_REG_TO_MEM(temp_reg2, reg_U.offset_ebp, EBP); | |
951 | + | |
952 | + //MOV_MEM_SCALE_TO_REG(txPtr.reg, lb, 1, pixel, OpndSize_8); | |
953 | + ADD_REG_TO_REG(txPtr.reg, lb); | |
954 | + MOVZX_MEM_TO_REG(OpndSize_8, lb, 0, pixel); | |
955 | + | |
956 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2); | |
957 | + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg1); | |
958 | + IMUL(temp_reg1, temp_reg2); | |
959 | + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); | |
960 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1); | |
961 | + IMUL(pixel, temp_reg1); | |
962 | + ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp); | |
963 | + SUB_REG_TO_MEM(temp_reg2, EBP, k_offset_ebp); | |
964 | + | |
965 | + | |
966 | + // LT -> (1-U)*(1-V) | |
967 | + MOV_MEM_TO_REG(reg_V.offset_ebp, EBP, temp_reg2); | |
968 | + NEG(temp_reg2); | |
969 | + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2); | |
970 | + MOV_REG_TO_MEM(temp_reg2, reg_V.offset_ebp, EBP); | |
971 | + | |
972 | + MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, pixel); | |
973 | + | |
974 | + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); | |
975 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2); | |
976 | + IMUL(temp_reg1, temp_reg2); | |
977 | + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1); | |
978 | + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); | |
979 | + IMUL(pixel, temp_reg1); | |
980 | + ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp); | |
981 | + | |
982 | + // RT -> U*(1-V) | |
983 | + //MOV_MEM_SCALE_TO_REG(txPtr.reg, rt, 1, pixel, OpndSize_8); | |
984 | + ADD_REG_TO_REG(txPtr.reg, rt); | |
985 | + MOVZX_MEM_TO_REG(OpndSize_8, rt, 0, pixel); | |
986 | + | |
987 | + int k = rt; | |
988 | + MOV_MEM_TO_REG(k_offset_ebp, EBP, k); | |
989 | + SUB_REG_TO_REG(temp_reg2, k); | |
990 | + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); | |
991 | + MOVSX_REG_TO_REG(OpndSize_16, k, k); | |
992 | + IMUL(pixel, k); | |
993 | + ADD_MEM_TO_REG(EBP, d_offset_ebp, k); | |
994 | + MOV_REG_TO_MEM(k, texel.offset_ebp, EBP); | |
995 | + scratches.recycle(rt); | |
996 | + scratches.recycle(lb); | |
997 | + scratches.recycle(pixel); | |
998 | + scratches.recycle(txPtr.reg); | |
999 | + scratches.recycle(temp_reg1); | |
1000 | + scratches.recycle(temp_reg2); | |
1001 | + for (int i=0 ; i<4 ; i++) { | |
1002 | + if (!texel.format.c[i].h) continue; | |
1003 | + texel.format.c[i].h = FRAC_BITS*2+8; | |
1004 | + |
Part of diff was cut off due to size limit. Use your local client to view the full diff.