Revisión | 8cf9a3d3f7a4b95f33e0bda5416b9c93ec887dd3 (tree) |
---|---|
Tiempo | 2017-01-14 04:47:29 |
Autor | Richard Henderson <rth@twid...> |
Commiter | Richard Henderson |
tcg/aarch64: Fix tcg_out_movi
There were some patterns, like 0x0000_ffff_ffff_00ff, for which we
would select to begin a multi-insn sequence with MOVN, but would
fail to set the 0x0000 lane back from 0xffff.
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20161207180727.6286-3-rth@twiddle.net>
@@ -580,11 +580,9 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, | ||
580 | 580 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, |
581 | 581 | tcg_target_long value) |
582 | 582 | { |
583 | - AArch64Insn insn; | |
584 | 583 | int i, wantinv, shift; |
585 | 584 | tcg_target_long svalue = value; |
586 | 585 | tcg_target_long ivalue = ~value; |
587 | - tcg_target_long imask; | |
588 | 586 | |
589 | 587 | /* For 32-bit values, discard potential garbage in value. For 64-bit |
590 | 588 | values within [2**31, 2**32-1], we can create smaller sequences by |
@@ -630,42 +628,35 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, | ||
630 | 628 | |
631 | 629 | /* Would it take fewer insns to begin with MOVN? For the value and its |
632 | 630 | inverse, count the number of 16-bit lanes that are 0. */ |
633 | - for (i = wantinv = imask = 0; i < 64; i += 16) { | |
631 | + for (i = wantinv = 0; i < 64; i += 16) { | |
634 | 632 | tcg_target_long mask = 0xffffull << i; |
635 | - if ((value & mask) == 0) { | |
636 | - wantinv -= 1; | |
637 | - } | |
638 | - if ((ivalue & mask) == 0) { | |
639 | - wantinv += 1; | |
640 | - imask |= mask; | |
641 | - } | |
633 | + wantinv -= ((value & mask) == 0); | |
634 | + wantinv += ((ivalue & mask) == 0); | |
642 | 635 | } |
643 | 636 | |
644 | - /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ | |
645 | - insn = I3405_MOVZ; | |
646 | - if (wantinv > 0) { | |
647 | - value = ivalue; | |
648 | - insn = I3405_MOVN; | |
649 | - } | |
650 | - | |
651 | - /* Find the lowest lane that is not 0x0000. */ | |
652 | - shift = ctz64(value) & (63 & -16); | |
653 | - tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); | |
654 | - | |
655 | - if (wantinv > 0) { | |
656 | - /* Re-invert the value, so MOVK sees non-inverted bits. */ | |
657 | - value = ~value; | |
658 | - /* Clear out all the 0xffff lanes. */ | |
659 | - value ^= imask; | |
660 | - } | |
661 | - /* Clear out the lane that we just set. */ | |
662 | - value &= ~(0xffffUL << shift); | |
663 | - | |
664 | - /* Iterate until all lanes have been set, and thus cleared from VALUE. */ | |
665 | - while (value) { | |
637 | + if (wantinv <= 0) { | |
638 | + /* Find the lowest lane that is not 0x0000. */ | |
666 | 639 | shift = ctz64(value) & (63 & -16); |
667 | - tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); | |
640 | + tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift); | |
641 | + /* Clear out the lane that we just set. */ | |
668 | 642 | value &= ~(0xffffUL << shift); |
643 | + /* Iterate until all non-zero lanes have been processed. */ | |
644 | + while (value) { | |
645 | + shift = ctz64(value) & (63 & -16); | |
646 | + tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); | |
647 | + value &= ~(0xffffUL << shift); | |
648 | + } | |
649 | + } else { | |
650 | + /* Like above, but with the inverted value and MOVN to start. */ | |
651 | + shift = ctz64(ivalue) & (63 & -16); | |
652 | + tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift); | |
653 | + ivalue &= ~(0xffffUL << shift); | |
654 | + while (ivalue) { | |
655 | + shift = ctz64(ivalue) & (63 & -16); | |
656 | + /* Provide MOVK with the non-inverted value. */ | |
657 | + tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift); | |
658 | + ivalue &= ~(0xffffUL << shift); | |
659 | + } | |
669 | 660 | } |
670 | 661 | } |
671 | 662 |