Revision: 10715 https://osdn.net/projects/ttssh2/scm/svn/commits/10715 Author: zmatsuo Date: 2023-05-21 00:28:54 +0900 (Sun, 21 May 2023) Log Message: ----------- UTF-8の冗長な表現を受け入れないようにした - すべて "!" U+0021 (冗長なUTF-8表現) - echo -e "\x21" - echo -e "\xc0\xa1" - echo -e "\xe0\x80\xa1" - echo -e "\xf0\x80\x80\xa1" - 修正前はすべて U+0021 として扱っていた - 0x80以上の1byteは、直接Unicodeとして扱うようにした - UTF-8 では1byte目に 0x80以上の値は存在しない - Tera Term の入力文字コードが UTF-8 のとき、そのまま ISO8059-1 として使用できる - 上の例の場合は次の入力となる - U+0021 - U+00C0 U+00A1 - U+00E0 U+0080 U+00A1 - U+00F0 U+0080 U+0080 U+00A1 ticket #17226 Ticket Links: ------------ https://osdn.net/projects/ttssh2/tracker/detail/17226 Modified Paths: -------------- trunk/teraterm/teraterm/vtterm.c -------------- next part -------------- Modified: trunk/teraterm/teraterm/vtterm.c =================================================================== --- trunk/teraterm/teraterm/vtterm.c 2023-05-20 14:14:27 UTC (rev 10714) +++ trunk/teraterm/teraterm/vtterm.c 2023-05-20 15:28:54 UTC (rev 10715) @@ -4319,7 +4319,7 @@ FirstPrm = FALSE; } -int CheckUTF8Seq(BYTE b, int utf8_stat) +static int CheckUTF8Seq(BYTE b, int utf8_stat) { if (ts.Language == IdUtf8 || (ts.Language==IdJapanese && ts.KanjiCode==IdUTF8)) { if (utf8_stat > 0) { @@ -5973,6 +5973,7 @@ static int count = 0; unsigned int code; + int i; if (ts.FallbackToCP932 && Fallbacked) { return ParseFirstJP(b); @@ -5991,94 +5992,125 @@ // - 0xf5 - 0xff // - 2byte\x96ڈȍ~ // - 0x00 - 0x7f - // --0xc0 - 0xff + // - 0xc0 - 0xff - if ((b & 0x80) != 0x80 || ((b & 0xe0) == 0x80 && count == 0)) { - // 1\x83o\x83C\x83g\x96ڂ\xA8\x82\xE6\x82\xD12\x83o\x83C\x83g\x96ڂ\xAAASCII\x82̏ꍇ\x82́A\x82\xB7\x82ׂ\xC4ASCII\x8Fo\x97͂Ƃ\xB7\x82\xE9\x81B - // 1\x83o\x83C\x83g\x96ڂ\xAAC1\x90\xA7\x8C䕶\x8E\x9A(0x80-0x9f)\x82̏ꍇ\x82\xE0\x93\xAF\x97l\x81B - - // \x93\xFC\x97͕\xB6\x8E\x9A\x82\xAA 0x00 ... 0x7f - if (count == 0 || count == 1) { - if (count == 1) { - ParseASCII(buf[0]); - } + // 1byte(7bit) + if (count == 0) { + if ((b & 0x80) == 0x00) { + // 1byte(7bit) + // 0x7f\x88ȉ\xBA, \x82̂Ƃ\xAB\x81A\x82\xBB\x82̂܂o\x97\xCD ParseASCII(b); - count = 0; // reset counter return TRUE; } - } - - buf[count++] = b; - if (count < 2) { + if ((b & 0x40) == 0x00 || b >= 0xf6 ) { + // UTF-8\x82\xC51byte\x82ɏo\x8C\xBB\x82\xB5\x82Ȃ\xA2\x83R\x81[\x83h\x82̂Ƃ\xAB\x81A\x82\xBB\x82̂܂o\x97\xCD + // 0x40 = 0b1011_1111, 0b10xx_xxxx\x82Ƃ\xA2\x82\xA4bit\x83p\x83^\x81[\x83\x93\x82ɂ͂Ȃ\xE7\x82Ȃ\xA2 + // 0xf6 \x88ȏ\xE3\x82̂Ƃ\xAB U+10FFFF\x82\xE6\x82\xE8\x91傫\x82\xAD\x82Ȃ\xE9 + PutU32(b); + return TRUE; + } + // 1byte\x96ڕۑ\xB6 + buf[count++] = b; return TRUE; } - // 2\x83o\x83C\x83g\x83R\x81[\x83h\x82̏ꍇ + // 2byte(11bit) if ((buf[0] & 0xe0) == 0xc0) { - if ((buf[1] & 0xc0) == 0x80) { - - code = ((buf[0] & 0x1f) << 6); - code |= ((buf[1] & 0x3f)); - - PutU32(code); + code = 0; + if((b & 0xc0) == 0x80) { + // 5bit + 6bit + code = ((buf[0] & 0x1f) << 6) | (b & 0x3f); + if (code < 0x80) { + // 11bit\x8Eg\x82\xC1\x82\xC47bit\x88ȉ\xBA\x82̎\x9E\x81AUTF-8\x82̏璷\x82ȕ\\x8C\xBB + code = 0; + } } + if (code == 0){ + // \x82\xBB\x82̂܂o\x97\xCD + PutU32(buf[0]); + PutU32(b); + count = 0; + return TRUE; + } else { - ParseASCII(buf[0]); - ParseASCII(buf[1]); + PutU32(code); + count = 0; + return TRUE; } - count = 0; - return TRUE; } - if (count < 3) { - return TRUE; - } + // 2byte\x96ڈȍ~\x95ۑ\xB6 + buf[count++] = b; - if ((buf[0] & 0xf0) == 0xe0 && - (buf[1] & 0xc0) == 0x80 && - (buf[2] & 0xc0) == 0x80) { // 3\x83o\x83C\x83g\x83R\x81[\x83h\x82̏ꍇ - - // UTF-8 BOM(Byte Order Mark) - if (buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) { - goto skip; + // 3byte(16bit) + if ((buf[0] & 0xf0) == 0xe0) { + if(count < 3) { + return TRUE; } - - code = ((buf[0] & 0xf) << 12); - code |= ((buf[1] & 0x3f) << 6); - code |= ((buf[2] & 0x3f)); - - PutU32(code); - -skip: - count = 0; - + code = 0; + if ((buf[1] & 0xc0) == 0x80 && (buf[2] & 0xc0) == 0x80) { + // 4bit + 6bit + 6bit + code = ((buf[0] & 0xf) << 12); + code |= ((buf[1] & 0x3f) << 6); + code |= ((buf[2] & 0x3f)); + if (code < 0x800) { + // 16bit\x8Eg\x82\xC1\x82\xC411bit\x88ȉ\xBA\x82̂Ƃ\xAB\x81AUTF-8\x82̏璷\x82ȕ\\x8C\xBB + code = 0; + } + } + if (code == 0) { + // \x82\xBB\x82̂܂o\x97\xCD + PutU32(buf[0]); + PutU32(buf[1]); + PutU32(buf[2]); + count = 0; + return TRUE; + } else { + PutU32(code); + count = 0; + return TRUE; + } } - if (count < 4) { - return TRUE; + // 4byte(21bit) + if ((buf[0] & 0xf8) == 0xf0) { + if(count < 4) { + return TRUE; + } + code = 0; + if ((buf[1] & 0xc0) == 0x80 && (buf[2] & 0xc0) == 0x80 && (buf[3] & 0xc0) == 0x80) { + // 3bit + 6bit + 6bit + 6bit + code = ((buf[0] & 0x07) << 18); + code |= ((buf[1] & 0x3f) << 12); + code |= ((buf[2] & 0x3f) << 6); + code |= (buf[3] & 0x3f); + if (code < 0x10000) { + // 21bit\x8Eg\x82\xC1\x82\xC416bit\x88ȉ\xBA\x82̂Ƃ\xAB\x81AUTF-8\x82̏璷\x82ȕ\\x8C\xBB + code = 0; + } + } + if (code == 0) { + // \x82\xBB\x82̂܂o\x97\xCD + PutU32(buf[0]); + PutU32(buf[1]); + PutU32(buf[2]); + PutU32(buf[3]); + count = 0; + return TRUE; + } else { + PutU32(code); + count = 0; + return TRUE; + } } - if ((buf[0] & 0xf8) == 0xf0 && - (buf[1] & 0xc0) == 0x80 && - (buf[2] & 0xc0) == 0x80 && - (buf[3] & 0xc0) == 0x80) - { // 4\x83o\x83C\x83g\x83R\x81[\x83h\x82̏ꍇ - code = ((buf[0] & 0x07) << 18); - code |= ((buf[1] & 0x3f) << 12); - code |= ((buf[2] & 0x3f) << 6); - code |= (buf[3] & 0x3f); + // \x82\xB1\x82\xB1\x82ɂ͗\x88\x82Ȃ\xA2 + assert(FALSE); - PutU32(code); - count = 0; - return TRUE; - } else { - ParseASCII(buf[0]); - ParseASCII(buf[1]); - ParseASCII(buf[2]); - ParseASCII(buf[3]); - count = 0; + for (i = 0; i < count; i++) { + ParseASCII(buf[i]); } - + count = 0; return TRUE; }