Revision: 7462 http://sourceforge.jp/projects/ttssh2/scm/svn/commits/7462 Author: zmatsuo Date: 2019-03-10 02:32:42 +0900 (Sun, 10 Mar 2019) Log Message: ----------- unicode関連をcodeconv.cに集めた CP932へ/からの変換を関数に分離(UTF32ToCP932(), UTF32ToCP932()) unicodeからDEC特殊文字変換を関数に分離(UTF32ToDecSp()) SJIS2UTF8() (内部コードからUTF-8へ出力)を language.c から ttcmn.c に移動 _WideCharToMultiByte(), _MultiByteToWideChar() の変換した文字数の戻り値修正 UTF32ToMBCP()追加 UTF32_CP932()追加 MBCPToUTF32()追加 WideCharToUTF8(), WideCharToCP932() の仕様を変更 変換テーブルとWindows APIをつかったコード変換の優先順位を変更できるようにした Modified Paths: -------------- trunk/teraterm/common/codeconv.cpp trunk/teraterm/common/codeconv.h trunk/teraterm/teraterm/CMakeLists.txt trunk/teraterm/teraterm/vtterm.c trunk/teraterm/ttpcmn/language.c trunk/teraterm/ttpcmn/language.h trunk/teraterm/ttpcmn/ttcmn.c -------------- next part -------------- Modified: trunk/teraterm/common/codeconv.cpp =================================================================== --- trunk/teraterm/common/codeconv.cpp 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/common/codeconv.cpp 2019-03-09 17:32:42 UTC (rev 7462) @@ -26,14 +26,21 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* unicode\x8A֘A\x82̕\xB6\x8E\x9A\x83R\x81[\x83h\x95ϊ\xB7 */ + #include <windows.h> #include <string.h> +#include <assert.h> #include <crtdbg.h> #if (defined(_MSC_VER) && (_MSC_VER >= 1600)) || !defined(_MSC_VER) #include <stdint.h> #endif +#include "codemap.h" #include "codeconv.h" +// cp932\x95ϊ\xB7\x8E\x9E\x81AWindows API \x82\xE6\x82\xE8 Tera Term \x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xF0\x97D\x90悷\x82\xE9 +//#define PRIORITY_CP932_TABLE + #if defined(_MSC_VER) && (_MSC_VER < 1600) typedef unsigned char uint8_t; typedef unsigned short uint16_t; @@ -47,12 +54,158 @@ #define _wcsdup(s) _wcsdup_dbg((s), _NORMAL_BLOCK, __FILE__, __LINE__) #endif +/* + * \x8C\xA9\x82\xA9\x82\xE7\x82Ȃ\xA2\x8Fꍇ\x82\xCD 0 \x82\xF0\x95Ԃ\xB7 + */ +static unsigned short _ConvertUnicode(unsigned short code, const codemap_t *table, int tmax) +{ + int low, mid, high; + unsigned short result; + + low = 0; + high = tmax - 1; + result = 0; // convert error + + // binary search + while (low < high) { + mid = (low + high) / 2; + if (table[mid].from_code < code) { + low = mid + 1; + } else { + high = mid; + } + } + + if (table[low].from_code == code) { + result = table[low].to_code; + } + + return (result); +} + +static int IsHighSurrogate(wchar_t u16) +{ + return 0xd800 <= u16 && u16 < 0xdc00; +} + +static int IsLowSurrogate(wchar_t u16) +{ + return 0xdc00 <= u16 && u16 < 0xe000; +} + /** - * UTF-32 \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9 + * 1\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA4\x82\xBD\x82߂ɁA\x95K\x97v\x82ȃL\x83\x83\x83\x89\x83N\x83^\x90\x94\x82\xE9 + * @retval 0 \x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2) + * @retval 1 1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9 + * @retval 2 2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9 + */ +#if 0 +static size_t UTF16GetCharCount(const wchar_t *wstr_ptr, size_t wstr_len) +{ + wchar_t u16; + assert(wstr_ptr != NULL); + if (wstr_len == 0) { + return 0; + } + u16 = *wstr_ptr++; + if (IsHighSurrogate(u16)) { + if (wstr_len >= 2) { + const wchar_t u16_lo = *wstr_ptr++; + if (IsLowSurrogate(u16_lo)) { + return 2; + } else { + return 0; + } + } else { + return 0; + } + } else if (IsLowSurrogate(u16)) { + return 0; + } + return 1; +} +#endif + +/** + * code page \x82\xCC mulit byte \x95\xB6\x8E\x9A\x82\xF0 UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9 + * @param KCode \x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̕\xB6\x8E\x9A\x83R\x81[\x83h(0x0000-0xffff) + * @param CoePage \x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̃R\x81[\x83h\x83y\x81[\x83W + * @retval unicode(UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h) + */ +unsigned int MBCPToUTF32(unsigned short KCode, int CodePage) +{ + unsigned int c; + + if (CodePage == 932) { + c = CP932ToUTF32(KCode); + } else { + char buf[3]; + wchar_t wchar; + int ret; + int len = 0; + if (KCode < 0x100) { + buf[0] = KCode & 0xff; + len = 1; + } else { + buf[0] = KCode >> 8; + buf[1] = KCode & 0xff; + len = 2; + } + ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1); + if (ret <= 0) { + c = 0; + } else { + c = (unsigned int)wchar; + } + } + return c; +} + +/** + * wchar_t\x95\xB6\x8E\x9A\x97\xE7unicode(UTF-32)\x82\xF01\x95\xB6\x8E\x9A\x8E\xE6\x82\xE8\x8Fo\x82\xB7 + * @retval 0 \x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2) + * @retval 1 1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9 + * @retval 2 2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9 + */ +size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32) +{ + assert(wstr_ptr != NULL); + if (wstr_len == 0) { + *u32 = 0; + return 0; + } + const wchar_t u16 = *wstr_ptr++; + // \x83T\x83\x8D\x83Q\x81[\x83g high? + if (IsHighSurrogate(u16)) { + if (wstr_len >= 2) { + const wchar_t u16_lo = *wstr_ptr++; + if (IsLowSurrogate(u16_lo)) { + // \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h + *u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00); + return 2; + } else { + *u32 = 0; + return 0; + } + } else { + *u32 = 0; + return 0; + } + } else if (IsLowSurrogate(u16)) { + *u32 = 0; + return 0; + } else { + *u32 = u16; + return 1; + } +} + +/** + * UTF-32\x95\xB6\x8E\x9A \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9 * @param[in] u32 \x95ϊ\xB7\x82\xB7\x82\xE9UTF-32 * @param[in,out] u8_ptr \x95ϊ\xB7\x8C\xE3UTF-8\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2) * @param[in] u8_len UTF-8\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x83o\x83b\x83t\x83@\x92\xB7,byte\x90\x94) - * @retval \x8Eg\x97p\x82\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j + * @retval \x8Fo\x97͂\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j * 0=\x83G\x83\x89\x81[ */ size_t UTF32ToUTF8(uint32_t u32, char *u8_ptr_, size_t u8_len) @@ -113,7 +266,7 @@ * @param[in] u8_len UTF-8\x95\xB6\x8E\x9A\x97\xB3 * @param[out] u32 \x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A * @retval \x8Eg\x97p\x82\xB5\x82\xBDUTF-8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j - * 0=\x83G\x83\x89\x81[ + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) */ size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, uint32_t *u32_) { @@ -191,33 +344,286 @@ return u8_in; } -// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5 -int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len) +/** + * UTF-32 \x82\xA9\x82\xE7 UTF-16 \x82֕ϊ\xB7\x82\xB7\x82\xE9 + * @param[in] u32 \x95ϊ\xB7\x82\xB7\x82\xE9UTF-32 + * @param[in,out] wstr_ptr \x95ϊ\xB7\x8C\xE3UTF-16\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2) + * @param[in] wstr_len UTF-16\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes) + * @retval \x8Fo\x97͂\xB5\x82\xBDUTF-16\x95\xB6\x8E\x9A\x90\x94(sizeof(wchar_t)\x94{\x82\xB7\x82\xE9\x82\xC6byte\x90\x94) + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) + */ +size_t UTF32ToUTF16(uint32_t u32, wchar_t *wstr_ptr, size_t wstr_len) { - int u8_out_sum = 0; - if (u8_ptr == NULL) { - u8_len = 4; + size_t u16_out; + if (u32 < 0x10000) { + if (wstr_len >= 1) { + if (wstr_ptr != NULL) { + *wstr_ptr++ = (uint16_t)u32; + } + u16_out = 1; + } else { + u16_out = 0; + } + } else if (u32 <= 0x10ffff) { + if (wstr_len >= 2) { + if (wstr_ptr != NULL) { + // \x83T\x83\x8D\x83Q\x81[\x83g \x83G\x83\x93\x83R\x81[\x83h + *wstr_ptr++ = uint16_t((u32 - 0x10000) / 0x400) + 0xd800; + *wstr_ptr++ = uint16_t((u32 - 0x10000) % 0x400) + 0xdc00; + } + u16_out = 2; + } else { + u16_out = 0; + } } else { - if (u8_len == 0) { - return 0; + u16_out = 0; + } + return u16_out; +} + +/** + * UTF-32\x95\xB6\x8E\x9A\x82\xF0CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82֕ϊ\xB7\x82\xB7\x82\xE9 + * @retval \x8Eg\x97p\x82\xB5\x82\xBDCP932\x95\xB6\x8E\x9A + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) + */ +unsigned short UTF32_CP932(unsigned int u32) +{ +#include "../teraterm/uni2sjis.map" // mapUnicodeToSJIS[] + char mbstr[2]; + unsigned short mb; + DWORD mblen; + wchar_t u16_str[2]; + size_t u16_len; + + if (u32 < 0x80) { + return (unsigned short)u32; + } + +#if defined(PRIORITY_CP932_TABLE) + if (u32 < 0x10000) { + wchar_t u16 = (wchar_t)u32; + // Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7 + mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS)); + if (mb != 0) { + // \x95ϊ\xB7\x82ł\xAB\x82\xBD + return mb; } } - if (wstr_len < 0) { +#endif + u16_len = UTF32ToUTF16(u32, u16_str, 2); + if (u16_len == 0) { + return 0; + } + mblen = WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, NULL); + switch (mblen) { + case 0: + case 1: + default: + if (mblen == 0 || mbstr[0] == '?') { + goto next_convert; + } else { + mb = (unsigned char)mbstr[0]; + return mb; + } + case 2: + if (mbstr[0] == '?' && mbstr[1] == '?') { + // 2byte\x8Fo\x97\xCD && "??" \x82̏ꍇ\x82͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD + goto next_convert; + } + mb = (((unsigned char)mbstr[0]) << 8) | (unsigned char)mbstr[1]; + return mb; + } + +next_convert: +#if !defined(PRIORITY_CP932_TABLE) + if (u32 < 0x10000) { + wchar_t u16 = (wchar_t)u32; + // Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7 + mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS)); + if (mb != 0) { + // \x95ϊ\xB7\x82ł\xAB\x82\xBD + return mb; + } + } +#endif + return 0; +} + +/** + * CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82\xA9\x82\xE7UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9 + * @param[in] cp932 CP932\x95\xB6\x8E\x9A + * @retval \x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A\x90\x94 + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) + */ +unsigned int CP932ToUTF32(unsigned short cp932) +{ +#include "../ttpcmn/sjis2uni.map" // mapSJISToUnicode[] + wchar_t wchar; + int ret; + unsigned int u32; + unsigned char buf[2]; + int len = 0; + +#if defined(PRIORITY_CP932_TABLE) + u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0])); + if (u32 != 0) { + return u32; + } +#endif + if (cp932 < 0x100) { + buf[0] = cp932 & 0xff; + len = 1; + } else { + buf[0] = cp932 >> 8; + buf[1] = cp932 & 0xff; + len = 2; + } + ret = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1); + if (ret <= 0) { + // MultiByteToWideChar()\x82\xAA\x95ϊ\xB7\x8E\xB8\x94s +#if !defined(PRIORITY_CP932_TABLE) + u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0])); + // \x83e\x81[\x83u\x83\x8B\x82ɂ\xE0\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ c = 0(\x95ϊ\xB7\x8E\xB8\x94s\x8E\x9E) +#else + u32 = 0; +#endif + } else { + u32 = (unsigned int)wchar; + } + + return u32; +} + +/** + * Unicode\x82\xA9\x82\xE7DEC\x93\xC1\x8Eꕶ\x8E\x9A\x82֕ϊ\xB7 + * @param u32 UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h + * @return \x89\xBA\x88\xCA8bit DEC\x93\xC1\x8Eꕶ\x8E\x9A\x83R\x81[\x83h + * \x8F\xE3\x88\xCA8bit \x95\xB6\x8E\x9A\x83R\x81[\x83h\x8E\xED\x95\xCA (1,2,4) + * file://../../doc/ja/html/setup/teraterm-term.html \x8EQ\x8F\xC6 + * 0 \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD + */ +unsigned short UTF32ToDecSp(unsigned int u32) +{ +#include "../teraterm/unisym2decsp.map" // mapUnicodeSymbolToDecSp[] + unsigned short cset; + if (u32 > 0x10000) { + cset = 0; + } else { + const unsigned short u16 = (unsigned short)u32; + cset = _ConvertUnicode(u16, mapUnicodeSymbolToDecSp, _countof(mapUnicodeSymbolToDecSp)); + } + return cset; +} + +/** + * UTF-32 \x82\xA9\x82\xE7 CP932 \x82֕ϊ\xB7\x82\xB7\x82\xE9 + * @param[in] u32 \x95ϊ\xB7\x82\xB7\x82\xE9UTF-32 + * @param[in,out] mb_ptr \x95ϊ\xB7\x8C\xE3CP932\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2) + * @param[in] mb_len CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes) + * @retval \x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94) + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) + */ +size_t UTF32ToCP932(uint32_t u32, char *mb_ptr, size_t mb_len) +{ + size_t cp932_out; + const uint16_t cp932 = UTF32_CP932(u32); + if (cp932 == 0 && u32 != 0) { + return 0; + } + if (mb_ptr == NULL) { + mb_len = 2; + } + if (cp932 < 0x100) { + if (mb_len >= 1) { + if (mb_ptr != NULL) { + *mb_ptr = cp932 & 0xff; + } + cp932_out = 1; + } else { + cp932_out = 0; + } + } else { + if (mb_len >= 2) { + if (mb_ptr != NULL) { + mb_ptr[0] = (cp932 >> 8) & 0xff; + mb_ptr[1] = cp932 & 0xff; + } + cp932_out = 2; + } else { + cp932_out = 0; + } + } + return cp932_out; +} + +size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len) +{ + wchar_t u16_str[2]; + size_t u16_len; + u16_len = UTF32ToUTF16(u32, u16_str, 2); + if (u16_len == 0) { + return 0; + } + mb_len = WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, NULL); + if (mb_len == 1 && mb_ptr[0] == '?' && u32 != '?') { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81A\x96߂\xE8\x92l=1, \x95\xB6\x8E\x9A[0]='?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9 + mb_len = 0; + } + return mb_len; +} + +/** + * wchar_t(UTF-16)\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82ɕϊ\xB7\x82\xB7\x82\xE9 + * + * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 + * @param[in,out] *wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7 + * NULL\x82܂\xBD\x82\xCD0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81AL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) + * NULL\x88ȊO\x82̂Ƃ\xAB\x93\xFC\x97͂\xB5\x82\xBD\x95\xB6\x8E\x9A\x90\x94\x82\xF0\x95Ԃ\xB7 + * @param[in] *mb_ptr \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82\xB7\x82\xE9\x83|\x83C\x83\x93\x83^ + * (NULL\x82̂Ƃ\xAB\x95ϊ\xB7\x82\xB9\x82\xB8\x82ɕ\xB6\x8E\x9A\x90\x94\x82\xF0\x83J\x83E\x83\x93\x83g\x82\xB7\x82\xE9) + * @param[in,out] *mb_len \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82ł\xAB\x82\xE9\x83T\x83C\x83Y,byte\x90\x94, + * mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x8Fo\x97͉\\x83T\x83C\x83Y\x82͕s\x97v + * \x95ϊ\xB7\x82\xB5\x82\xBD\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82̒\xB7\x82\xB3\x82\xF0\x95Ԃ\xB7 + * L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE + * mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x82ł\xE0\x92\xB7\x82\xB3\x82͕Ԃ\xB7 + * @param[in] UTF32ToMB UTF32\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82ɕϊ\xB7\x82\xB7\x82\xE9\x8A\x94\x82ւ̃|\x83C\x83\x93\x83^ + */ +static void WideCharToMB(const wchar_t *wstr_ptr, size_t *wstr_len_, + char *mb_ptr, size_t *mb_len_, + size_t (*UTF32ToMB)(uint32_t u32, char *mb_ptr, size_t mb_len)) +{ + size_t wstr_len; + size_t mb_len; + size_t mb_out_sum = 0; + size_t wstr_in = 0; + + assert(wstr_ptr != NULL); + if (mb_ptr == NULL) { + // \x95ϊ\xB7\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8F\x91\x82\xAB\x8Fo\x82\xB3\x82Ȃ\xA2 + mb_len = 4; // 1\x95\xB6\x8E\x9A4byte\x82ɂ͎\xFB\x82܂\xE9\x82͂\xB8 + } else { + mb_len = *mb_len_; + } + if (wstr_len_ == NULL || *wstr_len_ == 0) { wstr_len = (int)wcslen(wstr_ptr) + 1; + } else { + wstr_len = *wstr_len_; } - while(u8_len > 0 && wstr_len > 0) { + while(mb_len > 0 && wstr_len > 0) { const wchar_t u16 = *wstr_ptr++; uint32_t u32 = u16; - size_t u8_out; + size_t mb_out; wstr_len--; + wstr_in++; // \x83T\x83\x8D\x83Q\x81[\x83g high? - if (0xd800 <= u16 && u16 < 0xdc00) { + if (IsHighSurrogate(u16)) { if (wstr_len >= 1) { const wchar_t u16_lo = *wstr_ptr++; wstr_len--; + wstr_in++; // \x83T\x83\x8D\x83Q\x81[\x83g low? - if (0xdc00 <= u16_lo && u16_lo < 0xe000) { + if (IsLowSurrogate(u16_lo)) { // \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00); } else { @@ -224,34 +630,75 @@ goto unknown_code; } } else { - unknown_code: - if (u8_ptr != NULL) { - *u8_ptr++ = '?'; - } - u8_out = 1; - goto loop_next; + goto unknown_code; } } - u8_out = UTF32ToUTF8(u32, u8_ptr, u8_len); - if (u8_out == 0) { - goto unknown_code; + mb_out = UTF32ToMB(u32, mb_ptr, mb_len); + if (mb_out == 0) { + unknown_code: + if (mb_ptr != NULL) { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ + *mb_ptr++ = '?'; + } + mb_out = 1; } - loop_next: - u8_out_sum += u8_out; - if (u8_ptr != NULL) { - u8_ptr += u8_out; - u8_len -= u8_out; + mb_out_sum += mb_out; + if (mb_ptr != NULL) { + mb_ptr += mb_out; + mb_len -= mb_out; } } - return u8_out_sum; + + if (wstr_len_ != NULL) { + *wstr_len_ = wstr_in; + } + *mb_len_ = mb_out_sum; } +// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5 +void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len) +{ + WideCharToMB(wstr_ptr, wstr_len, u8_ptr, u8_len, UTF32ToUTF8); +} + +void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len) +{ + WideCharToMB(wstr_ptr, wstr_len, + cp932_ptr, cp932_len, + UTF32ToCP932); +} + +void WideCharToMBCP(const wchar_t *wstr_ptr, size_t *wstr_len, char *mb_ptr, size_t *mb_len, + int code_page) +{ + size_t (*utf32_to_mb)(uint32_t u32, char *mb_ptr, size_t mb_len); + switch (code_page) { + case CP_UTF8: + utf32_to_mb = UTF32ToUTF8; + break; + case 932: + utf32_to_mb = UTF32ToCP932; + break; + default: + *mb_len = 0; + return; + } + + WideCharToMB(wstr_ptr, wstr_len, + mb_ptr, mb_len, + utf32_to_mb); +} + // MultiByteToWideChar\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5 -int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len) +int UTF8ToWideChar(const char *u8_ptr, int u8_len_, wchar_t *wstr_ptr, int wstr_len_) { + size_t u8_len; + size_t wstr_len = wstr_len_; size_t u16_out_sum = 0; - if (u8_len < 0) { + if (u8_len_ < 0) { u8_len = strlen(u8_ptr) + 1; + } else { + u8_len = u8_len_; } if (wstr_ptr == NULL) { wstr_len = 1; @@ -295,16 +742,18 @@ } u16_out_sum += u16_out; } - return u16_out_sum; + return (int)u16_out_sum; } /** * wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 - * @param[in] wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE) + * @param[in] wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) * @param[in] code_page \x95ϊ\xB7\x90\xE6\x83R\x81[\x83h\x83y\x81[\x83W - * @param[out] *mb_len_ mb\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(NULL\x82̂Ƃ\xAB\x93\xE0\x95\x94\x83G\x83\x89\x81[) + * @param[out] *mb_len_ \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,byte\x90\x94,L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE + * (NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97\xF0\x95Ԃ\xB3\x82Ȃ\xA2) * @retval mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[) + * \x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6 */ char *_WideCharToMultiByte(const wchar_t *wstr_ptr, size_t wstr_len, int code_page, size_t *mb_len_) { @@ -317,9 +766,11 @@ wstr_len = wcslen(wstr_ptr) + 1; } int len; - if (code_page == CP_UTF8) { - len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len, - NULL, 0); + if (code_page == CP_UTF8 || code_page == 932) { + size_t wl = wstr_len; + size_t ml; + WideCharToMBCP(wstr_ptr, &wl, NULL, &ml, code_page); + len = ml; } else { len = ::WideCharToMultiByte(code_page, flags, wstr_ptr, (DWORD)wstr_len, @@ -333,9 +784,11 @@ if (mb_ptr == NULL) { return NULL; } - if (code_page == CP_UTF8) { - len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len, - mb_ptr, len); + if (code_page == CP_UTF8 || code_page == 932) { + size_t wl = wstr_len; + size_t ml = len; + WideCharToMBCP(wstr_ptr, &wl, mb_ptr, &ml, code_page); + len = ml; } else { len = ::WideCharToMultiByte(code_page, flags, wstr_ptr, (DWORD)wstr_len, @@ -347,7 +800,8 @@ return NULL; } if (mb_len_ != NULL) { - *mb_len_ = len - 1; + // \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(byte\x90\x94)\x82\xF0\x95Ԃ\xB7 + *mb_len_ = len; } return mb_ptr; } @@ -355,10 +809,12 @@ /** * \x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82\xF0wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 * @param[in] *str_ptr mb(char)\x95\xB6\x8E\x9A\x97\xF1 - * @param[in] str_len mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE) + * @param[in] str_len mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCD'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) * @param[in] code_page \x95ϊ\xB7\x8C\xB3\x83R\x81[\x83h\x83y\x81[\x83W - * @param[out] *w_len_ wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7 - * @retval mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[) + * @param[out] *w_len_ wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,wchar_t\x90\x94,'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7L'\0'\x82\xE0\x8A܂\xDE + * (NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97\xF0\x95Ԃ\xB3\x82Ȃ\xA2) + * @retval wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[) + * \x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6 */ wchar_t *_MultiByteToWideChar(const char *str_ptr, size_t str_len, int code_page, size_t *w_len_) { @@ -402,7 +858,8 @@ return NULL; } if (w_len_ != NULL) { - *w_len_ = len - 1; + // \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(wchar_t\x90\x94)\x82\xF0\x95Ԃ\xB7 + *w_len_ = len; } return wstr_ptr; } Modified: trunk/teraterm/common/codeconv.h =================================================================== --- trunk/teraterm/common/codeconv.h 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/common/codeconv.h 2019-03-09 17:32:42 UTC (rev 7462) @@ -35,12 +35,24 @@ #endif -// 1char +// simple code convert +unsigned int CP932ToUTF32(unsigned short cp932); +unsigned short UTF32ToDecSp(unsigned int u32); +unsigned int MBCPToUTF32(unsigned short KCode, int CodePage); +unsigned short UTF32_CP932(unsigned int u32); + +// 1char ToUTF32 +size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, unsigned int *u32_); +size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32); +// 1char UTF32To +size_t UTF32ToUTF16(unsigned int u32, wchar_t *wstr_ptr, size_t wstr_len); size_t UTF32ToUTF8(unsigned int u32, char *u8_ptr, size_t u8_len); -size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, unsigned int *u32_); +size_t UTF32ToCP932(unsigned int u32, char *mb_ptr, size_t mb_len); +size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len); // MultiByteToWideChar() wrappers -int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len); +void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len); +void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len); int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len); // API wrappers Modified: trunk/teraterm/teraterm/CMakeLists.txt =================================================================== --- trunk/teraterm/teraterm/CMakeLists.txt 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/teraterm/CMakeLists.txt 2019-03-09 17:32:42 UTC (rev 7462) @@ -31,7 +31,12 @@ ../common/i18n.h ../common/dllutil.cpp ../common/dllutil.h - ../ttpcmn/language.h + ../common/codeconv.h + ../common/codeconv.cpp + # + ../teraterm/unisym2decsp.map + ../teraterm/uni2sjis.map + ../ttpcmn/sjis2uni.map ) source_group( @@ -102,8 +107,6 @@ WSAAsyncGetAddrInfo.c WSAAsyncGetAddrInfo.h # - uni2sjis.map - unisym2decsp.map uni_combining.map # teraterm.manifest Modified: trunk/teraterm/teraterm/vtterm.c =================================================================== --- trunk/teraterm/teraterm/vtterm.c 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/teraterm/vtterm.c 2019-03-09 17:32:42 UTC (rev 7462) @@ -52,7 +52,8 @@ #include "telnet.h" #include "ttime.h" #include "clipboar.h" -#include "../ttpcmn/language.h" +#include "codeconv.h" +#include "codeconv.h" #include "vtterm.h" @@ -5408,13 +5409,6 @@ } // -// UTF-8 -// -#include "uni2sjis.map" -#include "unisym2decsp.map" - - -// // Unicode Combining Character Support // #include "uni_combining.map" @@ -5467,17 +5461,22 @@ return (index); } -// unicode(UTF-16,wchar_t)\x82\xF0\x83o\x83b\x83t\x83@\x82֏\x91\x82\xAB\x8D\x9E\x82\xDE +// unicode(UTF-32,wchar_t)\x82\xF0\x83o\x83b\x83t\x83@\x82֏\x91\x82\xAB\x8D\x9E\x82\xDE static void UnicodeToCP932(unsigned int code) { - wchar_t wchar = (wchar_t)code; + wchar_t wchar; int ret; char mbchar[2]; unsigned short cset; + if (code >= 0x10000) { + goto unknown; + } + wchar = (wchar_t)code; + // Unicode\x82\xA9\x82\xE7DEC\x93\xC1\x8Eꕶ\x8E\x9A\x82ւ̃}\x83b\x83s\x83\x93\x83O if (ts.UnicodeDecSpMapping) { - cset = ConvertUnicode(wchar, mapUnicodeSymbolToDecSp, MAPSIZE(mapUnicodeSymbolToDecSp)); + cset = UTF32ToDecSp(wchar); if (((cset >> 8) & ts.UnicodeDecSpMapping) != 0) { PutDecSp(cset & 0xff); return; @@ -5485,7 +5484,24 @@ } // Unicode -> \x93\xE0\x95\x94\x83R\x81[\x83h(ts.CodePage)\x82֕ϊ\xB7\x82\xB5\x82ďo\x97\xCD - ret = WideCharToMultiByte(ts.CodePage, 0, &wchar, 1, mbchar, 2, NULL, NULL); + if (ts.CodePage == 932) { + ret = (int)UTF16ToCP932(&wchar, 1, &cset); + if (ret == 0) { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD + ; + } else if (cset < 0x100) { + // 1byte\x95\xB6\x8E\x9A + mbchar[0] = (char)cset; + ret = 1; + } else { + // 2byte\x95\xB6\x8E\x9A + mbchar[0] = (char)(cset >> 8); + mbchar[1] = (char)(cset & 0xff); + ret = 2; + } + } else { + ret = WideCharToMultiByte(ts.CodePage, 0, &wchar, 1, mbchar, 2, NULL, NULL); + } if (ret == 1 && mbchar[0] == '?' && code != '?') { // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81Aret=1, '?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9 ret = 0; @@ -5492,17 +5508,7 @@ } switch (ret) { case 0: - if (ts.CodePage == 932) { - // CP932 - // U+301C\x82Ȃǂ͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x81BUnicode -> Shift_JIS\x82֕ϊ\xB7\x82\xB5\x82Ă݂\xE9\x81B - cset = ConvertUnicode(code, mapUnicodeToSJIS, MAPSIZE(mapUnicodeToSJIS)); - if (cset != 0) { - Kanji = cset & 0xff00; - PutKanji(cset & 0x00ff); - return; - } - } - + unknown: PutChar('?'); if (ts.UnknownUnicodeCharaAsWide) { PutChar('?'); Modified: trunk/teraterm/ttpcmn/language.c =================================================================== --- trunk/teraterm/ttpcmn/language.c 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/ttpcmn/language.c 2019-03-09 17:32:42 UTC (rev 7462) @@ -35,9 +35,8 @@ #include <locale.h> #include "language.h" -#include "codeconv.h" -#include "sjis2uni.map" +// export\x82\xB3\x82\xEA\x82Ă\xA2\x82\xE9 unsigned short ConvertUnicode(unsigned short code, const codemap_t *table, int tmax) { int low, mid, high; @@ -64,69 +63,6 @@ return (result); } -// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF8\x82֕ϊ\xB7\x82\xB7\x82\xE9 -unsigned int PASCAL SJIS2UTF8(WORD KCode, int *byte, int CodePage) -{ - wchar_t wchar; - int ret; - unsigned int code; - unsigned int c, c1, c2, c3; - unsigned char buf[3]; - unsigned char KCode_h; - int len = 0; - - // \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xA9\x82\xE7UTF-16LE\x82֕ϊ\xB7\x82\xB7\x82\xE9 - KCode_h = (unsigned char)(KCode >> 8); - if (KCode_h != 0) { - buf[len++] = KCode_h; - } - buf[len++] = KCode & 0xff; - ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1); - if (ret <= 0) { - // \x95ϊ\xB7\x8E\xB8\x94s - unsigned short cset = 0; - if (CodePage == 932) { - // CP932 - cset = ConvertUnicode(KCode, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0])); - } - if (cset == 0) { - c = 0xfffd; // U+FFFD: Replacement Character - } else { - c = cset; - } - } else { - c = (unsigned int)wchar; - } - - // UTF-16LE\x82\xA9\x82\xE7UTF-8\x82֕ϊ\xB7\x82\xB7\x82\xE9 - if (c <= 0x0000007f) { - // 0x00000000 <= c <= 0x0000007f - code = (c & 0xff); - *byte = 1; - - } else if (c <= 0x000007ff) { - // 0x00000080 <= c <= 0x000007ff - c1 = ((c >> 6) & 0x1f) | 0xc0; - c2 = (c & 0x3f) | 0x80; - code = (c1 << 8) | c2; - *byte = 2; - - } else if (c <= 0x0000ffff) { - // 0x00000800 <= c <= 0x0000ffff - c1 = ((c >> 12) & 0xf) | 0xe0; - c2 = ((c >> 6) & 0x3f) | 0x80; - c3 = ((c) & 0x3f) | 0x80; - code = (c1 << 16) | (c2 << 8) | c3; - *byte = 3; - } else { - code = KCode; - *byte = 2; - } - - return (code); -} - - // Japanese SJIS -> JIS WORD PASCAL SJIS2JIS(WORD KCode) { Modified: trunk/teraterm/ttpcmn/language.h =================================================================== --- trunk/teraterm/ttpcmn/language.h 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/ttpcmn/language.h 2019-03-09 17:32:42 UTC (rev 7462) @@ -35,13 +35,11 @@ #endif /* proto types */ -unsigned int PASCAL SJIS2UTF8(WORD KCode, int *byte, int CodePage); WORD PASCAL SJIS2JIS(WORD KCode); WORD PASCAL SJIS2EUC(WORD KCode); WORD PASCAL JIS2SJIS(WORD KCode); BYTE PASCAL RussConv(int cin, int cout, BYTE b); void PASCAL RussConvStr(int cin, int cout, PCHAR Str, int count); -unsigned short ConvertUnicode(unsigned short code, const codemap_t *table, int tmax); #ifdef __cplusplus } Modified: trunk/teraterm/ttpcmn/ttcmn.c =================================================================== --- trunk/teraterm/ttpcmn/ttcmn.c 2019-03-05 16:22:05 UTC (rev 7461) +++ trunk/teraterm/ttpcmn/ttcmn.c 2019-03-09 17:32:42 UTC (rev 7462) @@ -1582,25 +1582,52 @@ return i; } +// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF-32(UTF-16LE)\x82֕ϊ\xB7\x82\xB7\x82\xE9 +static unsigned int SJIS2UTF32(WORD KCode, int CodePage) +{ + unsigned int c; + + // \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xA9\x82\xE7UTF-16LE\x82֕ϊ\xB7\x82\xB7\x82\xE9 + if (CodePage == 932) { + c = CP932ToUTF32(KCode); + } else { + unsigned char buf[3]; + wchar_t wchar; + int ret; + int len = 0; + if (KCode < 0x100) { + buf[0] = KCode & 0xff; + len = 1; + } else { + buf[0] = KCode >> 8; + buf[1] = KCode & 0xff; + len = 2; + } + ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1); + if (ret <= 0) { + c = 0; + } else { + c = (unsigned int)wchar; + } + } + if (c <= 0) { + // \x95ϊ\xB7\x8E\xB8\x94s + c = 0xfffd; // U+FFFD: Replacement Character + } + + return c; +} + +// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF-8\x82֏o\x97͂\xB7\x82\xE9 static int OutputTextUTF8(WORD K, char *TempStr, PComVar cv) { + int CodePage = *cv->CodePage; unsigned int code; int outlen; - int TempLen = 0; - code = SJIS2UTF8(K, &outlen, *cv->CodePage); - switch (outlen) { - case 4: - TempStr[TempLen++] = (code >> 24) & 0xff; - case 3: - TempStr[TempLen++] = (code >> 16) & 0xff; - case 2: - TempStr[TempLen++] = (code >> 8) & 0xff; - case 1: - TempStr[TempLen++] = code & 0xff; - } - - return TempLen; + code = SJIS2UTF32(K, CodePage); + outlen = UTF32ToUTF8(code, TempStr, 4); + return outlen; } //