[Ttssh2-commit] [8805] UTF-32 文字列変換追加

Back to archive index
scmno****@osdn***** scmno****@osdn*****
2020年 6月 23日 (火) 00:10:25 JST


Revision: 8805
          https://osdn.net/projects/ttssh2/scm/svn/commits/8805
Author:   zmatsuo
Date:     2020-06-23 00:10:23 +0900 (Tue, 23 Jun 2020)
Log Message:
-----------
UTF-32 文字列変換追加

Modified Paths:
--------------
    branches/broadcast/teraterm/common/codeconv.cpp
    branches/broadcast/teraterm/common/codeconv.h

-------------- next part --------------
Modified: branches/broadcast/teraterm/common/codeconv.cpp
===================================================================
--- branches/broadcast/teraterm/common/codeconv.cpp	2020-06-19 16:13:29 UTC (rev 8804)
+++ branches/broadcast/teraterm/common/codeconv.cpp	2020-06-22 15:10:23 UTC (rev 8805)
@@ -732,6 +732,71 @@
 				 utf32_to_mb);
 }
 
+/**
+ *	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0UTF32\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
+ *
+ *	@param[in]		*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
+ *	@param[in,out]	*wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
+ *								NULL\x82܂\xBD\x82\xCD*wstr_len==0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE(L'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
+ *								NULL\x88ȊO\x82̂Ƃ\xAB\x93\xFC\x97͂\xB5\x82\xBD\x95\xB6\x8E\x9A\x90\x94\x82\xF0\x95Ԃ\xB7
+ *	@param[in]		*u32_ptr	\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82\xB7\x82\xE9\x83|\x83C\x83\x93\x83^
+ *								(NULL\x82̂Ƃ\xAB\x95ϊ\xB7\x82\xB9\x82\xB8\x82ɕ\xB6\x8E\x9A\x90\x94\x82\xF0\x83J\x83E\x83\x93\x83g\x82\xB7\x82\xE9)
+ *	@param[in,out]	*u32_len	\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82ł\xAB\x82\xE9\x83T\x83C\x83Y,byte\x90\x94,
+ *								\x95ϊ\xB7\x82\xB5\x82\xBD\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82̒\xB7\x82\xB3\x82\xF0\x95Ԃ\xB7
+ *								L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *								u32_ptr\x82\xAANULL\x82̂Ƃ\xAB\x82ł\xE0\x92\xB7\x82\xB3\x82͕Ԃ\xB7
+ */
+void WideCharToUTF32(const wchar_t *wstr_ptr, size_t *wstr_len_,
+					 char32_t *u32_ptr, size_t *u32_len_)
+{
+	size_t wstr_len;
+	size_t u32_len;
+	size_t u32_out = 0;
+	size_t wstr_in = 0;
+
+	assert(wstr_ptr != NULL);
+	if (u32_ptr == NULL) {
+		// \x95ϊ\xB7\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8F\x91\x82\xAB\x8Fo\x82\xB3\x82Ȃ\xA2
+		u32_len = 4;		// 1\x95\xB6\x8E\x9A4byte\x82ɂ͎\xFB\x82܂\xE9\x82͂\xB8
+	} else {
+		u32_len = *u32_len_;
+	}
+	if (wstr_len_ == NULL || *wstr_len_ == 0) {
+		wstr_len = (int)wcslen(wstr_ptr) + 1;
+	} else {
+		wstr_len = *wstr_len_;
+	}
+
+	while(u32_len > 0 && wstr_len > 0) {
+		char32_t u32;
+		unsigned int u32_;
+		size_t wb_in = UTF16ToUTF32(wstr_ptr, wstr_len, &u32_);
+		u32 = u32_;
+		if (wb_in == 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x8Fꍇ\x81A1\x95\xB6\x8E\x9A\x8F\xC1\x94\x82\xC4'?'\x8Fo\x97\xCD
+			wstr_len -= 1;
+			wstr_in += 1;
+			wstr_ptr++;
+			u32 = '?';
+		}
+		else {
+			wstr_len -= wb_in;
+			wstr_in += wb_in;
+			wstr_ptr += wb_in;
+		}
+		if (u32_ptr != NULL) {
+			*u32_ptr++ = u32;
+			u32_len--;
+		}
+		u32_out++;
+	}
+
+	if (wstr_len_ != NULL) {
+		*wstr_len_ = wstr_in;
+	}
+	*u32_len_ = u32_out;
+}
+
 // MultiByteToWideChar\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
 int UTF8ToWideChar(const char *u8_ptr, int u8_len_, wchar_t *wstr_ptr, int wstr_len_)
 {
@@ -865,6 +930,48 @@
 }
 
 /**
+ *	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0UTF-32\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
+ *	\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9
+ *
+ *	@param[in]	*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
+ *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
+ *	@param[out]	*u32_len_	\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,byte\x90\x94,'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *							(NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97񒷂\xF0\x95Ԃ\xB3\x82Ȃ\xA2)
+ *	@retval		UTF-32\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *				\x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6
+ */
+char32_t *_WideCharToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, size_t *u32_len_)
+{
+	const DWORD flags = 0;
+	if (u32_len_ != NULL) {
+		*u32_len_ = 0;
+	}
+	if (wstr_len == 0) {
+		wstr_len = wcslen(wstr_ptr) + 1;
+	}
+    size_t u32_len;
+	size_t wl = wstr_len;
+	WideCharToUTF32(wstr_ptr, &wl, NULL, &u32_len);
+	if (u32_len == 0) {
+		return NULL;
+	}
+	char32_t *u32_ptr = (char32_t *)malloc(u32_len * 4);
+	if (u32_ptr == NULL) {
+		return NULL;
+	}
+	WideCharToUTF32(wstr_ptr, &wl, u32_ptr, &u32_len);
+	if (u32_len == 0) {
+		free(u32_ptr);
+		return NULL;
+	}
+	if (u32_len_ != NULL) {
+		// \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(byte\x90\x94)\x82\xF0\x95Ԃ\xB7
+		*u32_len_ = u32_len;
+	}
+    return u32_ptr;
+}
+
+/**
  *	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82\xF0wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
  *	@param[in]	*str_ptr	mb(char)\x95\xB6\x8E\x9A\x97\xF1
  *	@param[in]	str_len		mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCD'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
@@ -989,6 +1096,12 @@
 	return strU8;
 }
 
+char32_t *ToU32W(const wchar_t *strW)
+{
+	char32_t *strU32 = _WideCharToUTF32(strW, NULL, NULL);
+	return strU32;
+}
+
 //////////////////////////////////////////////////////////////////////////////
 
 u8::u8()

Modified: branches/broadcast/teraterm/common/codeconv.h
===================================================================
--- branches/broadcast/teraterm/common/codeconv.h	2020-06-19 16:13:29 UTC (rev 8804)
+++ branches/broadcast/teraterm/common/codeconv.h	2020-06-22 15:10:23 UTC (rev 8805)
@@ -54,10 +54,13 @@
 // MultiByteToWideChar() wrappers
 void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len);
 void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len);
+void WideCharToUTF32(const wchar_t *wstr_ptr, size_t *wstr_len_,
+					 char32_t *u32_ptr, size_t *u32_len_);
 int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len);
 
 // API wrappers
 char *_WideCharToMultiByte(const wchar_t *wstr_ptr, size_t wstr_len, int code_page, size_t *mb_len_);
+char32_t *_WideCharToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, size_t *u32_len_);
 wchar_t *_MultiByteToWideChar(const char *str_ptr, size_t str_len, int code_page, size_t *w_len_);
 
 // convinience funcs  (for windows api params)
@@ -69,6 +72,7 @@
 wchar_t *ToWcharU8(const char *strU8);
 char *ToU8A(const char *strA);
 char *ToU8W(const wchar_t *strW);
+char32_t *ToU32W(const wchar_t *strW);
 
 #if defined(_UNICODE)
 #define ToTcharA(s)		ToWcharA(s)


Ttssh2-commit メーリングリストの案内
Back to archive index