/* $NoKeywords: $ */ /* // // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved. // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert // McNeel & Associates. // // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY. // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF // MERCHANTABILITY ARE HEREBY DISCLAIMED. // // For complete openNURBS copyright information see . // //////////////////////////////////////////////////////////////// */ #include "opennurbs.h" #if !defined(ON_COMPILING_OPENNURBS) // This check is included in all opennurbs source .c and .cpp files to insure // ON_COMPILING_OPENNURBS is defined when opennurbs source is compiled. // When opennurbs source is being compiled, ON_COMPILING_OPENNURBS is defined // and the opennurbs .h files alter what is declared and how it is declared. #error ON_COMPILING_OPENNURBS must be defined when compiling opennurbs #endif int ON_IsValidUnicodeCodePoint(ON__UINT32 u) { return (u < 0xD800 || (u >= 0xE000 && u <= 0x10FFFF)); } int ON_IsValidUTF32Value( ON__UINT32 c ) { return (c < 0xD800 || (c >= 0xE000 && c <= 0x10FFFF)); } int ON_IsValidSingleElementUTF16Value(ON__UINT32 c) { return ((c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFF)); } int ON_IsValidUTF16Singleton(ON__UINT32 c) { return ((c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFF)); } enum ON_UnicodeEncoding ON_UnicodeNativeCPU_UTF16() { return (ON::endian::little_endian== ON::Endian()) ? ON_UTF_16LE : ON_UTF_16BE; } enum ON_UnicodeEncoding ON_UnicodeNativeCPU_UTF32() { return (ON::endian::little_endian== ON::Endian()) ? ON_UTF_32LE : ON_UTF_32BE; } int ON_IsValidSingleByteUTF8CharValue( char c ) { return (c >= 0 && c <= 0x7F); } int ON_IsValidUTF8SingletonChar( char c ) { return (c >= 0 && c <= 0x7F); } int ON_IsValidSingleElementUTF8Value( ON__UINT32 c ) { return (c <= 0x7F); } int ON_IsValidUTF8Singleton( ON__UINT32 c ) { return (c <= 0x7FU); } int ON_IsValidUTF16SurrogatePair( unsigned int w1, unsigned int w2 ) { return ( w1 >= 0xD800U && w1 < 0xDC00 && w2 >= 0xDC00 && w2 < 0xE000 ); } unsigned int ON_DecodeUTF16SurrogatePair( unsigned int u1, unsigned int u2, unsigned int error_code_point ) { if (u1 >= 0xD800U && u1 < 0xDC00 && u2 >= 0xDC00 && u2 < 0xE000) { return ((u1-0xD800)*0x400 + (u2-0xDC00) + 0x10000); } return error_code_point; } int ON_IsValidSingleElementWideCharValue( wchar_t w ) { #pragma ON_PRAGMA_WARNING_PUSH // warning C4127: conditional expression is constant #pragma ON_PRAGMA_WARNING_DISABLE_MSC( 4127 ) if (1 == sizeof(w)) return ON_IsValidSingleElementUTF8Value((ON__UINT32)w); if (2 == sizeof(w)) return ON_IsValidSingleElementUTF16Value((ON__UINT32)w); return ON_IsValidUTF32Value((ON__UINT32)w); #pragma ON_PRAGMA_WARNING_POP } enum ON_UnicodeEncoding ON_IsUTFByteOrderMark( const void* buffer, size_t sizeof_buffer ) { if ( 0 != buffer && sizeof_buffer >= 2 ) { const unsigned char* b = static_cast(buffer); if ( 0 == b[0] ) { if ( sizeof_buffer >= 4 && 0 == b[1] && 0xFE == b[2] && 0xFF == b[3] ) return ON_UTF_32BE; } else if ( 0xEF == b[0] ) { if ( sizeof_buffer >= 3 && 0xBB == b[1] && 0xBF == b[2] ) return ON_UTF_8; } else if ( 0xFE == b[0] ) { if ( 0xFF == b[1] ) return ON_UTF_16BE; } else if ( 0xFF == b[0] && 0xFE == b[1] ) { return ( sizeof_buffer >= 4 && 0 == b[2] && 0 == b[3] ) ? ON_UTF_32LE : ON_UTF_16LE; } } return ON_UTF_unset; } unsigned int ON_UTFSizeofByteOrderMark( ON_UnicodeEncoding e ) { unsigned int sizeof_bom; switch (e) { case ON_UTF_8: sizeof_bom = 3; break; case ON_UTF_16: case ON_UTF_16BE: case ON_UTF_16LE: sizeof_bom = 2; break; case ON_UTF_32: case ON_UTF_32BE: case ON_UTF_32LE: sizeof_bom = 4; break; default: sizeof_bom = 0; break; } return sizeof_bom; } static int ON_IsUTF8ByteOrderMark( const char* sUTF8, int sUTF8_count ) { if ( 0 == sUTF8 ) return 0; if ( -1 != sUTF8_count || sUTF8_count < 3 ) return 0; return (0xEF == (unsigned char)(sUTF8[0]) && 0xBB == (unsigned char)(sUTF8[1]) && 0xBF == (unsigned char)(sUTF8[2])); } bool ON_IsUnicodeControlCodePoint( ON__UINT32 code_point, bool bNullReturnValue ) { if (0 == code_point) return bNullReturnValue ? true : false; if (code_point < 0x0020) return true; // below space if (code_point < 0x007f) return false; if (code_point <= 0x00A0) return true; // del to 0xA0 if (code_point < 0x00AD) return false; if (code_point == 0x00AD) return true; // soft hyphen return false; } int ON_EncodeUTF8( ON__UINT32 u, char sUTF8[6] ) { ON__UINT32 c; if ( u <= 0x7F ) { // 1 byte UTF8 encoding: 0xxxxxxx (7 bits of u) sUTF8[0] = (char)u; return 1; } if ( u <= 0x7FF ) { // 2 byte UTF8 encoding: 110xxxxx, 10xxxxxx (11 bits of u) c = (u / 0x40); // c = 000xxxxx c |= 0xC0; // |= 11000000 sUTF8[0] = (char)c; c = (u & 0x3F); c |= 0x80; sUTF8[1] = (char)c; return 2; } if ( u <= 0xFFFF ) { // 3 byte UTF8 encoding: 1110xxxx, 10xxxxxx, 10xxxxxx (16 bits of u) c = (u / 0x1000); // c = 0000xxxx c |= 0xE0; // |= 11100000 sUTF8[0] = (char)c; c = ((u & 0xFFF) / 0x40); c |= 0x80; sUTF8[1] = (char)c; c = u & 0x3F; c |= 0x80; sUTF8[2] = (char)c; return 3; } if ( u <= 0x1FFFFF ) { // (maximum valid unicode codepoint is 0x10FFFF) // 4 byte UTF8 encoding: 11110xxx, 10xxxxxx, 10xxxxxx, 10xxxxxx (21 bits of u) // Note: 0x10FFFF is the maximum valid unicode code point. // For u > 0x10FFFF and u <= 0x1FFFFF, this calculation encodes the low 21 bits of u. c = (u / 0x40000); // c = 00000xxx c |= 0xF0; // |= 11110000 sUTF8[0] = (char)c; c = ((u & 0x3FFFF)/0x1000); c |= 0x80; sUTF8[1] = (char)c; c = ((u & 0xFFF) / 0x40); c |= 0x80; sUTF8[2] = (char)c; c = u & 0x3F; c |= 0x80; sUTF8[3] = (char)c; return 4; } if ( u <= 0x3FFFFFF ) { // 5 byte encoding: 111110xx, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx (26 bits of u) // Note: 0x10FFFF is the maximum valid unicode code point. c = (u / 0x1000000); // c = 000000xx c |= 0xF8; // |= 11111000 sUTF8[0] = (char)c; c = ((u & 0xFFFFFF)/0x40000); c |= 0x80; sUTF8[1] = (char)c; c = ((u & 0x3FFFF)/0x1000); c |= 0x80; sUTF8[2] = (char)c; c = ((u & 0xFFF) / 0x40); c |= 0x80; sUTF8[3] = (char)c; c = u & 0x3F; c |= 0x80; sUTF8[4] = (char)c; return 5; } if ( u <= 0x7FFFFFFF ) { // 6 byte encoding: 1111110x, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx (31 bits of u) // Note: 0x10FFFF is the maximum valid unicode code point. c = (u / 0x40000000); // c = 00000000x c |= 0xFC; // |= 11111100 sUTF8[0] = (char)c; c = ((u & 0x3FFFFFFF)/0x1000000); c |= 0x80; sUTF8[1] = (char)c; c = ((u & 0xFFFFFF)/0x40000); c |= 0x80; sUTF8[2] = (char)c; c = ((u & 0x3FFFF)/0x1000); c |= 0x80; sUTF8[3] = (char)c; c = ((u & 0xFFF) / 0x40); c |= 0x80; sUTF8[4] = (char)c; c = u & 0x3F; c |= 0x80; sUTF8[5] = (char)c; return 6; } return 0; } static int ON_DecodeUTF8Helper( const char* sUTF8, int sUTF8_count, ON__UINT32* value, unsigned int* error_status ) { #define INPUT_BUFFER_TOO_SHORT 16 #define INVALID_CONTINUATION_VALUE 16 #define OVERLONG_ENCODING 8 ON__UINT32 u; char c; c = sUTF8[0]; if ( 0 == (0x80 & c) ) { // 1 byte ASCII encoding: 0xxxxxxx *value = c; return 1; } if ( 0xC0 == ( 0xE0 & c) ) { // 2 byte character encoding: 10xxxxxx, 10xxxxxx if ( sUTF8_count < 2 ) { *error_status |= INPUT_BUFFER_TOO_SHORT; // input buffer too short return 0; } u = (0x1F & c); c = sUTF8[1]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); if ( u <= 0x7F ) { *error_status |= OVERLONG_ENCODING; // overlong 2 byte character encoding } *value = u; return 2; } if ( 0xE0 == ( 0xF0 & c) ) { // 3 byte character encoding: 110xxxxx, 10xxxxxx, 10xxxxxx if ( sUTF8_count < 3 ) { *error_status |= INPUT_BUFFER_TOO_SHORT; // input buffer too short return 0; } u = (0x0F & c); c = sUTF8[1]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[2]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); if ( u <= 0x7FF ) { *error_status |= OVERLONG_ENCODING; // overlong 3 byte character encoding } *value = u; return 3; } if ( 0xF0 == ( 0xF8 & c) ) { // 4 byte character encoding: 11110xxx, 10xxxxxx, 10xxxxxx, 10xxxxxx if ( sUTF8_count < 4 ) { *error_status |= INPUT_BUFFER_TOO_SHORT; // input buffer too short return 0; } u = (0x07 & c); c = sUTF8[1]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[2]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[3]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); if ( u <= 0xFFFF ) { *error_status |= OVERLONG_ENCODING; // overlong 4 byte character encoding } *value = u; return 4; } if ( 0xF8 == ( 0xFC & c) ) { // 5 byte character encoding: 111110xx, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx if ( sUTF8_count < 5 ) { *error_status |= INPUT_BUFFER_TOO_SHORT; // input buffer too short return 0; } u = (0x03 & c); c = sUTF8[1]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[2]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[3]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[4]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); if ( u <= 0x1FFFFF ) { *error_status |= OVERLONG_ENCODING; // overlong 5 byte character encoding } *value = u; return 5; } if ( 0xFC == ( 0xFE & c) ) { // 6 byte character encoding: 110xxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx, 10xxxxxx if ( sUTF8_count < 6 ) { *error_status |= INPUT_BUFFER_TOO_SHORT; // input buffer too short return 0; } u = (0x01 & c); c = sUTF8[1]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[2]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[3]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[4]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); c = sUTF8[5]; if ( 0x80 != ( 0xC0 & c) ) { *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 continuation value return 0; } u *= 64; u |= (0x3F & c); if ( u <= 0x3FFFFFF ) { *error_status |= OVERLONG_ENCODING; // overlong 6 byte character encoding } *value = u; return 6; } *error_status |= INVALID_CONTINUATION_VALUE; // invalid UTF=8 start value return 0; #undef INPUT_BUFFER_TOO_SHORT #undef INVALID_CONTINUATION_VALUE #undef OVERLONG_ENCODING } int ON_DecodeUTF8( const char* sUTF8, int sUTF8_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { ON__UINT32 u0, u1; int i0, i1; unsigned int error_status; ON__UINT16 sUTF16[2]; char c; ON_UnicodeErrorParameters local_e = ON_UnicodeErrorParameters::MaskErrors; if (nullptr == e) e = &local_e; if ( 0 == sUTF8 || sUTF8_count <= 0 || 0 == unicode_code_point ) { if ( e ) e->m_error_status |= 1; return 0; } // special cases for most common unicode values // If any error conditions exist, then ON_DecodeUTF8Helper() // is used. if ( 0 == (0x80 & sUTF8[0]) ) { *unicode_code_point = sUTF8[0]; return 1; } c = sUTF8[0]; if ( 0xC0 == ( 0xE0 & c) && sUTF8_count >= 2 ) { // 2 byte character encoding: 10xxxxxx, 10xxxxxx u0 = (0x1F & c); c = sUTF8[1]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); if ( u0 > 0x7F ) { *unicode_code_point = u0; return 2; } } } else if ( 0xE0 == ( 0xF0 & c) && sUTF8_count >= 3 ) { // 3 byte character encoding: 110xxxxx, 10xxxxxx, 10xxxxxx u0 = (0x0F & c); c = sUTF8[1]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); c = sUTF8[2]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); if ( u0 >= 0x0800 && (u0 <= 0xD800 || u0 >= 0xE000) ) { *unicode_code_point = u0; return 3; } } } } else if ( 0xF0 == ( 0xF8 & c) && sUTF8_count >= 4 ) { // 4 byte character encoding: 11110xxx, 10xxxxxx, 10xxxxxx, 10xxxxxx u0 = (0x07 & c); c = sUTF8[1]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); c = sUTF8[2]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); c = sUTF8[3]; if ( 0x80 == ( 0xC0 & c) ) { u0 *= 64; u0 |= (0x3F & c); if ( u0 >= 0x010000 && u0 <= 0x10FFFF ) { *unicode_code_point = u0; return 4; } } } } } error_status = 0; u0 = 0xFFFFFFFF; i0 = ON_DecodeUTF8Helper(sUTF8,sUTF8_count,&u0,&error_status); if ( i0 > 0 && 0 == error_status && (u0 < 0xD800 || (u0 >= 0xE000 && u0 <= 0x10FFFF) ) ) { // valid UTF-8 multibyte encoding parsed *unicode_code_point = u0; return i0; } // handle errors if ( 0 == e ) { // no errors are masked. return 0; } // report error condition e->m_error_status |= error_status; if ( error_status != (error_status & e->m_error_mask) ) { // this error is not masked return 0; } if ( i0 <= 0 ) { i0 = 1; if ( ON_IsValidUnicodeCodePoint(e->m_error_code_point) ) { // skip to next UTF-8 start elemement for ( /*empty for initializer*/; i0 < sUTF8_count; i0++ ) { // Search for the next element of sUTF8[] that is the // start of a UTF-8 encoding sequence. c = sUTF8[i0]; if ( 0 == (0x80 & c) // ASCII 0 - 127 || 0xC0 == ( 0xE0 & c) // 2 byte encoding first character || 0xE0 == ( 0xF0 & c) // 3 byte encoding first character || 0xF0 == ( 0xF8 & c) // 4 byte encoding first character || 0xF8 == ( 0xFC & c) // 5 byte encoding first character || 0xFC == ( 0xFE & c) // 6 byte encoding first character ) { // resume parsing at this character break; } } *unicode_code_point = e->m_error_code_point; } return i0; } if ( ON_IsValidUnicodeCodePoint(u0) && 8 == error_status ) { // overlong UTF-8 multibyte encoding of valid unicode code point *unicode_code_point = u0; return i0; } if ( i0 < sUTF8_count && u0 >= 0xD800 && u0 <= 0xDBFF && (0 == error_status || 8 == error_status) && 0 != (4 & e->m_error_mask) ) { // See if a UFT-16 surrogate pair was incorrectly encoded // as two consecutive UTF-8 sequences. u1 = 0xFFFFFFFF; i1 = ON_DecodeUTF8Helper(sUTF8+i0,sUTF8_count-i0,&u1,&error_status); if ( i1 > 0 && (0 == error_status || 8 == error_status) ) { error_status = 0; sUTF16[0] = (ON__UINT16)u0; sUTF16[1] = (ON__UINT16)u1; u0 = 0xFFFFFFFF; if ( 2 == ON_ConvertUTF16ToUTF32(false,sUTF16,2,&u0,1,&error_status,0,0,0) && 0 == error_status && ON_IsValidUnicodeCodePoint(u0) ) { *unicode_code_point = u0; e->m_error_status |= 4; return i0+i1; } } } if ( ON_IsValidUnicodeCodePoint(e->m_error_code_point) ) { *unicode_code_point = e->m_error_code_point; return i0; } return 0; } int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] ) { // put the most comman case first if ( unicode_code_point < 0xD800 ) { // code point values U+0000 ... U+D7FF // = UTF-16 values sUTF16[0] = (ON__UINT16)unicode_code_point; return 1; } if ( unicode_code_point < 0xE000 ) { // 0xD800 ... 0xDFFF are invalid unicode code point values return 0; } if ( unicode_code_point <= 0xFFFF ) { // code point values U+E000 ... U+FFFF // = UTF-16 values sUTF16[0] = (ON__UINT16)unicode_code_point; return 1; } if ( unicode_code_point <= 0x10FFFF ) { // code point values U+10000 ... U+10FFFF // = surrogate pair UTF-16 values unicode_code_point -= 0x10000; sUTF16[0] = (ON__UINT16)(0xD800 + (unicode_code_point / 0x400)); // high surrogate value (0xD800 ... 0xDBFF) sUTF16[1] = (ON__UINT16)(0xDC00 + (unicode_code_point & 0x3FF)); // low surrogate value (0xDC00 ... 0xDFFF) return 2; } // 0x110000 ... 0xFFFFFFFF are invalid unicode code point values return 0; } int ON_DecodeUTF16( const ON__UINT16* sUTF16, int sUTF16_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { ON__UINT32 uhi, ulo; ON_UnicodeErrorParameters local_e = ON_UnicodeErrorParameters::MaskErrors; if (nullptr == e) e = &local_e; if ( 0 == sUTF16 || sUTF16_count <= 0 || 0 == unicode_code_point ) { if ( e ) e->m_error_status |= 1; return 0; } // special case for most common UTF-16 single element values if ( ( sUTF16[0] < 0xD800 ) || ( sUTF16[0] >= 0xE000 ) ) { *unicode_code_point = sUTF16[0]; return 1; } if ( sUTF16_count >= 2 && sUTF16[0] < 0xDC00 && sUTF16[1] >= 0xDC00 && sUTF16[1] < 0xE000 ) { // UTF-16 surrogate pair uhi = sUTF16[0]; ulo = sUTF16[1]; *unicode_code_point = (uhi-0xD800)*0x400 + (ulo-0xDC00) + 0x10000; return 2; } // handle errors if ( 0 == e ) { // no errors are masked. return 0; } // report error condition e->m_error_status |= 16; if ( 16 != (16 & e->m_error_mask) || !ON_IsValidUnicodeCodePoint(e->m_error_code_point) ) { // this error is not masked return 0; } // Search for the next element of sUTF16[] that is a // valid UTF-16 encoding sequence. int i; for ( i = 1; i < sUTF16_count; i++ ) { if ( ( sUTF16[i] < 0xD800 ) || ( sUTF16[i] >= 0xE000 ) ) { // valid single UTF-16 code unit break; } if ( i+1 < sUTF16_count && sUTF16[i] >= 0xD800 && sUTF16[i] < 0xDC00 && sUTF16[i+1] >= 0xDC00 && sUTF16[i+1] < 0xE000 ) { // valid UTF-16 surrogate pair break; } } *unicode_code_point = e->m_error_code_point; return i; } int ON_DecodeUTF16LE( const ON__UINT16* sUTF16, int sUTF16_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { #if defined(ON_LITTLE_ENDIAN) return ON_DecodeUTF16(sUTF16,sUTF16_count,e,unicode_code_point); #else return ON_DecodeSwapByteUTF16(sUTF16,sUTF16_count,e,unicode_code_point); #endif } int ON_DecodeUTF16BE( const ON__UINT16* sUTF16, int sUTF16_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { #if defined(ON_BIG_ENDIAN) return ON_DecodeUTF16(sUTF16,sUTF16_count,e,unicode_code_point); #else return ON_DecodeSwapByteUTF16(sUTF16,sUTF16_count,e,unicode_code_point); #endif } int ON_DecodeUTF32LE( const ON__UINT32* sUTF32, int sUTF32_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { #if defined(ON_LITTLE_ENDIAN) return ON_DecodeUTF32(sUTF32,sUTF32_count,e,unicode_code_point); #else return ON_DecodeSwapByteUTF32(sUTF32,sUTF32_count,e,unicode_code_point); #endif } int ON_DecodeUTF32BE( const ON__UINT32* sUTF32, int sUTF32_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { #if defined(ON_BIG_ENDIAN) return ON_DecodeUTF32(sUTF32,sUTF32_count,e,unicode_code_point); #else return ON_DecodeSwapByteUTF32(sUTF32,sUTF32_count,e,unicode_code_point); #endif } int ON_EncodeWideChar( ON__UINT32 code_point, size_t w_capacity, wchar_t* w ) { int rc = 0; if (nullptr != w && w_capacity > 0) { if (ON_IsValidUnicodeCodePoint(code_point)) { #if 1 == ON_SIZEOF_WCHAR_T char sUTF8[6]; rc = ON_EncodeUTF8(code_point, sUTF8); if (rc > (int)w_capacity) rc = 0; switch (rc) { case 1: w[0] = (wchar_t)sUTF8[0]; break; case 2: w[0] = (wchar_t)sUTF8[0]; w[1] = (wchar_t)sUTF8[1]; break; case 3: w[0] = (wchar_t)sUTF8[0]; w[1] = (wchar_t)sUTF8[1]; w[2] = (wchar_t)sUTF8[2]; break; case 4: w[0] = (wchar_t)sUTF8[0]; w[1] = (wchar_t)sUTF8[1]; w[2] = (wchar_t)sUTF8[2]; w[3] = (wchar_t)sUTF8[3]; break; default: rc = 0; break; } #elif 2 == ON_SIZEOF_WCHAR_T ON__UINT16 sUTF16[2]; rc = ON_EncodeUTF16(code_point, sUTF16); if (rc > (int)w_capacity) rc = 0; switch (rc) { case 1: w[0] = (wchar_t)sUTF16[0]; break; case 2: w[0] = (wchar_t)sUTF16[0]; w[1] = (wchar_t)sUTF16[1]; break; default: rc = 0; break; } #elif 4 == ON_SIZEOF_WCHAR_T if (w_capacity > 0) { w[0] = (wchar_t)code_point; rc = 1; } #endif } if (rc >= 0 && rc < (int)w_capacity) w[rc] = 0; } return rc; } int ON_DecodeWideChar( const wchar_t* sWideChar, int sWideChar_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { const ON_UnicodeEncoding widechar_encoding = ON_WCHAR_T_ENCODING; int rc; switch (widechar_encoding) { #if 1 == ON_SIZEOF_WCHAR_T case ON_UTF_8: rc = ON_DecodeUTF8((const char*)sWideChar,sWideChar_count,e,unicode_code_point); break; #elif 2 == ON_SIZEOF_WCHAR_T case ON_UTF_16: return ON_DecodeUTF16((const ON__UINT16*)sWideChar,sWideChar_count,e,unicode_code_point); break; case ON_UTF_16BE: rc = ON_DecodeUTF16BE((const ON__UINT16*)sWideChar,sWideChar_count,e,unicode_code_point); break; case ON_UTF_16LE: rc = ON_DecodeUTF16LE((const ON__UINT16*)sWideChar,sWideChar_count,e,unicode_code_point); break; #elif 4 == ON_SIZEOF_WCHAR_T case ON_UTF_32: rc = ON_DecodeUTF32((const ON__UINT32*)sWideChar,sWideChar_count,e,unicode_code_point); break; case ON_UTF_32BE: rc = ON_DecodeUTF32BE((const ON__UINT32*)sWideChar,sWideChar_count,e,unicode_code_point); break; case ON_UTF_32LE: rc = ON_DecodeUTF32LE((const ON__UINT32*)sWideChar,sWideChar_count,e,unicode_code_point); break; #endif default: rc = 0; if ( e ) e->m_error_status |= 1; break; } return rc; } int ON_DecodeSwapByteUTF16( const ON__UINT16* sUTF16, int sUTF16_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { int i; ON__UINT32 uhi, ulo; ON__UINT16 w0, w1; const ON__UINT8* p; ON__UINT8* p0; ON__UINT8* p1; ON_UnicodeErrorParameters local_e = ON_UnicodeErrorParameters::MaskErrors; if (nullptr == e) e = &local_e; if ( 0 == sUTF16 || sUTF16_count <= 0 || 0 == unicode_code_point ) { if ( e ) e->m_error_status |= 1; return 0; } // special case for most common UTF-16 single element values // w0 = byte swapped sUTF16[0] p = (const ON__UINT8*)sUTF16; p0 = (ON__UINT8*)&w0; p0[1] = p[0]; p0[0] = p[1]; if ( ( w0 < 0xD800 ) || (w0 >= 0xE000 ) ) { *unicode_code_point = w0; return 1; } if ( sUTF16_count >= 2 && w0 < 0xDC00 ) { // w1 = byte swapped sUTF16[1] p1 = (ON__UINT8*)&w1; p1[1] = p[2]; p1[0] = p[3]; if ( w1 >= 0xDC00 && w1 < 0xE000 ) { // UTF-16 surrogate pair uhi = w0; ulo = w1; *unicode_code_point = (uhi-0xD800)*0x400 + (ulo-0xDC00) + 0x10000; return 2; } } // handle errors if ( 0 == e ) { // no errors are masked. return 0; } // report error condition e->m_error_status |= 16; if ( 16 != (16 & e->m_error_mask) || !ON_IsValidUnicodeCodePoint(e->m_error_code_point) ) { // this error is not masked return 0; } // Search for the next element of sUTF16[] that is a // valid UTF-16 encoding sequence. p1 = (ON__UINT8*)&w1; p += sizeof(sUTF16[0]); for ( i = 1; i < sUTF16_count; i++, p += sizeof(sUTF16[0]) ) { // w0 = byte swapped sUTF16[i] p0[1] = p[0]; p0[0] = p[1]; if ( ( w0 < 0xD800 ) || ( w0 >= 0xE000 ) ) { // valid single UTF-16 code unit break; } if ( i+1 < sUTF16_count && w0 >= 0xD800 && w0 < 0xDC00 ) { // w1 = byte swapped sUTF16[i+1] p1[1] = p[sizeof(sUTF16[0])]; p1[0] = p[sizeof(sUTF16[0])+1]; if ( w1 >= 0xDC00 && w1 < 0xE000 ) { // valid UTF-16 surrogate pair break; } } } *unicode_code_point = e->m_error_code_point; return i; } int ON_ConvertUTF8ToUTF8( int bTestByteOrder, const char* sInputUTF8, int sInputUTF8_count, char* sOutputUTF8, int sOutputUTF8_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const char** sNextInputUTF8 ) { int i, j, k, output_count; ON__UINT32 u; char s[6]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sInputUTF8_count && 0 != sInputUTF8 ) { for ( sInputUTF8_count = 0; 0 != sInputUTF8[sInputUTF8_count]; sInputUTF8_count++) { // empty for body } } if ( 0 == sInputUTF8 || sInputUTF8_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextInputUTF8 ) *sNextInputUTF8 = sInputUTF8; return 0; } if ( 0 == sOutputUTF8_count ) { sOutputUTF8 = 0; sOutputUTF8_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sOutputUTF8 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextInputUTF8 ) *sNextInputUTF8 = sInputUTF8; return 0; } if ( bTestByteOrder && ON_IsUTF8ByteOrderMark(sInputUTF8,sInputUTF8_count) ) { // skip UTF-8 byte order element sInputUTF8_count -= 3; sInputUTF8 += 3; } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; for ( i = 0; i < sInputUTF8_count; i += j ) { j = ON_DecodeUTF8(sInputUTF8+i,sInputUTF8_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF8(u,s); if ( 0 != sOutputUTF8 ) { if ( output_count + k > sOutputUTF8_count ) { e.m_error_status |= 2; break; } memcpy(sOutputUTF8+output_count,s,k*sizeof(sOutputUTF8[0])); } output_count += k; } if ( 0 != sOutputUTF8 && output_count < sOutputUTF8_count) sOutputUTF8[output_count] = 0; if ( sNextInputUTF8 ) *sNextInputUTF8 = sInputUTF8+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF8ToUTF16( int bTestByteOrder, const char* sUTF8, int sUTF8_count, ON__UINT16* sUTF16, int sUTF16_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const char** sNextUTF8 ) { int i, j, k, output_count; ON__UINT32 u; ON__UINT16 w[2]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF8_count && 0 != sUTF8 ) { for ( sUTF8_count = 0; 0 != sUTF8[sUTF8_count]; sUTF8_count++) { // empty for body } } if ( 0 == sUTF8 || sUTF8_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF8 ) *sNextUTF8 = sUTF8; return 0; } if ( bTestByteOrder && ON_IsUTF8ByteOrderMark(sUTF8,sUTF8_count) ) { // skip UTF-8 byte order element sUTF8_count -= 3; sUTF8 += 3; } if ( 0 == sUTF16_count ) { sUTF16 = 0; sUTF16_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF16 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF8 ) *sNextUTF8 = sUTF8; return 0; } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; for ( i = 0; i < sUTF8_count; i += j ) { j = ON_DecodeUTF8(sUTF8+i,sUTF8_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF16(u,w); if ( 0 != sUTF16 ) { if ( output_count + k > sUTF16_count ) { e.m_error_status |= 2; break; } sUTF16[output_count] = w[0]; if ( 2 == k ) sUTF16[output_count+1] = w[1]; } output_count += k; } if ( 0 != sUTF16 && output_count < sUTF16_count) sUTF16[output_count] = 0; if ( sNextUTF8 ) *sNextUTF8 = sUTF8+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF8ToUTF32( int bTestByteOrder, const char* sUTF8, int sUTF8_count, ON__UINT32* sUTF32, int sUTF32_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const char** sNextUTF8 ) { int i, j, output_count; ON__UINT32 u; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF8_count && 0 != sUTF8 ) { for ( sUTF8_count = 0; 0 != sUTF8[sUTF8_count]; sUTF8_count++) { // empty for body } } if ( 0 == sUTF8 || sUTF8_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF8 ) *sNextUTF8 = sUTF8; return 0; } if ( bTestByteOrder && ON_IsUTF8ByteOrderMark(sUTF8,sUTF8_count) ) { // skip UTF-8 byte order element sUTF8_count -= 3; sUTF8 += 3; } if ( 0 == sUTF32_count ) { sUTF32 = 0; sUTF32_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF32 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF8 ) *sNextUTF8 = sUTF8; return 0; } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; for ( i = 0; i < sUTF8_count; i += j ) { j = ON_DecodeUTF8(sUTF8+i,sUTF8_count-i,&e,&u); if ( j <= 0 ) break; if ( 0 != sUTF32 ) { if ( output_count >= sUTF32_count ) { e.m_error_status |= 2; break; } sUTF32[output_count] = u; } output_count++; } if ( 0 != sUTF32 && output_count < sUTF32_count) sUTF32[output_count] = 0; if ( sNextUTF8 ) *sNextUTF8 = sUTF8+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF16ToUTF8( int bTestByteOrder, const ON__UINT16* sUTF16, int sUTF16_count, char* sUTF8, int sUTF8_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT16** sNextUTF16 ) { int i, j, k, output_count, bSwapBytes; ON__UINT32 u; char s[6]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF16_count && 0 != sUTF16 ) { for ( sUTF16_count = 0; 0 != sUTF16[sUTF16_count]; sUTF16_count++) { // empty for body } } if ( 0 == sUTF16 || sUTF16_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } if ( 0 == sUTF8_count ) { sUTF8 = 0; sUTF8_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF8 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } bSwapBytes = false; if ( bTestByteOrder && sUTF16_count > 0 ) { if ( 0xFEFF == sUTF16[0] ) { // skip BOM sUTF16_count--; sUTF16++; } else if ( 0xFFFE == sUTF16[0] ) { // skip BOM and swap bytes in rest of sUTF16 bSwapBytes = true; sUTF16_count--; sUTF16++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; if ( bSwapBytes ) { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeSwapByteUTF16(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF8(u,s); if ( 0 != sUTF8 ) { if ( output_count + k > sUTF8_count ) { e.m_error_status |= 2; break; } memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0])); } output_count += k; } } else { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeUTF16(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF8(u,s); if ( 0 != sUTF8 ) { if ( output_count + k > sUTF8_count ) { e.m_error_status |= 2; break; } memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0])); } output_count += k; } } if ( 0 != sUTF8 && output_count < sUTF8_count) sUTF8[output_count] = 0; if ( sNextUTF16 ) *sNextUTF16 = sUTF16+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF16ToUTF16( int bTestByteOrder, const ON__UINT16* sInputUTF16, int sInputUTF16_count, ON__UINT16* sOutputUTF16, int sOutputUTF16_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT16** sNextInputUTF16 ) { int i, j, k, output_count, bSwapBytes; ON__UINT32 u; ON__UINT16 s[2]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sInputUTF16_count && 0 != sInputUTF16 ) { for ( sInputUTF16_count = 0; 0 != sInputUTF16[sInputUTF16_count]; sInputUTF16_count++) { // empty for body } } if ( 0 == sInputUTF16 || sInputUTF16_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextInputUTF16 ) *sNextInputUTF16 = sInputUTF16; return 0; } if ( 0 == sOutputUTF16_count ) { sOutputUTF16 = 0; sOutputUTF16_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sOutputUTF16 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextInputUTF16 ) *sNextInputUTF16 = sInputUTF16; return 0; } bSwapBytes = false; if ( bTestByteOrder && sInputUTF16_count > 0 ) { if ( 0xFEFF == sInputUTF16[0] ) { // skip BOM sInputUTF16_count--; sInputUTF16++; } else if ( 0xFFFE == sInputUTF16[0] ) { // skip BOM and swap bytes in rest of sInputUTF16 bSwapBytes = true; sInputUTF16_count--; sInputUTF16++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; if ( bSwapBytes ) { for ( i = 0; i < sInputUTF16_count; i += j ) { j = ON_DecodeSwapByteUTF16(sInputUTF16+i,sInputUTF16_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF16(u,s); if ( 0 != sOutputUTF16 ) { if ( output_count + k > sOutputUTF16_count ) { e.m_error_status |= 2; break; } memcpy(sOutputUTF16+output_count,s,k*sizeof(sOutputUTF16[0])); } output_count += k; } } else { for ( i = 0; i < sInputUTF16_count; i += j ) { j = ON_DecodeUTF16(sInputUTF16+i,sInputUTF16_count-i,&e,&u); if ( j <= 0 ) break; k = ON_EncodeUTF16(u,s); if ( 0 != sOutputUTF16 ) { if ( output_count + k > sOutputUTF16_count ) { e.m_error_status |= 2; break; } memcpy(sOutputUTF16+output_count,s,k*sizeof(sOutputUTF16[0])); } output_count += k; } } if ( 0 != sOutputUTF16 && output_count < sOutputUTF16_count) sOutputUTF16[output_count] = 0; if ( sNextInputUTF16 ) *sNextInputUTF16 = sInputUTF16+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF16ToUTF32( int bTestByteOrder, const ON__UINT16* sUTF16, int sUTF16_count, unsigned int* sUTF32, int sUTF32_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT16** sNextUTF16 ) { int i, j, output_count, bSwapBytes; ON__UINT32 u; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF16_count && 0 != sUTF16 ) { for ( sUTF16_count = 0; 0 != sUTF16[sUTF16_count]; sUTF16_count++) { // empty for body } } if ( 0 == sUTF16 || sUTF16_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } if ( 0 == sUTF32_count ) { sUTF32 = 0; sUTF32_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF32 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } bSwapBytes = false; if ( bTestByteOrder && sUTF16_count > 0 ) { if ( 0xFEFF == sUTF16[0] ) { // skip BOM sUTF16_count--; sUTF16++; } else if ( 0xFFFE == sUTF16[0] ) { // skip BOM and swap bytes in rest of sUTF16 bSwapBytes = true; sUTF16_count--; sUTF16++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; if ( bSwapBytes ) { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeSwapByteUTF16(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; if ( 0 != sUTF32 ) { if ( output_count >= sUTF32_count ) { e.m_error_status |= 2; break; } sUTF32[output_count] = u; } output_count++; } } else { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeUTF16(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; if ( 0 != sUTF32 ) { if ( output_count >= sUTF32_count ) { e.m_error_status |= 2; break; } sUTF32[output_count] = u; } output_count++; } } if ( 0 != sUTF32 && output_count < sUTF32_count) sUTF32[output_count] = 0; if ( sNextUTF16 ) *sNextUTF16 = sUTF16+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } static ON__UINT32 SwapBytes32(ON__UINT32 u) { ON__UINT8 b; ON__UINT8* p = (ON__UINT8*)&u; b = p[0]; p[0] = p[3]; p[3] = b; b = p[1]; p[1] = p[2]; p[2] = b; return u; } int ON_DecodeUTF32( const ON__UINT32* sUTF32, int sUTF32_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { ON__UINT32 uhi, ulo; ON_UnicodeErrorParameters local_e = ON_UnicodeErrorParameters::MaskErrors; if (nullptr == e) e = &local_e; if ( 0 == sUTF32 || sUTF32_count <= 0 || 0 == unicode_code_point ) { e->m_error_status |= 1; return 0; } // special case for most common UTF-16 single element values if ( ( sUTF32[0] < 0xD800 ) || ( sUTF32[0] >= 0xE000 && sUTF32[0] <= 0x10FFFF) ) { // valid UTF-32 encoding. *unicode_code_point = sUTF32[0]; return 1; } // handle errors if ( 0 == e ) return 0; if ( sUTF32_count >= 2 && sUTF32[0] < 0xDC00 && sUTF32[1] >= 0xDC00 && sUTF32[1] < 0xE000 ) { // UTF-16 surrogate pair appears in UTF-32 array e->m_error_status |= 4; if ( 0 == (4 & e->m_error_mask) ) return 0; // this error is not masked. uhi = sUTF32[0]; ulo = sUTF32[1]; *unicode_code_point = (uhi-0xD800)*0x400 + (ulo-0xDC00) + 0x10000; return 2; // error masked and reasonable value returned. } // bogus value e->m_error_status |= 16; if ( 16 != (16 & e->m_error_mask) || !ON_IsValidUnicodeCodePoint(e->m_error_code_point) ) { // this error is not masked return 0; } *unicode_code_point = e->m_error_code_point; return 1; // error masked and e->m_error_code_point returnred. } int ON_DecodeSwapByteUTF32( const ON__UINT32* sUTF32, int sUTF32_count, struct ON_UnicodeErrorParameters* e, ON__UINT32* unicode_code_point ) { ON__UINT32 sUTF32swap[2]; ON_UnicodeErrorParameters local_e = ON_UnicodeErrorParameters::MaskErrors; if (nullptr == e) e = &local_e; if ( 0 != sUTF32 && sUTF32_count > 0 ) { sUTF32swap[0] = SwapBytes32(sUTF32[0]); if ( sUTF32_count > 1 ) { // Get up to 2 elements to pass to the unswapped // decoder so that masked errors are uniformly // handled. sUTF32swap[1] = SwapBytes32(sUTF32[1]); sUTF32_count = 2; } sUTF32 = sUTF32swap; } return ON_DecodeUTF32(sUTF32,sUTF32_count,e,unicode_code_point); } int ON_ConvertUTF32ToUTF8( int bTestByteOrder, const ON__UINT32* sUTF32, int sUTF32_count, char* sUTF8, int sUTF8_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT32** sNextUTF32 ) { int i, k, output_count, bSwapBytes; ON__UINT32 u; char s[6]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF32_count && 0 != sUTF32 ) { for ( sUTF32_count = 0; 0 != sUTF32[sUTF32_count]; sUTF32_count++) { // empty for body } } if ( 0 == sUTF32 || sUTF32_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF32 ) *sNextUTF32 = sUTF32; return 0; } if ( 0 == sUTF8_count ) { sUTF8 = 0; sUTF8_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF8 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF32 ) *sNextUTF32 = sUTF32; return 0; } bSwapBytes = false; if ( bTestByteOrder && sUTF32_count > 0 ) { if ( 0x0000FEFF == sUTF32[0] ) { // skip BOM sUTF32_count--; sUTF32++; } else if ( 0xFFFE0000 == sUTF32[0] ) { // skip BOM and swap bytes in rest of sUTF32 bSwapBytes = true; sUTF32_count--; sUTF32++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; for ( i = 0; i < sUTF32_count; i++ ) { u = bSwapBytes ? SwapBytes32(sUTF32[i]) : sUTF32[i]; if ( !ON_IsValidUnicodeCodePoint(u) ) { e.m_error_status |= 16; if ( 16 != (16 & e.m_error_mask) ) break; if ( !ON_IsValidUnicodeCodePoint(e.m_error_code_point) ) break; u = e.m_error_code_point; } k = ON_EncodeUTF8(u,s); if ( 0 != sUTF8 ) { if ( output_count + k > sUTF8_count ) { e.m_error_status |= 2; break; } memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0])); } output_count += k; } if ( 0 != sUTF8 && output_count < sUTF8_count) sUTF8[output_count] = 0; if ( sNextUTF32 ) *sNextUTF32 = sUTF32+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF32ToUTF16( int bTestByteOrder, const ON__UINT32* sUTF32, int sUTF32_count, ON__UINT16* sUTF16, int sUTF16_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT32** sNextUTF32 ) { int i, k, output_count, bSwapBytes; ON__UINT32 u; ON__UINT16 w[2]; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF32_count && 0 != sUTF32 ) { for ( sUTF32_count = 0; 0 != sUTF32[sUTF32_count]; sUTF32_count++) { // empty for body } } if ( 0 == sUTF32 || sUTF32_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF32 ) *sNextUTF32 = sUTF32; return 0; } if ( 0 == sUTF16_count ) { sUTF16 = 0; sUTF16_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF16 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF32 ) *sNextUTF32 = sUTF32; return 0; } bSwapBytes = false; if ( bTestByteOrder && sUTF32_count > 0 ) { if ( 0x0000FEFF == sUTF32[0] ) { // skip BOM sUTF32_count--; sUTF32++; } else if ( 0xFFFE0000 == sUTF32[0] ) { // skip BOM and swap bytes in rest of sUTF32 bSwapBytes = true; sUTF32_count--; sUTF32++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; for ( i = 0; i < sUTF32_count; i++ ) { u = bSwapBytes ? SwapBytes32(sUTF32[i]) : sUTF32[i]; if ( !ON_IsValidUnicodeCodePoint(u) ) { e.m_error_status |= 16; if ( 16 != (16 & e.m_error_mask) ) break; if ( !ON_IsValidUnicodeCodePoint(e.m_error_code_point) ) break; u = e.m_error_code_point; } k = ON_EncodeUTF16(u,w); if ( 0 != sUTF16 ) { if ( output_count + k > sUTF16_count ) { e.m_error_status |= 2; break; } sUTF16[output_count] = w[0]; if ( 2 == k ) sUTF16[output_count+1] = w[1]; } output_count += k; } if ( 0 != sUTF16 && output_count < sUTF16_count) sUTF16[output_count] = 0; if ( sNextUTF32 ) *sNextUTF32 = sUTF32+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertUTF32ToUTF32( int bTestByteOrder, const ON__UINT32* sUTF16, int sUTF16_count, unsigned int* sUTF32, int sUTF32_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT32** sNextUTF16 ) { int i, j, output_count, bSwapBytes; ON__UINT32 u; struct ON_UnicodeErrorParameters e; if ( 0 != error_status ) *error_status = 0; if ( -1 == sUTF16_count && 0 != sUTF16 ) { for ( sUTF16_count = 0; 0 != sUTF16[sUTF16_count]; sUTF16_count++) { // empty for body } } if ( 0 == sUTF16 || sUTF16_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } if ( 0 == sUTF32_count ) { sUTF32 = 0; sUTF32_count = 2147483647; // maximum value of a 32-bit signed int } else if ( 0 == sUTF32 ) { if ( 0 != error_status ) *error_status |= 1; if ( sNextUTF16 ) *sNextUTF16 = sUTF16; return 0; } bSwapBytes = false; if ( bTestByteOrder && sUTF16_count > 0 ) { if ( 0x0000FEFF == sUTF16[0] ) { // skip BOM sUTF16_count--; sUTF16++; } else if ( 0xFFFE0000 == sUTF16[0]) { // skip BOM and swap bytes in rest of sUTF16 bSwapBytes = true; sUTF16_count--; sUTF16++; } } e.m_error_status = 0; e.m_error_mask = error_mask; e.m_error_code_point = error_code_point; output_count = 0; if ( bSwapBytes ) { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeSwapByteUTF32(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; if ( 0 != sUTF32 ) { if ( output_count >= sUTF32_count ) { e.m_error_status |= 2; break; } sUTF32[output_count] = u; } output_count++; } } else { for ( i = 0; i < sUTF16_count; i += j ) { j = ON_DecodeUTF32(sUTF16+i,sUTF16_count-i,&e,&u); if ( j <= 0 ) break; if ( 0 != sUTF32 ) { if ( output_count >= sUTF32_count ) { e.m_error_status |= 2; break; } sUTF32[output_count] = u; } output_count++; } } if ( 0 != sUTF32 && output_count < sUTF32_count) sUTF32[output_count] = 0; if ( sNextUTF16 ) *sNextUTF16 = sUTF16+i; if ( error_status ) *error_status = e.m_error_status; return output_count; } int ON_ConvertWideCharToUTF8( int bTestByteOrder, const wchar_t* sWideChar, int sWideChar_count, char* sUTF8, int sUTF8_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const wchar_t** sNextWideChar ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF8ToUTF8( bTestByteOrder, (const char*)sWideChar,sWideChar_count, sUTF8,sUTF8_count, error_status,error_mask,error_code_point, (const char**)sNextWideChar ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF16ToUTF8( bTestByteOrder, (const ON__UINT16*)sWideChar,sWideChar_count, sUTF8,sUTF8_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextWideChar ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF32ToUTF8( bTestByteOrder, (const ON__UINT32*)sWideChar,sWideChar_count, sUTF8,sUTF8_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextWideChar ); break; default: rc = 0; } return rc; } int ON_ConvertWideCharToUTF16( int bTestByteOrder, const wchar_t* sWideChar, int sWideChar_count, char* sUTF16, int sUTF16_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const wchar_t** sNextWideChar ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF8ToUTF16( bTestByteOrder, (const char*)sWideChar,sWideChar_count, (ON__UINT16*)sUTF16,sUTF16_count, error_status,error_mask,error_code_point, (const char**)sNextWideChar ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF16ToUTF16( bTestByteOrder, (const ON__UINT16*)sWideChar,sWideChar_count, (ON__UINT16*)sUTF16,sUTF16_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextWideChar ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF32ToUTF16( bTestByteOrder, (const ON__UINT32*)sWideChar,sWideChar_count, (ON__UINT16*)sUTF16,sUTF16_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextWideChar ); break; default: rc = 0; } return rc; } int ON_ConvertWideCharToUTF32( int bTestByteOrder, const wchar_t* sWideChar, int sWideChar_count, ON__UINT32* sUTF32, int sUTF32_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const wchar_t** sNextWideChar ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF8ToUTF32( bTestByteOrder, (const char*)sWideChar,sWideChar_count, sUTF32,sUTF32_count, error_status,error_mask,error_code_point, (const char**)sNextWideChar ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF16ToUTF32( bTestByteOrder, (const ON__UINT16*)sWideChar,sWideChar_count, sUTF32,sUTF32_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextWideChar ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF32ToUTF32( bTestByteOrder, (const ON__UINT32*)sWideChar,sWideChar_count, sUTF32,sUTF32_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextWideChar ); break; default: rc = 0; } return rc; } int ON_ConvertUTF8ToWideChar( int bTestByteOrder, const char* sUTF8, int sUTF8_count, wchar_t* sWideChar, int sWideChar_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const char** sNextUTF8 ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF8ToUTF8( bTestByteOrder, sUTF8,sUTF8_count, (char*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, sNextUTF8 ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF8ToUTF16( bTestByteOrder, sUTF8,sUTF8_count, (ON__UINT16*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, sNextUTF8 ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF8ToUTF32( bTestByteOrder, sUTF8,sUTF8_count, (ON__UINT32*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, sNextUTF8 ); break; default: if (error_status) *error_status = 1; if (sNextUTF8) *sNextUTF8 = sUTF8; rc = 0; } return rc; } int ON_ConvertUTF16ToWideChar( int bTestByteOrder, const ON__UINT16* sUTF16, int sUTF16_count, wchar_t* sWideChar, int sWideChar_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT16** sNextUTF16 ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF16ToUTF8( bTestByteOrder, (const ON__UINT16*)sUTF16,sUTF16_count, (char*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextUTF16 ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF16ToUTF16( bTestByteOrder, (const ON__UINT16*)sUTF16,sUTF16_count, (ON__UINT16*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextUTF16 ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF16ToUTF32( bTestByteOrder, (const ON__UINT16*)sUTF16,sUTF16_count, (ON__UINT32*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT16**)sNextUTF16 ); break; default: if (error_status) *error_status = 1; if (sNextUTF16) *sNextUTF16 = sUTF16; rc = 0; } return rc; } int ON_ConvertUTF32ToWideChar( int bTestByteOrder, const ON__UINT32* sUTF32, int sUTF32_count, wchar_t* sWideChar, int sWideChar_count, unsigned int* error_status, unsigned int error_mask, ON__UINT32 error_code_point, const ON__UINT32** sNextUTF32 ) { int rc; switch(sizeof(sWideChar[0])) { case sizeof(char): // assume wchar_t strings are UTF-8 encoded rc = ON_ConvertUTF32ToUTF8( bTestByteOrder, (const ON__UINT32*)sUTF32,sUTF32_count, (char*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextUTF32 ); break; case sizeof(ON__UINT16): // assume wchar_t strings are UTF-16 encoded rc = ON_ConvertUTF32ToUTF16( bTestByteOrder, (const ON__UINT32*)sUTF32,sUTF32_count, (ON__UINT16*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextUTF32 ); break; case sizeof(ON__UINT32): // assume wchar_t strings are UTF-32 encoded rc = ON_ConvertUTF32ToUTF32( bTestByteOrder, (const ON__UINT32*)sUTF32,sUTF32_count, (ON__UINT32*)sWideChar,sWideChar_count, error_status,error_mask,error_code_point, (const ON__UINT32**)sNextUTF32 ); break; default: if (error_status) *error_status = 1; if (sNextUTF32) *sNextUTF32 = sUTF32; rc = 0; } return rc; } const ON_wString ON_wString::FromUnicodeCodePoints( const ON__UINT32* code_points, int code_point_count, ON__UINT32 error_code_point ) { const bool bErrorCodePointIsValid = ON_IsValidUnicodeCodePoint(error_code_point); if (nullptr == code_points) return ON_wString::EmptyString; if (-1 == code_point_count) { code_point_count = 0; while (0 != code_points[code_point_count]) { if ( false == bErrorCodePointIsValid && false == ON_IsValidUnicodeCodePoint(code_points[code_point_count]) ) { break; } code_point_count++; } } if ( code_point_count <= 0 ) return ON_wString::EmptyString; const int bTestByteOrder = false; unsigned int error_status = 0; const unsigned int error_mask = bErrorCodePointIsValid ? 0xFFFFFFFF : 0; int wchar_count = ON_ConvertUTF32ToWideChar( bTestByteOrder, code_points, code_point_count, nullptr, 0, &error_status, error_mask, error_code_point, nullptr ); if (wchar_count <= 0) return ON_wString::EmptyString; ON_wString s; const int s_capacity = (wchar_count + 1); wchar_t* a = s.ReserveArray((size_t)s_capacity); error_status = 0; wchar_count = ON_ConvertUTF32ToWideChar( bTestByteOrder, code_points, code_point_count, a, s_capacity, &error_status, error_mask, error_code_point, nullptr ); if (wchar_count <= 0) return ON_wString::EmptyString; s.SetLength(wchar_count); return s; } ////int ON_ConvertWindowsCodePageValueToWideChar( //// int windows_code_page, //// ON__UINT32 code_page_character_value, //// size_t w_capacity, //// wchar_t* w ////) ////{ //// ON__UINT32 unicode_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter; //// ON_UnicodeErrorParameters e; //// memset(&e, 0, sizeof(e)); //// e.m_error_mask = 0xFF; //// e.m_error_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter; //// ON_DecodeWindowsCodePageValue( windows_code_page, code_page_character_value, &e, &unicode_code_point); //// return ON_EncodeWideChar(unicode_code_point, w_capacity, w); ////} ON__UINT32 ON_MapRTFcharsetToWindowsCodePage( ON__UINT32 rtf_charset, ON__UINT32 default_code_page ) { // From the Microsoft version of the RTF ver 1.9 spec available on MSDN // // \fcharsetN: Specifies the character set of a font in the font table.If this appears, it implies that bytes in runs // tagged with the associated \fN are character codes in the codepage corresponding to the charset N. // Use this codepage to convert the codes to Unicode using a function like the Windows MultiByteToWideChar(). // See also the \cpgN control word, which, if it appears, supersedes the codepage given by \fcharsetN.Values for N are defined, // for example, in the Windows header file wingdi.h(e.g., see ANSI_CHARSET) and are repeated here together with the corresponding // Windows or Mac codepages for convenience:charset codepage Windows / Mac name // A font may have a different character set from the character set of the document. For example, the Symbol font has the // same characters in the same code positions both on the Macintosh and in Windows. Typically, RTF fonts use the code page // corresponding to the \fcharsetN control word in their \fonttbl description. If the charset doesn�t exist, the codepage // may be given by the \cpgN control word, for which the code page is N. If the \cpgN does appear, it supersedes the code // page corresponding to the \fcharsetN. // For such cases, codepage conversions can be avoided altogether by using the Unicode \uN notation for characters. // In addition, file names (used in field instructions and in embedded fonts) may not necessarily be the same as the character // set of the document; the \cpgN control word can change the character set for these file names as well. // ON__UINT32 cp; switch (rtf_charset) { case 0: cp = 1252; break; // ANSI case 1: cp = 0; break; // Default case 2: cp = 42; break; // Symbol case 77: cp = 10000; break; // Mac Roman case 78: cp = 10001; break; // Mac Shift Jis case 79: cp = 10003; break; // Mac Hangul case 80: cp = 10008; break; // Mac GB2312 case 81: cp = 10002; break; // Mac Big5 case 82: cp = default_code_page; break; // Mac Johab (old) case 83: cp = 10005; break; // Mac Hebrew case 84: cp = 10004; break; // Mac Arabic case 85: cp = 10006; break; // Mac Greek case 86: cp = 10081; break; // Mac Turkish case 87: cp = 10021; break; // Mac Thai case 88: cp = 10029; break; // Mac East Europe case 89: cp = 10007; break; // Mac Russian case 128: cp = 932; break; // Shift JIS case 129: cp = 949; break; // Hangul (Korean) case 130: cp = 1361; break; // Johab case 134: cp = 936; break; // GB2312 case 136: cp = 950; break; // Big5 case 161: cp = 1253; break; // Greek case 162: cp = 1254; break; // Turkish case 163: cp = 1258; break; // Vietnamese case 177: cp = 1255; break; // Hebrew case 178: cp = 1256; break; // Arabic case 179: cp = default_code_page; break; // Arabic Traditional (old) case 180: cp = default_code_page; break; // Arabic user (old) case 181: cp = default_code_page; break; // Hebrew user (old) case 186: cp = 1257; break; // Baltic case 204: cp = 1251; break; // Russian case 222: cp = 874; break; // Thai case 238: cp = 1250; break; // Eastern European case 254: cp = 437; break; // PC 437 case 255: cp = 850; break; // OEM default: cp = default_code_page; break; } return cp; } static int ON_Internal_ConvertMSSBCPToWideChar( const ON__UINT32* sb_code_page_0x80_to_0xFF_to_unicode, const char* sMBCS, int sMBCS_count, wchar_t* sWideChar, int sWideChar_capacity, unsigned int* error_status ) { wchar_t* sWideCharMax = (sWideChar_capacity > 0 && nullptr != sWideChar) ? sWideChar + sWideChar_capacity : nullptr; if (nullptr == sWideCharMax) { sWideChar = nullptr; sWideChar_capacity = 0; } else { sWideChar[0] = 0; } if (nullptr != error_status) *error_status = 0; unsigned int e = 0; if (nullptr == sMBCS || sMBCS_count < 0) sMBCS_count = 0; wchar_t* s = sWideChar; wchar_t w_buffer[8]; int rc = 0; for (int i = 0; i < sMBCS_count; i++) { const ON__UINT32 c = (unsigned char)sMBCS[i]; ON__UINT32 unicode_code_point; if (c < 0x80) unicode_code_point = c; else { if (c <= 0xFF && nullptr != sb_code_page_0x80_to_0xFF_to_unicode ) { unicode_code_point = sb_code_page_0x80_to_0xFF_to_unicode[c - 0x80]; if (0 == ON_IsValidUnicodeCodePoint(unicode_code_point)) unicode_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter; } else unicode_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter; if ( ON_UnicodeCodePoint::ON_ReplacementCharacter == unicode_code_point ) e |= 16; } const int w_count = ON_EncodeWideChar(unicode_code_point, sizeof(w_buffer)/sizeof(w_buffer[0]), w_buffer); if (w_count <= 0) { e |= 16; continue; } rc += w_count; if (s == nullptr) continue; wchar_t* s1 = s + w_count; if (s1 > sWideCharMax) { e |= 2; continue; } const wchar_t* w = w_buffer; while (s < s1) *s++ = *w++; } while (s < sWideCharMax) { *s++ = 0; } if (nullptr != error_status) *error_status = e; return rc; } int ON_ConvertMSMBCPToWideChar( ON__UINT32 windows_code_page, const char* sMBCS, int sMBCS_count, wchar_t* sWideChar, int sWideChar_capacity, unsigned int* error_status ) { if ( 0 != error_status ) *error_status = 0; bool bNullTerminated = false; if ( -1 == sMBCS_count && nullptr != sMBCS ) { for ( sMBCS_count = 0; true; sMBCS_count++) { if (0 == sMBCS[sMBCS_count]) { bNullTerminated = true; break; } } } if ( nullptr == sMBCS || sMBCS_count < 0 ) { if ( 0 != error_status ) *error_status |= 1; return 0; } if ( 0 == sMBCS_count ) { return 0; } if (sWideChar_capacity <= 0) { sWideChar_capacity = 0; sWideChar = nullptr; } else if (nullptr == sWideChar) { sWideChar_capacity = 0; } else { sWideChar[0] = 0; } const char* c = sMBCS; const char* c1 = c + sMBCS_count; wchar_t* w = sWideChar; wchar_t* w1 = w + sWideChar_capacity; while (c < c1 && *c >= 0 && *c <= 127) { if (nullptr != w) { if (w >= w1) break; *w++ = (wchar_t)*c; } c++; } if (c == c1) { if (w < w1) *w = 0; return sMBCS_count; } const ON__UINT32* sb_code_page_0x80_to_0xFF_to_unicode = ON_MSSBCP_0x80_0xFF_Unicode(windows_code_page); if (nullptr != sb_code_page_0x80_to_0xFF_to_unicode) { // fast platform independent single byte code page conversion built into opennurbs return ON_Internal_ConvertMSSBCPToWideChar( sb_code_page_0x80_to_0xFF_to_unicode, sMBCS, sMBCS_count, sWideChar, sWideChar_capacity, error_status ); } #if defined(ON_RUNTIME_WIN) // Starting with Windows Vista, the function does not drop illegal code points when dwFlags=0. // It replaces illegal sequences with U+FFFD (encoded as appropriate for the specified codepage). DWORD dwFlags = 0; int sWideChar_count = ::MultiByteToWideChar(windows_code_page, dwFlags, sMBCS, sMBCS_count, sWideChar, sWideChar_capacity); if (sWideChar_count < 0) sWideChar_count = 0; if (nullptr == sWideChar) return sWideChar_count; for (int i = 0; i < sWideChar_count; i++) { if (0 == sWideChar[i]) { sWideChar_count = i; break; } if ( ON_wString::ReplacementCharacter == sWideChar[i] ) { if ( nullptr != error_status) *error_status |= 16; } } if (sWideChar_count < sWideChar_capacity) sWideChar[sWideChar_count] = 0; return sWideChar_count; #elif defined (ON_RUNTIME_APPLE_OBJECTIVE_C_AVAILABLE) CFStringEncoding cfEncoding = CFStringConvertWindowsCodepageToEncoding(windows_code_page); if (cfEncoding == kCFStringEncodingInvalidId) { ON_ERROR("No Apple CFStringEncoding support for this value of windows_code_page"); return 0; } char* szMBCS = nullptr; if (false == bNullTerminated) { szMBCS = (char*)onmalloc((sMBCS_count + 1) * sizeof(szMBCS[0])); memcpy(szMBCS, sMBCS, sMBCS_count * sizeof(szMBCS[0])); szMBCS[sMBCS_count] = 0; sMBCS = szMBCS; } int sWideChar_count = 0; for (;;) { NSStringEncoding nsEncoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding); NSString* str = [NSString stringWithCString : sMBCS encoding : nsEncoding]; if (nullptr == str) { ON_ERROR("[NSString stringWithCString: sMBCS encoding: nsEncoding] failed."); break; } const int len = (int)(str.length); if (len <= 0) { break; } for (int i = 0; i < len; i++) { ON__UINT32 unicode_code_point = 0; const int u1 = [str characterAtIndex : i]; if (u1 >= 0xD800U && u1 < 0xDC00 && i+1 < len) { const int u2 = [str characterAtIndex : (i+1)]; unicode_code_point = ON_DecodeUTF16SurrogatePair((unsigned int)u1, (unsigned int)u2, ON_wString::ReplacementCharacter); if (ON_wString::ReplacementCharacter != unicode_code_point) i++; } else { unicode_code_point = (unsigned int)u1; } if ( false == ON_IsValidUnicodeCodePoint(unicode_code_point) || ON_wString::ReplacementCharacter == unicode_code_point ) { unicode_code_point = ON_wString::ReplacementCharacter; if (nullptr != error_status) *error_status |= 16; } if (nullptr != sWideChar && sWideChar_capacity > 0) { if (sWideChar_count < sWideChar_capacity) sWideChar[sWideChar_count] = (wchar_t)unicode_code_point; else { // continue counting but no more output to sWideChar[] sWideChar[sWideChar_capacity-1] = 0; sWideChar = nullptr; sWideChar_capacity = 0; if (nullptr != error_status) *error_status |= 2; } } sWideChar_count++; } break; } if (nullptr != szMBCS) onfree(szMBCS); if (nullptr != sWideChar && sWideChar_count < sWideChar_capacity) { sWideChar[sWideChar_count] = 0; sWideChar[sWideChar_capacity-1] = 0; } return sWideChar_count; #else // Add support for other platforms as needed. return 0; #endif }