Kawe Mazidjatari b3a68ed095 Add EABase, EAThread and DirtySDK to R5sdk
DirtySDK (EA's Dirty Sockets library) will be used for the LiveAPI implementation, and depends on: EABase, EAThread.
2024-04-05 18:29:03 +02:00

102 lines
3.5 KiB
C

/*H*************************************************************************************************/
/*!
\File utf8.h
\Description
This module implements routines for converting to and from UTF-8.
\Notes
This code only decodes the first three octets of UTF-8, thus it only handles UCS-2 codes,
not UCS-4 codes. It also does not handle UTF-16 (and surrogate pairs), and is therefore
limited to encoding to/decoding from the basic reference plane.
Helpful references:
http://www.utf-8.com/ - links
http://www.cis.ohio-state.edu/cgi-bin/rfc/rfc2279.html - RFC 2279
http://www.unicode.org/charts/ - UNICODE character charts
http://www-106.ibm.com/developerworks/library/utfencodingforms/ - UNICODE primer
http://www.columbia.edu/kermit/utf8.html - UTF-8 samples
\Copyright
Copyright (c) Tiburon Entertainment / Electronic Arts 2003. ALL RIGHTS RESERVED.
\Version 1.0 03/25/03 (JLB) First version.
*/
/*************************************************************************************************H*/
#ifndef _utf8_h
#define _utf8_h
/*!
\Moduledef Utf8 Utf8
\Modulemember Util
*/
//@{
/*** Include files *********************************************************************/
#include "DirtySDK/platform.h"
/*** Defines ***************************************************************************/
/*** Macros ****************************************************************************/
/*** Type Definitions ******************************************************************/
//! UTF-8 to 8bit translation table
typedef struct Utf8TransTblT
{
uint32_t uRangeBegin;
uint32_t uRangeEnd;
unsigned char *pCodeTbl;
} Utf8TransTblT;
//! 8bit to UTF-8 translation table
typedef struct Utf8EncodeTblT
{
uint16_t uCodeTbl[256];
} Utf8EncodeTblT;
/*** Variables *************************************************************************/
/*** Functions *************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
// strip non-ASCII characters from a UTF-8 encoded string
DIRTYCODE_API int32_t Utf8Strip(char *pOutStr, int32_t iBufSize, const char *pInStr);
// replace non-ASCII characters in a UTF-8 encoded string with 'cReplace'
DIRTYCODE_API int32_t Utf8Replace(char *pOutStr, int32_t iBufSize, const char *pInStr, char cReplace);
// get code point length of UTF-8 encoded string
DIRTYCODE_API int32_t Utf8StrLen(const char *pStr);
// encode a UCS-2 string to UTF-8
DIRTYCODE_API int32_t Utf8EncodeFromUCS2(char *pOutStr, int32_t iBufLen, const uint16_t *pInStr);
// encode a single UCS-2 "char" to UTF-8 string.
DIRTYCODE_API int32_t Utf8EncodeFromUCS2CodePt(char *pOutPtr, uint16_t uCodePt);
// decode a UTF-8 encoded string into UCS-2
DIRTYCODE_API int32_t Utf8DecodeToUCS2(uint16_t *pOutStr, int32_t iBufLen, const char *pInStr);
// encode the given 8bit input string to UTF-8, based on the input translation table
DIRTYCODE_API int32_t Utf8EncodeFrom8Bit(char *pOutStr, int32_t iBufLen, const char *pInStr, const Utf8EncodeTblT *pEncodeTbl);
// translate the given UTF-8 sequence based on the NULL-terminated array of given tables
DIRTYCODE_API int32_t Utf8TranslateTo8Bit(char *pOutStr, int32_t iBufLen, const char *pInStr, char cReplace, const Utf8TransTblT *pTransTbl);
#ifdef __cplusplus
}
#endif
//@}
#endif // _utf8_h