32 #ifndef _QORE_ENCODING_H 34 #define _QORE_ENCODING_H 42 #include <qore/QoreThreadLock.h> 69 hashdecl qore_encoding_private;
84 friend hashdecl qore_encoding_private;
96 unsigned char maxwidth;
98 qore_encoding_private* priv;
168 DLLEXPORT
const char*
getCode()
const;
171 DLLEXPORT
const char*
getDesc()
const;
200 typedef std::map<const char*, QoreEncoding*, ltcstrcase> encoding_map_t;
201 typedef std::map<const char*, const QoreEncoding*, ltcstrcase> const_encoding_map_t;
210 DLLLOCAL
static encoding_map_t emap;
211 DLLLOCAL
static const_encoding_map_t amap;
215 DLLLOCAL
static const QoreEncoding* findUnlocked(
const char* name);
236 DLLLOCAL
static void init(
const char* def);
273 #endif // _QORE_ENCODING_H qore_size_t(* mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid)
for multi-byte character set encodings: gives the number of bytes for the number of chars ...
Definition: QoreEncoding.h:53
DLLEXPORT const QoreEncoding * QCS_UTF8
UTF-8 multi-byte encoding (only UTF-8 and UTF-16 are multi-byte encodings)
DLLEXPORT qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const
gives the character position (number of characters) starting from the first pointer to the second ...
defines string encoding functions in Qore
Definition: QoreEncoding.h:83
DLLEXPORT bool isMultiByte() const
returns true if the encoding is a multi-byte encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_8
Hebrew character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_1
latin-1, Western European encoding
DLLEXPORT bool isAsciiCompat() const
returns true if the character encoding is backwards-compatible with ASCII
DLLEXPORT const QoreEncoding * QCS_UTF16
UTF-16 (only UTF-8 and UTF-16* are multi-byte encodings)
DLLEXPORT const QoreEncoding * QCS_DEFAULT
the default encoding for the Qore library
static DLLEXPORT void addAlias(const QoreEncoding *qcs, const char *alias)
adds an alias for an encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_2
latin-2, Central European encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_11
Thai character set.
manages encodings in Qore
Definition: QoreEncoding.h:208
DLLEXPORT const QoreEncoding * QCS_ISO_8859_3
latin-3, Southern European character set
size_t qore_size_t
used for sizes (same range as a pointer)
Definition: common.h:73
DLLEXPORT qore_offset_t getCharLen(const char *p, qore_size_t valid_len) const
gives the number of total bytes for the next character at the given pointer
DLLEXPORT const QoreEncoding * QCS_ISO_8859_4
latin-4, Northern European character set
static DLLEXPORT void showEncodings()
prints out all valid encodings to stdout
DLLEXPORT const QoreEncoding * QCS_USASCII
ascii encoding
qore_size_t(* mbcs_pos_t)(const char *str, const char *ptr, bool &invalid)
for multi-byte character set encodings: gives the character position of the ptr
Definition: QoreEncoding.h:56
unsigned(* mbcs_get_unicode_t)(const char *p)
returns the unicode code point for the given character, assumes there is enough data for the characte...
Definition: QoreEncoding.h:66
DLLEXPORT const QoreEncoding * QCS_ISO_8859_10
latin-6, Nordic character set
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:81
DLLEXPORT const char * getDesc() const
returns the description for the encoding
DLLEXPORT const QoreEncoding * QCS_KOI8_U
Ukrainian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT QoreEncodingManager QEM
the QoreEncodingManager object
DLLEXPORT qore_size_t getLength(const char *p, const char *end, bool &invalid) const
gives the length of the string in characters
DLLEXPORT unsigned getMinCharWidth() const
returns the minimum character width in bytes for the encoding
qore_offset_t(* mbcs_charlen_t)(const char *str, qore_size_t valid_len)
for multi-byte encodings: gives the number of total bytes for the character given one or more charact...
Definition: QoreEncoding.h:63
DLLEXPORT const QoreEncoding * QCS_ISO_8859_9
latin-5, Turkish character set
static DLLEXPORT void showAliases()
prints out all aliases to stdout
static DLLEXPORT const QoreEncoding * findCreate(const char *name)
finds an encoding if it exists (also looks up against alias names) and creates a new one if it doesn'...
DLLEXPORT const char * getCode() const
returns the string code (ex: "UTF-8") for the encoding
DLLEXPORT int getMaxCharWidth() const
returns the maximum character width in bytes for the encoding
DLLEXPORT const QoreEncoding * QCS_UTF16BE
UTF-16BE (only UTF-8 and UTF-16* are multi-byte encodings)
DLLEXPORT const QoreEncoding * QCS_KOI7
Russian: Kod Obmena Informatsiey, 7 bit characters.
qore_size_t(* mbcs_length_t)(const char *str, const char *end, bool &invalid)
for multi-byte character set encodings: gives the length of the string in characters ...
Definition: QoreEncoding.h:50
DLLEXPORT qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const
gives the number of bytes for the number of chars in the string or up to the end of the string ...
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:48
DLLEXPORT const QoreEncoding * QCS_ISO_8859_14
latin-8, Celtic character set
DLLEXPORT int getUnicode(const char *p, const char *end, unsigned &clen, ExceptionSink *xsink) const
returns the unicode code point for the given character; if there are any errors (invalid character...
DLLEXPORT const QoreEncoding * QCS_ISO_8859_6
Arabic character set.
intptr_t qore_offset_t
used for offsets that could be negative
Definition: common.h:76
DLLEXPORT const QoreEncoding * QCS_ISO_8859_5
Cyrillic character set.
provides a mutually-exclusive thread lock
Definition: QoreThreadLock.h:49
DLLEXPORT const QoreEncoding * QCS_UTF16LE
UTF-16LE (only UTF-8 and UTF-16* are multi-byte encodings)
static DLLEXPORT const QoreEncoding * add(const char *code, const char *desc=0, unsigned char maxwidth=1, mbcs_length_t l=0, mbcs_end_t e=0, mbcs_pos_t p=0, mbcs_charlen_t=0)
adds a new encoding to the list
DLLEXPORT const QoreEncoding * QCS_ISO_8859_16
latin-10, Southeast European character set
DLLEXPORT const QoreEncoding * QCS_ISO_8859_15
latin-9, Western European with euro symbol
DLLEXPORT const QoreEncoding * QCS_KOI8_R
Russian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_7
Greek character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_13
latin-7, Baltic rim character set