32 #ifndef _QORE_CHARSET_H
34 #define _QORE_CHARSET_H
42 #include <qore/QoreThreadLock.h>
88 unsigned char maxwidth;
91 DLLLOCAL
QoreEncoding(
const char* n_code,
const char* n_desc = 0,
unsigned char n_maxwidth = 1,
mbcs_length_t l = 0,
mbcs_end_t e = 0,
mbcs_pos_t p = 0,
mbcs_charlen_t c = 0) : code(n_code), desc(n_desc ? n_desc :
""), flength(l), fend(e), fpos(p), fcharlen(c), maxwidth(n_maxwidth) {
94 DLLLOCAL ~QoreEncoding() {
104 return flength ? flength(p, end, invalid) : strlen(p);
123 return fend ? fend(p, end, c, invalid) : c;
142 return fpos ? fpos(p, end, invalid) : end - p;
160 return fcharlen ? fcharlen(p, valid_len) : 1;
165 return (
bool)flength;
175 return desc.empty() ?
"<no description available>" : desc.c_str();
185 typedef std::map<const char*, QoreEncoding*, class ltcstrcase> encoding_map_t;
186 typedef std::map<const char*, const QoreEncoding*, class ltcstrcase> const_encoding_map_t;
195 DLLLOCAL
static encoding_map_t emap;
196 DLLLOCAL
static const_encoding_map_t amap;
200 DLLLOCAL
static const QoreEncoding* findUnlocked(
const char* name);
221 DLLLOCAL
static void init(
const char* def);
223 DLLLOCAL ~QoreEncodingManager();
258 #endif // _QORE_CHARSET_H
DLLLOCAL const char * getDesc() const
returns the description for the encoding
Definition: QoreEncoding.h:174
qore_size_t(* mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid)
for multi-byte character set encodings: gives the number of bytes for the number of chars ...
Definition: QoreEncoding.h:55
DLLEXPORT const QoreEncoding * QCS_UTF8
UTF-8 multi-byte encoding (the only multi-byte encoding, all others are single-byte encodings) ...
DLLLOCAL int getMaxCharWidth() const
returns the maximum character width in bytes for the encoding
Definition: QoreEncoding.h:179
defines string encoding functions in Qore
Definition: QoreEncoding.h:80
DLLEXPORT const QoreEncoding * QCS_ISO_8859_8
Hebrew character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_1
latin-1, Western European encoding
DLLEXPORT const QoreEncoding * QCS_DEFAULT
the default encoding for the Qore library
DLLLOCAL qore_size_t getLength(const char *p, const char *end, bool &invalid) const
gives the length of the string in characters
Definition: QoreEncoding.h:103
static DLLEXPORT void addAlias(const QoreEncoding *qcs, const char *alias)
adds an alias for an encoding
qore_size_t(* mbcs_charlen_t)(const char *str, qore_size_t valid_len)
for multi-byte encodings: gives the number of total bytes for the character given one or more charact...
Definition: QoreEncoding.h:65
DLLEXPORT const QoreEncoding * QCS_ISO_8859_2
latin-2, Central European encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_11
Thai character set.
manages encodings in Qore
Definition: QoreEncoding.h:193
DLLEXPORT const QoreEncoding * QCS_ISO_8859_3
latin-3, Southern European character set
size_t qore_size_t
used for sizes (same range as a pointer)
Definition: common.h:70
DLLEXPORT const QoreEncoding * QCS_ISO_8859_4
latin-4, Northern European character set
static DLLEXPORT void showEncodings()
prints out all valid encodings to stdout
DLLEXPORT const QoreEncoding * QCS_USASCII
ascii encoding
qore_size_t(* mbcs_pos_t)(const char *str, const char *ptr, bool &invalid)
for multi-byte character set encodings: gives the character position of the ptr
Definition: QoreEncoding.h:58
DLLLOCAL const char * getCode() const
returns the string code (ex: "UTF-8") for the encoding
Definition: QoreEncoding.h:169
DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const
gives the number of bytes for the number of chars in the string or up to the end of the string ...
Definition: QoreEncoding.h:122
DLLEXPORT const QoreEncoding * QCS_ISO_8859_10
latin-6, Nordic character set
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:50
DLLEXPORT const QoreEncoding * QCS_KOI8_U
Ukrainian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT QoreEncodingManager QEM
the QoreEncodingManager object
DLLEXPORT const QoreEncoding * QCS_ISO_8859_9
latin-5, Turkish character set
static DLLEXPORT void showAliases()
prints out all aliases to stdout
static DLLEXPORT const QoreEncoding * findCreate(const char *name)
finds an encoding if it exists (also looks up against alias names) and creates a new one if it doesn'...
DLLEXPORT const QoreEncoding * QCS_KOI7
Russian: Kod Obmena Informatsiey, 7 bit characters.
qore_size_t(* mbcs_length_t)(const char *str, const char *end, bool &invalid)
for multi-byte character set encodings: gives the length of the string in characters ...
Definition: QoreEncoding.h:52
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:43
DLLEXPORT const QoreEncoding * QCS_ISO_8859_14
latin-8, Celtic character set
DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const
gives the character position (number of characters) starting from the first pointer to the second ...
Definition: QoreEncoding.h:141
DLLEXPORT const QoreEncoding * QCS_ISO_8859_6
Arabic character set.
DLLLOCAL bool isMultiByte() const
returns true if the encoding is a multi-byte encoding
Definition: QoreEncoding.h:164
DLLEXPORT qore_size_t q_UTF8_get_char_len(const char *p, qore_size_t valid_len)
returns the length of the next UTF-8 character or 0 for an encoding error or a negative number if the...
DLLEXPORT const QoreEncoding * QCS_ISO_8859_5
Cyrillic character set.
provides a mutually-exclusive thread lock
Definition: QoreThreadLock.h:49
DLLLOCAL qore_size_t getCharLen(const char *p, qore_size_t valid_len) const
gives the number of total bytes for the character given one or more characters
Definition: QoreEncoding.h:159
static DLLEXPORT const QoreEncoding * add(const char *code, const char *desc=0, unsigned char maxwidth=1, mbcs_length_t l=0, mbcs_end_t e=0, mbcs_pos_t p=0, mbcs_charlen_t=0)
adds a new encoding to the list
DLLEXPORT const QoreEncoding * QCS_ISO_8859_16
latin-10, Southeast European character set
DLLEXPORT const QoreEncoding * QCS_ISO_8859_15
latin-9, Western European with euro symbol
DLLEXPORT const QoreEncoding * QCS_KOI8_R
Russian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_7
Greek character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_13
latin-7, Baltic rim character set