DarkPlaces
Game engine based on the Quake 1 engine by id Software, developed by LadyHavoc
 
utf8lib.h
Go to the documentation of this file.
1/*
2Copyright (C) 2009-2020 DarkPlaces contributors
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19*/
20
21/*
22 * UTF-8 utility functions for DarkPlaces
23 */
24#ifndef UTF8LIB_H__
25#define UTF8LIB_H__
26
27#include <stddef.h>
28#include "qtypes.h"
29
30// types for unicode strings
31// let them be 32 bit for now
32// normally, whcar_t is 16 or 32 bit, 16 on linux I think, 32 on haiku and maybe windows
33
34// Uchar, a wide character
35typedef int32_t Uchar;
36
37// Initialize UTF8, this registers cvars which allows for UTF8 to be disabled
38// completely.
39// When UTF8 is disabled, every u8_ function will work exactly as you'd expect
40// a non-utf8 version to work: u8_strlen() will wrap to strlen()
41// u8_byteofs() and u8_charidx() will simply return whatever is passed as index parameter
42// u8_getchar() will will just return the next byte, u8_fromchar will write one byte, ...
43extern struct cvar_s utf8_enable;
44void u8_Init(void);
45
46size_t u8_strlen(const char*);
47size_t u8_strnlen(const char*, size_t);
48int u8_byteofs(const char*, size_t, size_t*);
49int u8_charidx(const char*, size_t, size_t*);
50size_t u8_bytelen(const char*, size_t);
51size_t u8_prevbyte(const char*, size_t);
52Uchar u8_getchar_utf8_enabled(const char*, const char**);
53Uchar u8_getnchar_utf8_enabled(const char*, const char**, size_t);
54int u8_fromchar(Uchar, char*, size_t);
55size_t u8_mbstowcs(Uchar *, const char *, size_t);
56size_t u8_wcstombs(char*, const Uchar*, size_t);
57size_t u8_COM_StringLengthNoColors(const char *s, size_t size_s, qbool *valid);
58
59// returns a static buffer, use this for inlining
60char *u8_encodech(Uchar ch, size_t*, char*buf16);
61
62size_t u8_strpad(char *out, size_t outsize, const char *in, qbool leftalign, size_t minwidth, size_t maxwidth);
63size_t u8_strpad_colorcodes(char *out, size_t outsize, const char *in, qbool leftalign, size_t minwidth, size_t maxwidth);
64
65/* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
66 * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
67 * rest:
68 */
69extern Uchar u8_quake2utf8map[256];
70// these defines get a bit tricky, as c and e may be aliased to the same variable
71#define u8_getchar(c,e) (utf8_enable.integer ? u8_getchar_utf8_enabled(c,e) : (u8_quake2utf8map[(unsigned char)(*(e) = (c) + 1)[-1]]))
72#define u8_getchar_noendptr(c) (utf8_enable.integer ? u8_getchar_utf8_enabled(c,NULL) : (u8_quake2utf8map[(unsigned char)*(c)]))
73#define u8_getchar_check(c,e) ((e) ? u8_getchar((c),(e)) : u8_getchar_noendptr((c)))
74#define u8_getnchar(c,e,n) (utf8_enable.integer ? u8_getnchar_utf8_enabled(c,e,n) : ((n) <= 0 ? ((*(e) = c), 0) : (u8_quake2utf8map[(unsigned char)(*(e) = (c) + 1)[-1]])))
75#define u8_getnchar_noendptr(c,n) (utf8_enable.integer ? u8_getnchar_utf8_enabled(c,NULL,n) : ((n) <= 0 ? 0 : (u8_quake2utf8map[(unsigned char)*(c)])))
76#define u8_getnchar_check(c,e,n) ((e) ? u8_getchar((c),(e),(n)) : u8_getchar_noendptr((c),(n)))
77
80
81#ifdef WIN32
82
83// WTF-8 encoding to circumvent Windows encodings, be it UTF-16 or random codepages
84// https://simonsapin.github.io/wtf-8/
85
86typedef wchar_t wchar;
87
88// whether to regard wchar as utf-32
89// sizeof(wchar_t) is 2 for win32, we don't have sizeof in macros
90#define WTF8U32 0
91// check for extra sanity in conversion steps
92#define WTF8CHECKS 1
93
94int towtf8(const wchar* wstr, int wlen, char* str, int maxlen);
95int fromwtf8(const char* str, int len, wchar* wstr, int maxwlen);
96int wstrlen(const wchar* wstr);
97
98// helpers for wchar code
99/* convert given wtf-8 encoded char *str to wchar *wstr, only on win32 */
100#define WIDE(str, wstr) fromwtf8(str, strlen(str), wstr, strlen(str))
101/* convert given wchar *wstr to wtf-8 encoded char *str, only on win32 */
102#define NARROW(wstr, str) towtf8(wstr, wstrlen(wstr), str, wstrlen(wstr) * (WTF8U32 ? 4 : 3))
103
104#else
105
106#define WIDE(str, wstr) ;
107#define NARROW(wstr, str) ;
108
109#endif // WIN32
110
111#endif // UTF8LIB_H__
bool qbool
Definition qtypes.h:9
Uchar u8_quake2utf8map[256]
Definition utf8lib.c:568
Uchar u8_tolower(Uchar ch)
Definition utf8lib.c:2120
size_t u8_strpad(char *out, size_t outsize, const char *in, qbool leftalign, size_t minwidth, size_t maxwidth)
Pads a utf-8 string.
Definition utf8lib.c:881
size_t u8_strpad_colorcodes(char *out, size_t outsize, const char *in, qbool leftalign, size_t minwidth, size_t maxwidth)
Definition utf8lib.c:899
size_t u8_COM_StringLengthNoColors(const char *s, size_t size_s, qbool *valid)
Definition utf8lib.c:777
size_t u8_wcstombs(char *, const Uchar *, size_t)
Convert a wide-character string to a utf-8 multibyte string.
Definition utf8lib.c:738
int32_t Uchar
Definition utf8lib.h:35
Uchar u8_toupper(Uchar ch)
Definition utf8lib.c:1066
int u8_fromchar(Uchar, char *, size_t)
Encode a wide-character into utf-8.
Definition utf8lib.c:628
size_t u8_bytelen(const char *, size_t)
Get the number of bytes used in a string to represent an amount of characters.
Definition utf8lib.c:340
Uchar u8_getchar_utf8_enabled(const char *, const char **)
Fetch a character from an utf-8 encoded string.
Definition utf8lib.c:592
size_t u8_prevbyte(const char *, size_t)
Get the byte offset of the previous byte.
Definition utf8lib.c:525
size_t u8_strlen(const char *)
Get the number of characters in an UTF-8 string.
Definition utf8lib.c:195
int u8_byteofs(const char *, size_t, size_t *)
Get the byte-index for a character-index.
Definition utf8lib.c:425
void u8_Init(void)
Definition utf8lib.c:13
size_t u8_mbstowcs(Uchar *, const char *, size_t)
Convert a utf-8 multibyte string to a wide character string.
Definition utf8lib.c:715
Uchar u8_getnchar_utf8_enabled(const char *, const char **, size_t)
Fetch a character from an utf-8 encoded string.
Definition utf8lib.c:609
int u8_charidx(const char *, size_t, size_t *)
Get the char-index for a byte-index.
Definition utf8lib.c:462
struct cvar_s utf8_enable
Definition utf8lib.c:11
char * u8_encodech(Uchar ch, size_t *, char *buf16)
uses u8_fromchar on a static buffer
Definition utf8lib.c:697
size_t u8_strnlen(const char *, size_t)
Get the number of characters in a part of an UTF-8 string.
Definition utf8lib.c:249