bstring 1.1.0
Loading...
Searching...
No Matches
utf8util.h
Go to the documentation of this file.
1/* Copyright 2002-2015 Paul Hsieh
2 * This file is part of Bstrlib.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * 3. Neither the name of bstrlib nor the names of its contributors may be
15 * used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * Alternatively, the contents of this file may be used under the terms of
31 * GNU General Public License Version 2 (the "GPL").
32 */
33
40
41#ifndef UTF8_UNICODE_UTILITIES
42#define UTF8_UNICODE_UTILITIES
43
44#include <limits.h>
45
46/* If bstrlib.h has not been included, define the visibility attribute here.
47 The #ifndef guard ensures we don't conflict if bstrlib.h came first. */
48#ifndef BSTR_PUBLIC
49# if __GNUC__ >= 4
50# define BSTR_PUBLIC __attribute__ ((visibility ("default")))
51# else
52# define BSTR_PUBLIC
53# endif
54#endif
55
56#ifdef __cplusplus
57extern "C" {
58#endif
59
60#if INT_MAX >= 0x7fffffffUL
61typedef int cpUcs4;
62#elif LONG_MAX >= 0x7fffffffUL
63typedef long cpUcs4;
64#else
65#error This compiler is not supported
66#endif
67
68#if UINT_MAX == 0xFFFF
69typedef unsigned int cpUcs2;
70#elif USHRT_MAX == 0xFFFF
71typedef unsigned short cpUcs2;
72#elif UCHAR_MAX == 0xFFFF
73typedef unsigned char cpUcs2;
74#else
75#error This compiler is not supported
76#endif
77
78#define isLegalUnicodeCodePoint(v) \
79 ((((v) < 0xD800L) || ((v) > 0xDFFFL)) && \
80 (((unsigned long)(v)) <= 0x0010FFFFL) && \
81 (((v)|0x1F0001) != 0x1FFFFFL))
82
84 unsigned char *data;
85 int slen;
86 int start;
87 int next;
88 int error;
89};
90
91#define utf8IteratorNoMore(it) (!(it) || (it)->next >= (it)->slen)
92
93BSTR_PUBLIC void utf8IteratorInit(struct utf8Iterator *iter,
94 unsigned char *data, int slen);
95BSTR_PUBLIC void utf8IteratorUninit(struct utf8Iterator *iter);
96BSTR_PUBLIC cpUcs4 utf8IteratorGetNextCodePoint(struct utf8Iterator *iter,
97 cpUcs4 errCh);
98BSTR_PUBLIC cpUcs4 utf8IteratorGetCurrCodePoint(struct utf8Iterator *iter,
99 cpUcs4 errCh);
100BSTR_PUBLIC int utf8ScanBackwardsForCodePoint(const unsigned char *msg,
101 int len, int pos, cpUcs4 *out);
102
103#ifdef __cplusplus
104}
105#endif
106
107#endif /* UTF8_UNICODE_UTILITIES */
Definition utf8util.h:83