Boron 2.1.0
ucs2_case.c
1/*
2 Copyright 2014 Karl Robillard
3
4 This file is part of the Urlan datatype system.
5
6 Urlan is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Urlan is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with Urlan. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21enum CaseOperator
22{
23 CO_All,
24 CO_Odd,
25 CO_Even,
26 CO_Set,
27 CO_Map,
28};
29
30
31static const uint16_t _toLowerMap[] =
32{
33 // 0x0181 - 0x01BC 60
34 0x0253, 0x0183, 0x0183, 0x0185, 0x0185, 0x0254, 0x0188, 0x0188,
35 0x0256, 0x0257, 0x018C, 0x018C, 0x018D, 0x01DD, 0x0259, 0x025B,
36 0x0192, 0x0192, 0x0260, 0x0263, 0x0195, 0x0269, 0x0268, 0x0199,
37 0x0199, 0x019A, 0x019B, 0x026F, 0x0272, 0x019E, 0x0275, 0x01A1,
38 0x01A1, 0x01A3, 0x01A3, 0x01A5, 0x01A5, 0x0280, 0x01A8, 0x01A8,
39 0x0283, 0x01AA, 0x01AB, 0x01AD, 0x01AD, 0x0288, 0x01B0, 0x01B0,
40 0x028A, 0x028B, 0x01B4, 0x01B4, 0x01B6, 0x01B6, 0x0292, 0x01B9,
41 0x01B9, 0x01BA, 0x01BB, 0x01BD,
42
43 // 0x01C4 - 0x01CA 7
44 0x01C6, 0x01C6, 0x01C6, 0x01C9, 0x01C9, 0x01C9, 0x01CC,
45
46 // 0x01F1 - 0x01F7 7
47 0x01F3, 0x01F3, 0x01F3, 0x01F5, 0x01F5, 0x0195, 0x01BF,
48
49 // 0x023A - 0x024E 21
50 0x2C65, 0x023C, 0x023C, 0x019A, 0x2C66, 0x023F, 0x0240, 0x0242,
51 0x0242, 0x0180, 0x0289, 0x028C, 0x0247, 0x0247, 0x0249, 0x0249,
52 0x024B, 0x024B, 0x024D, 0x024D, 0x024F,
53
54 // 0x0370 - 0x0376 7
55 0x0371, 0x0371, 0x0373, 0x0373, 0x0374, 0x0375, 0x0377,
56
57 // 0x0386 - 0x038F 10
58 0x03AC, 0x0387, 0x03AD, 0x03AE, 0x03AF, 0x038B, 0x03CC, 0x038D,
59 0x03CD, 0x03CE,
60
61 // 0x03F4 - 0x03FF 12
62 0x03B8, 0x03F5, 0x03F6, 0x03F8, 0x03F8, 0x03F2, 0x03FB, 0x03FB,
63 0x03FC, 0x037B, 0x037C, 0x037D
64};
65
66
67static const uint16_t _toLower[] =
68{
69 0x00C0, 0x00D6, CO_All, 32, // Latin
70 0x00D8, 0x00DE, CO_All, 32,
71 0x0100, 0x012E, CO_Even, 1,
72 0x0130, 0x0130, CO_Set, 0x69,
73 0x0132, 0x0136, CO_Even, 1,
74 0x0139, 0x0147, CO_Odd, 1,
75 0x014A, 0x0176, CO_Even, 1,
76 0x0178, 0x0178, CO_Set, 0xFF,
77 0x0179, 0x017D, CO_Odd, 1,
78 0x0181, 0x01BC, CO_Map, 0,
79 0x01C4, 0x01CA, CO_Map, 60,
80 0x01CB, 0x01DB, CO_Odd, 1,
81 0x01DE, 0x01EE, CO_Even, 1,
82 0x01F1, 0x01F7, CO_Map, 67,
83 0x01F8, 0x021E, CO_Even, 1,
84 0x0220, 0x0220, CO_Set, 0x19E,
85 0x0222, 0x0232, CO_Even, 1,
86 0x023A, 0x024E, CO_Map, 74,
87 0x0370, 0x0376, CO_Map, 95, // Greek
88 0x037F, 0x037F, CO_Set, 0x3F3,
89 0x0386, 0x038F, CO_Map, 102,
90 0x0391, 0x03A1, CO_All, 32,
91 0x03A3, 0x03AB, CO_All, 32,
92 0x03CF, 0x03CF, CO_Set, 0x3D7,
93 0x03D8, 0x03EE, CO_Even, 1,
94 0x03F4, 0x03FF, CO_Map, 112,
95 0x0400, 0x040F, CO_All, 80, // Cyrillic
96 0x0410, 0x042F, CO_All, 32,
97 0x0460, 0x0480, CO_Even, 1,
98 0x048A, 0x04BE, CO_Even, 1,
99 0x04C0, 0x04C0, CO_Set, 0x4CF,
100 0x04C1, 0x04CD, CO_Odd, 1,
101 0x04D0, 0x052E, CO_Even, 1,
102 0x0531, 0x0556, CO_All, 48, // Armenian
103 0x10A0, 0x10C5, CO_All, 7264, // Georgian
104 0x1E00, 0x1E94, CO_Even, 1, // Latin
105 0x1E9E, 0x1E9E, CO_Set, 0xDF,
106 0x1EA0, 0x1EFE, CO_Even, 1,
107 0x1F08, 0x1F0F, CO_All, -8, // Greek
108 0x1F18, 0x1F1D, CO_All, -8,
109 0x1F28, 0x1F2F, CO_All, -8,
110 0x1F38, 0x1F3F, CO_All, -8,
111 0x1F48, 0x1F4D, CO_All, -8,
112
113 0x24B6, 0x24CF, CO_All, 26, // Circled
114 0x2C00, 0x2C2E, CO_All, 48, // Glagolitic
115
116 0xFF21, 0xFF3A, CO_All, 32, // Fullwidth Latin
117};
118
119
120static const uint16_t _toUpperMap[] =
121{
122 // 0x017F - 0x01CA 76
123 0x0053, 0x0243, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186,
124 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E,
125 0x018F, 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196,
126 0x0197, 0x0198, 0x0198, 0x023D, 0x019B, 0x019C, 0x019D, 0x0220,
127 0x019F, 0x01A0, 0x01A0, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6,
128 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE,
129 0x01AF, 0x01AF, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5,
130 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE,
131 0x01F7, 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C5, 0x01C5, 0x01C5,
132 0x01C8, 0x01C8, 0x01C8, 0x01CB,
133
134 // 0x01F1 - 0x01F5 5
135 0x01F2, 0x01F2, 0x01F2, 0x01F4, 0x01F4,
136
137 // 0x023C - 0x0275 58
138 0x023B, 0x023D, 0x023E, 0x2C7E, 0x2C7F, 0x0241, 0x0241, 0x0243,
139 0x0244, 0x0245, 0x0246, 0x0246, 0x0248, 0x0248, 0x024A, 0x024A,
140 0x024C, 0x024C, 0x024E, 0x024E, 0x2C6F, 0x2C6D, 0x2C70, 0x0181,
141 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190,
142 0xA7AB, 0x025D, 0x025E, 0x025F, 0x0193, 0xA7AC, 0x0262, 0x0194,
143 0x0264, 0xA78D, 0xA7AA, 0x0267, 0x0197, 0x0196, 0x026A, 0x2C62,
144 0xA7AD, 0x026D, 0x026E, 0x019C, 0x0270, 0x2C6E, 0x019D, 0x0273,
145 0x0274, 0x019F,
146
147 // 0x027D - 0x0283 7
148 0x2C64, 0x027E, 0x027F, 0x01A6, 0x0281, 0x0282, 0x01A9,
149
150 // 0x0288 - 0x028C 5
151 0x01AE, 0x0244, 0x01B1, 0x01B2, 0x0245,
152
153 // 0x0371 - 0x037D 13
154 0x0370, 0x0372, 0x0372, 0x0374, 0x0375, 0x0376, 0x0376, 0x0378,
155 0x0379, 0x037A, 0x03FD, 0x03FE, 0x03FF,
156
157 // 0x03AC - 0x03AF 4
158 0x0386, 0x0388, 0x0389, 0x038A,
159
160 // 0x03CC - 0x03D7 12
161 0x038C, 0x038E, 0x038F, 0x03CF, 0x0392, 0x0398, 0x03D2, 0x03D3,
162 0x03D4, 0x03A6, 0x03A0, 0x03CF,
163
164 // 0x03F0 - 0x03FB 12
165 0x039A, 0x03A1, 0x03F9, 0x037F, 0x03F4, 0x0395, 0x03F6, 0x03F7,
166 0x03F7, 0x03F9, 0x03FA, 0x03FA
167};
168
169
170static const uint16_t _toUpper[] =
171{
172 0x00B5, 0x00B5, CO_Set, 0x39C, // Latin
173 0x00E0, 0x00F6, CO_All, -32,
174 0x00F8, 0x00FE, CO_All, -32,
175 0x00FF, 0x00FF, CO_Set, 0x178,
176 0x0101, 0x012F, CO_Odd, -1,
177 0x0131, 0x0131, CO_Set, 0x49,
178 0x0133, 0x0137, CO_Odd, -1,
179 0x013A, 0x0148, CO_Even, -1,
180 0x014B, 0x0177, CO_Odd, -1,
181 0x017A, 0x017E, CO_Even, -1,
182 0x017F, 0x01CA, CO_Map, 0,
183 0x01CC, 0x01DC, CO_Even, -1,
184 0x01DD, 0x01DD, CO_Set, 0x18E,
185 0x01DF, 0x01EF, CO_Odd, -1,
186 0x01F1, 0x01F5, CO_Map, 76,
187 0x01F9, 0x021F, CO_Odd, -1,
188 0x0223, 0x0233, CO_Odd, -1,
189 0x023C, 0x0275, CO_Map, 81,
190 0x027D, 0x0283, CO_Map, 139,
191 0x0287, 0x0287, CO_Set, 0xA7B1,
192 0x0288, 0x028C, CO_Map, 146,
193 0x0292, 0x0292, CO_Set, 0x1B7,
194 0x029E, 0x029E, CO_Set, 0xA7B0,
195 0x0345, 0x0345, CO_Set, 0x399, // Greek
196 0x0371, 0x037D, CO_Map, 151,
197 0x03AC, 0x03AF, CO_Map, 164,
198 0x03B1, 0x03C1, CO_All, -32,
199 0x03C2, 0x03C2, CO_Set, 0x3A3,
200 0x03C3, 0x03CB, CO_All, -32,
201 0x03CC, 0x03D7, CO_Map, 168,
202 0x03D9, 0x03EF, CO_Odd, -1,
203 0x03F0, 0x03FB, CO_Map, 180,
204 0x0430, 0x044F, CO_All, -32, // Cyrillic
205 0x0450, 0x045F, CO_All, -80,
206 0x0461, 0x0481, CO_Odd, -1,
207 0x048B, 0x04BF, CO_Odd, -1,
208 0x04C2, 0x04CE, CO_Even, -1,
209 0x04CF, 0x04CF, CO_Set, 0x4C0,
210 0x04D1, 0x052F, CO_Odd, -1,
211 0x0561, 0x0586, CO_All, -48, // Armenian
212 0x1D79, 0x1D79, CO_Set, 0xA77D,
213 0x1D7D, 0x1D7D, CO_Set, 0x2C63,
214 0x1E01, 0x1E95, CO_Odd, -1, // Latin
215 0x1E9B, 0x1E9B, CO_Set, 0x1E60,
216 0x1EA1, 0x1EFF, CO_Odd, -1,
217 0x1F00, 0x1F07, CO_All, 8, // Greek
218 0x1F10, 0x1F15, CO_All, 8,
219 0x1F20, 0x1F27, CO_All, 8,
220 0x1F30, 0x1F37, CO_All, 8,
221 0x1F40, 0x1F45, CO_All, 8,
222
223 0x24D0, 0x24E9, CO_All, -26, // Circled
224 0x2C30, 0x2C5E, CO_All, -48, // Glagolitic
225
226 0x2D00, 0x2D2D, CO_All, -7264, // Georgian
227
228 0xFF41, 0xFF5A, CO_All, -32, // Fullwidth Latin
229};
230
231
232typedef struct
233{
234 uint16_t low;
235 uint16_t high;
236 uint16_t op;
237 uint16_t value;
238}
239CaseEntry;
240
241
242static int _caseConvert( const CaseEntry* table, int count,
243 const uint16_t* cmap, int ch )
244{
245 const CaseEntry* ent;
246 int mid;
247 int low = 0;
248 int high = count - 1;
249
250 while( low <= high )
251 {
252 mid = ((unsigned int) (low + high)) >> 1;
253 ent = table + mid;
254
255 if( ch > ent->high )
256 low = mid + 1;
257 else if( ch < ent->low )
258 high = mid - 1;
259 else
260 {
261 switch( ent->op )
262 {
263 case CO_All:
264 return ch + ((int16_t) ent->value);
265
266 case CO_Odd:
267 if( ch & 1 )
268 return ch + ((int16_t) ent->value);
269 break;
270
271 case CO_Even:
272 if( (ch & 1) == 0 )
273 return ch + ((int16_t) ent->value);
274 break;
275
276 case CO_Set:
277 return ent->value;
278
279 case CO_Map:
280 return cmap[ ent->value + (ch - ent->low) ];
281 }
282 }
283 }
284
285 // Character not found.
286 return ch;
287}
288
289
294{
295 if( c <= 'Z' )
296 {
297 if( c >= 'A' )
298 return c + 32;
299 }
300 else if( c >= 0x00C0 )
301 {
302 return _caseConvert( (const CaseEntry*) _toLower,
303 sizeof(_toLower) / sizeof(CaseEntry),
304 _toLowerMap, c );
305 }
306 return c;
307}
308
309
314{
315 if( c <= 'z' )
316 {
317 if( c >= 'a' )
318 return c - 32;
319 }
320 else if( c >= 0x00B5 )
321 {
322 return _caseConvert( (const CaseEntry*) _toUpper,
323 sizeof(_toUpper) / sizeof(CaseEntry),
324 _toUpperMap, c );
325 }
326 return c;
327}
328
329
330//EOF
int ur_charUppercase(int c)
Convert UCS2 character to uppercase.
Definition ucs2_case.c:313
int ur_charLowercase(int c)
Convert UCS2 character to lowercase.
Definition ucs2_case.c:293