MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ctype-latin1.c
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #include <my_global.h>
17 #include "m_string.h"
18 #include "m_ctype.h"
19 
20 static uchar ctype_latin1[] = {
21  0,
22  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
23  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
24  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
25  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
26  16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
27  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
28  16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
29  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
30  16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1, 0,
31  0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2, 1,
32  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
33  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
34  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35  1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
36  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
37  2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
38 };
39 
40 static uchar to_lower_latin1[] = {
41  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
42  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
43  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
44  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
45  64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
46  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
47  96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
48  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
49  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
50  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
51  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
52  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
53  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
54  240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
55  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
56  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
57 };
58 
59 static uchar to_upper_latin1[] = {
60  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
61  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
62  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
63  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
65  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
66  96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
67  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
68  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
69  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
70  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
71  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
72  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
73  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
74  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
75  208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
76 };
77 
78 static uchar sort_order_latin1[] = {
79  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
80  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
81  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
82  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
83  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
84  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
85  96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
86  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
87  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
88  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
89  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
90  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
91  65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
92  68, 78, 79, 79, 79, 79, 93,215,216, 85, 85, 85, 89, 89,222,223,
93  65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
94  68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255
95 };
96 
97 /*
98  WL#1494 notes:
99 
100  We'll use cp1252 instead of iso-8859-1.
101  cp1252 contains printable characters in the range 0x80-0x9F.
102  In ISO 8859-1, these code points have no associated printable
103  characters. Therefore, by converting from CP1252 to ISO 8859-1,
104  one would lose the euro (for instance). Since most people are
105  unaware of the difference, and since we don't really want a
106  "Windows ANSI" to differ from a "Unix ANSI", we will:
107 
108  - continue to pretend the latin1 character set is ISO 8859-1
109  - actually allow the storage of euro etc. so it's actually cp1252
110 
111  Also we'll map these five undefined cp1252 character:
112  0x81, 0x8D, 0x8F, 0x90, 0x9D
113  into corresponding control characters:
114  U+0081, U+008D, U+008F, U+0090, U+009D.
115  like ISO-8859-1 does. Otherwise, loading "mysqldump"
116  output doesn't reproduce these undefined characters.
117 */
118 
119 unsigned short cs_to_uni[256]={
120 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
121 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
122 0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
123 0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
124 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
125 0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
126 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
127 0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
128 0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
129 0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
130 0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
131 0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
132 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
133 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
134 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
135 0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
136 0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,
137 0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,
138 0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,
139 0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178,
140 0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
141 0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
142 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
143 0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
144 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
145 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
146 0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
147 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
148 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
149 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
150 0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
151 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
152 };
153 uchar pl00[256]={
154 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
155 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
156 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
157 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
158 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
159 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
160 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
161 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
162 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
163 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
164 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
165 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
166 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
167 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
168 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
169 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
170 0x00,0x81,0x00,0x00,0x00,0x00,0x00,0x00,
171 0x00,0x00,0x00,0x00,0x00,0x8D,0x00,0x8F,
172 0x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
173 0x00,0x00,0x00,0x00,0x00,0x9D,0x00,0x00,
174 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
175 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
176 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
177 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
178 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
179 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
180 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
181 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
182 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
183 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
184 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
185 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
186 };
187 uchar pl01[256]={
188 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
189 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
190 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
191 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
192 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
193 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
194 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
195 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
196 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
197 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
198 0x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00,
199 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
200 0x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00,
201 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
202 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
203 0x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00,
204 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
205 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
206 0x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00,
207 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
208 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
209 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
210 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
211 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
212 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
213 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
214 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
215 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
216 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
217 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
218 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
219 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
220 };
221 uchar pl02[256]={
222 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
223 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
224 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
225 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
226 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
227 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
229 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
230 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
231 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
232 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
233 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
236 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
237 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
241 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
243 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
246 0x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00,
247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
249 0x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00,
250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
254 };
255 uchar pl20[256]={
256 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
258 0x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00,
259 0x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00,
260 0x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00,
261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
262 0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
263 0x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00,
264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
267 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
268 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
272 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
277 0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
279 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
282 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
283 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
287 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
288 };
289 uchar pl21[256]={
290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294 0x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00,
295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
297 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
322 };
323 uchar *uni_to_cs[256]={
324 pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL,
325 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
326 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
327 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
328 pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL,
329 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
330 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
331 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
332 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
333 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
334 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
335 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
336 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
337 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
338 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
339 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
340 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
341 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
342 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
343 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
344 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
345 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
346 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
347 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
348 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
349 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
350 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
351 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
352 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
353 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
354 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
355 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
356 };
357 
358 static
359 int my_mb_wc_latin1(const CHARSET_INFO *cs __attribute__((unused)),
360  my_wc_t *wc,
361  const uchar *str,
362  const uchar *end __attribute__((unused)))
363 {
364  if (str >= end)
365  return MY_CS_TOOSMALL;
366 
367  *wc=cs_to_uni[*str];
368  return (!wc[0] && str[0]) ? -1 : 1;
369 }
370 
371 static
372 int my_wc_mb_latin1(const CHARSET_INFO *cs __attribute__((unused)),
373  my_wc_t wc,
374  uchar *str,
375  uchar *end __attribute__((unused)))
376 {
377  uchar *pl;
378 
379  if (str >= end)
380  return MY_CS_TOOSMALL;
381 
382  if (wc > 0xFFFF)
383  return MY_CS_ILUNI;
384 
385  pl= uni_to_cs[wc >> 8];
386  str[0]= pl ? pl[wc & 0xFF] : '\0';
387  return (!str[0] && wc) ? MY_CS_ILUNI : 1;
388 }
389 
390 static MY_CHARSET_HANDLER my_charset_handler=
391 {
392  NULL, /* init */
393  NULL,
394  my_mbcharlen_8bit,
395  my_numchars_8bit,
396  my_charpos_8bit,
397  my_well_formed_len_8bit,
398  my_lengthsp_8bit,
399  my_numcells_8bit,
400  my_mb_wc_latin1,
401  my_wc_mb_latin1,
402  my_mb_ctype_8bit,
403  my_caseup_str_8bit,
404  my_casedn_str_8bit,
405  my_caseup_8bit,
406  my_casedn_8bit,
407  my_snprintf_8bit,
408  my_long10_to_str_8bit,
409  my_longlong10_to_str_8bit,
410  my_fill_8bit,
411  my_strntol_8bit,
412  my_strntoul_8bit,
413  my_strntoll_8bit,
414  my_strntoull_8bit,
415  my_strntod_8bit,
416  my_strtoll10_8bit,
417  my_strntoull10rnd_8bit,
418  my_scan_8bit
419 };
420 
421 
422 CHARSET_INFO my_charset_latin1=
423 {
424  8,0,0, /* number */
425  MY_CS_COMPILED | MY_CS_PRIMARY, /* state */
426  "latin1", /* cs name */
427  "latin1_swedish_ci", /* name */
428  "", /* comment */
429  NULL, /* tailoring */
430  ctype_latin1,
431  to_lower_latin1,
432  to_upper_latin1,
433  sort_order_latin1,
434  NULL, /* uca */
435  cs_to_uni, /* tab_to_uni */
436  NULL, /* tab_from_uni */
437  &my_unicase_default,/* caseinfo */
438  NULL, /* state_map */
439  NULL, /* ident_map */
440  1, /* strxfrm_multiply */
441  1, /* caseup_multiply */
442  1, /* casedn_multiply */
443  1, /* mbminlen */
444  1, /* mbmaxlen */
445  0, /* min_sort_char */
446  255, /* max_sort_char */
447  ' ', /* pad char */
448  0, /* escape_with_backslash_is_dangerous */
449  1, /* levels_for_compare */
450  1, /* levels_for_order */
451  &my_charset_handler,
452  &my_collation_8bit_simple_ci_handler
453 };
454 
455 
456 
457 
458 /*
459  * This file is the latin1 character set with German sorting
460  *
461  * The modern sort order is used, where:
462  *
463  * 'ä' -> "ae"
464  * 'ö' -> "oe"
465  * 'ü' -> "ue"
466  * 'ß' -> "ss"
467  */
468 
469 
470 /*
471  * This is a simple latin1 mapping table, which maps all accented
472  * characters to their non-accented equivalents. Note: in this
473  * table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
474  * accented characters except the following are treated the same way.
475  * Ü, ü, Ö, ö, Ä, ä
476  */
477 
478 static uchar sort_order_latin1_de[] = {
479  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
480  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
481  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
482  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
483  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
484  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
485  96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
486  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
487  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
488  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
489  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
490  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
491  65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
492  68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
493  65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
494  68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
495 };
496 
497 
498 /*
499  same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
500 */
501 
502 uchar combo1map[]={
503  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
504  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
505  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
506  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
507  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
508  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
509  96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
510  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
511  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
512  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
513  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
514  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
515  65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
516  68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
517  65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
518  68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
519 };
520 
521 uchar combo2map[]={
522  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530  0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
531  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
532 };
533 
534 
535 /*
536  Some notes about the following comparison rules:
537  By definition, my_strnncoll_latin_de must works exactly as if had called
538  my_strnxfrm_latin_de() on both strings and compared the result strings.
539 
540  This means that:
541  Ä must also matches ÁE and Aè, because my_strxn_frm_latin_de() will convert
542  both to AE.
543 
544  The other option would be to not do any accent removal in
545  sort_order_latin_de[] at all
546 */
547 
548 
549 static int my_strnncoll_latin1_de(const CHARSET_INFO *cs
550  __attribute__((unused)),
551  const uchar *a, size_t a_length,
552  const uchar *b, size_t b_length,
553  my_bool b_is_prefix)
554 {
555  const uchar *a_end= a + a_length;
556  const uchar *b_end= b + b_length;
557  uchar a_char, a_extend= 0, b_char, b_extend= 0;
558 
559  while ((a < a_end || a_extend) && (b < b_end || b_extend))
560  {
561  if (a_extend)
562  {
563  a_char=a_extend; a_extend=0;
564  }
565  else
566  {
567  a_extend=combo2map[*a];
568  a_char=combo1map[*a++];
569  }
570  if (b_extend)
571  {
572  b_char=b_extend; b_extend=0;
573  }
574  else
575  {
576  b_extend=combo2map[*b];
577  b_char=combo1map[*b++];
578  }
579  if (a_char != b_char)
580  return (int) a_char - (int) b_char;
581  }
582  /*
583  A simple test of string lengths won't work -- we test to see
584  which string ran out first
585  */
586  return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
587  (b < b_end || b_extend) ? -1 : 0);
588 }
589 
590 
591 static int my_strnncollsp_latin1_de(const CHARSET_INFO *cs
592  __attribute__((unused)),
593  const uchar *a, size_t a_length,
594  const uchar *b, size_t b_length,
595  my_bool diff_if_only_endspace_difference)
596 {
597  const uchar *a_end= a + a_length, *b_end= b + b_length;
598  uchar a_char, a_extend= 0, b_char, b_extend= 0;
599  int res;
600 
601 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
602  diff_if_only_endspace_difference= 0;
603 #endif
604 
605  while ((a < a_end || a_extend) && (b < b_end || b_extend))
606  {
607  if (a_extend)
608  {
609  a_char=a_extend;
610  a_extend= 0;
611  }
612  else
613  {
614  a_extend= combo2map[*a];
615  a_char= combo1map[*a++];
616  }
617  if (b_extend)
618  {
619  b_char= b_extend;
620  b_extend= 0;
621  }
622  else
623  {
624  b_extend= combo2map[*b];
625  b_char= combo1map[*b++];
626  }
627  if (a_char != b_char)
628  return (int) a_char - (int) b_char;
629  }
630  /* Check if double character last */
631  if (a_extend)
632  return 1;
633  if (b_extend)
634  return -1;
635 
636  res= 0;
637  if (a != a_end || b != b_end)
638  {
639  int swap= 1;
640  if (diff_if_only_endspace_difference)
641  res= 1; /* Assume 'a' is bigger */
642  /*
643  Check the next not space character of the longer key. If it's < ' ',
644  then it's smaller than the other key.
645  */
646  if (a == a_end)
647  {
648  /* put shorter key in a */
649  a_end= b_end;
650  a= b;
651  swap= -1; /* swap sign of result */
652  res= -res;
653  }
654  for ( ; a < a_end ; a++)
655  {
656  if (*a != ' ')
657  return (*a < ' ') ? -swap : swap;
658  }
659  }
660  return res;
661 }
662 
663 
664 static size_t
665 my_strnxfrm_latin1_de(const CHARSET_INFO *cs,
666  uchar *dst, size_t dstlen, uint nweights,
667  const uchar* src, size_t srclen, uint flags)
668 {
669  uchar *de= dst + dstlen;
670  const uchar *se= src + srclen;
671  uchar *d0= dst;
672  for ( ; src < se && dst < de && nweights; src++, nweights--)
673  {
674  uchar chr= combo1map[*src];
675  *dst++= chr;
676  if ((chr= combo2map[*src]) && dst < de && nweights > 1)
677  {
678  *dst++= chr;
679  nweights--;
680  }
681  }
682  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
683 }
684 
685 
686 void my_hash_sort_latin1_de(const CHARSET_INFO *cs __attribute__((unused)),
687  const uchar *key, size_t len,
688  ulong *nr1, ulong *nr2)
689 {
690  const uchar *end;
691  /*
692  Remove end space. We have to do this to be able to compare
693  'AE' and 'Ä' as identical
694  */
695  end= skip_trailing_space(key, len);
696 
697  for (; key < end ; key++)
698  {
699  uint X= (uint) combo1map[(uint) *key];
700  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
701  nr2[0]+=3;
702  if ((X= combo2map[*key]))
703  {
704  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
705  nr2[0]+=3;
706  }
707  }
708 }
709 
710 
711 static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
712 {
713  NULL, /* init */
714  my_strnncoll_latin1_de,
715  my_strnncollsp_latin1_de,
716  my_strnxfrm_latin1_de,
717  my_strnxfrmlen_simple,
718  my_like_range_simple,
719  my_wildcmp_8bit,
720  my_strcasecmp_8bit,
721  my_instr_simple,
722  my_hash_sort_latin1_de,
723  my_propagate_complex
724 };
725 
726 
727 CHARSET_INFO my_charset_latin1_german2_ci=
728 {
729  31,0,0, /* number */
730  MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
731  "latin1", /* cs name */
732  "latin1_german2_ci", /* name */
733  "", /* comment */
734  NULL, /* tailoring */
735  ctype_latin1,
736  to_lower_latin1,
737  to_upper_latin1,
738  sort_order_latin1_de,
739  NULL, /* uca */
740  cs_to_uni, /* tab_to_uni */
741  NULL, /* tab_from_uni */
742  &my_unicase_default, /* caseinfo */
743  NULL, /* state_map */
744  NULL, /* ident_map */
745  2, /* strxfrm_multiply */
746  1, /* caseup_multiply */
747  1, /* casedn_multiply */
748  1, /* mbminlen */
749  1, /* mbmaxlen */
750  0, /* min_sort_char */
751  247, /* max_sort_char */
752  ' ', /* pad char */
753  0, /* escape_with_backslash_is_dangerous */
754  1, /* levels_for_compare */
755  1, /* levels_for_order */
756  &my_charset_handler,
757  &my_collation_german2_ci_handler
758 };
759 
760 
761 CHARSET_INFO my_charset_latin1_bin=
762 {
763  47,0,0, /* number */
764  MY_CS_COMPILED|MY_CS_BINSORT, /* state */
765  "latin1", /* cs name */
766  "latin1_bin", /* name */
767  "", /* comment */
768  NULL, /* tailoring */
769  ctype_latin1,
770  to_lower_latin1,
771  to_upper_latin1,
772  NULL, /* sort_order */
773  NULL, /* uca */
774  cs_to_uni, /* tab_to_uni */
775  NULL, /* tab_from_uni */
776  &my_unicase_default, /* caseinfo */
777  NULL, /* state_map */
778  NULL, /* ident_map */
779  1, /* strxfrm_multiply */
780  1, /* caseup_multiply */
781  1, /* casedn_multiply */
782  1, /* mbminlen */
783  1, /* mbmaxlen */
784  0, /* min_sort_char */
785  255, /* max_sort_char */
786  ' ', /* pad char */
787  0, /* escape_with_backslash_is_dangerous */
788  1, /* levels_for_compare */
789  1, /* levels_for_order */
790  &my_charset_handler,
791  &my_collation_8bit_bin_handler
792 };
793