MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mf_soundex.c
1 /* Copyright (C) 2000 MySQL AB
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
15 
16 /****************************************************************
17 * SOUNDEX ALGORITHM in C *
18 * *
19 * The basic Algorithm source is taken from EDN Nov. *
20 * 14, 1985 pg. 36. *
21 * *
22 * As a test Those in Illinois will find that the *
23 * first group of numbers in their drivers license *
24 * number is the soundex number for their last name. *
25 * *
26 * RHW PC-IBBS ID. #1230 *
27 * *
28 * As an extension if remove_garbage is set then all non- *
29 * alpha characters are skipped *
30 * *
31 * Note, that this implementation corresponds to the *
32 * original version of the algorithm, not to the more *
33 * popular "enhanced" version, described by Knuth. *
34 ****************************************************************/
35 
36 #include "mysys_priv.h"
37 #include <m_ctype.h>
38 #include "my_static.h"
39 
40 static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage);
41 
42  /* outputed string is 4 byte long */
43  /* out_pntr can be == in_pntr */
44 
45 void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr,
46  pbool remove_garbage)
47 {
48  char ch,last_ch;
49  reg3 char * end;
50  register uchar *map=cs->to_upper;
51 
52  if (remove_garbage)
53  {
54  while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
55  in_pntr++;
56  }
57  *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */
58  last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */
59  /* for the first 'double-letter */
60  /* check. */
61  end=out_pntr+3; /* Loop on input letters until */
62  /* end of input (null) or output */
63  /* letter code count = 3 */
64 
65  in_pntr++;
66  while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0)
67  {
68  in_pntr++;
69  if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
70  {
71  *out_pntr++ = ch; /* letter, copy to output */
72  } /* for next double-letter check */
73  last_ch = ch; /* save code of last input letter */
74  }
75  while (out_pntr < end)
76  *out_pntr++ = '0';
77  *out_pntr=0; /* end string */
78  return;
79 } /* soundex */
80 
81 
82  /*
83  If alpha, map input letter to soundex code.
84  If not alpha and remove_garbage is set then skip to next char
85  else return 0
86  */
87 
88 static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage)
89 {
90  uchar ch;
91 
92  if (remove_garbage)
93  {
94  while (**ptr && !my_isalpha(cs,**ptr))
95  (*ptr)++;
96  }
97  ch=my_toupper(cs,**ptr);
98  if (ch < 'A' || ch > 'Z')
99  {
100  if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */
101  return '0'; /* threat as vokal */
102  return 0; /* Can't map */
103  }
104  return(soundex_map[ch-'A']);
105 } /* get_scode */