How to convert accented characters to unaccented ones (Views: 713)
Problem/Question/Abstract: Is there a way to convert accented characters to unaccented (meaning ASCII A - Z, a - z)? Answer: The classical way is to have a conversion table and do a lookup in that table. The problem with that is that the table is of course specific to a certain charset (encoding), like Windows Latin-1. You could build a table for a range of UNICODE (widechar) characters to get around this limitation and convert the strings to widestrings before you do the accent removals. The routine below uses ANSI characters with the Windows western (Latin-1) encoding. function SimplifyChar(const _ch: char): char; const Charmap: array[#128..#255] of Char = ( #128 { ? }, #129 { ? }, #130 { ? }, #131 { ? }, #132 { ? }, #133 { ? }, #134 { ? }, #135 { ? }, #136 { ? }, #137 { ? }, #138 { ? }, #139 { ? }, #140 { ? }, #141 { ? }, #142 { ? }, #143 { ? }, #144 { ? }, #145 { ? }, #146 { ? }, #147 { ? }, #148 { ? }, #149 { ? }, #150 { ? }, #151 { ? }, #152 { ? }, #153 { ? }, #154 { ? }, #155 { ? }, #156 { ? }, #157 { ? }, #158 { ? }, #159 { ? }, #160 { }, #161 { · }, #162 { ¢ }, #163 { £ }, #164 { ¤ }, #165 { · }, #166 { ¦ }, #167 { § }, #168 { ¨ }, #169 { © }, #170 { ª }, #171 { « }, #172 { ¬ }, #173 { }, #174 { ® }, #175 { ¯ }, #176 { ° }, #177 { ± }, #178 { ² }, #179 { ³ }, #180 { ´ }, #181 { µ }, #182 { ¶ }, #183 { · }, #184 { ¸ }, #185 { ± }, #186 { º }, #187 { » }, #188 { ¡ }, #189 { ½ }, #190 { µ }, #191 { ¿ }, 'A' { À }, 'A' { Á }, 'A' { Â }, 'A' { Ã }, 'A' { Ä }, 'A' { Å }, #198 { Æ }, #199 { Ç }, 'E' { È }, 'E' { É }, 'E' { Ê }, 'E' { Ë }, 'I' { Ì }, 'I' { Í }, 'I' { Î }, 'I' { Ï }, #208 { Ð }, #209 { Ñ }, 'O' { Ò }, 'O' { Ó }, 'O' { Ô }, 'O' { Õ }, 'O' { Ö }, #215 { × }, #216 { Ø }, 'U' { Ù }, 'U' { Ú }, 'U' { Û }, 'U' { Ü }, #221 { Ý }, #222 { Þ }, #223 { ß }, 'a' { à }, 'a' { á }, 'a' { â }, 'a' { ã }, 'a' { ä }, 'a' { å }, #230 { æ }, #231 { ç }, 'e' { è }, 'e' { é }, 'e' { ê }, 'e' { ë }, 'i' { ì }, 'i' { í }, 'i' { î }, 'i' { ï }, #240 { ð }, #241 { ñ }, 'o' { ò }, 'o' { ó }, 'o' { ô }, 'o' { õ }, 'o' { ö }, #247 { ÷ }, #248 { ø }, 'u' { ù }, 'u' { ú }, 'u' { û }, 'u' { ü }, #253 { ý }, #254 { þ }, #255 { ÿ } ); begin if _ch >= #128 then Result := Charmap[_ch] else Result := _ch; end; The charmap table was created by this little routine and then edited: procedure CreateCharacterMap(fromchar, tochar: Char); function DisplayStr(const ch: Char): string; begin if ch < #32 then Result := '^' + Chr(Ord('A') - 1 + Ord(ch)) else Result := ch; end; var sl: TStringlist; line, element: string; ch: char; begin Assert(fromchar <= tochar); sl := Tstringlist.Create; try sl.Add('Const'); line := Format(' Charmap: array [#%d..#%d] of Char = (', [Ord(fromchar), Ord(tochar)]); sl.Add(line); line := ''; for ch := fromchar to toChar do begin element := Format('#%3.3d { %s }', [Ord(ch), DisplayStr(ch)]); if (Length(line) + Length(element)) > 66 then begin sl.Add(' ' + line); line := ''; end; line := line + element; if ch <> tochar then line := line + ', '; end; sl.Add(' ' + line); sl.add(' );'); Clipboard.AsText := sl.Text; finally sl.Free end; end; |