123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- using System;
- using System.Globalization;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- namespace Emby.Server.Implementations.Localization
- {
- public class TextLocalizer : ITextLocalizer
- {
- public string RemoveDiacritics(string text)
- {
- if (text == null)
- {
- throw new ArgumentNullException(nameof(text));
- }
- var chars = Normalize(text, NormalizationForm.FormD)
- .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
- return Normalize(string.Concat(chars), NormalizationForm.FormC);
- }
- private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
- {
- if (stripStringOnFailure)
- {
- try
- {
- return text.Normalize(form);
- }
- catch (ArgumentException)
- {
- // will throw if input contains invalid unicode chars
- // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
- text = StripInvalidUnicodeCharacters(text);
- return Normalize(text, form, false);
- }
- }
- try
- {
- return text.Normalize(form);
- }
- catch (ArgumentException)
- {
- // if it still fails, return the original text
- return text;
- }
- }
- private static string StripInvalidUnicodeCharacters(string str)
- {
- var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
- return invalidCharactersRegex.Replace(str, "");
- }
- public string NormalizeFormKD(string text)
- {
- return text.Normalize(NormalizationForm.FormKD);
- }
- }
- }
|