TextLocalizer.cs 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. using System;
  2. using System.Globalization;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. namespace Emby.Server.Implementations.Localization
  7. {
  8. public class TextLocalizer : ITextLocalizer
  9. {
  10. public string RemoveDiacritics(string text)
  11. {
  12. if (text == null)
  13. {
  14. throw new ArgumentNullException(nameof(text));
  15. }
  16. var chars = Normalize(text, NormalizationForm.FormD)
  17. .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
  18. return Normalize(string.Concat(chars), NormalizationForm.FormC);
  19. }
  20. private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
  21. {
  22. if (stripStringOnFailure)
  23. {
  24. try
  25. {
  26. return text.Normalize(form);
  27. }
  28. catch (ArgumentException)
  29. {
  30. // will throw if input contains invalid unicode chars
  31. // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
  32. text = StripInvalidUnicodeCharacters(text);
  33. return Normalize(text, form, false);
  34. }
  35. }
  36. try
  37. {
  38. return text.Normalize(form);
  39. }
  40. catch (ArgumentException)
  41. {
  42. // if it still fails, return the original text
  43. return text;
  44. }
  45. }
  46. private static string StripInvalidUnicodeCharacters(string str)
  47. {
  48. var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
  49. return invalidCharactersRegex.Replace(str, "");
  50. }
  51. public string NormalizeFormKD(string text)
  52. {
  53. return text.Normalize(NormalizationForm.FormKD);
  54. }
  55. }
  56. }