TextLocalizer.cs 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. using System;
  2. using System.Globalization;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. using Emby.Server.Implementations.Localization;
  7. namespace Emby.Server.Core.Localization
  8. {
  9. public class TextLocalizer : ITextLocalizer
  10. {
  11. public string RemoveDiacritics(string text)
  12. {
  13. if (text == null)
  14. {
  15. throw new ArgumentNullException("text");
  16. }
  17. var chars = Normalize(text, NormalizationForm.FormD)
  18. .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
  19. return Normalize(String.Concat(chars), NormalizationForm.FormC);
  20. }
  21. private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
  22. {
  23. if (stripStringOnFailure)
  24. {
  25. try
  26. {
  27. return text.Normalize(form);
  28. }
  29. catch (ArgumentException)
  30. {
  31. // will throw if input contains invalid unicode chars
  32. // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
  33. text = StripInvalidUnicodeCharacters(text);
  34. return Normalize(text, form, false);
  35. }
  36. }
  37. try
  38. {
  39. return text.Normalize(form);
  40. }
  41. catch (ArgumentException)
  42. {
  43. // if it still fails, return the original text
  44. return text;
  45. }
  46. }
  47. private static string StripInvalidUnicodeCharacters(string str)
  48. {
  49. var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
  50. return invalidCharactersRegex.Replace(str, "");
  51. }
  52. public string NormalizeFormKD(string text)
  53. {
  54. return text.Normalize(NormalizationForm.FormKD);
  55. }
  56. }
  57. }