StringExtensions.cs 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. #pragma warning disable CS1591
  2. using System;
  3. using System.Globalization;
  4. using System.Linq;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. namespace MediaBrowser.Controller.Extensions
  8. {
  9. /// <summary>
  10. /// Class BaseExtensions.
  11. /// </summary>
  12. public static class StringExtensions
  13. {
  14. public static string RemoveDiacritics(this string text)
  15. {
  16. if (text == null)
  17. {
  18. throw new ArgumentNullException(nameof(text));
  19. }
  20. var chars = Normalize(text, NormalizationForm.FormD)
  21. .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
  22. return Normalize(string.Concat(chars), NormalizationForm.FormC);
  23. }
  24. private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
  25. {
  26. if (stripStringOnFailure)
  27. {
  28. try
  29. {
  30. return text.Normalize(form);
  31. }
  32. catch (ArgumentException)
  33. {
  34. // will throw if input contains invalid unicode chars
  35. // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
  36. text = Regex.Replace(text, "([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])", "");
  37. return Normalize(text, form, false);
  38. }
  39. }
  40. try
  41. {
  42. return text.Normalize(form);
  43. }
  44. catch (ArgumentException)
  45. {
  46. // if it still fails, return the original text
  47. return text;
  48. }
  49. }
  50. }
  51. }