2
0

StringExtensions.cs 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. using System;
  2. using System.Globalization;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. namespace MediaBrowser.Controller.Extensions
  7. {
  8. /// <summary>
  9. /// Class BaseExtensions
  10. /// </summary>
  11. public static class StringExtensions
  12. {
  13. public static string RemoveDiacritics(this string text)
  14. {
  15. if (text == null)
  16. {
  17. throw new ArgumentNullException(nameof(text));
  18. }
  19. var chars = Normalize(text, NormalizationForm.FormD)
  20. .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
  21. return Normalize(string.Concat(chars), NormalizationForm.FormC);
  22. }
  23. private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
  24. {
  25. if (stripStringOnFailure)
  26. {
  27. try
  28. {
  29. return text.Normalize(form);
  30. }
  31. catch (ArgumentException)
  32. {
  33. // will throw if input contains invalid unicode chars
  34. // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
  35. text = Regex.Replace(text, "([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])", "");
  36. return Normalize(text, form, false);
  37. }
  38. }
  39. try
  40. {
  41. return text.Normalize(form);
  42. }
  43. catch (ArgumentException)
  44. {
  45. // if it still fails, return the original text
  46. return text;
  47. }
  48. }
  49. }
  50. }