SrtParser.cs 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #pragma warning disable CS1591
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.IO;
  6. using System.Text.RegularExpressions;
  7. using System.Threading;
  8. using MediaBrowser.Model.MediaInfo;
  9. using Microsoft.Extensions.Logging;
  10. namespace MediaBrowser.MediaEncoding.Subtitles
  11. {
  12. public class SrtParser : ISubtitleParser
  13. {
  14. private readonly ILogger _logger;
  15. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  16. public SrtParser(ILogger logger)
  17. {
  18. _logger = logger;
  19. }
  20. /// <inheritdoc />
  21. public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
  22. {
  23. var trackInfo = new SubtitleTrackInfo();
  24. var trackEvents = new List<SubtitleTrackEvent>();
  25. using (var reader = new StreamReader(stream))
  26. {
  27. string line;
  28. while ((line = reader.ReadLine()) != null)
  29. {
  30. cancellationToken.ThrowIfCancellationRequested();
  31. if (string.IsNullOrWhiteSpace(line))
  32. {
  33. continue;
  34. }
  35. var subEvent = new SubtitleTrackEvent { Id = line };
  36. line = reader.ReadLine();
  37. if (string.IsNullOrWhiteSpace(line))
  38. {
  39. continue;
  40. }
  41. var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
  42. if (time.Length < 2)
  43. {
  44. // This occurs when subtitle text has an empty line as part of the text.
  45. // Need to adjust the break statement below to resolve this.
  46. _logger.LogWarning("Unrecognized line in srt: {0}", line);
  47. continue;
  48. }
  49. subEvent.StartPositionTicks = GetTicks(time[0]);
  50. var endTime = time[1].AsSpan();
  51. var idx = endTime.IndexOf(' ');
  52. if (idx > 0)
  53. {
  54. endTime = endTime.Slice(0, idx);
  55. }
  56. subEvent.EndPositionTicks = GetTicks(endTime);
  57. var multiline = new List<string>();
  58. while ((line = reader.ReadLine()) != null)
  59. {
  60. if (string.IsNullOrEmpty(line))
  61. {
  62. break;
  63. }
  64. multiline.Add(line);
  65. }
  66. subEvent.Text = string.Join(ParserValues.NewLine, multiline);
  67. subEvent.Text = subEvent.Text.Replace(@"\N", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
  68. subEvent.Text = Regex.Replace(subEvent.Text, @"\{(?:\\\d?[\w.-]+(?:\([^\)]*\)|&H?[0-9A-Fa-f]+&|))+\}", string.Empty, RegexOptions.IgnoreCase);
  69. subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
  70. subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
  71. subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
  72. trackEvents.Add(subEvent);
  73. }
  74. }
  75. trackInfo.TrackEvents = trackEvents.ToArray();
  76. return trackInfo;
  77. }
  78. private long GetTicks(ReadOnlySpan<char> time)
  79. {
  80. return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out var span)
  81. ? span.Ticks
  82. : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
  83. ? span.Ticks : 0);
  84. }
  85. }
  86. }