瀏覽代碼

Merge pull request #815 from lalmanzar/master

Implement parsers
Luke 11 年之前
父節點
當前提交
4e70530ba6

+ 50 - 6
MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs

@@ -1,17 +1,61 @@
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Text.RegularExpressions;
 
 namespace MediaBrowser.MediaEncoding.Subtitles
 {
     public class SrtParser : ISubtitleParser
     {
-        public SubtitleTrackInfo Parse(Stream stream)
-        {
-            throw new NotImplementedException();
+        private readonly CultureInfo _usCulture = new CultureInfo("en-US");
+        public SubtitleTrackInfo Parse(Stream stream) {
+            var trackInfo = new SubtitleTrackInfo();
+            using ( var reader = new StreamReader(stream))
+            {
+                string line;
+                while ((line = reader.ReadLine()) != null)
+                {
+                    if (string.IsNullOrWhiteSpace(line))
+                    {
+                        continue;
+                    }
+                    var subEvent = new SubtitleTrackEvent {Id = line};
+                    line = reader.ReadLine();
+                    var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
+                    subEvent.StartPositionTicks = GetTicks(time[0]);
+                    var endTime = time[1];
+                    var idx = endTime.IndexOf(" ", StringComparison.Ordinal);
+                    if (idx > 0)
+                        endTime = endTime.Substring(0, idx);
+                    subEvent.EndPositionTicks = GetTicks(endTime);
+                    var multiline = new List<string>();
+                    while ((line = reader.ReadLine()) != null)
+                    {
+                        if (string.IsNullOrEmpty(line))
+                        {
+                            break;
+                        }
+                        multiline.Add(line);
+                    }
+                    subEvent.Text = string.Join(@"\N", multiline);
+                    subEvent.Text = Regex.Replace(subEvent.Text, @"\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}", string.Empty, RegexOptions.IgnoreCase);
+                    subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
+                    subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
+                    subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
+                    subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "<br />",RegexOptions.IgnoreCase);
+                    trackInfo.TrackEvents.Add(subEvent);
+                }
+            }
+            return trackInfo;
+        }
+
+        long GetTicks(string time) {
+            TimeSpan span;
+            return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span)
+                ? span.Ticks
+                : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span) 
+                ? span.Ticks : 0);
         }
     }
 }

+ 55 - 3
MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs

@@ -1,17 +1,69 @@
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Text.RegularExpressions;
 
 namespace MediaBrowser.MediaEncoding.Subtitles
 {
     public class SsaParser : ISubtitleParser
     {
+        private readonly CultureInfo _usCulture = new CultureInfo("en-US");
+
         public SubtitleTrackInfo Parse(Stream stream)
         {
-            throw new NotImplementedException();
+            var trackInfo = new SubtitleTrackInfo();
+            var eventIndex = 1;
+            using (var reader = new StreamReader(stream))
+            {
+                string line;
+                while (reader.ReadLine() != "[Events]")
+                {}
+                var headers = ParseFieldHeaders(reader.ReadLine());
+
+                while ((line = reader.ReadLine()) != null)
+                {
+                    if (string.IsNullOrWhiteSpace(line))
+                    {
+                        continue;
+                    }
+                    if(line.StartsWith("["))
+                        break;
+                    if(string.IsNullOrEmpty(line))
+                        continue;
+                    var subEvent = new SubtitleTrackEvent { Id = eventIndex.ToString(_usCulture) };
+                    eventIndex++;
+                    var sections = line.Substring(10).Split(',');
+
+                    subEvent.StartPositionTicks = GetTicks(sections[headers["Start"]]);
+                    subEvent.EndPositionTicks = GetTicks(sections[headers["End"]]);
+                    subEvent.Text = string.Join(",", sections.Skip(headers["Text"]));
+                    subEvent.Text = Regex.Replace(subEvent.Text, @"\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}", string.Empty, RegexOptions.IgnoreCase);
+                    subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "<br />", RegexOptions.IgnoreCase);
+
+                    trackInfo.TrackEvents.Add(subEvent);
+                }
+            }
+            return trackInfo;
+        }
+
+        long GetTicks(string time)
+        {
+            TimeSpan span;
+            return TimeSpan.TryParseExact(time, @"h\:mm\:ss\.ff", _usCulture, out span)
+                ? span.Ticks: 0;
+        }
+
+        private Dictionary<string,int> ParseFieldHeaders(string line) {
+            var fields = line.Substring(8).Split(',').Select(x=>x.Trim()).ToList();
+
+            var result = new Dictionary<string, int> {
+                                                         {"Start", fields.IndexOf("Start")},
+                                                         {"End", fields.IndexOf("End")},
+                                                         {"Text", fields.IndexOf("Text")}
+                                                     };
+            return result;
         }
     }
 }

+ 14 - 0
MediaBrowser.Tests/MediaBrowser.Tests.csproj

@@ -50,6 +50,8 @@
     </Otherwise>
   </Choose>
   <ItemGroup>
+    <Compile Include="MediaEncoding\Subtitles\SsaParserTests.cs" />
+    <Compile Include="MediaEncoding\Subtitles\SrtParserTests.cs" />
     <Compile Include="Providers\MovieDbProviderTests.cs" />
     <Compile Include="Resolvers\MovieResolverTests.cs" />
     <Compile Include="Resolvers\TvUtilTests.cs" />
@@ -61,6 +63,10 @@
       <Project>{17e1f4e6-8abd-4fe5-9ecf-43d4b6087ba2}</Project>
       <Name>MediaBrowser.Controller</Name>
     </ProjectReference>
+    <ProjectReference Include="..\MediaBrowser.MediaEncoding\MediaBrowser.MediaEncoding.csproj">
+      <Project>{0BD82FA6-EB8A-4452-8AF5-74F9C3849451}</Project>
+      <Name>MediaBrowser.MediaEncoding</Name>
+    </ProjectReference>
     <ProjectReference Include="..\MediaBrowser.Model\MediaBrowser.Model.csproj">
       <Project>{7eeeb4bb-f3e8-48fc-b4c5-70f0fff8329b}</Project>
       <Name>MediaBrowser.Model</Name>
@@ -77,6 +83,14 @@
   <ItemGroup>
     <None Include="app.config" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="MediaEncoding\Subtitles\TestSubtitles\data.ssa">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Include="MediaEncoding\Subtitles\TestSubtitles\unit.srt">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
   <Choose>
     <When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
       <ItemGroup>

+ 108 - 0
MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs

@@ -0,0 +1,108 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using MediaBrowser.MediaEncoding.Subtitles;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace MediaBrowser.Tests.MediaEncoding.Subtitles {
+
+    [TestClass]
+    public class SrtParserTests {
+
+        [TestMethod]
+        public void TestParse() {
+
+            var expectedSubs =
+                new SubtitleTrackInfo {
+                                          TrackEvents = new List<SubtitleTrackEvent> {
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "1",
+                                                                                                                    StartPositionTicks = 24000000,
+                                                                                                                    EndPositionTicks = 52000000,
+                                                                                                                    Text =
+                                                                                                                        "[Background Music Playing]"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "2",
+                                                                                                                    StartPositionTicks = 157120000,
+                                                                                                                    EndPositionTicks = 173990000,
+                                                                                                                    Text =
+                                                                                                                        "Oh my god, Watch out!<br />It's coming!!"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "3",
+                                                                                                                    StartPositionTicks = 257120000,
+                                                                                                                    EndPositionTicks = 303990000,
+                                                                                                                    Text = "[Bird noises]"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "4",
+                                                                                                                    StartPositionTicks = 310000000,
+                                                                                                                    EndPositionTicks = 319990000,
+                                                                                                                    Text =
+                                                                                                                        "This text is <font color=\"red\">RED</font> and has not been positioned."
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "5",
+                                                                                                                    StartPositionTicks = 320000000,
+                                                                                                                    EndPositionTicks = 329990000,
+                                                                                                                    Text =
+                                                                                                                        "This is a<br />new line, as is<br />this"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "6",
+                                                                                                                    StartPositionTicks = 330000000,
+                                                                                                                    EndPositionTicks = 339990000,
+                                                                                                                    Text =
+                                                                                                                        "This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "7",
+                                                                                                                    StartPositionTicks = 340000000,
+                                                                                                                    EndPositionTicks = 349990000,
+                                                                                                                    Text =
+                                                                                                                        "Unclosed but <b>supported HTML tags are left in,  SSA italics aren't"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "8",
+                                                                                                                    StartPositionTicks = 350000000,
+                                                                                                                    EndPositionTicks = 359990000,
+                                                                                                                    Text =
+                                                                                                                        "&lt;ggg&gt;Unsupported&lt;/ggg&gt; HTML tags are escaped and left in, even if &lt;hhh&gt;not closed."
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "9",
+                                                                                                                    StartPositionTicks = 360000000,
+                                                                                                                    EndPositionTicks = 369990000,
+                                                                                                                    Text =
+                                                                                                                        "Multiple SSA tags are stripped"
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "10",
+                                                                                                                    StartPositionTicks = 370000000,
+                                                                                                                    EndPositionTicks = 379990000,
+                                                                                                                    Text =
+                                                                                                                        "Greater than (&lt;) and less than (&gt;) are shown"
+                                                                                                                }
+                                                                                     }
+                                      };
+
+            var sut = new SrtParser();
+
+            var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\unit.srt");
+
+            var result = sut.Parse(stream);
+
+            Assert.IsNotNull(result);
+            Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count);
+            for (int i = 0; i < expectedSubs.TrackEvents.Count; i++)
+            {
+                Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text);
+            }
+
+        }
+    }
+}

+ 59 - 0
MediaBrowser.Tests/MediaEncoding/Subtitles/SsaParserTests.cs

@@ -0,0 +1,59 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using MediaBrowser.MediaEncoding.Subtitles;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace MediaBrowser.Tests.MediaEncoding.Subtitles {
+
+    [TestClass]
+    public class SsaParserTests {
+
+        [TestMethod]
+        public void TestParse() {
+
+            var expectedSubs =
+                new SubtitleTrackInfo {
+                                          TrackEvents = new List<SubtitleTrackEvent> {
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "1",
+                                                                                                                    StartPositionTicks = 24000000,
+                                                                                                                    EndPositionTicks = 72000000,
+                                                                                                                    Text =
+                                                                                                                        "Senator, we're <br />making our final <br />approach into Coruscant."
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "2",
+                                                                                                                    StartPositionTicks = 97100000,
+                                                                                                                    EndPositionTicks = 133900000,
+                                                                                                                    Text =
+                                                                                                                        "Very good, Lieutenant."
+                                                                                                                },
+                                                                                         new SubtitleTrackEvent {
+                                                                                                                    Id = "3",
+                                                                                                                    StartPositionTicks = 150400000,
+                                                                                                                    EndPositionTicks = 180400000,
+                                                                                                                    Text = "It's <br />a <br />trap!"
+                                                                                                                }
+                                                                                     }
+                                      };
+
+            var sut = new SsaParser();
+
+            var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\data.ssa");
+
+            var result = sut.Parse(stream);
+
+            Assert.IsNotNull(result);
+            Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count);
+            for (int i = 0; i < expectedSubs.TrackEvents.Count; i++)
+            {
+                Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks);
+                Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text);
+            }
+
+        }
+    }
+}

+ 23 - 0
MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/data.ssa

@@ -0,0 +1,23 @@
+[Script Info]
+Title: Testing subtitles for the SSA Format
+
+[V4 Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding
+Style: Default,Arial,20,65535,65535,65535,-2147483640,-1,0,1,3,0,2,30,30,30,0,0
+Style: Titre_episode,Akbar,140,15724527,65535,65535,986895,-1,0,1,1,0,3,30,30,30,0,0
+Style: Wolf main,Wolf_Rain,56,15724527,15724527,15724527,4144959,0,0,1,1,2,2,5,5,30,0,0
+
+
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:02.40,0:00:07.20,Default,,0000,0000,0000,,Senator, {\kf89}we're \Nmaking our final \napproach into Coruscant.
+Dialogue: 0,0:00:09.71,0:00:13.39,Default,,0000,0000,0000,,{\pos(400,570)}Very good, Lieutenant.
+Dialogue: 0,0:00:15.04,0:00:18.04,Default,,0000,0000,0000,,It's \Na \ntrap!
+
+
+[Pictures]
+This section will be ignored
+
+[Fonts]
+This section will be ignored

+ 44 - 0
MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt

@@ -0,0 +1,44 @@
+
+
+1
+00:00:02.400 --> 00:00:05.200
+[Background Music Playing]
+
+2
+00:00:15,712 --> 00:00:17,399 X1:000 X2:000 Y1:050 Y2:100
+Oh my god, Watch out!
+It's coming!!
+
+3
+00:00:25,712 --> 00:00:30,399
+[Bird noises]
+
+4
+00:00:31,000 --> 00:00:31,999
+This text is <font color="red">RED</font> and has not been {\pos(142,120)}positioned.
+
+5
+00:00:32,000 --> 00:00:32,999
+This is a\nnew line, as is\Nthis
+
+6
+00:00:33,000 --> 00:00:33,999
+This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags
+
+7
+00:00:34,000 --> 00:00:34,999
+Unclosed but <b>supported HTML tags are left in, {\i1} SSA italics aren't
+
+8
+00:00:35,000 --> 00:00:35,999
+<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed.
+
+9
+00:00:36,000 --> 00:00:36,999
+Multiple {\pos(142,120)\b1}SSA tags are stripped
+
+10
+00:00:37,000 --> 00:00:37,999
+Greater than (<) and less than (>) are shown
+
+