|
@@ -2511,27 +2511,97 @@ def srt_subtitles_timecode(seconds):
|
|
|
|
|
|
|
|
|
def dfxp2srt(dfxp_data):
|
|
|
+ LEGACY_NAMESPACES = (
|
|
|
+ ('http://www.w3.org/ns/ttml', [
|
|
|
+ 'http://www.w3.org/2004/11/ttaf1',
|
|
|
+ 'http://www.w3.org/2006/04/ttaf1',
|
|
|
+ 'http://www.w3.org/2006/10/ttaf1',
|
|
|
+ ]),
|
|
|
+ ('http://www.w3.org/ns/ttml#styling', [
|
|
|
+ 'http://www.w3.org/ns/ttml#style',
|
|
|
+ ]),
|
|
|
+ )
|
|
|
+
|
|
|
+ SUPPORTED_STYLING = [
|
|
|
+ 'color',
|
|
|
+ 'fontFamily',
|
|
|
+ 'fontSize',
|
|
|
+ 'fontStyle',
|
|
|
+ 'fontWeight',
|
|
|
+ 'textDecoration'
|
|
|
+ ]
|
|
|
+
|
|
|
_x = functools.partial(xpath_with_ns, ns_map={
|
|
|
'ttml': 'http://www.w3.org/ns/ttml',
|
|
|
- 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
|
|
- 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
|
|
|
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
|
|
|
})
|
|
|
|
|
|
+ styles = {}
|
|
|
+ default_style = {}
|
|
|
+
|
|
|
class TTMLPElementParser(object):
|
|
|
- out = ''
|
|
|
+ _out = ''
|
|
|
+ _unclosed_elements = []
|
|
|
+ _applied_styles = []
|
|
|
|
|
|
def start(self, tag, attrib):
|
|
|
- if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
|
|
- self.out += '\n'
|
|
|
+ if tag in (_x('ttml:br'), 'br'):
|
|
|
+ self._out += '\n'
|
|
|
+ else:
|
|
|
+ unclosed_elements = []
|
|
|
+ style = {}
|
|
|
+ element_style_id = attrib.get('style')
|
|
|
+ if default_style:
|
|
|
+ style.update(default_style)
|
|
|
+ if element_style_id:
|
|
|
+ style.update(styles.get(element_style_id, {}))
|
|
|
+ for prop in SUPPORTED_STYLING:
|
|
|
+ prop_val = attrib.get(_x('tts:' + prop))
|
|
|
+ if prop_val:
|
|
|
+ style[prop] = prop_val
|
|
|
+ if style:
|
|
|
+ font = ''
|
|
|
+ for k, v in sorted(style.items()):
|
|
|
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
|
|
|
+ continue
|
|
|
+ if k == 'color':
|
|
|
+ font += ' color="%s"' % v
|
|
|
+ elif k == 'fontSize':
|
|
|
+ font += ' size="%s"' % v
|
|
|
+ elif k == 'fontFamily':
|
|
|
+ font += ' face="%s"' % v
|
|
|
+ elif k == 'fontWeight' and v == 'bold':
|
|
|
+ self._out += '<b>'
|
|
|
+ unclosed_elements.append('b')
|
|
|
+ elif k == 'fontStyle' and v == 'italic':
|
|
|
+ self._out += '<i>'
|
|
|
+ unclosed_elements.append('i')
|
|
|
+ elif k == 'textDecoration' and v == 'underline':
|
|
|
+ self._out += '<u>'
|
|
|
+ unclosed_elements.append('u')
|
|
|
+ if font:
|
|
|
+ self._out += '<font' + font + '>'
|
|
|
+ unclosed_elements.append('font')
|
|
|
+ applied_style = {}
|
|
|
+ if self._applied_styles:
|
|
|
+ applied_style.update(self._applied_styles[-1])
|
|
|
+ applied_style.update(style)
|
|
|
+ self._applied_styles.append(applied_style)
|
|
|
+ self._unclosed_elements.append(unclosed_elements)
|
|
|
|
|
|
def end(self, tag):
|
|
|
- pass
|
|
|
+ if tag not in (_x('ttml:br'), 'br'):
|
|
|
+ unclosed_elements = self._unclosed_elements.pop()
|
|
|
+ for element in reversed(unclosed_elements):
|
|
|
+ self._out += '</%s>' % element
|
|
|
+ if unclosed_elements and self._applied_styles:
|
|
|
+ self._applied_styles.pop()
|
|
|
|
|
|
def data(self, data):
|
|
|
- self.out += data
|
|
|
+ self._out += data
|
|
|
|
|
|
def close(self):
|
|
|
- return self.out.strip()
|
|
|
+ return self._out.strip()
|
|
|
|
|
|
def parse_node(node):
|
|
|
target = TTMLPElementParser()
|
|
@@ -2539,13 +2609,45 @@ def dfxp2srt(dfxp_data):
|
|
|
parser.feed(xml.etree.ElementTree.tostring(node))
|
|
|
return parser.close()
|
|
|
|
|
|
+ for k, v in LEGACY_NAMESPACES:
|
|
|
+ for ns in v:
|
|
|
+ dfxp_data = dfxp_data.replace(ns, k)
|
|
|
+
|
|
|
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
|
|
out = []
|
|
|
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
|
|
|
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
|
|
|
|
|
if not paras:
|
|
|
raise ValueError('Invalid dfxp/TTML subtitle')
|
|
|
|
|
|
+ repeat = False
|
|
|
+ while True:
|
|
|
+ for style in dfxp.findall(_x('.//ttml:style')):
|
|
|
+ style_id = style.get('id')
|
|
|
+ parent_style_id = style.get('style')
|
|
|
+ if parent_style_id:
|
|
|
+ if parent_style_id not in styles:
|
|
|
+ repeat = True
|
|
|
+ continue
|
|
|
+ styles[style_id] = styles[parent_style_id].copy()
|
|
|
+ for prop in SUPPORTED_STYLING:
|
|
|
+ prop_val = style.get(_x('tts:' + prop))
|
|
|
+ if prop_val:
|
|
|
+ styles.setdefault(style_id, {})[prop] = prop_val
|
|
|
+ if repeat:
|
|
|
+ repeat = False
|
|
|
+ else:
|
|
|
+ break
|
|
|
+
|
|
|
+ for p in ('body', 'div'):
|
|
|
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
|
|
|
+ if ele is None:
|
|
|
+ continue
|
|
|
+ style = styles.get(ele.get('style'))
|
|
|
+ if not style:
|
|
|
+ continue
|
|
|
+ default_style.update(style)
|
|
|
+
|
|
|
for para, index in zip(paras, itertools.count(1)):
|
|
|
begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
|
|
|
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
|