|
1 /* GStreamer SSA subtitle parser |
|
2 * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net> |
|
3 * |
|
4 * This library is free software; you can redistribute it and/or |
|
5 * modify it under the terms of the GNU Library General Public |
|
6 * License as published by the Free Software Foundation; either |
|
7 * version 2 of the License, or (at your option) any later version. |
|
8 * |
|
9 * This library is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
12 * Library General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU Library General Public |
|
15 * License along with this library; if not, write to the |
|
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
17 * Boston, MA 02111-1307, USA. |
|
18 */ |
|
19 |
|
20 /* Super-primitive SSA parser - we just want the text and ignore |
|
21 * everything else like styles and timing codes etc. for now */ |
|
22 |
|
23 #ifdef HAVE_CONFIG_H |
|
24 #include "config.h" |
|
25 #endif |
|
26 |
|
27 #include <stdlib.h> /* atoi() */ |
|
28 #include <string.h> |
|
29 |
|
30 #include "gstssaparse.h" |
|
31 |
|
32 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug); |
|
33 #define GST_CAT_DEFAULT ssa_parse_debug |
|
34 |
|
35 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", |
|
36 GST_PAD_SINK, |
|
37 GST_PAD_ALWAYS, |
|
38 GST_STATIC_CAPS ("application/x-ssa; application/x-ass") |
|
39 ); |
|
40 |
|
41 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src", |
|
42 GST_PAD_SRC, |
|
43 GST_PAD_ALWAYS, |
|
44 GST_STATIC_CAPS ("text/x-pango-markup") |
|
45 ); |
|
46 |
|
47 GST_BOILERPLATE (GstSsaParse, gst_ssa_parse, GstElement, GST_TYPE_ELEMENT); |
|
48 |
|
49 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement * |
|
50 element, GstStateChange transition); |
|
51 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps); |
|
52 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstEvent * event); |
|
53 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstEvent * event); |
|
54 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstBuffer * buf); |
|
55 |
|
56 |
|
57 static void |
|
58 gst_ssa_parse_base_init (gpointer klass) |
|
59 { |
|
60 GstElementClass *element_class = GST_ELEMENT_CLASS (klass); |
|
61 static GstElementDetails ssa_parse_details = { |
|
62 "SSA Subtitle Parser", |
|
63 "Codec/Parser/Subtitle", |
|
64 "Parses SSA subtitle streams", |
|
65 "Tim-Philipp Müller <tim centricular net>" |
|
66 }; |
|
67 |
|
68 gst_element_class_add_pad_template (element_class, |
|
69 gst_static_pad_template_get (&sink_templ)); |
|
70 gst_element_class_add_pad_template (element_class, |
|
71 gst_static_pad_template_get (&src_templ)); |
|
72 gst_element_class_set_details (element_class, &ssa_parse_details); |
|
73 |
|
74 GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0, |
|
75 "SSA subtitle parser"); |
|
76 } |
|
77 |
|
78 static void |
|
79 gst_ssa_parse_dispose (GObject * object) |
|
80 { |
|
81 GstSsaParse *parse = GST_SSA_PARSE (object); |
|
82 |
|
83 g_free (parse->ini); |
|
84 parse->ini = NULL; |
|
85 |
|
86 GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object)); |
|
87 } |
|
88 |
|
89 static void |
|
90 gst_ssa_parse_init (GstSsaParse * parse, GstSsaParseClass * klass) |
|
91 { |
|
92 parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink"); |
|
93 gst_pad_set_setcaps_function (parse->sinkpad, |
|
94 GST_DEBUG_FUNCPTR (gst_ssa_parse_setcaps)); |
|
95 gst_pad_set_chain_function (parse->sinkpad, |
|
96 GST_DEBUG_FUNCPTR (gst_ssa_parse_chain)); |
|
97 gst_pad_set_event_function (parse->sinkpad, |
|
98 GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event)); |
|
99 gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad); |
|
100 |
|
101 parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src"); |
|
102 gst_pad_set_event_function (parse->srcpad, |
|
103 GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event)); |
|
104 gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad); |
|
105 gst_pad_use_fixed_caps (parse->srcpad); |
|
106 gst_pad_set_caps (parse->srcpad, |
|
107 gst_static_pad_template_get_caps (&src_templ)); |
|
108 |
|
109 parse->ini = NULL; |
|
110 parse->framed = FALSE; |
|
111 } |
|
112 |
|
113 static void |
|
114 gst_ssa_parse_class_init (GstSsaParseClass * klass) |
|
115 { |
|
116 GObjectClass *object_class = G_OBJECT_CLASS (klass); |
|
117 GstElementClass *element_class = GST_ELEMENT_CLASS (klass); |
|
118 |
|
119 object_class->dispose = GST_DEBUG_FUNCPTR (gst_ssa_parse_dispose); |
|
120 |
|
121 element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state); |
|
122 } |
|
123 |
|
124 static gboolean |
|
125 gst_ssa_parse_src_event (GstPad * pad, GstEvent * event) |
|
126 { |
|
127 return gst_pad_event_default (pad, event); |
|
128 } |
|
129 |
|
130 static gboolean |
|
131 gst_ssa_parse_sink_event (GstPad * pad, GstEvent * event) |
|
132 { |
|
133 return gst_pad_event_default (pad, event); |
|
134 } |
|
135 |
|
136 static gboolean |
|
137 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps) |
|
138 { |
|
139 GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad)); |
|
140 const GValue *val; |
|
141 GstStructure *s; |
|
142 const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF }; |
|
143 GstBuffer *priv; |
|
144 gchar *data; |
|
145 guint size; |
|
146 |
|
147 s = gst_caps_get_structure (caps, 0); |
|
148 val = gst_structure_get_value (s, "codec_data"); |
|
149 if (val == NULL) { |
|
150 parse->framed = FALSE; |
|
151 GST_ERROR ("Only SSA subtitles embedded in containers are supported"); |
|
152 return FALSE; |
|
153 } |
|
154 |
|
155 parse->framed = TRUE; |
|
156 |
|
157 priv = (GstBuffer *) gst_value_get_mini_object (val); |
|
158 g_return_val_if_fail (priv != NULL, FALSE); |
|
159 |
|
160 gst_buffer_ref (priv); |
|
161 |
|
162 data = (gchar *) GST_BUFFER_DATA (priv); |
|
163 size = GST_BUFFER_SIZE (priv); |
|
164 /* skip UTF-8 BOM */ |
|
165 if (size >= 3 && memcmp (data, bom_utf8, 3) == 0) { |
|
166 data += 3; |
|
167 size -= 3; |
|
168 } |
|
169 |
|
170 if (!strstr (data, "[Script Info]")) { |
|
171 GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header"); |
|
172 gst_buffer_unref (priv); |
|
173 return FALSE; |
|
174 } |
|
175 |
|
176 if (!g_utf8_validate (data, size, NULL)) { |
|
177 GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8"); |
|
178 gst_buffer_unref (priv); |
|
179 return FALSE; |
|
180 } |
|
181 |
|
182 /* FIXME: parse initial section */ |
|
183 parse->ini = g_strndup (data, size); |
|
184 GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini); |
|
185 |
|
186 gst_buffer_unref (priv); |
|
187 |
|
188 return TRUE; |
|
189 } |
|
190 |
|
191 static gboolean |
|
192 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt) |
|
193 { |
|
194 gchar *t, *end; |
|
195 gboolean removed_any = FALSE; |
|
196 |
|
197 while ((t = strchr (txt, '{'))) { |
|
198 end = strchr (txt, '}'); |
|
199 if (end == NULL) { |
|
200 GST_WARNING_OBJECT (parse, "Missing { for style override code"); |
|
201 return removed_any; |
|
202 } |
|
203 /* move terminating NUL character forward as well */ |
|
204 g_memmove (t, end + 1, strlen (end + 1) + 1); |
|
205 removed_any = TRUE; |
|
206 } |
|
207 |
|
208 /* these may occur outside of curly brackets. We don't handle the different |
|
209 * wrapping modes yet, so just remove these markers from the text for now */ |
|
210 while ((t = strstr (txt, "\\n"))) { |
|
211 t[0] = ' '; |
|
212 t[1] = '\n'; |
|
213 } |
|
214 while ((t = strstr (txt, "\\N"))) { |
|
215 t[0] = ' '; |
|
216 t[1] = '\n'; |
|
217 } |
|
218 while ((t = strstr (txt, "\\h"))) { |
|
219 t[0] = ' '; |
|
220 t[1] = ' '; |
|
221 } |
|
222 |
|
223 return removed_any; |
|
224 } |
|
225 |
|
226 /** |
|
227 * gst_ssa_parse_push_line: |
|
228 * @parse: caller element |
|
229 * @txt: text to push |
|
230 * @start: timestamp for the buffer |
|
231 * @duration: duration for the buffer |
|
232 * |
|
233 * Parse the text in a buffer with the given properties and |
|
234 * push it to the srcpad of the @parse element |
|
235 * |
|
236 * Returns: result of the push of the created buffer |
|
237 */ |
|
238 static GstFlowReturn |
|
239 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, |
|
240 GstClockTime start, GstClockTime duration) |
|
241 { |
|
242 GstFlowReturn ret; |
|
243 GstBuffer *buf; |
|
244 gchar *t, *escaped; |
|
245 gint num, i, len; |
|
246 |
|
247 num = atoi (txt); |
|
248 GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT, |
|
249 num, GST_TIME_ARGS (start)); |
|
250 |
|
251 /* skip all non-text fields before the actual text */ |
|
252 t = txt; |
|
253 for (i = 0; i < 8; ++i) { |
|
254 t = strchr (t, ','); |
|
255 if (t == NULL) |
|
256 return GST_FLOW_ERROR; |
|
257 ++t; |
|
258 } |
|
259 |
|
260 GST_LOG_OBJECT (parse, "Text : %s", t); |
|
261 |
|
262 if (gst_ssa_parse_remove_override_codes (parse, t)) { |
|
263 GST_LOG_OBJECT (parse, "Clean: %s", t); |
|
264 } |
|
265 |
|
266 /* we claim to output pango markup, so we must escape the |
|
267 * text even if we don't actually use any pango markup yet */ |
|
268 escaped = g_markup_printf_escaped ("%s", t); |
|
269 |
|
270 len = strlen (escaped); |
|
271 |
|
272 /* allocate enough for a terminating NUL, but don't include it in buf size */ |
|
273 buf = gst_buffer_new_and_alloc (len + 1); |
|
274 memcpy (GST_BUFFER_DATA (buf), escaped, len + 1); |
|
275 GST_BUFFER_SIZE (buf) = len; |
|
276 g_free (escaped); |
|
277 |
|
278 GST_BUFFER_TIMESTAMP (buf) = start; |
|
279 GST_BUFFER_DURATION (buf) = duration; |
|
280 |
|
281 gst_buffer_set_caps (buf, GST_PAD_CAPS (parse->srcpad)); |
|
282 |
|
283 GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT |
|
284 " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start), |
|
285 GST_TIME_ARGS (duration)); |
|
286 |
|
287 ret = gst_pad_push (parse->srcpad, buf); |
|
288 |
|
289 if (ret != GST_FLOW_OK) { |
|
290 GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt, |
|
291 gst_flow_get_name (ret)); |
|
292 } |
|
293 |
|
294 return ret; |
|
295 } |
|
296 |
|
297 static GstFlowReturn |
|
298 gst_ssa_parse_chain (GstPad * sinkpad, GstBuffer * buf) |
|
299 { |
|
300 GstFlowReturn ret; |
|
301 GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad)); |
|
302 GstClockTime ts; |
|
303 gchar *txt; |
|
304 |
|
305 if (G_UNLIKELY (!parse->framed)) |
|
306 goto not_framed; |
|
307 |
|
308 /* make double-sure it's 0-terminated and all */ |
|
309 txt = g_strndup ((gchar *) GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf)); |
|
310 |
|
311 if (txt == NULL) |
|
312 return GST_FLOW_UNEXPECTED; |
|
313 |
|
314 ts = GST_BUFFER_TIMESTAMP (buf); |
|
315 ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf)); |
|
316 |
|
317 if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) { |
|
318 /* just advance time without sending anything */ |
|
319 gst_pad_push_event (parse->srcpad, |
|
320 gst_event_new_new_segment (TRUE, 1.0, GST_FORMAT_TIME, ts, -1, ts)); |
|
321 ret = GST_FLOW_OK; |
|
322 } |
|
323 |
|
324 g_free (txt); |
|
325 |
|
326 return ret; |
|
327 |
|
328 /* ERRORS */ |
|
329 not_framed: |
|
330 { |
|
331 GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL), |
|
332 ("Only SSA subtitles embedded in containers are supported")); |
|
333 return GST_FLOW_NOT_NEGOTIATED; |
|
334 } |
|
335 } |
|
336 |
|
337 static GstStateChangeReturn |
|
338 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition) |
|
339 { |
|
340 GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS; |
|
341 GstSsaParse *parse = GST_SSA_PARSE (element); |
|
342 |
|
343 switch (transition) { |
|
344 case GST_STATE_CHANGE_READY_TO_PAUSED: |
|
345 break; |
|
346 default: |
|
347 break; |
|
348 } |
|
349 |
|
350 ret = parent_class->change_state (element, transition); |
|
351 if (ret == GST_STATE_CHANGE_FAILURE) |
|
352 return ret; |
|
353 |
|
354 switch (transition) { |
|
355 case GST_STATE_CHANGE_PAUSED_TO_READY: |
|
356 g_free (parse->ini); |
|
357 parse->ini = NULL; |
|
358 parse->framed = FALSE; |
|
359 break; |
|
360 default: |
|
361 break; |
|
362 } |
|
363 |
|
364 return ret; |
|
365 } |