|
1 /* GStreamer |
|
2 * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu> |
|
3 * Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net> |
|
4 * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net> |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Library General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Library General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Library General Public |
|
17 * License along with this library; if not, write to the |
|
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
19 * Boston, MA 02111-1307, USA. |
|
20 */ |
|
21 |
|
22 #ifdef HAVE_CONFIG_H |
|
23 #include "config.h" |
|
24 #endif |
|
25 |
|
26 #include <string.h> |
|
27 #include <stdlib.h> |
|
28 #include <sys/types.h> |
|
29 #include <regex.h> |
|
30 |
|
31 #include "gstsubparse.h" |
|
32 #include "gstssaparse.h" |
|
33 #include "samiparse.h" |
|
34 #include "tmplayerparse.h" |
|
35 #include "mpl2parse.h" |
|
36 |
|
37 #ifdef __SYMBIAN32__ |
|
38 #include <glib_global.h> |
|
39 #endif |
|
40 GST_DEBUG_CATEGORY (sub_parse_debug); |
|
41 |
|
42 #define DEFAULT_ENCODING NULL |
|
43 |
|
44 enum |
|
45 { |
|
46 PROP_0, |
|
47 PROP_ENCODING |
|
48 }; |
|
49 |
|
50 static void |
|
51 gst_sub_parse_set_property (GObject * object, guint prop_id, |
|
52 const GValue * value, GParamSpec * pspec); |
|
53 static void |
|
54 gst_sub_parse_get_property (GObject * object, guint prop_id, |
|
55 GValue * value, GParamSpec * pspec); |
|
56 |
|
57 |
|
58 static const GstElementDetails sub_parse_details = |
|
59 GST_ELEMENT_DETAILS ("Subtitle parser", |
|
60 "Codec/Parser/Subtitle", |
|
61 "Parses subtitle (.sub) files into text streams", |
|
62 "Gustavo J. A. M. Carneiro <gjc@inescporto.pt>\n" |
|
63 "Ronald S. Bultje <rbultje@ronald.bitfreak.net>"); |
|
64 |
|
65 #ifndef GST_DISABLE_XML |
|
66 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", |
|
67 GST_PAD_SINK, |
|
68 GST_PAD_ALWAYS, |
|
69 GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-sami; " |
|
70 "application/x-subtitle-tmplayer; application/x-subtitle-mpl2") |
|
71 ); |
|
72 #else |
|
73 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", |
|
74 GST_PAD_SINK, |
|
75 GST_PAD_ALWAYS, |
|
76 GST_STATIC_CAPS ("application/x-subtitle") |
|
77 ); |
|
78 #endif |
|
79 |
|
80 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src", |
|
81 GST_PAD_SRC, |
|
82 GST_PAD_ALWAYS, |
|
83 GST_STATIC_CAPS ("text/plain; text/x-pango-markup") |
|
84 ); |
|
85 |
|
86 static void gst_sub_parse_base_init (GstSubParseClass * klass); |
|
87 static void gst_sub_parse_class_init (GstSubParseClass * klass); |
|
88 static void gst_sub_parse_init (GstSubParse * subparse); |
|
89 |
|
90 static gboolean gst_sub_parse_src_event (GstPad * pad, GstEvent * event); |
|
91 static gboolean gst_sub_parse_sink_event (GstPad * pad, GstEvent * event); |
|
92 |
|
93 static GstStateChangeReturn gst_sub_parse_change_state (GstElement * element, |
|
94 GstStateChange transition); |
|
95 |
|
96 static GstFlowReturn gst_sub_parse_chain (GstPad * sinkpad, GstBuffer * buf); |
|
97 |
|
98 static GstElementClass *parent_class = NULL; |
|
99 #ifdef __SYMBIAN32__ |
|
100 EXPORT_C |
|
101 #endif |
|
102 |
|
103 |
|
104 GType |
|
105 gst_sub_parse_get_type (void) |
|
106 { |
|
107 static GType sub_parse_type = 0; |
|
108 |
|
109 if (!sub_parse_type) { |
|
110 static const GTypeInfo sub_parse_info = { |
|
111 sizeof (GstSubParseClass), |
|
112 (GBaseInitFunc) gst_sub_parse_base_init, |
|
113 NULL, |
|
114 (GClassInitFunc) gst_sub_parse_class_init, |
|
115 NULL, |
|
116 NULL, |
|
117 sizeof (GstSubParse), |
|
118 0, |
|
119 (GInstanceInitFunc) gst_sub_parse_init, |
|
120 }; |
|
121 |
|
122 sub_parse_type = g_type_register_static (GST_TYPE_ELEMENT, |
|
123 "GstSubParse", &sub_parse_info, 0); |
|
124 } |
|
125 |
|
126 return sub_parse_type; |
|
127 } |
|
128 |
|
129 static void |
|
130 gst_sub_parse_base_init (GstSubParseClass * klass) |
|
131 { |
|
132 GstElementClass *element_class = GST_ELEMENT_CLASS (klass); |
|
133 |
|
134 gst_element_class_add_pad_template (element_class, |
|
135 gst_static_pad_template_get (&sink_templ)); |
|
136 gst_element_class_add_pad_template (element_class, |
|
137 gst_static_pad_template_get (&src_templ)); |
|
138 gst_element_class_set_details (element_class, &sub_parse_details); |
|
139 } |
|
140 |
|
141 static void |
|
142 gst_sub_parse_dispose (GObject * object) |
|
143 { |
|
144 GstSubParse *subparse = GST_SUBPARSE (object); |
|
145 |
|
146 GST_DEBUG_OBJECT (subparse, "cleaning up subtitle parser"); |
|
147 |
|
148 if (subparse->encoding) { |
|
149 g_free (subparse->encoding); |
|
150 subparse->encoding = NULL; |
|
151 } |
|
152 if (subparse->textbuf) { |
|
153 g_string_free (subparse->textbuf, TRUE); |
|
154 subparse->textbuf = NULL; |
|
155 } |
|
156 sami_context_deinit (&subparse->state); |
|
157 |
|
158 GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object)); |
|
159 } |
|
160 |
|
161 static void |
|
162 gst_sub_parse_class_init (GstSubParseClass * klass) |
|
163 { |
|
164 GObjectClass *object_class = G_OBJECT_CLASS (klass); |
|
165 GstElementClass *element_class = GST_ELEMENT_CLASS (klass); |
|
166 |
|
167 parent_class = g_type_class_peek_parent (klass); |
|
168 |
|
169 object_class->dispose = gst_sub_parse_dispose; |
|
170 object_class->set_property = gst_sub_parse_set_property; |
|
171 object_class->get_property = gst_sub_parse_get_property; |
|
172 |
|
173 element_class->change_state = gst_sub_parse_change_state; |
|
174 |
|
175 g_object_class_install_property (object_class, PROP_ENCODING, |
|
176 g_param_spec_string ("subtitle-encoding", "subtitle charset encoding", |
|
177 "Encoding to assume if input subtitles are not in UTF-8 encoding. " |
|
178 "If not set, the GST_SUBTITLE_ENCODING environment variable will " |
|
179 "be checked for an encoding to use. If that is not set either, " |
|
180 "ISO-8859-15 will be assumed.", DEFAULT_ENCODING, G_PARAM_READWRITE)); |
|
181 } |
|
182 |
|
183 static void |
|
184 gst_sub_parse_init (GstSubParse * subparse) |
|
185 { |
|
186 subparse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink"); |
|
187 gst_pad_set_chain_function (subparse->sinkpad, |
|
188 GST_DEBUG_FUNCPTR (gst_sub_parse_chain)); |
|
189 gst_pad_set_event_function (subparse->sinkpad, |
|
190 GST_DEBUG_FUNCPTR (gst_sub_parse_sink_event)); |
|
191 gst_element_add_pad (GST_ELEMENT (subparse), subparse->sinkpad); |
|
192 |
|
193 subparse->srcpad = gst_pad_new_from_static_template (&src_templ, "src"); |
|
194 gst_pad_set_event_function (subparse->srcpad, |
|
195 GST_DEBUG_FUNCPTR (gst_sub_parse_src_event)); |
|
196 gst_element_add_pad (GST_ELEMENT (subparse), subparse->srcpad); |
|
197 |
|
198 subparse->textbuf = g_string_new (NULL); |
|
199 subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; |
|
200 subparse->flushing = FALSE; |
|
201 gst_segment_init (&subparse->segment, GST_FORMAT_TIME); |
|
202 subparse->need_segment = TRUE; |
|
203 subparse->encoding = g_strdup (DEFAULT_ENCODING); |
|
204 } |
|
205 |
|
206 /* |
|
207 * Source pad functions. |
|
208 */ |
|
209 |
|
210 static gboolean |
|
211 gst_sub_parse_src_event (GstPad * pad, GstEvent * event) |
|
212 { |
|
213 GstSubParse *self = GST_SUBPARSE (gst_pad_get_parent (pad)); |
|
214 gboolean ret = FALSE; |
|
215 |
|
216 GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event)); |
|
217 |
|
218 switch (GST_EVENT_TYPE (event)) { |
|
219 case GST_EVENT_SEEK: |
|
220 { |
|
221 GstFormat format; |
|
222 GstSeekType start_type, stop_type; |
|
223 gint64 start, stop; |
|
224 gdouble rate; |
|
225 gboolean update; |
|
226 |
|
227 gst_event_parse_seek (event, &rate, &format, &self->segment_flags, |
|
228 &start_type, &start, &stop_type, &stop); |
|
229 |
|
230 if (format != GST_FORMAT_TIME) { |
|
231 GST_WARNING_OBJECT (self, "we only support seeking in TIME format"); |
|
232 gst_event_unref (event); |
|
233 goto beach; |
|
234 } |
|
235 |
|
236 /* Convert that seek to a seeking in bytes at position 0, |
|
237 FIXME: could use an index */ |
|
238 ret = gst_pad_push_event (self->sinkpad, |
|
239 gst_event_new_seek (rate, GST_FORMAT_BYTES, self->segment_flags, |
|
240 GST_SEEK_TYPE_SET, 0, GST_SEEK_TYPE_NONE, 0)); |
|
241 |
|
242 if (ret) { |
|
243 /* Apply the seek to our segment */ |
|
244 gst_segment_set_seek (&self->segment, rate, format, self->segment_flags, |
|
245 start_type, start, stop_type, stop, &update); |
|
246 #ifndef __SYMBIAN32__ |
|
247 GST_DEBUG_OBJECT (self, "segment after seek: %" GST_SEGMENT_FORMAT, |
|
248 &self->segment); |
|
249 #endif |
|
250 self->next_offset = 0; |
|
251 |
|
252 self->need_segment = TRUE; |
|
253 } else { |
|
254 GST_WARNING_OBJECT (self, "seek to 0 bytes failed"); |
|
255 } |
|
256 |
|
257 gst_event_unref (event); |
|
258 break; |
|
259 } |
|
260 default: |
|
261 ret = gst_pad_event_default (pad, event); |
|
262 break; |
|
263 } |
|
264 |
|
265 beach: |
|
266 gst_object_unref (self); |
|
267 |
|
268 return ret; |
|
269 } |
|
270 |
|
271 static void |
|
272 gst_sub_parse_set_property (GObject * object, guint prop_id, |
|
273 const GValue * value, GParamSpec * pspec) |
|
274 { |
|
275 GstSubParse *subparse = GST_SUBPARSE (object); |
|
276 |
|
277 GST_OBJECT_LOCK (subparse); |
|
278 switch (prop_id) { |
|
279 case PROP_ENCODING: |
|
280 g_free (subparse->encoding); |
|
281 subparse->encoding = g_value_dup_string (value); |
|
282 GST_LOG_OBJECT (object, "subtitle encoding set to %s", |
|
283 GST_STR_NULL (subparse->encoding)); |
|
284 break; |
|
285 default: |
|
286 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); |
|
287 break; |
|
288 } |
|
289 GST_OBJECT_UNLOCK (subparse); |
|
290 } |
|
291 |
|
292 static void |
|
293 gst_sub_parse_get_property (GObject * object, guint prop_id, |
|
294 GValue * value, GParamSpec * pspec) |
|
295 { |
|
296 GstSubParse *subparse = GST_SUBPARSE (object); |
|
297 |
|
298 GST_OBJECT_LOCK (subparse); |
|
299 switch (prop_id) { |
|
300 case PROP_ENCODING: |
|
301 g_value_set_string (value, subparse->encoding); |
|
302 break; |
|
303 default: |
|
304 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); |
|
305 break; |
|
306 } |
|
307 GST_OBJECT_UNLOCK (subparse); |
|
308 } |
|
309 |
|
310 static gchar * |
|
311 convert_encoding (GstSubParse * self, const gchar * str, gsize len) |
|
312 { |
|
313 const gchar *encoding; |
|
314 GError *err = NULL; |
|
315 gchar *ret; |
|
316 |
|
317 if (self->valid_utf8) { |
|
318 if (g_utf8_validate (str, len, NULL)) { |
|
319 GST_LOG_OBJECT (self, "valid UTF-8, no conversion needed"); |
|
320 return g_strndup (str, len); |
|
321 } |
|
322 GST_INFO_OBJECT (self, "invalid UTF-8!"); |
|
323 self->valid_utf8 = FALSE; |
|
324 } |
|
325 |
|
326 encoding = self->encoding; |
|
327 if (encoding == NULL || *encoding == '\0') { |
|
328 encoding = g_getenv ("GST_SUBTITLE_ENCODING"); |
|
329 } |
|
330 if (encoding == NULL || *encoding == '\0') { |
|
331 /* if local encoding is UTF-8 and no encoding specified |
|
332 * via the environment variable, assume ISO-8859-15 */ |
|
333 if (g_get_charset (&encoding)) { |
|
334 encoding = "ISO-8859-15"; |
|
335 } |
|
336 } |
|
337 |
|
338 ret = g_convert_with_fallback (str, len, "UTF-8", encoding, "*", NULL, |
|
339 NULL, &err); |
|
340 |
|
341 if (err) { |
|
342 GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s", |
|
343 encoding, err->message); |
|
344 g_error_free (err); |
|
345 |
|
346 /* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */ |
|
347 ret = g_convert_with_fallback (str, len, "UTF-8", "ISO-8859-15", "*", |
|
348 NULL, NULL, NULL); |
|
349 } |
|
350 |
|
351 GST_LOG_OBJECT (self, |
|
352 "successfully converted %" G_GSIZE_FORMAT " characters from %s to UTF-8" |
|
353 "%s", len, encoding, (err) ? " , using ISO-8859-15 as fallback" : ""); |
|
354 |
|
355 return ret; |
|
356 } |
|
357 |
|
358 static gchar * |
|
359 get_next_line (GstSubParse * self) |
|
360 { |
|
361 char *line = NULL; |
|
362 const char *line_end; |
|
363 int line_len; |
|
364 gboolean have_r = FALSE; |
|
365 |
|
366 line_end = strchr (self->textbuf->str, '\n'); |
|
367 |
|
368 if (!line_end) { |
|
369 /* end-of-line not found; return for more data */ |
|
370 return NULL; |
|
371 } |
|
372 |
|
373 /* get rid of '\r' */ |
|
374 if (line_end != self->textbuf->str && *(line_end - 1) == '\r') { |
|
375 line_end--; |
|
376 have_r = TRUE; |
|
377 } |
|
378 |
|
379 line_len = line_end - self->textbuf->str; |
|
380 line = convert_encoding (self, self->textbuf->str, line_len); |
|
381 self->textbuf = g_string_erase (self->textbuf, 0, |
|
382 line_len + (have_r ? 2 : 1)); |
|
383 return line; |
|
384 } |
|
385 |
|
386 static gchar * |
|
387 parse_mdvdsub (ParserState * state, const gchar * line) |
|
388 { |
|
389 const gchar *line_split; |
|
390 gchar *line_chunk; |
|
391 guint start_frame, end_frame; |
|
392 gint64 clip_start = 0, clip_stop = 0; |
|
393 gboolean in_seg = FALSE; |
|
394 GString *markup; |
|
395 gchar *ret; |
|
396 |
|
397 /* style variables */ |
|
398 gboolean italic; |
|
399 gboolean bold; |
|
400 guint fontsize; |
|
401 |
|
402 if (sscanf (line, "{%u}{%u}", &start_frame, &end_frame) != 2) { |
|
403 g_warning ("Parse of the following line, assumed to be in microdvd .sub" |
|
404 " format, failed:\n%s", line); |
|
405 return NULL; |
|
406 } |
|
407 |
|
408 /* skip the {%u}{%u} part */ |
|
409 line = strchr (line, '}') + 1; |
|
410 line = strchr (line, '}') + 1; |
|
411 |
|
412 /* see if there's a first line with a framerate */ |
|
413 if (state->fps == 0.0 && start_frame == 1 && end_frame == 1) { |
|
414 gchar *rest, *end = NULL; |
|
415 |
|
416 rest = g_strdup (line); |
|
417 g_strdelimit (rest, ",", '.'); |
|
418 state->fps = g_ascii_strtod (rest, &end); |
|
419 if (end == rest) |
|
420 state->fps = 0.0; |
|
421 GST_INFO ("framerate from file: %f ('%s')", state->fps, rest); |
|
422 g_free (rest); |
|
423 return NULL; |
|
424 } |
|
425 |
|
426 if (state->fps == 0.0) { |
|
427 /* FIXME: hardcoded for now, is there a better way/assumption? */ |
|
428 state->fps = 24000.0 / 1001.0; |
|
429 GST_INFO ("no framerate specified, assuming %f", state->fps); |
|
430 } |
|
431 |
|
432 state->start_time = start_frame / state->fps * GST_SECOND; |
|
433 state->duration = (end_frame - start_frame) / state->fps * GST_SECOND; |
|
434 |
|
435 /* Check our segment start/stop */ |
|
436 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME, |
|
437 state->start_time, state->start_time + state->duration, &clip_start, |
|
438 &clip_stop); |
|
439 |
|
440 /* No need to parse that text if it's out of segment */ |
|
441 if (in_seg) { |
|
442 state->start_time = clip_start; |
|
443 state->duration = clip_stop - clip_start; |
|
444 } else { |
|
445 return NULL; |
|
446 } |
|
447 |
|
448 markup = g_string_new (NULL); |
|
449 while (1) { |
|
450 italic = FALSE; |
|
451 bold = FALSE; |
|
452 fontsize = 0; |
|
453 /* parse style markup */ |
|
454 if (strncmp (line, "{y:i}", 5) == 0) { |
|
455 italic = TRUE; |
|
456 line = strchr (line, '}') + 1; |
|
457 } |
|
458 if (strncmp (line, "{y:b}", 5) == 0) { |
|
459 bold = TRUE; |
|
460 line = strchr (line, '}') + 1; |
|
461 } |
|
462 if (sscanf (line, "{s:%u}", &fontsize) == 1) { |
|
463 line = strchr (line, '}') + 1; |
|
464 } |
|
465 /* forward slashes at beginning/end signify italics too */ |
|
466 if (g_str_has_prefix (line, "/")) { |
|
467 italic = TRUE; |
|
468 ++line; |
|
469 } |
|
470 if ((line_split = strchr (line, '|'))) |
|
471 line_chunk = g_markup_escape_text (line, line_split - line); |
|
472 else |
|
473 line_chunk = g_markup_escape_text (line, strlen (line)); |
|
474 |
|
475 /* Remove italics markers at end of line/stanza (CHECKME: are end slashes |
|
476 * always at the end of a line or can they span multiple lines?) */ |
|
477 if (g_str_has_suffix (line_chunk, "/")) { |
|
478 line_chunk[strlen (line_chunk) - 1] = '\0'; |
|
479 } |
|
480 |
|
481 markup = g_string_append (markup, "<span"); |
|
482 if (italic) |
|
483 g_string_append (markup, " style=\"italic\""); |
|
484 if (bold) |
|
485 g_string_append (markup, " weight=\"bold\""); |
|
486 if (fontsize) |
|
487 g_string_append_printf (markup, " size=\"%u\"", fontsize * 1000); |
|
488 g_string_append_printf (markup, ">%s</span>", line_chunk); |
|
489 g_free (line_chunk); |
|
490 if (line_split) { |
|
491 g_string_append (markup, "\n"); |
|
492 line = line_split + 1; |
|
493 } else { |
|
494 break; |
|
495 } |
|
496 } |
|
497 ret = markup->str; |
|
498 g_string_free (markup, FALSE); |
|
499 GST_DEBUG ("parse_mdvdsub returning (%f+%f): %s", |
|
500 state->start_time / (double) GST_SECOND, |
|
501 state->duration / (double) GST_SECOND, ret); |
|
502 return ret; |
|
503 } |
|
504 |
|
505 static void |
|
506 strip_trailing_newlines (gchar * txt) |
|
507 { |
|
508 if (txt) { |
|
509 guint len; |
|
510 |
|
511 len = strlen (txt); |
|
512 while (len > 1 && txt[len - 1] == '\n') { |
|
513 txt[len - 1] = '\0'; |
|
514 --len; |
|
515 } |
|
516 } |
|
517 } |
|
518 |
|
519 /* we want to escape text in general, but retain basic markup like |
|
520 * <i></i>, <u></u>, and <b></b>. The easiest and safest way is to |
|
521 * just unescape a white list of allowed markups again after |
|
522 * escaping everything (the text between these simple markers isn't |
|
523 * necessarily escaped, so it seems best to do it like this) */ |
|
524 static void |
|
525 subrip_unescape_formatting (gchar * txt) |
|
526 { |
|
527 gchar *pos; |
|
528 |
|
529 for (pos = txt; pos != NULL && *pos != '\0'; ++pos) { |
|
530 if (g_ascii_strncasecmp (pos, "<u>", 9) == 0 || |
|
531 g_ascii_strncasecmp (pos, "<i>", 9) == 0 || |
|
532 g_ascii_strncasecmp (pos, "<b>", 9) == 0) { |
|
533 pos[0] = '<'; |
|
534 pos[1] = g_ascii_tolower (pos[4]); |
|
535 pos[2] = '>'; |
|
536 /* move NUL terminator as well */ |
|
537 g_memmove (pos + 3, pos + 9, strlen (pos + 9) + 1); |
|
538 pos += 2; |
|
539 } |
|
540 } |
|
541 |
|
542 for (pos = txt; pos != NULL && *pos != '\0'; ++pos) { |
|
543 if (g_ascii_strncasecmp (pos, "</u>", 10) == 0 || |
|
544 g_ascii_strncasecmp (pos, "</i>", 10) == 0 || |
|
545 g_ascii_strncasecmp (pos, "</b>", 10) == 0) { |
|
546 pos[0] = '<'; |
|
547 pos[1] = '/'; |
|
548 pos[2] = g_ascii_tolower (pos[5]); |
|
549 pos[3] = '>'; |
|
550 /* move NUL terminator as well */ |
|
551 g_memmove (pos + 4, pos + 10, strlen (pos + 10) + 1); |
|
552 pos += 3; |
|
553 } |
|
554 } |
|
555 } |
|
556 |
|
557 |
|
558 static gboolean |
|
559 subrip_remove_unhandled_tag (gchar * start, gchar * stop) |
|
560 { |
|
561 gchar *tag, saved; |
|
562 |
|
563 tag = start + strlen ("<"); |
|
564 if (*tag == '/') |
|
565 ++tag; |
|
566 |
|
567 if (g_ascii_tolower (*tag) < 'a' || g_ascii_tolower (*tag) > 'z') |
|
568 return FALSE; |
|
569 |
|
570 saved = *stop; |
|
571 *stop = '\0'; |
|
572 GST_LOG ("removing unhandled tag '%s'", start); |
|
573 *stop = saved; |
|
574 g_memmove (start, stop, strlen (stop) + 1); |
|
575 return TRUE; |
|
576 } |
|
577 |
|
578 /* remove tags we haven't explicitly allowed earlier on, like font tags |
|
579 * for example */ |
|
580 static void |
|
581 subrip_remove_unhandled_tags (gchar * txt) |
|
582 { |
|
583 gchar *pos, *gt; |
|
584 |
|
585 for (pos = txt; pos != NULL && *pos != '\0'; ++pos) { |
|
586 if (strncmp (pos, "<", 4) == 0 && (gt = strstr (pos + 4, ">"))) { |
|
587 if (subrip_remove_unhandled_tag (pos, gt + strlen (">"))) |
|
588 --pos; |
|
589 } |
|
590 } |
|
591 } |
|
592 |
|
593 /* we only allow <i>, <u> and <b>, so let's take a simple approach. This code |
|
594 * assumes the input has been escaped and subrip_unescape_formatting() has then |
|
595 * been run over the input! This function adds missing closing markup tags and |
|
596 * removes broken closing tags for tags that have never been opened. */ |
|
597 static void |
|
598 subrip_fix_up_markup (gchar ** p_txt) |
|
599 { |
|
600 gchar *cur, *next_tag; |
|
601 gchar open_tags[32]; |
|
602 guint num_open_tags = 0; |
|
603 |
|
604 g_assert (*p_txt != NULL); |
|
605 |
|
606 cur = *p_txt; |
|
607 while (*cur != '\0') { |
|
608 next_tag = strchr (cur, '<'); |
|
609 if (next_tag == NULL) |
|
610 break; |
|
611 ++next_tag; |
|
612 switch (*next_tag) { |
|
613 case '/':{ |
|
614 ++next_tag; |
|
615 if (num_open_tags == 0 || open_tags[num_open_tags - 1] != *next_tag) { |
|
616 GST_LOG ("broken input, closing tag '%c' is not open", *next_tag); |
|
617 g_memmove (next_tag - 2, next_tag + 2, strlen (next_tag + 2) + 1); |
|
618 next_tag -= 2; |
|
619 } else { |
|
620 /* it's all good, closing tag which is open */ |
|
621 --num_open_tags; |
|
622 } |
|
623 break; |
|
624 } |
|
625 case 'i': |
|
626 case 'b': |
|
627 case 'u': |
|
628 if (num_open_tags == G_N_ELEMENTS (open_tags)) |
|
629 return; /* something dodgy is going on, stop parsing */ |
|
630 open_tags[num_open_tags] = *next_tag; |
|
631 ++num_open_tags; |
|
632 break; |
|
633 default: |
|
634 GST_ERROR ("unexpected tag '%c' (%s)", *next_tag, next_tag); |
|
635 g_assert_not_reached (); |
|
636 break; |
|
637 } |
|
638 cur = next_tag; |
|
639 } |
|
640 |
|
641 if (num_open_tags > 0) { |
|
642 GString *s; |
|
643 |
|
644 s = g_string_new (*p_txt); |
|
645 while (num_open_tags > 0) { |
|
646 GST_LOG ("adding missing closing tag '%c'", open_tags[num_open_tags - 1]); |
|
647 g_string_append_c (s, '<'); |
|
648 g_string_append_c (s, '/'); |
|
649 g_string_append_c (s, open_tags[num_open_tags - 1]); |
|
650 g_string_append_c (s, '>'); |
|
651 --num_open_tags; |
|
652 } |
|
653 g_free (*p_txt); |
|
654 *p_txt = g_string_free (s, FALSE); |
|
655 } |
|
656 } |
|
657 |
|
658 static gchar * |
|
659 parse_subrip (ParserState * state, const gchar * line) |
|
660 { |
|
661 guint h1, m1, s1, ms1; |
|
662 guint h2, m2, s2, ms2; |
|
663 int subnum; |
|
664 gchar *ret; |
|
665 |
|
666 switch (state->state) { |
|
667 case 0: |
|
668 /* looking for a single integer */ |
|
669 if (sscanf (line, "%u", &subnum) == 1) |
|
670 state->state = 1; |
|
671 return NULL; |
|
672 case 1: |
|
673 /* looking for start_time --> end_time */ |
|
674 if (sscanf (line, "%u:%u:%u,%u --> %u:%u:%u,%u", |
|
675 &h1, &m1, &s1, &ms1, &h2, &m2, &s2, &ms2) == 8) { |
|
676 state->state = 2; |
|
677 state->start_time = |
|
678 (((guint64) h1) * 3600 + m1 * 60 + s1) * GST_SECOND + |
|
679 ms1 * GST_MSECOND; |
|
680 state->duration = |
|
681 (((guint64) h2) * 3600 + m2 * 60 + s2) * GST_SECOND + |
|
682 ms2 * GST_MSECOND - state->start_time; |
|
683 } else { |
|
684 GST_DEBUG ("error parsing subrip time line"); |
|
685 state->state = 0; |
|
686 } |
|
687 return NULL; |
|
688 case 2: |
|
689 { |
|
690 /* No need to parse that text if it's out of segment */ |
|
691 gint64 clip_start = 0, clip_stop = 0; |
|
692 gboolean in_seg = FALSE; |
|
693 |
|
694 /* Check our segment start/stop */ |
|
695 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME, |
|
696 state->start_time, state->start_time + state->duration, |
|
697 &clip_start, &clip_stop); |
|
698 |
|
699 if (in_seg) { |
|
700 state->start_time = clip_start; |
|
701 state->duration = clip_stop - clip_start; |
|
702 } else { |
|
703 state->state = 0; |
|
704 return NULL; |
|
705 } |
|
706 } |
|
707 /* looking for subtitle text; empty line ends this subtitle entry */ |
|
708 if (state->buf->len) |
|
709 g_string_append_c (state->buf, '\n'); |
|
710 g_string_append (state->buf, line); |
|
711 if (strlen (line) == 0) { |
|
712 ret = g_markup_escape_text (state->buf->str, state->buf->len); |
|
713 g_string_truncate (state->buf, 0); |
|
714 state->state = 0; |
|
715 subrip_unescape_formatting (ret); |
|
716 subrip_remove_unhandled_tags (ret); |
|
717 strip_trailing_newlines (ret); |
|
718 subrip_fix_up_markup (&ret); |
|
719 return ret; |
|
720 } |
|
721 return NULL; |
|
722 default: |
|
723 g_return_val_if_reached (NULL); |
|
724 } |
|
725 } |
|
726 |
|
727 static void |
|
728 subviewer_unescape_newlines (gchar * read) |
|
729 { |
|
730 gchar *write = read; |
|
731 |
|
732 /* Replace all occurences of '[br]' with a newline as version 2 |
|
733 * of the subviewer format uses this for newlines */ |
|
734 |
|
735 if (read[0] == '\0' || read[1] == '\0' || read[2] == '\0' || read[3] == '\0') |
|
736 return; |
|
737 |
|
738 do { |
|
739 if (strncmp (read, "[br]", 4) == 0) { |
|
740 *write = '\n'; |
|
741 read += 4; |
|
742 } else { |
|
743 *write = *read; |
|
744 read++; |
|
745 } |
|
746 write++; |
|
747 } while (*read); |
|
748 |
|
749 *write = '\0'; |
|
750 } |
|
751 |
|
752 static gchar * |
|
753 parse_subviewer (ParserState * state, const gchar * line) |
|
754 { |
|
755 guint h1, m1, s1, ms1; |
|
756 guint h2, m2, s2, ms2; |
|
757 gchar *ret; |
|
758 |
|
759 /* TODO: Maybe also parse the fields in the header, especially DELAY. |
|
760 * For examples see the unit test or |
|
761 * http://www.doom9.org/index.html?/sub.htm */ |
|
762 |
|
763 switch (state->state) { |
|
764 case 0: |
|
765 /* looking for start_time,end_time */ |
|
766 if (sscanf (line, "%u:%u:%u.%u,%u:%u:%u.%u", |
|
767 &h1, &m1, &s1, &ms1, &h2, &m2, &s2, &ms2) == 8) { |
|
768 state->state = 1; |
|
769 state->start_time = |
|
770 (((guint64) h1) * 3600 + m1 * 60 + s1) * GST_SECOND + |
|
771 ms1 * GST_MSECOND; |
|
772 state->duration = |
|
773 (((guint64) h2) * 3600 + m2 * 60 + s2) * GST_SECOND + |
|
774 ms2 * GST_MSECOND - state->start_time; |
|
775 } |
|
776 return NULL; |
|
777 case 1: |
|
778 { |
|
779 /* No need to parse that text if it's out of segment */ |
|
780 gint64 clip_start = 0, clip_stop = 0; |
|
781 gboolean in_seg = FALSE; |
|
782 |
|
783 /* Check our segment start/stop */ |
|
784 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME, |
|
785 state->start_time, state->start_time + state->duration, |
|
786 &clip_start, &clip_stop); |
|
787 |
|
788 if (in_seg) { |
|
789 state->start_time = clip_start; |
|
790 state->duration = clip_stop - clip_start; |
|
791 } else { |
|
792 state->state = 0; |
|
793 return NULL; |
|
794 } |
|
795 } |
|
796 /* looking for subtitle text; empty line ends this subtitle entry */ |
|
797 if (state->buf->len) |
|
798 g_string_append_c (state->buf, '\n'); |
|
799 g_string_append (state->buf, line); |
|
800 if (strlen (line) == 0) { |
|
801 ret = g_strdup (state->buf->str); |
|
802 subviewer_unescape_newlines (ret); |
|
803 strip_trailing_newlines (ret); |
|
804 g_string_truncate (state->buf, 0); |
|
805 state->state = 0; |
|
806 return ret; |
|
807 } |
|
808 return NULL; |
|
809 default: |
|
810 g_assert_not_reached (); |
|
811 return NULL; |
|
812 } |
|
813 } |
|
814 |
|
815 static gchar * |
|
816 parse_mpsub (ParserState * state, const gchar * line) |
|
817 { |
|
818 gchar *ret; |
|
819 float t1, t2; |
|
820 |
|
821 switch (state->state) { |
|
822 case 0: |
|
823 /* looking for two floats (offset, duration) */ |
|
824 if (sscanf (line, "%f %f", &t1, &t2) == 2) { |
|
825 state->state = 1; |
|
826 state->start_time += state->duration + GST_SECOND * t1; |
|
827 state->duration = GST_SECOND * t2; |
|
828 } |
|
829 return NULL; |
|
830 case 1: |
|
831 { /* No need to parse that text if it's out of segment */ |
|
832 gint64 clip_start = 0, clip_stop = 0; |
|
833 gboolean in_seg = FALSE; |
|
834 |
|
835 /* Check our segment start/stop */ |
|
836 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME, |
|
837 state->start_time, state->start_time + state->duration, |
|
838 &clip_start, &clip_stop); |
|
839 |
|
840 if (in_seg) { |
|
841 state->start_time = clip_start; |
|
842 state->duration = clip_stop - clip_start; |
|
843 } else { |
|
844 state->state = 0; |
|
845 return NULL; |
|
846 } |
|
847 } |
|
848 /* looking for subtitle text; empty line ends this |
|
849 * subtitle entry */ |
|
850 if (state->buf->len) |
|
851 g_string_append_c (state->buf, '\n'); |
|
852 g_string_append (state->buf, line); |
|
853 if (strlen (line) == 0) { |
|
854 ret = g_strdup (state->buf->str); |
|
855 g_string_truncate (state->buf, 0); |
|
856 state->state = 0; |
|
857 return ret; |
|
858 } |
|
859 return NULL; |
|
860 default: |
|
861 g_assert_not_reached (); |
|
862 return NULL; |
|
863 } |
|
864 } |
|
865 |
|
866 static void |
|
867 parser_state_init (ParserState * state) |
|
868 { |
|
869 GST_DEBUG ("initialising parser"); |
|
870 |
|
871 if (state->buf) { |
|
872 g_string_truncate (state->buf, 0); |
|
873 } else { |
|
874 state->buf = g_string_new (NULL); |
|
875 } |
|
876 |
|
877 state->start_time = 0; |
|
878 state->duration = 0; |
|
879 state->state = 0; |
|
880 state->segment = NULL; |
|
881 } |
|
882 |
|
883 static void |
|
884 parser_state_dispose (ParserState * state) |
|
885 { |
|
886 if (state->buf) { |
|
887 g_string_free (state->buf, TRUE); |
|
888 state->buf = NULL; |
|
889 } |
|
890 if (state->user_data) { |
|
891 sami_context_reset (state); |
|
892 } |
|
893 } |
|
894 |
|
895 /* |
|
896 * FIXME: maybe we should pass along a second argument, the preceding |
|
897 * text buffer, because that is how this originally worked, even though |
|
898 * I don't really see the use of that. |
|
899 */ |
|
900 |
|
901 static GstSubParseFormat |
|
902 gst_sub_parse_data_format_autodetect (gchar * match_str) |
|
903 { |
|
904 static gboolean need_init_regexps = TRUE; |
|
905 static regex_t mdvd_rx; |
|
906 static regex_t subrip_rx; |
|
907 guint n1, n2, n3; |
|
908 |
|
909 /* initialize the regexps used the first time around */ |
|
910 if (need_init_regexps) { |
|
911 int err; |
|
912 char errstr[128]; |
|
913 |
|
914 need_init_regexps = FALSE; |
|
915 if ((err = regcomp (&mdvd_rx, "^\\{[0-9]+\\}\\{[0-9]+\\}", |
|
916 REG_EXTENDED | REG_NEWLINE | REG_NOSUB) != 0) || |
|
917 (err = regcomp (&subrip_rx, "^[0-9]([0-9]){0,3}(\x0d)?\x0a" |
|
918 "[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}" |
|
919 " --> [0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}", |
|
920 REG_EXTENDED | REG_NEWLINE | REG_NOSUB)) != 0) { |
|
921 regerror (err, &subrip_rx, errstr, 127); |
|
922 GST_WARNING ("Compilation of subrip regex failed: %s", errstr); |
|
923 } |
|
924 } |
|
925 |
|
926 if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) { |
|
927 GST_LOG ("MicroDVD (frame based) format detected"); |
|
928 return GST_SUB_PARSE_FORMAT_MDVDSUB; |
|
929 } |
|
930 if (regexec (&subrip_rx, match_str, 0, NULL, 0) == 0) { |
|
931 GST_LOG ("SubRip (time based) format detected"); |
|
932 return GST_SUB_PARSE_FORMAT_SUBRIP; |
|
933 } |
|
934 |
|
935 if (!strncmp (match_str, "FORMAT=TIME", 11)) { |
|
936 GST_LOG ("MPSub (time based) format detected"); |
|
937 return GST_SUB_PARSE_FORMAT_MPSUB; |
|
938 } |
|
939 if (strstr (match_str, "<SAMI>") != NULL || |
|
940 strstr (match_str, "<sami>") != NULL) { |
|
941 GST_LOG ("SAMI (time based) format detected"); |
|
942 return GST_SUB_PARSE_FORMAT_SAMI; |
|
943 } |
|
944 /* we're boldly assuming the first subtitle appears within the first hour */ |
|
945 if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 || |
|
946 sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 || |
|
947 sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 || |
|
948 sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 || |
|
949 sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) { |
|
950 GST_LOG ("TMPlayer (time based) format detected"); |
|
951 return GST_SUB_PARSE_FORMAT_TMPLAYER; |
|
952 } |
|
953 if (sscanf (match_str, "[%u][%u]", &n1, &n2) == 2) { |
|
954 GST_LOG ("MPL2 (time based) format detected"); |
|
955 return GST_SUB_PARSE_FORMAT_MPL2; |
|
956 } |
|
957 if (strstr (match_str, "[INFORMATION]") != NULL) { |
|
958 GST_LOG ("SubViewer (time based) format detected"); |
|
959 return GST_SUB_PARSE_FORMAT_SUBVIEWER; |
|
960 } |
|
961 |
|
962 GST_DEBUG ("no subtitle format detected"); |
|
963 return GST_SUB_PARSE_FORMAT_UNKNOWN; |
|
964 } |
|
965 |
|
966 static GstCaps * |
|
967 gst_sub_parse_format_autodetect (GstSubParse * self) |
|
968 { |
|
969 gchar *data; |
|
970 GstSubParseFormat format; |
|
971 |
|
972 if (strlen (self->textbuf->str) < 35) { |
|
973 GST_DEBUG ("File too small to be a subtitles file"); |
|
974 return NULL; |
|
975 } |
|
976 |
|
977 data = g_strndup (self->textbuf->str, 35); |
|
978 format = gst_sub_parse_data_format_autodetect (data); |
|
979 g_free (data); |
|
980 |
|
981 self->parser_type = format; |
|
982 parser_state_init (&self->state); |
|
983 |
|
984 switch (format) { |
|
985 case GST_SUB_PARSE_FORMAT_MDVDSUB: |
|
986 self->parse_line = parse_mdvdsub; |
|
987 return gst_caps_new_simple ("text/x-pango-markup", NULL); |
|
988 case GST_SUB_PARSE_FORMAT_SUBRIP: |
|
989 self->parse_line = parse_subrip; |
|
990 return gst_caps_new_simple ("text/x-pango-markup", NULL); |
|
991 case GST_SUB_PARSE_FORMAT_MPSUB: |
|
992 self->parse_line = parse_mpsub; |
|
993 return gst_caps_new_simple ("text/plain", NULL); |
|
994 case GST_SUB_PARSE_FORMAT_SAMI: |
|
995 self->parse_line = parse_sami; |
|
996 sami_context_init (&self->state); |
|
997 return gst_caps_new_simple ("text/x-pango-markup", NULL); |
|
998 case GST_SUB_PARSE_FORMAT_TMPLAYER: |
|
999 self->parse_line = parse_tmplayer; |
|
1000 return gst_caps_new_simple ("text/plain", NULL); |
|
1001 case GST_SUB_PARSE_FORMAT_MPL2: |
|
1002 self->parse_line = parse_mpl2; |
|
1003 return gst_caps_new_simple ("text/x-pango-markup", NULL); |
|
1004 case GST_SUB_PARSE_FORMAT_SUBVIEWER: |
|
1005 self->parse_line = parse_subviewer; |
|
1006 return gst_caps_new_simple ("text/plain", NULL); |
|
1007 case GST_SUB_PARSE_FORMAT_UNKNOWN: |
|
1008 default: |
|
1009 GST_DEBUG ("no subtitle format detected"); |
|
1010 GST_ELEMENT_ERROR (self, STREAM, WRONG_TYPE, |
|
1011 ("The input is not a valid/supported subtitle file"), (NULL)); |
|
1012 return NULL; |
|
1013 } |
|
1014 } |
|
1015 |
|
1016 static void |
|
1017 feed_textbuf (GstSubParse * self, GstBuffer * buf) |
|
1018 { |
|
1019 if (GST_BUFFER_OFFSET (buf) != self->offset) { |
|
1020 /* flush the parser state */ |
|
1021 parser_state_init (&self->state); |
|
1022 g_string_truncate (self->textbuf, 0); |
|
1023 sami_context_reset (&self->state); |
|
1024 } |
|
1025 |
|
1026 self->textbuf = g_string_append_len (self->textbuf, |
|
1027 (gchar *) GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf)); |
|
1028 self->offset = GST_BUFFER_OFFSET (buf) + GST_BUFFER_SIZE (buf); |
|
1029 self->next_offset = self->offset; |
|
1030 |
|
1031 gst_buffer_unref (buf); |
|
1032 } |
|
1033 |
|
1034 static GstFlowReturn |
|
1035 handle_buffer (GstSubParse * self, GstBuffer * buf) |
|
1036 { |
|
1037 GstFlowReturn ret = GST_FLOW_OK; |
|
1038 GstCaps *caps = NULL; |
|
1039 gchar *line, *subtitle; |
|
1040 |
|
1041 feed_textbuf (self, buf); |
|
1042 |
|
1043 /* make sure we know the format */ |
|
1044 if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) { |
|
1045 if (!(caps = gst_sub_parse_format_autodetect (self))) { |
|
1046 return GST_FLOW_UNEXPECTED; |
|
1047 } |
|
1048 if (!gst_pad_set_caps (self->srcpad, caps)) { |
|
1049 gst_caps_unref (caps); |
|
1050 return GST_FLOW_UNEXPECTED; |
|
1051 } |
|
1052 gst_caps_unref (caps); |
|
1053 } |
|
1054 |
|
1055 while ((line = get_next_line (self)) && !self->flushing) { |
|
1056 /* Set segment on our parser state machine */ |
|
1057 self->state.segment = &self->segment; |
|
1058 /* Now parse the line, out of segment lines will just return NULL */ |
|
1059 GST_LOG_OBJECT (self, "Parsing line '%s'", line); |
|
1060 subtitle = self->parse_line (&self->state, line); |
|
1061 g_free (line); |
|
1062 |
|
1063 if (subtitle) { |
|
1064 guint subtitle_len = strlen (subtitle); |
|
1065 |
|
1066 /* +1 for terminating NUL character */ |
|
1067 ret = gst_pad_alloc_buffer_and_set_caps (self->srcpad, |
|
1068 GST_BUFFER_OFFSET_NONE, subtitle_len + 1, |
|
1069 GST_PAD_CAPS (self->srcpad), &buf); |
|
1070 |
|
1071 if (ret == GST_FLOW_OK) { |
|
1072 /* copy terminating NUL character as well */ |
|
1073 memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1); |
|
1074 GST_BUFFER_SIZE (buf) = subtitle_len; |
|
1075 GST_BUFFER_TIMESTAMP (buf) = self->state.start_time; |
|
1076 GST_BUFFER_DURATION (buf) = self->state.duration; |
|
1077 |
|
1078 gst_segment_set_last_stop (&self->segment, GST_FORMAT_TIME, |
|
1079 self->state.start_time); |
|
1080 |
|
1081 GST_DEBUG_OBJECT (self, "Sending text '%s', %" GST_TIME_FORMAT " + %" |
|
1082 GST_TIME_FORMAT, subtitle, GST_TIME_ARGS (self->state.start_time), |
|
1083 GST_TIME_ARGS (self->state.duration)); |
|
1084 |
|
1085 ret = gst_pad_push (self->srcpad, buf); |
|
1086 } |
|
1087 |
|
1088 g_free (subtitle); |
|
1089 subtitle = NULL; |
|
1090 |
|
1091 if (ret != GST_FLOW_OK) { |
|
1092 GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret)); |
|
1093 break; |
|
1094 } |
|
1095 } |
|
1096 } |
|
1097 |
|
1098 return ret; |
|
1099 } |
|
1100 |
|
1101 static GstFlowReturn |
|
1102 gst_sub_parse_chain (GstPad * sinkpad, GstBuffer * buf) |
|
1103 { |
|
1104 GstFlowReturn ret; |
|
1105 GstSubParse *self; |
|
1106 |
|
1107 self = GST_SUBPARSE (GST_PAD_PARENT (sinkpad)); |
|
1108 |
|
1109 /* Push newsegment if needed */ |
|
1110 if (self->need_segment) { |
|
1111 #ifndef __SYMBIAN32__ |
|
1112 GST_LOG_OBJECT (self, "pushing newsegment event with %" GST_SEGMENT_FORMAT, |
|
1113 &self->segment); |
|
1114 #endif |
|
1115 gst_pad_push_event (self->srcpad, gst_event_new_new_segment (FALSE, |
|
1116 self->segment.rate, self->segment.format, |
|
1117 self->segment.last_stop, self->segment.stop, self->segment.time)); |
|
1118 self->need_segment = FALSE; |
|
1119 } |
|
1120 |
|
1121 ret = handle_buffer (self, buf); |
|
1122 |
|
1123 return ret; |
|
1124 } |
|
1125 |
|
1126 static gboolean |
|
1127 gst_sub_parse_sink_event (GstPad * pad, GstEvent * event) |
|
1128 { |
|
1129 GstSubParse *self = GST_SUBPARSE (gst_pad_get_parent (pad)); |
|
1130 gboolean ret = FALSE; |
|
1131 |
|
1132 GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event)); |
|
1133 |
|
1134 switch (GST_EVENT_TYPE (event)) { |
|
1135 case GST_EVENT_EOS:{ |
|
1136 /* Make sure the last subrip chunk is pushed out even |
|
1137 * if the file does not have an empty line at the end */ |
|
1138 if (self->parser_type == GST_SUB_PARSE_FORMAT_SUBRIP || |
|
1139 self->parser_type == GST_SUB_PARSE_FORMAT_MPL2) { |
|
1140 GstBuffer *buf = gst_buffer_new_and_alloc (1 + 1); |
|
1141 |
|
1142 GST_DEBUG ("EOS. Pushing remaining text (if any)"); |
|
1143 GST_BUFFER_DATA (buf)[0] = '\n'; |
|
1144 GST_BUFFER_DATA (buf)[1] = '\0'; /* play it safe */ |
|
1145 GST_BUFFER_SIZE (buf) = 1; |
|
1146 GST_BUFFER_OFFSET (buf) = self->offset; |
|
1147 gst_sub_parse_chain (pad, buf); |
|
1148 } |
|
1149 ret = gst_pad_event_default (pad, event); |
|
1150 break; |
|
1151 } |
|
1152 case GST_EVENT_NEWSEGMENT: |
|
1153 { |
|
1154 GstFormat format; |
|
1155 gdouble rate; |
|
1156 gint64 start, stop, time; |
|
1157 gboolean update; |
|
1158 |
|
1159 gst_event_parse_new_segment (event, &update, &rate, &format, &start, |
|
1160 &stop, &time); |
|
1161 |
|
1162 GST_DEBUG_OBJECT (self, "newsegment (%s)", gst_format_get_name (format)); |
|
1163 |
|
1164 if (format == GST_FORMAT_TIME) { |
|
1165 gst_segment_set_newsegment (&self->segment, update, rate, format, |
|
1166 start, stop, time); |
|
1167 } else { |
|
1168 /* if not time format, we'll either start with a 0 timestamp anyway or |
|
1169 * it's following a seek in which case we'll have saved the requested |
|
1170 * seek segment and don't want to overwrite it (remember that on a seek |
|
1171 * we always just seek back to the start in BYTES format and just throw |
|
1172 * away all text that's before the requested position; if the subtitles |
|
1173 * come from an upstream demuxer, it won't be able to handle our BYTES |
|
1174 * seek request and instead send us a newsegment from the seek request |
|
1175 * it received via its video pads instead, so all is fine then too) */ |
|
1176 } |
|
1177 |
|
1178 ret = TRUE; |
|
1179 gst_event_unref (event); |
|
1180 break; |
|
1181 } |
|
1182 case GST_EVENT_FLUSH_START: |
|
1183 { |
|
1184 self->flushing = TRUE; |
|
1185 |
|
1186 ret = gst_pad_event_default (pad, event); |
|
1187 break; |
|
1188 } |
|
1189 case GST_EVENT_FLUSH_STOP: |
|
1190 { |
|
1191 self->flushing = FALSE; |
|
1192 |
|
1193 ret = gst_pad_event_default (pad, event); |
|
1194 break; |
|
1195 } |
|
1196 default: |
|
1197 ret = gst_pad_event_default (pad, event); |
|
1198 break; |
|
1199 } |
|
1200 |
|
1201 gst_object_unref (self); |
|
1202 |
|
1203 return ret; |
|
1204 } |
|
1205 |
|
1206 |
|
1207 static GstStateChangeReturn |
|
1208 gst_sub_parse_change_state (GstElement * element, GstStateChange transition) |
|
1209 { |
|
1210 GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS; |
|
1211 GstSubParse *self = GST_SUBPARSE (element); |
|
1212 |
|
1213 switch (transition) { |
|
1214 case GST_STATE_CHANGE_READY_TO_PAUSED: |
|
1215 /* format detection will init the parser state */ |
|
1216 self->offset = 0; |
|
1217 self->next_offset = 0; |
|
1218 self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; |
|
1219 self->valid_utf8 = TRUE; |
|
1220 g_string_truncate (self->textbuf, 0); |
|
1221 break; |
|
1222 default: |
|
1223 break; |
|
1224 } |
|
1225 |
|
1226 ret = parent_class->change_state (element, transition); |
|
1227 if (ret == GST_STATE_CHANGE_FAILURE) |
|
1228 return ret; |
|
1229 |
|
1230 switch (transition) { |
|
1231 case GST_STATE_CHANGE_PAUSED_TO_READY: |
|
1232 parser_state_dispose (&self->state); |
|
1233 self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; |
|
1234 break; |
|
1235 default: |
|
1236 break; |
|
1237 } |
|
1238 |
|
1239 return ret; |
|
1240 } |
|
1241 |
|
1242 /* |
|
1243 * Typefind support. |
|
1244 */ |
|
1245 |
|
1246 /* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so; |
|
1247 * also, give different subtitle formats really different types */ |
|
1248 static GstStaticCaps mpl2_caps = |
|
1249 GST_STATIC_CAPS ("application/x-subtitle-mpl2"); |
|
1250 static GstStaticCaps tmp_caps = |
|
1251 GST_STATIC_CAPS ("application/x-subtitle-tmplayer"); |
|
1252 static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami"); |
|
1253 static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle"); |
|
1254 |
|
1255 #define SUB_CAPS (gst_static_caps_get (&sub_caps)) |
|
1256 #define SAMI_CAPS (gst_static_caps_get (&smi_caps)) |
|
1257 #define TMP_CAPS (gst_static_caps_get (&tmp_caps)) |
|
1258 #define MPL2_CAPS (gst_static_caps_get (&mpl2_caps)) |
|
1259 |
|
1260 static void |
|
1261 gst_subparse_type_find (GstTypeFind * tf, gpointer private) |
|
1262 { |
|
1263 GstSubParseFormat format; |
|
1264 const guint8 *data; |
|
1265 GstCaps *caps; |
|
1266 gchar *str; |
|
1267 |
|
1268 if (!(data = gst_type_find_peek (tf, 0, 36))) |
|
1269 return; |
|
1270 |
|
1271 /* make sure string passed to _autodetect() is NUL-terminated */ |
|
1272 str = g_strndup ((gchar *) data, 35); |
|
1273 format = gst_sub_parse_data_format_autodetect (str); |
|
1274 g_free (str); |
|
1275 |
|
1276 switch (format) { |
|
1277 case GST_SUB_PARSE_FORMAT_MDVDSUB: |
|
1278 GST_DEBUG ("MicroDVD format detected"); |
|
1279 caps = SUB_CAPS; |
|
1280 break; |
|
1281 case GST_SUB_PARSE_FORMAT_SUBRIP: |
|
1282 GST_DEBUG ("SubRip format detected"); |
|
1283 caps = SUB_CAPS; |
|
1284 break; |
|
1285 case GST_SUB_PARSE_FORMAT_MPSUB: |
|
1286 GST_DEBUG ("MPSub format detected"); |
|
1287 caps = SUB_CAPS; |
|
1288 break; |
|
1289 case GST_SUB_PARSE_FORMAT_SAMI: |
|
1290 GST_DEBUG ("SAMI (time-based) format detected"); |
|
1291 caps = SAMI_CAPS; |
|
1292 break; |
|
1293 case GST_SUB_PARSE_FORMAT_TMPLAYER: |
|
1294 GST_DEBUG ("TMPlayer (time based) format detected"); |
|
1295 caps = TMP_CAPS; |
|
1296 break; |
|
1297 /* FIXME: our MPL2 typefinding is not really good enough to warrant |
|
1298 * returning a high probability (however, since we registered our |
|
1299 * typefinder here with a rank of MARGINAL we should pretty much only |
|
1300 * be called if most other typefinders have already run */ |
|
1301 case GST_SUB_PARSE_FORMAT_MPL2: |
|
1302 GST_DEBUG ("MPL2 (time based) format detected"); |
|
1303 caps = MPL2_CAPS; |
|
1304 break; |
|
1305 case GST_SUB_PARSE_FORMAT_SUBVIEWER: |
|
1306 GST_DEBUG ("SubViewer format detected"); |
|
1307 caps = SUB_CAPS; |
|
1308 break; |
|
1309 default: |
|
1310 case GST_SUB_PARSE_FORMAT_UNKNOWN: |
|
1311 GST_DEBUG ("no subtitle format detected"); |
|
1312 return; |
|
1313 } |
|
1314 |
|
1315 /* if we're here, it's ok */ |
|
1316 gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps); |
|
1317 } |
|
1318 |
|
1319 static gboolean |
|
1320 plugin_init (GstPlugin * plugin) |
|
1321 { |
|
1322 static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", "smi", "txt", |
|
1323 NULL |
|
1324 }; |
|
1325 |
|
1326 GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser"); |
|
1327 |
|
1328 if (!gst_type_find_register (plugin, "subparse_typefind", GST_RANK_MARGINAL, |
|
1329 gst_subparse_type_find, sub_exts, SUB_CAPS, NULL, NULL)) |
|
1330 return FALSE; |
|
1331 |
|
1332 if (!gst_element_register (plugin, "subparse", |
|
1333 GST_RANK_PRIMARY, GST_TYPE_SUBPARSE) || |
|
1334 !gst_element_register (plugin, "ssaparse", |
|
1335 GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE)) { |
|
1336 return FALSE; |
|
1337 } |
|
1338 |
|
1339 return TRUE; |
|
1340 } |
|
1341 |
|
1342 GST_PLUGIN_DEFINE (GST_VERSION_MAJOR, |
|
1343 GST_VERSION_MINOR, |
|
1344 "subparse", |
|
1345 "Subtitle parsing", |
|
1346 plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN); |
|
1347 |
|
1348 #ifdef __SYMBIAN32__ |
|
1349 EXPORT_C |
|
1350 #endif |
|
1351 GstPluginDesc* _GST_PLUGIN_DESC() |
|
1352 { |
|
1353 return &gst_plugin_desc; |
|
1354 } |