gst_plugins_base/gst/subparse/gstssaparse.c
changeset 0 0e761a78d257
child 8 4a7fac7dd34a
equal deleted inserted replaced
-1:000000000000 0:0e761a78d257
       
     1 /* GStreamer SSA subtitle parser
       
     2  * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net>
       
     3  *
       
     4  * This library is free software; you can redistribute it and/or
       
     5  * modify it under the terms of the GNU Library General Public
       
     6  * License as published by the Free Software Foundation; either
       
     7  * version 2 of the License, or (at your option) any later version.
       
     8  *
       
     9  * This library is distributed in the hope that it will be useful,
       
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    12  * Library General Public License for more details.
       
    13  *
       
    14  * You should have received a copy of the GNU Library General Public
       
    15  * License along with this library; if not, write to the
       
    16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
       
    17  * Boston, MA 02111-1307, USA.
       
    18  */
       
    19 
       
    20 /* Super-primitive SSA parser - we just want the text and ignore
       
    21  * everything else like styles and timing codes etc. for now */
       
    22 
       
    23 #ifdef HAVE_CONFIG_H
       
    24 #include "config.h"
       
    25 #endif
       
    26 
       
    27 #include <stdlib.h>             /* atoi() */
       
    28 #include <string.h>
       
    29 
       
    30 #include "gstssaparse.h"
       
    31 
       
    32 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
       
    33 #define GST_CAT_DEFAULT ssa_parse_debug
       
    34 
       
    35 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
       
    36     GST_PAD_SINK,
       
    37     GST_PAD_ALWAYS,
       
    38     GST_STATIC_CAPS ("application/x-ssa; application/x-ass")
       
    39     );
       
    40 
       
    41 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
       
    42     GST_PAD_SRC,
       
    43     GST_PAD_ALWAYS,
       
    44     GST_STATIC_CAPS ("text/x-pango-markup")
       
    45     );
       
    46 
       
    47 GST_BOILERPLATE (GstSsaParse, gst_ssa_parse, GstElement, GST_TYPE_ELEMENT);
       
    48 
       
    49 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
       
    50     element, GstStateChange transition);
       
    51 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps);
       
    52 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstEvent * event);
       
    53 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstEvent * event);
       
    54 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstBuffer * buf);
       
    55 
       
    56 
       
    57 static void
       
    58 gst_ssa_parse_base_init (gpointer klass)
       
    59 {
       
    60   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
       
    61   static GstElementDetails ssa_parse_details = {
       
    62     "SSA Subtitle Parser",
       
    63     "Codec/Parser/Subtitle",
       
    64     "Parses SSA subtitle streams",
       
    65     "Tim-Philipp Müller <tim centricular net>"
       
    66   };
       
    67 
       
    68   gst_element_class_add_pad_template (element_class,
       
    69       gst_static_pad_template_get (&sink_templ));
       
    70   gst_element_class_add_pad_template (element_class,
       
    71       gst_static_pad_template_get (&src_templ));
       
    72   gst_element_class_set_details (element_class, &ssa_parse_details);
       
    73 
       
    74   GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0,
       
    75       "SSA subtitle parser");
       
    76 }
       
    77 
       
    78 static void
       
    79 gst_ssa_parse_dispose (GObject * object)
       
    80 {
       
    81   GstSsaParse *parse = GST_SSA_PARSE (object);
       
    82 
       
    83   g_free (parse->ini);
       
    84   parse->ini = NULL;
       
    85 
       
    86   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
       
    87 }
       
    88 
       
    89 static void
       
    90 gst_ssa_parse_init (GstSsaParse * parse, GstSsaParseClass * klass)
       
    91 {
       
    92   parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
       
    93   gst_pad_set_setcaps_function (parse->sinkpad,
       
    94       GST_DEBUG_FUNCPTR (gst_ssa_parse_setcaps));
       
    95   gst_pad_set_chain_function (parse->sinkpad,
       
    96       GST_DEBUG_FUNCPTR (gst_ssa_parse_chain));
       
    97   gst_pad_set_event_function (parse->sinkpad,
       
    98       GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event));
       
    99   gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad);
       
   100 
       
   101   parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
       
   102   gst_pad_set_event_function (parse->srcpad,
       
   103       GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event));
       
   104   gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad);
       
   105   gst_pad_use_fixed_caps (parse->srcpad);
       
   106   gst_pad_set_caps (parse->srcpad,
       
   107       gst_static_pad_template_get_caps (&src_templ));
       
   108 
       
   109   parse->ini = NULL;
       
   110   parse->framed = FALSE;
       
   111 }
       
   112 
       
   113 static void
       
   114 gst_ssa_parse_class_init (GstSsaParseClass * klass)
       
   115 {
       
   116   GObjectClass *object_class = G_OBJECT_CLASS (klass);
       
   117   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
       
   118 
       
   119   object_class->dispose = GST_DEBUG_FUNCPTR (gst_ssa_parse_dispose);
       
   120 
       
   121   element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state);
       
   122 }
       
   123 
       
   124 static gboolean
       
   125 gst_ssa_parse_src_event (GstPad * pad, GstEvent * event)
       
   126 {
       
   127   return gst_pad_event_default (pad, event);
       
   128 }
       
   129 
       
   130 static gboolean
       
   131 gst_ssa_parse_sink_event (GstPad * pad, GstEvent * event)
       
   132 {
       
   133   return gst_pad_event_default (pad, event);
       
   134 }
       
   135 
       
   136 static gboolean
       
   137 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps)
       
   138 {
       
   139   GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
       
   140   const GValue *val;
       
   141   GstStructure *s;
       
   142   const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF };
       
   143   GstBuffer *priv;
       
   144   gchar *data;
       
   145   guint size;
       
   146 
       
   147   s = gst_caps_get_structure (caps, 0);
       
   148   val = gst_structure_get_value (s, "codec_data");
       
   149   if (val == NULL) {
       
   150     parse->framed = FALSE;
       
   151     GST_ERROR ("Only SSA subtitles embedded in containers are supported");
       
   152     return FALSE;
       
   153   }
       
   154 
       
   155   parse->framed = TRUE;
       
   156 
       
   157   priv = (GstBuffer *) gst_value_get_mini_object (val);
       
   158   g_return_val_if_fail (priv != NULL, FALSE);
       
   159 
       
   160   gst_buffer_ref (priv);
       
   161 
       
   162   data = (gchar *) GST_BUFFER_DATA (priv);
       
   163   size = GST_BUFFER_SIZE (priv);
       
   164   /* skip UTF-8 BOM */
       
   165   if (size >= 3 && memcmp (data, bom_utf8, 3) == 0) {
       
   166     data += 3;
       
   167     size -= 3;
       
   168   }
       
   169 
       
   170   if (!strstr (data, "[Script Info]")) {
       
   171     GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header");
       
   172     gst_buffer_unref (priv);
       
   173     return FALSE;
       
   174   }
       
   175 
       
   176   if (!g_utf8_validate (data, size, NULL)) {
       
   177     GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8");
       
   178     gst_buffer_unref (priv);
       
   179     return FALSE;
       
   180   }
       
   181 
       
   182   /* FIXME: parse initial section */
       
   183   parse->ini = g_strndup (data, size);
       
   184   GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini);
       
   185 
       
   186   gst_buffer_unref (priv);
       
   187 
       
   188   return TRUE;
       
   189 }
       
   190 
       
   191 static gboolean
       
   192 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
       
   193 {
       
   194   gchar *t, *end;
       
   195   gboolean removed_any = FALSE;
       
   196 
       
   197   while ((t = strchr (txt, '{'))) {
       
   198     end = strchr (txt, '}');
       
   199     if (end == NULL) {
       
   200       GST_WARNING_OBJECT (parse, "Missing { for style override code");
       
   201       return removed_any;
       
   202     }
       
   203     /* move terminating NUL character forward as well */
       
   204     g_memmove (t, end + 1, strlen (end + 1) + 1);
       
   205     removed_any = TRUE;
       
   206   }
       
   207 
       
   208   /* these may occur outside of curly brackets. We don't handle the different
       
   209    * wrapping modes yet, so just remove these markers from the text for now */
       
   210   while ((t = strstr (txt, "\\n"))) {
       
   211     t[0] = ' ';
       
   212     t[1] = '\n';
       
   213   }
       
   214   while ((t = strstr (txt, "\\N"))) {
       
   215     t[0] = ' ';
       
   216     t[1] = '\n';
       
   217   }
       
   218   while ((t = strstr (txt, "\\h"))) {
       
   219     t[0] = ' ';
       
   220     t[1] = ' ';
       
   221   }
       
   222 
       
   223   return removed_any;
       
   224 }
       
   225 
       
   226 /**
       
   227  * gst_ssa_parse_push_line:
       
   228  * @parse: caller element
       
   229  * @txt: text to push
       
   230  * @start: timestamp for the buffer
       
   231  * @duration: duration for the buffer
       
   232  *
       
   233  * Parse the text in a buffer with the given properties and
       
   234  * push it to the srcpad of the @parse element
       
   235  *
       
   236  * Returns: result of the push of the created buffer
       
   237  */
       
   238 static GstFlowReturn
       
   239 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
       
   240     GstClockTime start, GstClockTime duration)
       
   241 {
       
   242   GstFlowReturn ret;
       
   243   GstBuffer *buf;
       
   244   gchar *t, *escaped;
       
   245   gint num, i, len;
       
   246 
       
   247   num = atoi (txt);
       
   248   GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
       
   249       num, GST_TIME_ARGS (start));
       
   250 
       
   251   /* skip all non-text fields before the actual text */
       
   252   t = txt;
       
   253   for (i = 0; i < 8; ++i) {
       
   254     t = strchr (t, ',');
       
   255     if (t == NULL)
       
   256       return GST_FLOW_ERROR;
       
   257     ++t;
       
   258   }
       
   259 
       
   260   GST_LOG_OBJECT (parse, "Text : %s", t);
       
   261 
       
   262   if (gst_ssa_parse_remove_override_codes (parse, t)) {
       
   263     GST_LOG_OBJECT (parse, "Clean: %s", t);
       
   264   }
       
   265 
       
   266   /* we claim to output pango markup, so we must escape the
       
   267    * text even if we don't actually use any pango markup yet */
       
   268   escaped = g_markup_printf_escaped ("%s", t);
       
   269 
       
   270   len = strlen (escaped);
       
   271 
       
   272   /* allocate enough for a terminating NUL, but don't include it in buf size */
       
   273   buf = gst_buffer_new_and_alloc (len + 1);
       
   274   memcpy (GST_BUFFER_DATA (buf), escaped, len + 1);
       
   275   GST_BUFFER_SIZE (buf) = len;
       
   276   g_free (escaped);
       
   277 
       
   278   GST_BUFFER_TIMESTAMP (buf) = start;
       
   279   GST_BUFFER_DURATION (buf) = duration;
       
   280 
       
   281   gst_buffer_set_caps (buf, GST_PAD_CAPS (parse->srcpad));
       
   282 
       
   283   GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
       
   284       " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
       
   285       GST_TIME_ARGS (duration));
       
   286 
       
   287   ret = gst_pad_push (parse->srcpad, buf);
       
   288 
       
   289   if (ret != GST_FLOW_OK) {
       
   290     GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt,
       
   291         gst_flow_get_name (ret));
       
   292   }
       
   293 
       
   294   return ret;
       
   295 }
       
   296 
       
   297 static GstFlowReturn
       
   298 gst_ssa_parse_chain (GstPad * sinkpad, GstBuffer * buf)
       
   299 {
       
   300   GstFlowReturn ret;
       
   301   GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
       
   302   GstClockTime ts;
       
   303   gchar *txt;
       
   304 
       
   305   if (G_UNLIKELY (!parse->framed))
       
   306     goto not_framed;
       
   307 
       
   308   /* make double-sure it's 0-terminated and all */
       
   309   txt = g_strndup ((gchar *) GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
       
   310 
       
   311   if (txt == NULL)
       
   312     return GST_FLOW_UNEXPECTED;
       
   313 
       
   314   ts = GST_BUFFER_TIMESTAMP (buf);
       
   315   ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
       
   316 
       
   317   if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
       
   318     /* just advance time without sending anything */
       
   319     gst_pad_push_event (parse->srcpad,
       
   320         gst_event_new_new_segment (TRUE, 1.0, GST_FORMAT_TIME, ts, -1, ts));
       
   321     ret = GST_FLOW_OK;
       
   322   }
       
   323 
       
   324   g_free (txt);
       
   325 
       
   326   return ret;
       
   327 
       
   328 /* ERRORS */
       
   329 not_framed:
       
   330   {
       
   331     GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL),
       
   332         ("Only SSA subtitles embedded in containers are supported"));
       
   333     return GST_FLOW_NOT_NEGOTIATED;
       
   334   }
       
   335 }
       
   336 
       
   337 static GstStateChangeReturn
       
   338 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition)
       
   339 {
       
   340   GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
       
   341   GstSsaParse *parse = GST_SSA_PARSE (element);
       
   342 
       
   343   switch (transition) {
       
   344     case GST_STATE_CHANGE_READY_TO_PAUSED:
       
   345       break;
       
   346     default:
       
   347       break;
       
   348   }
       
   349 
       
   350   ret = parent_class->change_state (element, transition);
       
   351   if (ret == GST_STATE_CHANGE_FAILURE)
       
   352     return ret;
       
   353 
       
   354   switch (transition) {
       
   355     case GST_STATE_CHANGE_PAUSED_TO_READY:
       
   356       g_free (parse->ini);
       
   357       parse->ini = NULL;
       
   358       parse->framed = FALSE;
       
   359       break;
       
   360     default:
       
   361       break;
       
   362   }
       
   363 
       
   364   return ret;
       
   365 }