gst_plugins_base/gst/subparse/gstsubparse.c
branchRCL_3
changeset 29 567bb019e3e3
parent 0 0e761a78d257
child 30 7e817e7e631c
equal deleted inserted replaced
6:9b2c3c7a1a9c 29:567bb019e3e3
    24 #endif
    24 #endif
    25 
    25 
    26 #include <string.h>
    26 #include <string.h>
    27 #include <stdlib.h>
    27 #include <stdlib.h>
    28 #include <sys/types.h>
    28 #include <sys/types.h>
       
    29 #include <glib.h>
    29 #include <regex.h>
    30 #include <regex.h>
    30 
    31 
    31 #include "gstsubparse.h"
    32 #include "gstsubparse.h"
    32 #include "gstssaparse.h"
    33 #include "gstssaparse.h"
    33 #include "samiparse.h"
    34 #include "samiparse.h"
    34 #include "tmplayerparse.h"
    35 #include "tmplayerparse.h"
    35 #include "mpl2parse.h"
    36 #include "mpl2parse.h"
    36 
    37 
    37 #ifdef __SYMBIAN32__
       
    38 #include <glib_global.h>
       
    39 #endif
       
    40 GST_DEBUG_CATEGORY (sub_parse_debug);
    38 GST_DEBUG_CATEGORY (sub_parse_debug);
    41 
    39 
    42 #define DEFAULT_ENCODING   NULL
    40 #define DEFAULT_ENCODING   NULL
    43 
    41 
    44 enum
    42 enum
   147 
   145 
   148   if (subparse->encoding) {
   146   if (subparse->encoding) {
   149     g_free (subparse->encoding);
   147     g_free (subparse->encoding);
   150     subparse->encoding = NULL;
   148     subparse->encoding = NULL;
   151   }
   149   }
       
   150 
       
   151   if (subparse->detected_encoding) {
       
   152     g_free (subparse->detected_encoding);
       
   153     subparse->detected_encoding = NULL;
       
   154   }
       
   155 
       
   156   if (subparse->adapter) {
       
   157     gst_object_unref (subparse->adapter);
       
   158     subparse->adapter = NULL;
       
   159   }
       
   160 
   152   if (subparse->textbuf) {
   161   if (subparse->textbuf) {
   153     g_string_free (subparse->textbuf, TRUE);
   162     g_string_free (subparse->textbuf, TRUE);
   154     subparse->textbuf = NULL;
   163     subparse->textbuf = NULL;
   155   }
   164   }
       
   165 #ifndef GST_DISABLE_XML
   156   sami_context_deinit (&subparse->state);
   166   sami_context_deinit (&subparse->state);
       
   167 #endif
   157 
   168 
   158   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
   169   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
   159 }
   170 }
   160 
   171 
   161 static void
   172 static void
   172 
   183 
   173   element_class->change_state = gst_sub_parse_change_state;
   184   element_class->change_state = gst_sub_parse_change_state;
   174 
   185 
   175   g_object_class_install_property (object_class, PROP_ENCODING,
   186   g_object_class_install_property (object_class, PROP_ENCODING,
   176       g_param_spec_string ("subtitle-encoding", "subtitle charset encoding",
   187       g_param_spec_string ("subtitle-encoding", "subtitle charset encoding",
   177           "Encoding to assume if input subtitles are not in UTF-8 encoding. "
   188           "Encoding to assume if input subtitles are not in UTF-8 or any other "
   178           "If not set, the GST_SUBTITLE_ENCODING environment variable will "
   189           "Unicode encoding. If not set, the GST_SUBTITLE_ENCODING environment "
   179           "be checked for an encoding to use. If that is not set either, "
   190           "variable will be checked for an encoding to use. If that is not set "
   180           "ISO-8859-15 will be assumed.", DEFAULT_ENCODING, G_PARAM_READWRITE));
   191           "either, ISO-8859-15 will be assumed.", DEFAULT_ENCODING,
       
   192           G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
   181 }
   193 }
   182 
   194 
   183 static void
   195 static void
   184 gst_sub_parse_init (GstSubParse * subparse)
   196 gst_sub_parse_init (GstSubParse * subparse)
   185 {
   197 {
   199   subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
   211   subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
   200   subparse->flushing = FALSE;
   212   subparse->flushing = FALSE;
   201   gst_segment_init (&subparse->segment, GST_FORMAT_TIME);
   213   gst_segment_init (&subparse->segment, GST_FORMAT_TIME);
   202   subparse->need_segment = TRUE;
   214   subparse->need_segment = TRUE;
   203   subparse->encoding = g_strdup (DEFAULT_ENCODING);
   215   subparse->encoding = g_strdup (DEFAULT_ENCODING);
       
   216   subparse->detected_encoding = NULL;
       
   217   subparse->adapter = gst_adapter_new ();
   204 }
   218 }
   205 
   219 
   206 /*
   220 /*
   207  * Source pad functions.
   221  * Source pad functions.
   208  */
   222  */
   306   }
   320   }
   307   GST_OBJECT_UNLOCK (subparse);
   321   GST_OBJECT_UNLOCK (subparse);
   308 }
   322 }
   309 
   323 
   310 static gchar *
   324 static gchar *
   311 convert_encoding (GstSubParse * self, const gchar * str, gsize len)
   325 gst_sub_parse_get_format_description (GstSubParseFormat format)
       
   326 {
       
   327   switch (format) {
       
   328     case GST_SUB_PARSE_FORMAT_MDVDSUB:
       
   329       return "MicroDVD";
       
   330     case GST_SUB_PARSE_FORMAT_SUBRIP:
       
   331       return "SubRip";
       
   332     case GST_SUB_PARSE_FORMAT_MPSUB:
       
   333       return "MPSub";
       
   334     case GST_SUB_PARSE_FORMAT_SAMI:
       
   335       return "SAMI";
       
   336     case GST_SUB_PARSE_FORMAT_TMPLAYER:
       
   337       return "TMPlayer";
       
   338     case GST_SUB_PARSE_FORMAT_MPL2:
       
   339       return "MPL2";
       
   340     case GST_SUB_PARSE_FORMAT_SUBVIEWER:
       
   341       return "SubViewer";
       
   342     default:
       
   343     case GST_SUB_PARSE_FORMAT_UNKNOWN:
       
   344       break;
       
   345   }
       
   346   return NULL;
       
   347 }
       
   348 
       
   349 static gchar *
       
   350 gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
       
   351     gsize * consumed, GError ** err)
       
   352 {
       
   353   gchar *ret = NULL;
       
   354 
       
   355   *consumed = 0;
       
   356   ret =
       
   357       g_convert_with_fallback (str, len, "UTF-8", encoding, "*", consumed, NULL,
       
   358       err);
       
   359   if (ret == NULL)
       
   360     return ret;
       
   361 
       
   362   /* + 3 to skip UTF-8 BOM if it was added */
       
   363   len = strlen (ret);
       
   364   if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
       
   365       && (guint8) ret[2] == 0xBF)
       
   366     g_memmove (ret, ret + 3, len + 1 - 3);
       
   367 
       
   368   return ret;
       
   369 }
       
   370 
       
   371 static gchar *
       
   372 detect_encoding (const gchar * str, gsize len)
       
   373 {
       
   374   if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
       
   375       && (guint8) str[2] == 0xBF)
       
   376     return g_strdup ("UTF-8");
       
   377 
       
   378   if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
       
   379     return g_strdup ("UTF-16BE");
       
   380 
       
   381   if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
       
   382     return g_strdup ("UTF-16LE");
       
   383 
       
   384   if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
       
   385       && (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
       
   386     return g_strdup ("UTF-32BE");
       
   387 
       
   388   if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
       
   389       && (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
       
   390     return g_strdup ("UTF-32LE");
       
   391 
       
   392   return NULL;
       
   393 }
       
   394 
       
   395 static gchar *
       
   396 convert_encoding (GstSubParse * self, const gchar * str, gsize len,
       
   397     gsize * consumed)
   312 {
   398 {
   313   const gchar *encoding;
   399   const gchar *encoding;
   314   GError *err = NULL;
   400   GError *err = NULL;
   315   gchar *ret;
   401   gchar *ret = NULL;
   316 
   402 
       
   403   *consumed = 0;
       
   404 
       
   405   /* First try any detected encoding */
       
   406   if (self->detected_encoding) {
       
   407     ret =
       
   408         gst_convert_to_utf8 (str, len, self->detected_encoding, consumed, &err);
       
   409 
       
   410     if (!err)
       
   411       return ret;
       
   412 
       
   413     GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
       
   414         self->detected_encoding, err->message);
       
   415     g_free (self->detected_encoding);
       
   416     self->detected_encoding = NULL;
       
   417     g_error_free (err);
       
   418   }
       
   419 
       
   420   /* Otherwise check if it's UTF8 */
   317   if (self->valid_utf8) {
   421   if (self->valid_utf8) {
   318     if (g_utf8_validate (str, len, NULL)) {
   422     if (g_utf8_validate (str, len, NULL)) {
   319       GST_LOG_OBJECT (self, "valid UTF-8, no conversion needed");
   423       GST_LOG_OBJECT (self, "valid UTF-8, no conversion needed");
       
   424       *consumed = len;
   320       return g_strndup (str, len);
   425       return g_strndup (str, len);
   321     }
   426     }
   322     GST_INFO_OBJECT (self, "invalid UTF-8!");
   427     GST_INFO_OBJECT (self, "invalid UTF-8!");
   323     self->valid_utf8 = FALSE;
   428     self->valid_utf8 = FALSE;
   324   }
   429   }
   325 
   430 
       
   431   /* Else try fallback */
   326   encoding = self->encoding;
   432   encoding = self->encoding;
   327   if (encoding == NULL || *encoding == '\0') {
   433   if (encoding == NULL || *encoding == '\0') {
   328     encoding = g_getenv ("GST_SUBTITLE_ENCODING");
   434     encoding = g_getenv ("GST_SUBTITLE_ENCODING");
   329   }
   435   }
   330   if (encoding == NULL || *encoding == '\0') {
   436   if (encoding == NULL || *encoding == '\0') {
   333     if (g_get_charset (&encoding)) {
   439     if (g_get_charset (&encoding)) {
   334       encoding = "ISO-8859-15";
   440       encoding = "ISO-8859-15";
   335     }
   441     }
   336   }
   442   }
   337 
   443 
   338   ret = g_convert_with_fallback (str, len, "UTF-8", encoding, "*", NULL,
   444   ret = gst_convert_to_utf8 (str, len, encoding, consumed, &err);
   339       NULL, &err);
       
   340 
   445 
   341   if (err) {
   446   if (err) {
   342     GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
   447     GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
   343         encoding, err->message);
   448         encoding, err->message);
   344     g_error_free (err);
   449     g_error_free (err);
   345 
   450 
   346     /* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
   451     /* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
   347     ret = g_convert_with_fallback (str, len, "UTF-8", "ISO-8859-15", "*",
   452     ret = gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed, NULL);
   348         NULL, NULL, NULL);
       
   349   }
   453   }
   350 
   454 
   351   GST_LOG_OBJECT (self,
   455   GST_LOG_OBJECT (self,
   352       "successfully converted %" G_GSIZE_FORMAT " characters from %s to UTF-8"
   456       "successfully converted %" G_GSIZE_FORMAT " characters from %s to UTF-8"
   353       "%s", len, encoding, (err) ? " , using ISO-8859-15 as fallback" : "");
   457       "%s", len, encoding, (err) ? " , using ISO-8859-15 as fallback" : "");
   375     line_end--;
   479     line_end--;
   376     have_r = TRUE;
   480     have_r = TRUE;
   377   }
   481   }
   378 
   482 
   379   line_len = line_end - self->textbuf->str;
   483   line_len = line_end - self->textbuf->str;
   380   line = convert_encoding (self, self->textbuf->str, line_len);
   484   line = g_strndup (self->textbuf->str, line_len);
   381   self->textbuf = g_string_erase (self->textbuf, 0,
   485   self->textbuf = g_string_erase (self->textbuf, 0,
   382       line_len + (have_r ? 2 : 1));
   486       line_len + (have_r ? 2 : 1));
   383   return line;
   487   return line;
   384 }
   488 }
   385 
   489 
   653     g_free (*p_txt);
   757     g_free (*p_txt);
   654     *p_txt = g_string_free (s, FALSE);
   758     *p_txt = g_string_free (s, FALSE);
   655   }
   759   }
   656 }
   760 }
   657 
   761 
       
   762 static gboolean
       
   763 parse_subrip_time (const gchar * ts_string, GstClockTime * t)
       
   764 {
       
   765   gchar s[128] = { '\0', };
       
   766   gchar *end, *p;
       
   767   guint hour, min, sec, msec, len;
       
   768 
       
   769   while (*ts_string == ' ')
       
   770     ++ts_string;
       
   771 
       
   772   g_strlcpy (s, ts_string, sizeof (s));
       
   773   if ((end = strstr (s, "-->")))
       
   774     *end = '\0';
       
   775   g_strchomp (s);
       
   776 
       
   777   /* ms may be in these formats:
       
   778    * hh:mm:ss,500 = 500ms
       
   779    * hh:mm:ss,  5 =   5ms
       
   780    * hh:mm:ss, 5  =  50ms
       
   781    * hh:mm:ss, 50 =  50ms
       
   782    * hh:mm:ss,5   = 500ms
       
   783    * and sscanf() doesn't differentiate between '  5' and '5' so munge
       
   784    * the white spaces within the timestamp to '0' (I'm sure there's a
       
   785    * way to make sscanf() do this for us, but how?)
       
   786    */
       
   787   g_strdelimit (s, " ", '0');
       
   788 
       
   789   /* make sure we have exactly three digits after he comma */
       
   790   p = strchr (s, ',');
       
   791   g_assert (p != NULL);
       
   792   ++p;
       
   793   len = strlen (p);
       
   794   if (len > 3) {
       
   795     p[3] = '\0';
       
   796   } else
       
   797     while (len < 3) {
       
   798       g_strlcat (&p[len], "0", 2);
       
   799       ++len;
       
   800     }
       
   801 
       
   802   GST_LOG ("parsing timestamp '%s'", s);
       
   803   if (sscanf (s, "%u:%u:%u,%u", &hour, &min, &sec, &msec) != 4) {
       
   804     GST_WARNING ("failed to parse subrip timestamp string '%s'", s);
       
   805     return FALSE;
       
   806   }
       
   807 
       
   808   *t = ((hour * 3600) + (min * 60) + sec) * GST_SECOND + msec * GST_MSECOND;
       
   809   return TRUE;
       
   810 }
       
   811 
   658 static gchar *
   812 static gchar *
   659 parse_subrip (ParserState * state, const gchar * line)
   813 parse_subrip (ParserState * state, const gchar * line)
   660 {
   814 {
   661   guint h1, m1, s1, ms1;
       
   662   guint h2, m2, s2, ms2;
       
   663   int subnum;
   815   int subnum;
   664   gchar *ret;
   816   gchar *ret;
   665 
   817 
   666   switch (state->state) {
   818   switch (state->state) {
   667     case 0:
   819     case 0:
   668       /* looking for a single integer */
   820       /* looking for a single integer */
   669       if (sscanf (line, "%u", &subnum) == 1)
   821       if (sscanf (line, "%u", &subnum) == 1)
   670         state->state = 1;
   822         state->state = 1;
   671       return NULL;
   823       return NULL;
   672     case 1:
   824     case 1:
       
   825     {
       
   826       GstClockTime ts_start, ts_end;
       
   827       gchar *end_time;
       
   828 
   673       /* looking for start_time --> end_time */
   829       /* looking for start_time --> end_time */
   674       if (sscanf (line, "%u:%u:%u,%u --> %u:%u:%u,%u",
   830       if ((end_time = strstr (line, " --> ")) &&
   675               &h1, &m1, &s1, &ms1, &h2, &m2, &s2, &ms2) == 8) {
   831           parse_subrip_time (line, &ts_start) &&
       
   832           parse_subrip_time (end_time + strlen (" --> "), &ts_end) &&
       
   833           state->start_time <= ts_end) {
   676         state->state = 2;
   834         state->state = 2;
   677         state->start_time =
   835         state->start_time = ts_start;
   678             (((guint64) h1) * 3600 + m1 * 60 + s1) * GST_SECOND +
   836         state->duration = ts_end - ts_start;
   679             ms1 * GST_MSECOND;
       
   680         state->duration =
       
   681             (((guint64) h2) * 3600 + m2 * 60 + s2) * GST_SECOND +
       
   682             ms2 * GST_MSECOND - state->start_time;
       
   683       } else {
   837       } else {
   684         GST_DEBUG ("error parsing subrip time line");
   838         GST_DEBUG ("error parsing subrip time line '%s'", line);
   685         state->state = 0;
   839         state->state = 0;
   686       }
   840       }
   687       return NULL;
   841       return NULL;
       
   842     }
   688     case 2:
   843     case 2:
   689     {
   844     {
   690       /* No need to parse that text if it's out of segment */
   845       /* No need to parse that text if it's out of segment */
   691       gint64 clip_start = 0, clip_stop = 0;
   846       gint64 clip_start = 0, clip_stop = 0;
   692       gboolean in_seg = FALSE;
   847       gboolean in_seg = FALSE;
   874     state->buf = g_string_new (NULL);
  1029     state->buf = g_string_new (NULL);
   875   }
  1030   }
   876 
  1031 
   877   state->start_time = 0;
  1032   state->start_time = 0;
   878   state->duration = 0;
  1033   state->duration = 0;
       
  1034   state->max_duration = 0;      /* no limit */
   879   state->state = 0;
  1035   state->state = 0;
   880   state->segment = NULL;
  1036   state->segment = NULL;
   881 }
  1037 }
   882 
  1038 
   883 static void
  1039 static void
   885 {
  1041 {
   886   if (state->buf) {
  1042   if (state->buf) {
   887     g_string_free (state->buf, TRUE);
  1043     g_string_free (state->buf, TRUE);
   888     state->buf = NULL;
  1044     state->buf = NULL;
   889   }
  1045   }
       
  1046 #ifndef GST_DISABLE_XML
   890   if (state->user_data) {
  1047   if (state->user_data) {
   891     sami_context_reset (state);
  1048     sami_context_reset (state);
   892   }
  1049   }
       
  1050 #endif
       
  1051 }
       
  1052 
       
  1053 /* regex type enum */
       
  1054 typedef enum
       
  1055 {
       
  1056   GST_SUB_PARSE_REGEX_UNKNOWN = 0,
       
  1057   GST_SUB_PARSE_REGEX_MDVDSUB = 1,
       
  1058   GST_SUB_PARSE_REGEX_SUBRIP = 2,
       
  1059 } GstSubParseRegex;
       
  1060 
       
  1061 static gpointer
       
  1062 gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
       
  1063 {
       
  1064   gpointer result = NULL;
       
  1065   GError *gerr = NULL;
       
  1066   switch (regtype) {
       
  1067     case GST_SUB_PARSE_REGEX_MDVDSUB:
       
  1068       result =
       
  1069           (gpointer) g_regex_new ("^\\{[0-9]+\\}\\{[0-9]+\\}", 0, 0, &gerr);
       
  1070       if (result == NULL) {
       
  1071         g_warning ("Compilation of mdvd regex failed: %s", gerr->message);
       
  1072         g_error_free (gerr);
       
  1073       }
       
  1074       break;
       
  1075     case GST_SUB_PARSE_REGEX_SUBRIP:
       
  1076       result = (gpointer) g_regex_new ("^([ 0-9]){0,3}[0-9]\\s*(\x0d)?\x0a"
       
  1077           "[ 0-9][0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{0,2}[0-9]"
       
  1078           " +--> +([ 0-9])?[0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{0,2}[0-9]",
       
  1079           0, 0, &gerr);
       
  1080       if (result == NULL) {
       
  1081         g_warning ("Compilation of subrip regex failed: %s", gerr->message);
       
  1082         g_error_free (gerr);
       
  1083       }
       
  1084       break;
       
  1085     default:
       
  1086       GST_WARNING ("Trying to allocate regex of unknown type %u", regtype);
       
  1087   }
       
  1088   return result;
   893 }
  1089 }
   894 
  1090 
   895 /*
  1091 /*
   896  * FIXME: maybe we should pass along a second argument, the preceding
  1092  * FIXME: maybe we should pass along a second argument, the preceding
   897  * text buffer, because that is how this originally worked, even though
  1093  * text buffer, because that is how this originally worked, even though
   899  */
  1095  */
   900 
  1096 
   901 static GstSubParseFormat
  1097 static GstSubParseFormat
   902 gst_sub_parse_data_format_autodetect (gchar * match_str)
  1098 gst_sub_parse_data_format_autodetect (gchar * match_str)
   903 {
  1099 {
   904   static gboolean need_init_regexps = TRUE;
       
   905   static regex_t mdvd_rx;
       
   906   static regex_t subrip_rx;
       
   907   guint n1, n2, n3;
  1100   guint n1, n2, n3;
   908 
  1101 
   909   /* initialize the regexps used the first time around */
  1102   static GOnce mdvd_rx_once = G_ONCE_INIT;
   910   if (need_init_regexps) {
  1103   static GOnce subrip_rx_once = G_ONCE_INIT;
   911     int err;
  1104 
   912     char errstr[128];
  1105   GRegex *mdvd_grx;
   913 
  1106   GRegex *subrip_grx;
   914     need_init_regexps = FALSE;
  1107 
   915     if ((err = regcomp (&mdvd_rx, "^\\{[0-9]+\\}\\{[0-9]+\\}",
  1108   g_once (&mdvd_rx_once,
   916                 REG_EXTENDED | REG_NEWLINE | REG_NOSUB) != 0) ||
  1109       (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
   917         (err = regcomp (&subrip_rx, "^[0-9]([0-9]){0,3}(\x0d)?\x0a"
  1110       (gpointer) GST_SUB_PARSE_REGEX_MDVDSUB);
   918                 "[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}"
  1111   g_once (&subrip_rx_once,
   919                 " --> [0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}",
  1112       (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
   920                 REG_EXTENDED | REG_NEWLINE | REG_NOSUB)) != 0) {
  1113       (gpointer) GST_SUB_PARSE_REGEX_SUBRIP);
   921       regerror (err, &subrip_rx, errstr, 127);
  1114 
   922       GST_WARNING ("Compilation of subrip regex failed: %s", errstr);
  1115   mdvd_grx = (GRegex *) mdvd_rx_once.retval;
   923     }
  1116   subrip_grx = (GRegex *) subrip_rx_once.retval;
   924   }
  1117 
   925 
  1118   if (g_regex_match (mdvd_grx, match_str, 0, NULL) == TRUE) {
   926   if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
       
   927     GST_LOG ("MicroDVD (frame based) format detected");
  1119     GST_LOG ("MicroDVD (frame based) format detected");
   928     return GST_SUB_PARSE_FORMAT_MDVDSUB;
  1120     return GST_SUB_PARSE_FORMAT_MDVDSUB;
   929   }
  1121   }
   930   if (regexec (&subrip_rx, match_str, 0, NULL, 0) == 0) {
  1122   if (g_regex_match (subrip_grx, match_str, 0, NULL) == TRUE) {
   931     GST_LOG ("SubRip (time based) format detected");
  1123     GST_LOG ("SubRip (time based) format detected");
   932     return GST_SUB_PARSE_FORMAT_SUBRIP;
  1124     return GST_SUB_PARSE_FORMAT_SUBRIP;
   933   }
  1125   }
   934 
  1126 
   935   if (!strncmp (match_str, "FORMAT=TIME", 11)) {
  1127   if (!strncmp (match_str, "FORMAT=TIME", 11)) {
   936     GST_LOG ("MPSub (time based) format detected");
  1128     GST_LOG ("MPSub (time based) format detected");
   937     return GST_SUB_PARSE_FORMAT_MPSUB;
  1129     return GST_SUB_PARSE_FORMAT_MPSUB;
   938   }
  1130   }
       
  1131 #ifndef GST_DISABLE_XML
   939   if (strstr (match_str, "<SAMI>") != NULL ||
  1132   if (strstr (match_str, "<SAMI>") != NULL ||
   940       strstr (match_str, "<sami>") != NULL) {
  1133       strstr (match_str, "<sami>") != NULL) {
   941     GST_LOG ("SAMI (time based) format detected");
  1134     GST_LOG ("SAMI (time based) format detected");
   942     return GST_SUB_PARSE_FORMAT_SAMI;
  1135     return GST_SUB_PARSE_FORMAT_SAMI;
   943   }
  1136   }
       
  1137 #endif
   944   /* we're boldly assuming the first subtitle appears within the first hour */
  1138   /* we're boldly assuming the first subtitle appears within the first hour */
   945   if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
  1139   if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
   946       sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
  1140       sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
   947       sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
  1141       sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
   948       sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
  1142       sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
   967 gst_sub_parse_format_autodetect (GstSubParse * self)
  1161 gst_sub_parse_format_autodetect (GstSubParse * self)
   968 {
  1162 {
   969   gchar *data;
  1163   gchar *data;
   970   GstSubParseFormat format;
  1164   GstSubParseFormat format;
   971 
  1165 
   972   if (strlen (self->textbuf->str) < 35) {
  1166   if (strlen (self->textbuf->str) < 30) {
   973     GST_DEBUG ("File too small to be a subtitles file");
  1167     GST_DEBUG ("File too small to be a subtitles file");
   974     return NULL;
  1168     return NULL;
   975   }
  1169   }
   976 
  1170 
   977   data = g_strndup (self->textbuf->str, 35);
  1171   data = g_strndup (self->textbuf->str, 35);
   978   format = gst_sub_parse_data_format_autodetect (data);
  1172   format = gst_sub_parse_data_format_autodetect (data);
   979   g_free (data);
  1173   g_free (data);
   980 
  1174 
   981   self->parser_type = format;
  1175   self->parser_type = format;
       
  1176   self->subtitle_codec = gst_sub_parse_get_format_description (format);
   982   parser_state_init (&self->state);
  1177   parser_state_init (&self->state);
   983 
  1178 
   984   switch (format) {
  1179   switch (format) {
   985     case GST_SUB_PARSE_FORMAT_MDVDSUB:
  1180     case GST_SUB_PARSE_FORMAT_MDVDSUB:
   986       self->parse_line = parse_mdvdsub;
  1181       self->parse_line = parse_mdvdsub;
   989       self->parse_line = parse_subrip;
  1184       self->parse_line = parse_subrip;
   990       return gst_caps_new_simple ("text/x-pango-markup", NULL);
  1185       return gst_caps_new_simple ("text/x-pango-markup", NULL);
   991     case GST_SUB_PARSE_FORMAT_MPSUB:
  1186     case GST_SUB_PARSE_FORMAT_MPSUB:
   992       self->parse_line = parse_mpsub;
  1187       self->parse_line = parse_mpsub;
   993       return gst_caps_new_simple ("text/plain", NULL);
  1188       return gst_caps_new_simple ("text/plain", NULL);
       
  1189 #ifndef GST_DISABLE_XML
   994     case GST_SUB_PARSE_FORMAT_SAMI:
  1190     case GST_SUB_PARSE_FORMAT_SAMI:
   995       self->parse_line = parse_sami;
  1191       self->parse_line = parse_sami;
   996       sami_context_init (&self->state);
  1192       sami_context_init (&self->state);
   997       return gst_caps_new_simple ("text/x-pango-markup", NULL);
  1193       return gst_caps_new_simple ("text/x-pango-markup", NULL);
       
  1194 #endif
   998     case GST_SUB_PARSE_FORMAT_TMPLAYER:
  1195     case GST_SUB_PARSE_FORMAT_TMPLAYER:
   999       self->parse_line = parse_tmplayer;
  1196       self->parse_line = parse_tmplayer;
       
  1197       self->state.max_duration = 5 * GST_SECOND;
  1000       return gst_caps_new_simple ("text/plain", NULL);
  1198       return gst_caps_new_simple ("text/plain", NULL);
  1001     case GST_SUB_PARSE_FORMAT_MPL2:
  1199     case GST_SUB_PARSE_FORMAT_MPL2:
  1002       self->parse_line = parse_mpl2;
  1200       self->parse_line = parse_mpl2;
  1003       return gst_caps_new_simple ("text/x-pango-markup", NULL);
  1201       return gst_caps_new_simple ("text/x-pango-markup", NULL);
  1004     case GST_SUB_PARSE_FORMAT_SUBVIEWER:
  1202     case GST_SUB_PARSE_FORMAT_SUBVIEWER:
  1014 }
  1212 }
  1015 
  1213 
  1016 static void
  1214 static void
  1017 feed_textbuf (GstSubParse * self, GstBuffer * buf)
  1215 feed_textbuf (GstSubParse * self, GstBuffer * buf)
  1018 {
  1216 {
  1019   if (GST_BUFFER_OFFSET (buf) != self->offset) {
  1217   gboolean discont;
       
  1218   gsize consumed;
       
  1219   gchar *input = NULL;
       
  1220 
       
  1221   discont = GST_BUFFER_IS_DISCONT (buf);
       
  1222 
       
  1223   if (GST_BUFFER_OFFSET_IS_VALID (buf) &&
       
  1224       GST_BUFFER_OFFSET (buf) != self->offset) {
       
  1225     self->offset = GST_BUFFER_OFFSET (buf);
       
  1226     discont = TRUE;
       
  1227   }
       
  1228 
       
  1229   if (discont) {
       
  1230     GST_INFO ("discontinuity");
  1020     /* flush the parser state */
  1231     /* flush the parser state */
  1021     parser_state_init (&self->state);
  1232     parser_state_init (&self->state);
  1022     g_string_truncate (self->textbuf, 0);
  1233     g_string_truncate (self->textbuf, 0);
       
  1234     gst_adapter_clear (self->adapter);
       
  1235 #ifndef GST_DISABLE_XML
  1023     sami_context_reset (&self->state);
  1236     sami_context_reset (&self->state);
  1024   }
  1237 #endif
  1025 
  1238     /* we could set a flag to make sure that the next buffer we push out also
  1026   self->textbuf = g_string_append_len (self->textbuf,
  1239      * has the DISCONT flag set, but there's no point really given that it's
  1027       (gchar *) GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
  1240      * subtitles which are discontinuous by nature. */
       
  1241   }
       
  1242 
  1028   self->offset = GST_BUFFER_OFFSET (buf) + GST_BUFFER_SIZE (buf);
  1243   self->offset = GST_BUFFER_OFFSET (buf) + GST_BUFFER_SIZE (buf);
  1029   self->next_offset = self->offset;
  1244   self->next_offset = self->offset;
  1030 
  1245 
  1031   gst_buffer_unref (buf);
  1246   gst_adapter_push (self->adapter, buf);
       
  1247 
       
  1248   input =
       
  1249       convert_encoding (self, (const gchar *) gst_adapter_peek (self->adapter,
       
  1250           gst_adapter_available (self->adapter)),
       
  1251       (gsize) gst_adapter_available (self->adapter), &consumed);
       
  1252 
       
  1253   if (input && consumed > 0) {
       
  1254     self->textbuf = g_string_append (self->textbuf, input);
       
  1255     gst_adapter_flush (self->adapter, consumed);
       
  1256   }
       
  1257 
       
  1258   g_free (input);
  1032 }
  1259 }
  1033 
  1260 
  1034 static GstFlowReturn
  1261 static GstFlowReturn
  1035 handle_buffer (GstSubParse * self, GstBuffer * buf)
  1262 handle_buffer (GstSubParse * self, GstBuffer * buf)
  1036 {
  1263 {
  1037   GstFlowReturn ret = GST_FLOW_OK;
  1264   GstFlowReturn ret = GST_FLOW_OK;
  1038   GstCaps *caps = NULL;
  1265   GstCaps *caps = NULL;
  1039   gchar *line, *subtitle;
  1266   gchar *line, *subtitle;
       
  1267 
       
  1268   if (self->first_buffer) {
       
  1269     self->detected_encoding =
       
  1270         detect_encoding ((gchar *) GST_BUFFER_DATA (buf),
       
  1271         GST_BUFFER_SIZE (buf));
       
  1272     self->first_buffer = FALSE;
       
  1273   }
  1040 
  1274 
  1041   feed_textbuf (self, buf);
  1275   feed_textbuf (self, buf);
  1042 
  1276 
  1043   /* make sure we know the format */
  1277   /* make sure we know the format */
  1044   if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
  1278   if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
  1048     if (!gst_pad_set_caps (self->srcpad, caps)) {
  1282     if (!gst_pad_set_caps (self->srcpad, caps)) {
  1049       gst_caps_unref (caps);
  1283       gst_caps_unref (caps);
  1050       return GST_FLOW_UNEXPECTED;
  1284       return GST_FLOW_UNEXPECTED;
  1051     }
  1285     }
  1052     gst_caps_unref (caps);
  1286     gst_caps_unref (caps);
  1053   }
  1287 
  1054 
  1288     /* push tags */
  1055   while ((line = get_next_line (self)) && !self->flushing) {
  1289     if (self->subtitle_codec != NULL) {
       
  1290       GstTagList *tags;
       
  1291 
       
  1292       tags = gst_tag_list_new ();
       
  1293       gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC,
       
  1294           self->subtitle_codec, NULL);
       
  1295       gst_element_found_tags_for_pad (GST_ELEMENT (self), self->srcpad, tags);
       
  1296     }
       
  1297   }
       
  1298 
       
  1299   while (!self->flushing && (line = get_next_line (self))) {
       
  1300     guint offset = 0;
       
  1301 
  1056     /* Set segment on our parser state machine */
  1302     /* Set segment on our parser state machine */
  1057     self->state.segment = &self->segment;
  1303     self->state.segment = &self->segment;
  1058     /* Now parse the line, out of segment lines will just return NULL */
  1304     /* Now parse the line, out of segment lines will just return NULL */
  1059     GST_LOG_OBJECT (self, "Parsing line '%s'", line);
  1305     GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
  1060     subtitle = self->parse_line (&self->state, line);
  1306     subtitle = self->parse_line (&self->state, line + offset);
  1061     g_free (line);
  1307     g_free (line);
  1062 
  1308 
  1063     if (subtitle) {
  1309     if (subtitle) {
  1064       guint subtitle_len = strlen (subtitle);
  1310       guint subtitle_len = strlen (subtitle);
  1065 
  1311 
  1073         memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1);
  1319         memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1);
  1074         GST_BUFFER_SIZE (buf) = subtitle_len;
  1320         GST_BUFFER_SIZE (buf) = subtitle_len;
  1075         GST_BUFFER_TIMESTAMP (buf) = self->state.start_time;
  1321         GST_BUFFER_TIMESTAMP (buf) = self->state.start_time;
  1076         GST_BUFFER_DURATION (buf) = self->state.duration;
  1322         GST_BUFFER_DURATION (buf) = self->state.duration;
  1077 
  1323 
       
  1324         /* in some cases (e.g. tmplayer) we can only determine the duration
       
  1325          * of a text chunk from the timestamp of the next text chunk; in those
       
  1326          * cases, we probably want to limit the duration to something
       
  1327          * reasonable, so we don't end up showing some text for e.g. 40 seconds
       
  1328          * just because nothing else is being said during that time */
       
  1329         if (self->state.max_duration > 0 && GST_BUFFER_DURATION_IS_VALID (buf)) {
       
  1330           if (GST_BUFFER_DURATION (buf) > self->state.max_duration)
       
  1331             GST_BUFFER_DURATION (buf) = self->state.max_duration;
       
  1332         }
       
  1333 
  1078         gst_segment_set_last_stop (&self->segment, GST_FORMAT_TIME,
  1334         gst_segment_set_last_stop (&self->segment, GST_FORMAT_TIME,
  1079             self->state.start_time);
  1335             self->state.start_time);
  1080 
  1336 
  1081         GST_DEBUG_OBJECT (self, "Sending text '%s', %" GST_TIME_FORMAT " + %"
  1337         GST_DEBUG_OBJECT (self, "Sending text '%s', %" GST_TIME_FORMAT " + %"
  1082             GST_TIME_FORMAT, subtitle, GST_TIME_ARGS (self->state.start_time),
  1338             GST_TIME_FORMAT, subtitle, GST_TIME_ARGS (self->state.start_time),
  1083             GST_TIME_ARGS (self->state.duration));
  1339             GST_TIME_ARGS (self->state.duration));
  1084 
  1340 
  1085         ret = gst_pad_push (self->srcpad, buf);
  1341         ret = gst_pad_push (self->srcpad, buf);
  1086       }
  1342       }
  1087 
  1343 
       
  1344       /* move this forward (the tmplayer parser needs this) */
       
  1345       if (self->state.duration != GST_CLOCK_TIME_NONE)
       
  1346         self->state.start_time += self->state.duration;
       
  1347 
  1088       g_free (subtitle);
  1348       g_free (subtitle);
  1089       subtitle = NULL;
  1349       subtitle = NULL;
  1090 
  1350 
  1091       if (ret != GST_FLOW_OK) {
  1351       if (ret != GST_FLOW_OK) {
  1092         GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
  1352         GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
  1106 
  1366 
  1107   self = GST_SUBPARSE (GST_PAD_PARENT (sinkpad));
  1367   self = GST_SUBPARSE (GST_PAD_PARENT (sinkpad));
  1108 
  1368 
  1109   /* Push newsegment if needed */
  1369   /* Push newsegment if needed */
  1110   if (self->need_segment) {
  1370   if (self->need_segment) {
  1111 #ifndef __SYMBIAN32__
       
  1112     GST_LOG_OBJECT (self, "pushing newsegment event with %" GST_SEGMENT_FORMAT,
  1371     GST_LOG_OBJECT (self, "pushing newsegment event with %" GST_SEGMENT_FORMAT,
  1113         &self->segment);
  1372         &self->segment);
  1114 #endif
  1373 
  1115     gst_pad_push_event (self->srcpad, gst_event_new_new_segment (FALSE,
  1374     gst_pad_push_event (self->srcpad, gst_event_new_new_segment (FALSE,
  1116             self->segment.rate, self->segment.format,
  1375             self->segment.rate, self->segment.format,
  1117             self->segment.last_stop, self->segment.stop, self->segment.time));
  1376             self->segment.last_stop, self->segment.stop, self->segment.time));
  1118     self->need_segment = FALSE;
  1377     self->need_segment = FALSE;
  1119   }
  1378   }
  1134   switch (GST_EVENT_TYPE (event)) {
  1393   switch (GST_EVENT_TYPE (event)) {
  1135     case GST_EVENT_EOS:{
  1394     case GST_EVENT_EOS:{
  1136       /* Make sure the last subrip chunk is pushed out even
  1395       /* Make sure the last subrip chunk is pushed out even
  1137        * if the file does not have an empty line at the end */
  1396        * if the file does not have an empty line at the end */
  1138       if (self->parser_type == GST_SUB_PARSE_FORMAT_SUBRIP ||
  1397       if (self->parser_type == GST_SUB_PARSE_FORMAT_SUBRIP ||
       
  1398           self->parser_type == GST_SUB_PARSE_FORMAT_TMPLAYER ||
  1139           self->parser_type == GST_SUB_PARSE_FORMAT_MPL2) {
  1399           self->parser_type == GST_SUB_PARSE_FORMAT_MPL2) {
  1140         GstBuffer *buf = gst_buffer_new_and_alloc (1 + 1);
  1400         GstBuffer *buf = gst_buffer_new_and_alloc (2 + 1);
  1141 
  1401 
  1142         GST_DEBUG ("EOS. Pushing remaining text (if any)");
  1402         GST_DEBUG ("EOS. Pushing remaining text (if any)");
  1143         GST_BUFFER_DATA (buf)[0] = '\n';
  1403         GST_BUFFER_DATA (buf)[0] = '\n';
  1144         GST_BUFFER_DATA (buf)[1] = '\0';        /* play it safe */
  1404         GST_BUFFER_DATA (buf)[1] = '\n';
  1145         GST_BUFFER_SIZE (buf) = 1;
  1405         GST_BUFFER_DATA (buf)[2] = '\0';        /* play it safe */
       
  1406         GST_BUFFER_SIZE (buf) = 2;
  1146         GST_BUFFER_OFFSET (buf) = self->offset;
  1407         GST_BUFFER_OFFSET (buf) = self->offset;
  1147         gst_sub_parse_chain (pad, buf);
  1408         gst_sub_parse_chain (pad, buf);
  1148       }
  1409       }
  1149       ret = gst_pad_event_default (pad, event);
  1410       ret = gst_pad_event_default (pad, event);
  1150       break;
  1411       break;
  1215       /* format detection will init the parser state */
  1476       /* format detection will init the parser state */
  1216       self->offset = 0;
  1477       self->offset = 0;
  1217       self->next_offset = 0;
  1478       self->next_offset = 0;
  1218       self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
  1479       self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
  1219       self->valid_utf8 = TRUE;
  1480       self->valid_utf8 = TRUE;
       
  1481       self->first_buffer = TRUE;
       
  1482       g_free (self->detected_encoding);
       
  1483       self->detected_encoding = NULL;
  1220       g_string_truncate (self->textbuf, 0);
  1484       g_string_truncate (self->textbuf, 0);
       
  1485       gst_adapter_clear (self->adapter);
  1221       break;
  1486       break;
  1222     default:
  1487     default:
  1223       break;
  1488       break;
  1224   }
  1489   }
  1225 
  1490 
  1245 
  1510 
  1246 /* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
  1511 /* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
  1247  * also, give different  subtitle formats really different types */
  1512  * also, give different  subtitle formats really different types */
  1248 static GstStaticCaps mpl2_caps =
  1513 static GstStaticCaps mpl2_caps =
  1249 GST_STATIC_CAPS ("application/x-subtitle-mpl2");
  1514 GST_STATIC_CAPS ("application/x-subtitle-mpl2");
       
  1515 #define SUB_CAPS (gst_static_caps_get (&sub_caps))
       
  1516 
  1250 static GstStaticCaps tmp_caps =
  1517 static GstStaticCaps tmp_caps =
  1251 GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
  1518 GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
       
  1519 #define TMP_CAPS (gst_static_caps_get (&tmp_caps))
       
  1520 
       
  1521 static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
       
  1522 #define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
       
  1523 
       
  1524 #ifndef GST_DISABLE_XML
  1252 static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
  1525 static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
  1253 static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
       
  1254 
       
  1255 #define SUB_CAPS (gst_static_caps_get (&sub_caps))
       
  1256 #define SAMI_CAPS (gst_static_caps_get (&smi_caps))
  1526 #define SAMI_CAPS (gst_static_caps_get (&smi_caps))
  1257 #define TMP_CAPS (gst_static_caps_get (&tmp_caps))
  1527 #endif
  1258 #define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
       
  1259 
  1528 
  1260 static void
  1529 static void
  1261 gst_subparse_type_find (GstTypeFind * tf, gpointer private)
  1530 gst_subparse_type_find (GstTypeFind * tf, gpointer private)
  1262 {
  1531 {
  1263   GstSubParseFormat format;
  1532   GstSubParseFormat format;
  1264   const guint8 *data;
  1533   const guint8 *data;
  1265   GstCaps *caps;
  1534   GstCaps *caps;
  1266   gchar *str;
  1535   gchar *str;
  1267 
  1536   gchar *encoding = NULL;
  1268   if (!(data = gst_type_find_peek (tf, 0, 36)))
  1537   const gchar *end;
       
  1538 
       
  1539   if (!(data = gst_type_find_peek (tf, 0, 129)))
  1269     return;
  1540     return;
  1270 
  1541 
  1271   /* make sure string passed to _autodetect() is NUL-terminated */
  1542   /* make sure string passed to _autodetect() is NUL-terminated */
  1272   str = g_strndup ((gchar *) data, 35);
  1543   str = g_malloc0 (129);
       
  1544   memcpy (str, data, 128);
       
  1545 
       
  1546   if ((encoding = detect_encoding (str, 128)) != NULL) {
       
  1547     gchar *converted_str;
       
  1548     GError *err = NULL;
       
  1549     gsize tmp;
       
  1550 
       
  1551     converted_str = gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
       
  1552     if (converted_str == NULL) {
       
  1553       GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
       
  1554           err->message);
       
  1555       g_error_free (err);
       
  1556       g_free (encoding);
       
  1557     } else {
       
  1558       g_free (str);
       
  1559       str = converted_str;
       
  1560       g_free (encoding);
       
  1561     }
       
  1562   }
       
  1563 
       
  1564   /* Check if at least the first 120 chars are valid UTF8,
       
  1565    * otherwise convert as always */
       
  1566   if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
       
  1567     gchar *converted_str;
       
  1568     GError *err = NULL;
       
  1569     gsize tmp;
       
  1570     const gchar *enc;
       
  1571 
       
  1572     enc = g_getenv ("GST_SUBTITLE_ENCODING");
       
  1573     if (enc == NULL || *enc == '\0') {
       
  1574       /* if local encoding is UTF-8 and no encoding specified
       
  1575        * via the environment variable, assume ISO-8859-15 */
       
  1576       if (g_get_charset (&enc)) {
       
  1577         enc = "ISO-8859-15";
       
  1578       }
       
  1579     }
       
  1580     converted_str = gst_convert_to_utf8 (str, 128, enc, &tmp, &err);
       
  1581     if (converted_str == NULL) {
       
  1582       GST_DEBUG ("Charset conversion failed: %s", err->message);
       
  1583       g_error_free (err);
       
  1584       g_free (str);
       
  1585       return;
       
  1586     } else {
       
  1587       g_free (str);
       
  1588       str = converted_str;
       
  1589     }
       
  1590   }
       
  1591 
  1273   format = gst_sub_parse_data_format_autodetect (str);
  1592   format = gst_sub_parse_data_format_autodetect (str);
  1274   g_free (str);
  1593   g_free (str);
  1275 
  1594 
  1276   switch (format) {
  1595   switch (format) {
  1277     case GST_SUB_PARSE_FORMAT_MDVDSUB:
  1596     case GST_SUB_PARSE_FORMAT_MDVDSUB:
  1284       break;
  1603       break;
  1285     case GST_SUB_PARSE_FORMAT_MPSUB:
  1604     case GST_SUB_PARSE_FORMAT_MPSUB:
  1286       GST_DEBUG ("MPSub format detected");
  1605       GST_DEBUG ("MPSub format detected");
  1287       caps = SUB_CAPS;
  1606       caps = SUB_CAPS;
  1288       break;
  1607       break;
       
  1608 #ifndef GST_DISABLE_XML
  1289     case GST_SUB_PARSE_FORMAT_SAMI:
  1609     case GST_SUB_PARSE_FORMAT_SAMI:
  1290       GST_DEBUG ("SAMI (time-based) format detected");
  1610       GST_DEBUG ("SAMI (time-based) format detected");
  1291       caps = SAMI_CAPS;
  1611       caps = SAMI_CAPS;
  1292       break;
  1612       break;
       
  1613 #endif
  1293     case GST_SUB_PARSE_FORMAT_TMPLAYER:
  1614     case GST_SUB_PARSE_FORMAT_TMPLAYER:
  1294       GST_DEBUG ("TMPlayer (time based) format detected");
  1615       GST_DEBUG ("TMPlayer (time based) format detected");
  1295       caps = TMP_CAPS;
  1616       caps = TMP_CAPS;
  1296       break;
  1617       break;
  1297       /* FIXME: our MPL2 typefinding is not really good enough to warrant
  1618       /* FIXME: our MPL2 typefinding is not really good enough to warrant