FCL/sf/os/xmlsrv: xml/libxml2libs/src/libxml2/libxml2


/*
 * libxml2_htmltree.c : implementation of access function for an HTML tree.
 *
 * See Copyright for the status of this software.
 *
 * daniel@veillard.com
 * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
 */

#define IN_LIBXML
#include "xmlenglibxml.h"

#include <string.h> /* for memset() only ! */

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#include <stdapis/libxml2/libxml2_xmlmemory.h>
#include "libxml2_htmlparser.h"
#include "libxml2_htmltree.h"
#include <stdapis/libxml2/libxml2_entities.h>
#include <stdapis/libxml2/libxml2_valid.h>
#include <stdapis/libxml2/libxml2_xmlerror.h>
#include "libxml2_xmlerror2.h"
#include <stdapis/libxml2/libxml2_xmlsave.h>
#include <stdapis/libxml2/libxml2_parserinternals.h>
#include <stdapis/libxml2/libxml2_globals.h>
#include <stdapis/libxml2/libxml2_uri.h>

#ifdef LIBXML_HTML_ENABLED

/************************************************************************
 *                                                                      *
 *          Getting/Setting encoding meta tags                          *
 *                                                                      *
 ************************************************************************/

/**
 * htmlGetMetaEncoding:
 * @param doc the document
 *
 * Encoding definition lookup in the Meta tags
 *
 * Returns the current encoding as flagged in the HTML source
 */
const xmlChar *
htmlGetMetaEncoding(htmlDocPtr doc) {
    htmlNodePtr cur;
    const xmlChar *content;
    const xmlChar *encoding;

    if (doc == NULL)
        return(NULL);
    cur = doc->children;

    /*
     * Search the html
     */
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrEqual(cur->name, BAD_CAST"html"))
                break;
            if (xmlStrEqual(cur->name, BAD_CAST"head"))
                goto found_head;
            if (xmlStrEqual(cur->name, BAD_CAST"meta"))
                goto found_meta;
        }
        cur = cur->next;
    }
    if (cur == NULL)
        return(NULL);
    cur = cur->children;

    /*
     * Search the head
     */
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrEqual(cur->name, BAD_CAST"head"))
                break;
            if (xmlStrEqual(cur->name, BAD_CAST"meta"))
                goto found_meta;
        }
        cur = cur->next;
    }
    if (cur == NULL)
        return(NULL);
found_head:
    cur = cur->children;

    /*
     * Search the meta elements
     */
found_meta:
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
                xmlAttrPtr attr = cur->properties;
                int http;
                const xmlChar *value;

                content = NULL;
                http = 0;
                while (attr != NULL) {
                    if ((attr->children != NULL) &&
                        (attr->children->type == XML_TEXT_NODE) &&
                        (attr->children->next == NULL)) {
                        value = attr->children->content;
                        if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
                         && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
                            http = 1;
                        else if ((value != NULL)
                         && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
                            content = value;
                        if ((http != 0) && (content != NULL))
                            goto found_content;
                    }
                    attr = attr->next;
                }
            }
        }
        cur = cur->next;
    }
    return(NULL);

found_content:
    encoding = xmlStrstr(content, BAD_CAST"charset=");
    if (encoding == NULL)
        encoding = xmlStrstr(content, BAD_CAST"Charset=");
    if (encoding == NULL)
        encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
    if (encoding != NULL) {
        encoding += 8;
    } else {
        encoding = xmlStrstr(content, BAD_CAST"charset =");
        if (encoding == NULL)
            encoding = xmlStrstr(content, BAD_CAST"Charset =");
        if (encoding == NULL)
            encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
        if (encoding != NULL)
            encoding += 9;
    }
    if (encoding != NULL) {
        while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
    }
    return(encoding);
}


#endif /* LIBXML_HTML_ENABLED */

#if defined(LIBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT)
/**
 * htmlSetMetaEncoding:
 * @param doc the document
 * @param encoding the encoding string
 *
 * Sets the current encoding in the Meta tags
 * NOTE: this will not change the document content encoding, just
 * the META flag associated.
 *
 * Returns 0 in case of success and -1 in case of error
 */
XMLPUBFUNEXPORT int
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
    htmlNodePtr cur, meta;
    const xmlChar *content;
    char newcontent[100];
	LOAD_GS_SAFE_DOC(doc)

    if (doc == NULL)
        return(-1);

    if (encoding != NULL) {
        snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
                encoding);
        newcontent[sizeof(newcontent) - 1] = 0;
    }

    cur = doc->children;

    /*
     * Search the html
     */
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
                break;
            if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
                goto found_head;
            if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
                goto found_meta;
        }
        cur = cur->next;
    }
    if (cur == NULL)
        return(-1);
    cur = cur->children;

    /*
     * Search the head
     */
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
                break;
            if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
                goto found_meta;
        }
        cur = cur->next;
    }
    if (cur == NULL)
        return(-1);
found_head:
    if (cur->children == NULL) {
        if (encoding == NULL)
            return(0);
        meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
        xmlAddChild(cur, meta);
        xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
        xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
        if ( OOM_FLAG )     //oom set in xmlNewProp
            return(-1);
        return(0);
    }
    cur = cur->children;

found_meta:
    if (encoding != NULL) {
        /*
         * Create a new Meta element with the right attributes
         */

        meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
        xmlAddPrevSibling(cur, meta);
        xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
        xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    }

    /*
     * Search and destroy all the remaining the meta elements carrying
     * encoding informations
     */
    while (cur != NULL) {
        if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
            if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
                xmlAttrPtr attr = cur->properties;
                int http;
                const xmlChar *value;

                content = NULL;
                http = 0;
                while (attr != NULL) {
                    if ((attr->children != NULL) &&
                        (attr->children->type == XML_TEXT_NODE) &&
                        (attr->children->next == NULL)) {
                        value = attr->children->content;
                        if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
                         && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
                            http = 1;
                        else
                        {
                           if ((value != NULL) &&
                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
                              content = value;
                        }
                        if ((http != 0) && (content != NULL))
                            break;
                    }
                    attr = attr->next;
                }
                if ((http != 0) && (content != NULL)) {
                    meta = cur;
                    cur = cur->next;
                    xmlUnlinkNode(meta);
                    xmlFreeNode(meta);
                    continue;
                }

            }
        }
        cur = cur->next;
    }
    return(0);
}

/**
 * booleanHTMLAttrs:
 *
 * These are the HTML attributes which will be output
 * in minimized form, i.e. <option selected="selected"> will be
 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 *
 */
static const char* const htmlBooleanAttrs[] = {
  "checked", "compact", "declare", "defer", "disabled", "ismap",
  "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
  "selected", NULL
};


/**
 * htmlIsBooleanAttr:
 * @param name the name of the attribute to check
 *
 * Determine if a given attribute is a boolean attribute.
 *
 * returns: false if the attribute is not boolean, true otherwise.
 */
XMLPUBFUNEXPORT int
htmlIsBooleanAttr(const xmlChar *name)
{
    int i = 0;

    while (htmlBooleanAttrs[i] != NULL) {
        if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
            return 1;
        i++;
    }
    return 0;
}
#endif // HTML or XSLT

#ifdef LIBXML_HTML_ENABLED

#ifdef LIBXML_OUTPUT_ENABLED
/************************************************************************
 *                                                                      *
 *          Output error handlers                                       *
 *                                                                      *
 ************************************************************************/
/**
 * htmlSaveErrMemory:
 * @param extra extra informations
 *
 * Handle an out of memory condition
 */
static void
htmlSaveErrMemory(const char *extra)
{
    __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
}

/**
 * htmlSaveErr:
 * @param code the error number
 * @param node the location of the error.
 * @param extra extra informations
 *
 * Handle an out of memory condition
 */
static void
htmlSaveErr(int code, xmlNodePtr node, const char *extra); // Moved to XSLT-enabled part of the file

/************************************************************************
 *                                                                      *
 *          Dumping HTML tree content to a simple buffer                *
 *                                                                      *
 ************************************************************************/

static int
htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
                   int format);

/**
 * htmlNodeDumpFormat:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the current node
 * @param format should formatting spaces been added
 *
 * Dump an HTML node, recursive behaviour,children are printed too.
 *
 * Returns the number of byte written or -1 in case of error
 */
static int
htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
                   int format) {
    unsigned int use;
    int ret;
    xmlOutputBufferPtr outbuf;

    if (cur == NULL) {
        return (-1);
    }
    if (buf == NULL) {
        return (-1);
    }
    outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
    if (outbuf == NULL) {
        htmlSaveErrMemory("allocating HTML output buffer");
        return (-1);
    }
    memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
    outbuf->buffer = buf;
    outbuf->encoder = NULL;
    outbuf->writecallback = NULL;
    outbuf->closecallback = NULL;
    outbuf->context = NULL;
    outbuf->written = 0;

    use = buf->use;
    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
    xmlFree(outbuf);
    ret = buf->use - use;
    return (ret);
}

/**
 * htmlNodeDump:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the current node
 *
 * Dump an HTML node, recursive behaviour,children are printed too,
 * and formatting returns are added.
 *
 * Returns the number of byte written or -1 in case of error
 */
int
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
    xmlInitParser();

    return(htmlNodeDumpFormat(buf, doc, cur, 1));
}

/**
 * htmlNodeDumpFileFormat:
 * @param out the FILE pointer
 * @param doc the document
 * @param cur the current node
 * @param encoding the document encoding
 * @param format should formatting spaces been added
 *
 * Dump an HTML node, recursive behaviour,children are printed too.
 *
 
 *
 * returns: the number of byte written or -1 in case of failure.
 */
int
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
                       xmlNodePtr cur, const char *encoding, int format) {
    xmlOutputBufferPtr buf;
    xmlCharEncodingHandlerPtr handler = NULL;
    int ret;

    xmlInitParser();

    if (encoding != NULL) {
        xmlCharEncoding enc;

        enc = xmlParseCharEncoding(encoding);
        if (enc != XML_CHAR_ENCODING_UTF8) {
            handler = xmlFindCharEncodingHandler(encoding);
            if (handler == NULL)
                return(-1);
        }
    }

    /*
     * Fallback to HTML or ASCII when the encoding is unspecified
     */
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("HTML");
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("ascii");

    /*
     * save the content to a temp buffer.
     */
    buf = xmlOutputBufferCreateFile(out, handler);
    if (buf == NULL) return(0);

    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);

    ret = xmlOutputBufferClose(buf);
    return(ret);
}

/**
 * htmlNodeDumpFile:
 * @param out the FILE pointer
 * @param doc the document
 * @param cur the current node
 *
 * Dump an HTML node, recursive behaviour,children are printed too,
 * and formatting returns are added.
 */
void
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
    htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
}

/**
 * htmlDocDumpMemory:
 * @param cur the document
 * @param mem OUT: the memory pointer
 * @param size OUT: the memory length
 *
 * Dump an HTML document in memory and return the xmlChar * and it's size.
 * It's up to the caller to free the memory.
 */
void
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
    xmlOutputBufferPtr buf;
    xmlCharEncodingHandlerPtr handler = NULL;
    const char *encoding;

    xmlInitParser();

    if (cur == NULL) {
        *mem = NULL;
        *size = 0;
        return;
    }

    encoding = (const char *) htmlGetMetaEncoding(cur);

    if (encoding != NULL) {
        xmlCharEncoding enc;

        enc = xmlParseCharEncoding(encoding);
        if (enc != cur->charset) {
            if (cur->charset != XML_CHAR_ENCODING_UTF8) {
                /*
                 * Not supported yet
                 */
                *mem = NULL;
                *size = 0;
                return;
            }

            handler = xmlFindCharEncodingHandler(encoding);
            if (handler == NULL) {
                *mem = NULL;
                *size = 0;
                return;
            }
        }
    }

    /*
     * Fallback to HTML or ASCII when the encoding is unspecified
     */
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("HTML");
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("ascii");

    buf = xmlAllocOutputBuffer(handler);
    if (buf == NULL) {
        *mem = NULL;
        *size = 0;
        return;
    }

    htmlDocContentDumpOutput(buf, cur, NULL);
    xmlOutputBufferFlush(buf);
    if (buf->conv != NULL) {
        *size = buf->conv->use;
        *mem = xmlStrndup(buf->conv->content, *size);
    } else {
        *size = buf->buffer->use;
        *mem = xmlStrndup(buf->buffer->content, *size);
    }
    (void)xmlOutputBufferClose(buf);
}


/************************************************************************
 *                                                                      *
 *          Dumping HTML tree content to an I/O output buffer           *
 *                                                                      *
 ************************************************************************/

/**
 * htmlNodeDumpOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the current node
 * @param encoding the encoding string
 *
 * Dump an HTML node, recursive behaviour,children are printed too,
 * and formatting returns/spaces are added.
 */
void
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
               xmlNodePtr cur, const char *encoding) {
    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
}
#endif  /* LIBXML_OUTPUT_ENABLED */
#endif  /* defined(LIBXML_HTML_ENABLED) */

#if defined(LIBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT)
#ifdef LIBXML_OUTPUT_ENABLED
/**
 * htmlSaveErr:
 * @param code the error number
 * @param node the location of the error.
 * @param extra extra informations
 *
 * Handle an out of memory condition
 */
static void
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
{
    const char *msg = NULL;

    switch(code) {
        case XML_SAVE_NOT_UTF8:
        msg = "string is not in UTF-8";
        break;
    case XML_SAVE_CHAR_INVALID:
        msg = "invalid character value";
        break;
    case XML_SAVE_UNKNOWN_ENCODING:
        msg = "unknown encoding %s";
        break;
    case XML_SAVE_NO_DOCTYPE:
        msg = "HTML has no DOCTYPE";
        break;
    default:
        msg = "unexpected error number";
    }
    __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
}

/**
 * htmlDtdDumpOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param encoding the encoding string
 *
 
 *
 * Dump the HTML document DTD, if any.
 */
static void
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                  const char *encoding ATTRIBUTE_UNUSED) {
    xmlDtdPtr cur = doc->intSubset;

    if (cur == NULL) {
        htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
        return;
    }
    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    if (cur->ExternalID != NULL) {
        xmlOutputBufferWriteString(buf, " PUBLIC ");
        xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
        if (cur->SystemID != NULL) {
            xmlOutputBufferWriteString(buf, " ");
            xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
        }
    }  else if (cur->SystemID != NULL) {
        xmlOutputBufferWriteString(buf, " SYSTEM ");
        xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    }
    xmlOutputBufferWriteString(buf, ">\n");
}

/**
 * htmlAttrDumpOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the attribute pointer
 * @param encoding the encoding string
 *
 * Dump an HTML attribute
 */
static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
                   const char *encoding ATTRIBUTE_UNUSED) {
    xmlChar *value;

    /*
     
     
     
     */

    if (cur == NULL) {
        return;
    }
    xmlOutputBufferWriteString(buf, " ");
    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
        xmlOutputBufferWriteString(buf, ":");
    }
    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
        value = xmlNodeListGetString(doc, cur->children, 0);
        if (value) {
            xmlOutputBufferWriteString(buf, "=");
            if ((cur->ns == NULL) && (cur->parent != NULL) &&
                (cur->parent->ns == NULL) &&
                ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
                 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
                 (!xmlStrcasecmp(cur->name, BAD_CAST "src")))) {
                xmlChar *escaped;
                xmlChar *tmp = value;

                while (IS_BLANK_CH(*tmp)) tmp++;

                escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
                if (escaped != NULL) {
                    xmlBufferWriteQuotedString(buf->buffer, escaped);
                    xmlFree(escaped);
                } else {
                    xmlBufferWriteQuotedString(buf->buffer, value);
                }
            } else {
                xmlBufferWriteQuotedString(buf->buffer, value);
            }
            xmlFree(value);
        } else  {
            xmlOutputBufferWriteString(buf, "=\"\"");
        }
    }
}

/**
 * htmlAttrListDumpOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the first attribute pointer
 * @param encoding the encoding string
 *
 * Dump a list of HTML attributes
 */
static void
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
    if (cur == NULL) {
        return;
    }
    while (cur != NULL) {
        htmlAttrDumpOutput(buf, doc, cur, encoding);
        cur = cur->next;
    }
}

/**
 * htmlNodeListDumpOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the first node
 * @param encoding the encoding string
 * @param format should formatting spaces been added
 *
 * Dump an HTML node list, recursive behaviour,children are printed too.
 */
static void
htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                       xmlNodePtr cur, const char *encoding, int format) {
    if (cur == NULL) {
        return;
    }
    while (cur != NULL) {
        htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
        cur = cur->next;
    }
}

/**
 * htmlNodeDumpFormatOutput:
 * @param buf the HTML buffer output
 * @param doc the document
 * @param cur the current node
 * @param encoding the encoding string
 * @param format should formatting spaces been added
 *
 * Dump an HTML node, recursive behaviour,children are printed too.
 */
XMLPUBFUNEXPORT void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                         xmlNodePtr cur, const char *encoding, int format) {
    const htmlElemDesc * info;

    xmlInitParser();

    if (cur == NULL) {
        return;
    }
    /*
     * Special cases.
     */
    if (cur->type == XML_DTD_NODE)
        return;
    if (cur->type == XML_HTML_DOCUMENT_NODE) {
        htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
        return;
    }
    if (cur->type == HTML_TEXT_NODE) {
        if (cur->content) {
            if (
                  ( 
                    (cur->name == (const xmlChar*) xmlStringText     ) ||
                    (cur->name != (const xmlChar*) xmlStringTextNoenc)
                  )
                &&
                  (!cur->parent ||
                    (
                     xmlStrcasecmp(cur->parent->name, BAD_CAST "script")
                      &&
                     xmlStrcasecmp(cur->parent->name, BAD_CAST "style")
                    )
                  )
               )
            {
                xmlChar* buffer;

                buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
                if (buffer) {
                    xmlOutputBufferWriteString(buf, (const char *)buffer);
                    xmlFree(buffer);
                }
            } else {
                xmlOutputBufferWriteString(buf, (const char *)cur->content);
            }
        }
        return;
    }
    if (cur->type == HTML_COMMENT_NODE) {
        if (cur->content != NULL) {
            xmlOutputBufferWriteString(buf, "<!--");
            xmlOutputBufferWriteString(buf, (const char *)cur->content);
            xmlOutputBufferWriteString(buf, "-->");
        }
        return;
    }

    if (cur->type == HTML_PI_NODE) {
        if (cur->name == NULL)
            return;
        xmlOutputBufferWriteString(buf, "<?");
        xmlOutputBufferWriteString(buf, (const char *)cur->name);
        if (cur->content != NULL) {
            xmlOutputBufferWriteString(buf, " ");
            xmlOutputBufferWriteString(buf, (const char *)cur->content);
        }
        xmlOutputBufferWriteString(buf, ">");
        return;
    }

    if (cur->type == HTML_ENTITY_REF_NODE) {
        xmlOutputBufferWriteString(buf, "&");
        xmlOutputBufferWriteString(buf, (const char *)cur->name);
        xmlOutputBufferWriteString(buf, ";");
        return;
    }

    if (cur->type == HTML_PRESERVE_NODE) {
        if (cur->content != NULL) {
            xmlOutputBufferWriteString(buf, (const char *)cur->content);
        }
        return;
    }

    /*
     * Get specific HTML info for that node.
     */
    info = cur->ns ? NULL : htmlTagLookup(cur->name);

    xmlOutputBufferWriteString(buf, "<");
    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
        xmlOutputBufferWriteString(buf, ":");
    }
    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    if (cur->nsDef)
        xmlNsListDumpOutput(buf, cur->nsDef);
    if (cur->properties != NULL)
        htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);

    if ((info != NULL) && (info->empty)) {
        xmlOutputBufferWriteString(buf, ">");
        if ((format) && (!info->isinline) && (cur->next != NULL)) {
            if ((cur->next->type != HTML_TEXT_NODE) &&
                (cur->next->type != HTML_ENTITY_REF_NODE) &&
                (cur->parent != NULL) &&
                (cur->parent->name != NULL) &&
                (cur->parent->name[0] != 'p')) /* p, pre, param */
                xmlOutputBufferWriteString(buf, "\n");
        }
        return;
    }

    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
        (cur->children == NULL))
    {
        if ((info != NULL) && (info->saveEndTag != 0) &&
            (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
            (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body")))
        {
            xmlOutputBufferWriteString(buf, ">");
        } else {
            xmlOutputBufferWriteString(buf, "></");
            if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
                xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
                xmlOutputBufferWriteString(buf, ":");
            }
            xmlOutputBufferWriteString(buf, (const char *)cur->name);
            xmlOutputBufferWriteString(buf, ">");
        }
        if ((format) && (cur->next != NULL) &&
                (info != NULL) && (!info->isinline))
        {
            if ((cur->next->type != HTML_TEXT_NODE) &&
                (cur->next->type != HTML_ENTITY_REF_NODE) &&
                (cur->parent != NULL) &&
                (cur->parent->name != NULL) &&
                (cur->parent->name[0] != 'p')) /* p, pre, param */
                xmlOutputBufferWriteString(buf, "\n");
        }
        return;
    }
    xmlOutputBufferWriteString(buf, ">");
    if ((cur->type != XML_ELEMENT_NODE) &&
        (cur->content != NULL)) {
            /*
             * Uses the OutputBuffer property to automatically convert
             * invalids to charrefs
             */

            xmlOutputBufferWriteString(buf, (const char *) cur->content);
    }
    if (cur->children != NULL) {
        if ((format) && (info != NULL) && (!info->isinline) &&
            (cur->children->type != HTML_TEXT_NODE) &&
            (cur->children->type != HTML_ENTITY_REF_NODE) &&
            (cur->children != cur->last) &&
            (cur->name != NULL) &&
            (cur->name[0] != 'p')) /* p, pre, param */
            xmlOutputBufferWriteString(buf, "\n");
        htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
        if ((format) && (info != NULL) && (!info->isinline) &&
            (cur->last->type != HTML_TEXT_NODE) &&
            (cur->last->type != HTML_ENTITY_REF_NODE) &&
            (cur->children != cur->last) &&
            (cur->name != NULL) &&
            (cur->name[0] != 'p')) /* p, pre, param */
            xmlOutputBufferWriteString(buf, "\n");
    }
    xmlOutputBufferWriteString(buf, "</");
    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
        xmlOutputBufferWriteString(buf, ":");
    }
    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    xmlOutputBufferWriteString(buf, ">");
    if ((format) && (info != NULL) && (!info->isinline) &&
        (cur->next != NULL)) {
        if ((cur->next->type != HTML_TEXT_NODE) &&
            (cur->next->type != HTML_ENTITY_REF_NODE) &&
            (cur->parent != NULL) &&
            (cur->parent->name != NULL) &&
            (cur->parent->name[0] != 'p')) /* p, pre, param */
            xmlOutputBufferWriteString(buf, "\n");
    }
}


/**
 * htmlDocContentDumpFormatOutput:
 * @param buf the HTML buffer output
 * @param cur the document
 * @param encoding the encoding string
 * @param format should formatting spaces been added
 *
 * Dump an HTML document.
 */
XMLPUBFUNEXPORT void
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
                               const char *encoding, int format) {
    int type;

    xmlInitParser();

    /*
     * force to output the stuff as HTML, especially for entities
     */
    type = cur->type;
    cur->type = XML_HTML_DOCUMENT_NODE;
    if (cur->intSubset != NULL) {
        htmlDtdDumpOutput(buf, cur, NULL);
    }
    if (cur->children != NULL) {
        htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
    }
    xmlOutputBufferWriteString(buf, "\n");
    cur->type = (xmlElementType) type;
}

/**
 * htmlDocContentDumpOutput:
 * @param buf the HTML buffer output
 * @param cur the document
 * @param encoding the encoding string
 *
 * Dump an HTML document. Formating return/spaces are added.
 */
XMLPUBFUNEXPORT void
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
                         const char *encoding) {
    htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
}

#endif /* LIBXML_OUTPUT_ENABLED */
#endif /* defined(IBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT) */

#ifdef LIBXML_HTML_ENABLED
#ifdef LIBXML_OUTPUT_ENABLED

/************************************************************************
 *                                                                      *
 *      Saving functions front-ends                                     *
 *                                                                      *
 ************************************************************************/

/**
 * htmlDocDump:
 * @param f the FILE*
 * @param cur the document
 *
 * Dump an HTML document to an open FILE.
 *
 * returns: the number of byte written or -1 in case of failure.
 */
int
htmlDocDump(FILE *f, xmlDocPtr cur) {
    xmlOutputBufferPtr buf;
    xmlCharEncodingHandlerPtr handler = NULL;
    const char *encoding;
    int ret;

    xmlInitParser();

    if (cur == NULL) {
        return(-1);
    }

    encoding = (const char *) htmlGetMetaEncoding(cur);

    if (encoding != NULL) {
        xmlCharEncoding enc;

        enc = xmlParseCharEncoding(encoding);
        if (enc != cur->charset) {
            if (cur->charset != XML_CHAR_ENCODING_UTF8) {
                /*
                 * Not supported yet
                 */
                return(-1);
            }

            handler = xmlFindCharEncodingHandler(encoding);
            if (handler == NULL)
                return(-1);
        }
    }

    /*
     * Fallback to HTML or ASCII when the encoding is unspecified
     */
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("HTML");
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("ascii");

    buf = xmlOutputBufferCreateFile(f, handler);
    if (buf == NULL) return(-1);
    htmlDocContentDumpOutput(buf, cur, NULL);

    ret = xmlOutputBufferClose(buf);
    return(ret);
}

/**
 * htmlSaveFile:
 * @param filename the filename (or URL)
 * @param cur the document
 *
 * Dump an HTML document to a file. If filename is "-" the stdout file is
 * used.
 * returns: the number of byte written or -1 in case of failure.
 */
int
htmlSaveFile(const char *filename, xmlDocPtr cur) {
    xmlOutputBufferPtr buf;
    xmlCharEncodingHandlerPtr handler = NULL;
    const char *encoding;
    int ret;

    xmlInitParser();

    encoding = (const char *) htmlGetMetaEncoding(cur);

    if (encoding != NULL) {
        xmlCharEncoding enc;

        enc = xmlParseCharEncoding(encoding);
        if (enc != cur->charset) {
            if (cur->charset != XML_CHAR_ENCODING_UTF8) {
                /*
                 * Not supported yet
                 */
                return(-1);
            }

            handler = xmlFindCharEncodingHandler(encoding);
            if (handler == NULL)
                return(-1);
        }
    }

    /*
     * Fallback to HTML or ASCII when the encoding is unspecified
     */
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("HTML");
    if (handler == NULL)
        handler = xmlFindCharEncodingHandler("ascii");

    /*
     * save the content to a temp buffer.
     */
    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
    if (buf == NULL) return(0);

    htmlDocContentDumpOutput(buf, cur, NULL);

    ret = xmlOutputBufferClose(buf);
    return(ret);
}

/**
 * htmlSaveFileFormat:
 * @param filename the filename
 * @param cur the document
 * @param format should formatting spaces been added
 * @param encoding the document encoding
 *
 * Dump an HTML document to a file using a given encoding.
 *
 * returns: the number of byte written or -1 in case of failure.
 */
int
htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
                   const char *encoding, int format) {
    xmlOutputBufferPtr buf;
    xmlCharEncodingHandlerPtr handler = NULL;
    int ret;

    xmlInitParser();

    if (encoding != NULL) {
        xmlCharEncoding enc;

        enc = xmlParseCharEncoding(encoding);
        if (enc != cur->charset) {
            if (cur->charset != XML_CHAR_ENCODING_UTF8) {
                /*
                 * Not supported yet
                 */
                return(-1);
            }

            handler = xmlFindCharEncodingHandler(encoding);
            if (handler == NULL)
                return(-1);
            htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
        }
    } else {
        htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
    }

    /*
     * Fallback to HTML or ASCII when the encoding is unspecified
     */
    if (handler == NULL){
        handler = xmlFindCharEncodingHandler("HTML");
        if (handler == NULL)
            handler = xmlFindCharEncodingHandler("ascii");
    }
    /*
     * save the content to a temp buffer.
     */
    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
    if (buf == NULL) return(0);

    htmlDocContentDumpFormatOutput(buf, cur, encoding, format);

    ret = xmlOutputBufferClose(buf);
    return(ret);
}

/**
 * htmlSaveFileEnc:
 * @param filename the filename
 * @param cur the document
 * @param encoding the document encoding
 *
 * Dump an HTML document to a file using a given encoding
 * and formatting returns/spaces are added.
 *
 * returns: the number of byte written or -1 in case of failure.
 */
int
htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
    return(htmlSaveFileFormat(filename, cur, encoding, 1));
}

#endif /* LIBXML_OUTPUT_ENABLED */

#endif /* LIBXML_HTML_ENABLED */
author	Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
	Thu, 17 Dec 2009 09:29:21 +0200
changeset 0	e35f40988205
permissions	-rw-r--r--