xml/xmlexpatparser/src/expat-1.95.5/doc_pub/xmlwf.sgml
branchRCL_3
changeset 21 604ca70b6235
parent 15 6b9cbc8ad81c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xml/xmlexpatparser/src/expat-1.95.5/doc_pub/xmlwf.sgml	Wed Sep 01 12:37:34 2010 +0100
@@ -0,0 +1,440 @@
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
+
+<!-- Process this file with docbook-to-man to generate an nroff manual
+     page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
+     the manual page with: `docbook-to-man manpage.sgml | nroff -man |
+     less'.  A typical entry in a Makefile or Makefile.am is:
+
+manpage.1: manpage.sgml
+	docbook-to-man $< > $@
+  -->
+
+  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
+  <!ENTITY dhsurname   "<surname>Bronson</surname>">
+  <!-- Please adjust the date whenever revising the manpage. -->
+  <!ENTITY dhdate      "<date>December  5, 2001</date>">
+  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+       allowed: see man(7), man(1). -->
+  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
+  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
+  <!ENTITY dhusername  "Scott Bronson">
+  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
+  <!ENTITY dhpackage   "xmlwf">
+
+  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
+  <!ENTITY gnu         "<acronym>GNU</acronym>">
+]>
+
+<refentry>
+  <refentryinfo>
+    <address>
+      &dhemail;
+    </address>
+    <author>
+      &dhfirstname;
+      &dhsurname;
+    </author>
+    <copyright>
+      <year>2001</year>
+      <holder>&dhusername;</holder>
+    </copyright>
+    &dhdate;
+  </refentryinfo>
+  <refmeta>
+    &dhucpackage;
+
+    &dhsection;
+  </refmeta>
+  <refnamediv>
+    <refname>&dhpackage;</refname>
+
+    <refpurpose>Determines if an XML document is well-formed</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>&dhpackage;</command>
+	  <arg><option>-s</option></arg>
+	  <arg><option>-n</option></arg>
+	  <arg><option>-p</option></arg>
+	  <arg><option>-x</option></arg>
+
+	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
+	  <arg><option>-w</option></arg>
+
+	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
+	  <arg><option>-c</option></arg>
+	  <arg><option>-m</option></arg>
+
+	  <arg><option>-r</option></arg>
+	  <arg><option>-t</option></arg>
+
+	  <arg><option>-v</option></arg>
+
+	  <arg>file ...</arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+ 
+  <refsect1>
+    <title>DESCRIPTION</title>
+
+    <para>
+	<command>&dhpackage;</command> uses the Expat library to determine
+	if an XML document is well-formed.  It is non-validating.
+	</para>
+
+	<para>
+	If you do not specify any files on the command-line,
+	and you have a recent version of &dhpackage;, the input
+	file will be read from stdin.
+	</para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>WELL-FORMED DOCUMENTS</title>
+
+	<para>
+	  A well-formed document must adhere to the
+	  following rules:
+	</para>
+
+	<itemizedlist>
+      <listitem><para>
+	    The file begins with an XML declaration.  For instance,
+		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
+		<emphasis>NOTE:</emphasis> &dhpackage; does not currently
+		check for a valid XML declaration.
+      </para></listitem>
+      <listitem><para>
+		Every start tag is either empty (&lt;tag/&gt;)
+		or has a corresponding end tag.
+      </para></listitem>
+      <listitem><para>
+	    There is exactly one root element.  This element must contain
+		all other elements in the document.  Only comments, white
+		space, and processing instructions may come after the close
+		of the root element.
+      </para></listitem>
+      <listitem><para>
+		All elements nest properly.
+      </para></listitem>
+      <listitem><para>
+		All attribute values are enclosed in quotes (either single
+		or double).
+      </para></listitem>
+    </itemizedlist>
+
+	<para>
+	  If the document has a DTD, and it strictly complies with that
+	  DTD, then the document is also considered <emphasis>valid</emphasis>.
+	  &dhpackage; is a non-validating parser -- it does not check the DTD.
+	  However, it does support external entities (see the -x option).
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>OPTIONS</title>
+
+<para>
+When an option includes an argument, you may specify the argument either
+separate ("d output") or mashed ("-doutput").  &dhpackage; supports both.
+</para>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>-c</option></term>
+        <listitem>
+		<para>
+  If the input file is well-formed and &dhpackage; doesn't
+  encounter any errors, the input file is simply copied to
+  the output directory unchanged.
+  This implies no namespaces (turns off -n) and
+  requires -d to specify an output file.
+  		</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-d output-dir</option></term>
+        <listitem>
+		<para>
+  Specifies a directory to contain transformed
+  representations of the input files.
+  By default, -d outputs a canonical representation
+  (described below).
+  You can select different output formats using -c and -m.
+	  </para>
+	  <para>
+  The output filenames will
+  be exactly the same as the input filenames or "STDIN" if the input is
+  coming from STDIN.  Therefore, you must be careful that the
+  output file does not go into the same directory as the input
+  file.  Otherwise, &dhpackage; will delete the input file before
+  it generates the output file (just like running
+  <literal>cat &lt; file &gt; file</literal> in most shells).
+	  </para>
+	  <para> 
+  Two structurally equivalent XML documents have a byte-for-byte
+  identical canonical XML representation.
+  Note that ignorable white space is considered significant and
+  is treated equivalently to data.
+  More on canonical XML can be found at
+  http://www.jclark.com/xml/canonxml.html .
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-e encoding</option></term>
+        <listitem>
+		<para>
+   Specifies the character encoding for the document, overriding
+   any document encoding declaration.  &dhpackage;
+   has four built-in encodings: 
+   	<literal>US-ASCII</literal>,
+	<literal>UTF-8</literal>,
+	<literal>UTF-16</literal>, and
+    <literal>ISO-8859-1</literal>.
+	Also see the -w option.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-m</option></term>
+        <listitem>
+		<para>
+  Outputs some strange sort of XML file that completely
+  describes the the input file, including character postitions.
+  Requires -d to specify an output file.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-n</option></term>
+        <listitem>
+		<para>
+  Turns on namespace processing.  (describe namespaces)
+  -c disables namespaces.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-p</option></term>
+        <listitem>
+		<para>
+    Tells xmlwf to process external DTDs and parameter
+    entities.
+	 </para>
+	 <para>
+	Normally &dhpackage; never parses parameter entities.
+	-p tells it to always parse them.
+	-p implies -x.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-r</option></term>
+        <listitem>
+		<para>
+   Normally &dhpackage; memory-maps the XML file before parsing.
+   -r turns off memory-mapping and uses normal file IO calls instead.
+   Of course, memory-mapping is automatically turned off
+   when reading from STDIN.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-s</option></term>
+        <listitem>
+		<para>
+  Prints an error if the document is not standalone. 
+  A document is standalone if it has no external subset and no
+  references to parameter entities.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-t</option></term>
+        <listitem>
+		<para>
+  Turns on timings.  This tells Expat to parse the entire file,
+  but not perform any processing.
+  This gives a fairly accurate idea of the raw speed of Expat itself
+  without client overhead.
+   -t turns off most of the output options (-d, -m -c, ...).
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-v</option></term>
+        <listitem>
+		<para>
+  Prints the version of the Expat library being used, and then exits.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-w</option></term>
+        <listitem>
+		<para>
+  Enables Windows code pages.
+  Normally, &dhpackage; will throw an error if it runs across
+  an encoding that it is not equipped to handle itself.  With
+  -w, &dhpackage; will try to use a Windows code page.  See
+  also -e.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-x</option></term>
+        <listitem>
+		<para>
+  Turns on parsing external entities.
+  </para>
+<para>
+  Non-validating parsers are not required to resolve external
+  entities, or even expand entities at all.
+  Expat always expands internal entities (?),
+  but external entity parsing must be enabled explicitly.
+  </para>
+  <para>
+  External entities are simply entities that obtain their
+  data from outside the XML file currently being parsed.
+  </para>
+  <para>
+  This is an example of an internal entity:
+<literallayout>
+&lt;!ENTITY vers '1.0.2'&gt;
+</literallayout>
+  </para>
+  <para>
+  And here are some examples of external entities:
+
+<literallayout>
+&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
+&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
+</literallayout>
+
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--</option></term>
+        <listitem>
+		<para>
+    For some reason, &dhpackage; specifically ignores "--"
+	anywhere it appears on the command line.
+	   </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+	<para>
+	Older versions of &dhpackage; do not support reading from STDIN.
+	</para>
+  </refsect1>
+
+  <refsect1>
+  <title>OUTPUT</title>
+    <para>
+	If an input file is not well-formed, &dhpackage; outputs
+	a single line describing the problem to STDOUT.
+	If a file is well formed, &dhpackage; outputs nothing.
+	Note that the result code is <emphasis>not</emphasis> set.
+	</para>
+  </refsect1>
+  
+  <refsect1>
+    <title>BUGS</title>
+	<para>
+	According to the W3C standard, an XML file without a
+	declaration at the beginning is not considered well-formed.
+	However, &dhpackage; allows this to pass.
+	</para>
+	<para>
+	&dhpackage; returns a 0 - noerr result, even if the file is
+	not well-formed.  There is no good way for a program to use
+	xmlwf to quickly check a file -- it must parse xmlwf's STDOUT.
+	</para>
+    <para>
+    The errors should go to STDERR, not stdout.
+    </para>
+	<para>
+	There should be a way to get -d to send its output to STDOUT
+	rather than forcing the user to send it to a file.
+	</para>
+	<para>
+	I have no idea why anyone would want to use the -d, -c
+	and -m options.  If someone could explain it to me, I'd
+	like to add this information to this manpage.
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>ALTERNATIVES</title>
+	<para>
+	  Here are some XML validators on the web:
+
+<literallayout>
+http://www.hcrc.ed.ac.uk/~richard/xml-check.html
+http://www.stg.brown.edu/service/xmlvalid/
+http://www.scripting.com/frontier5/xml/code/xmlValidator.html
+http://www.xml.com/pub/a/tools/ruwf/check.html
+&nbsp;    (on a page with no less than 15 ads!  Shame!)
+</literallayout>
+
+		 </para>
+  </refsect1>
+
+  <refsect1>
+    <title>SEE ALSO</title>
+	<para>
+
+<literallayout>
+The Expat home page:        http://www.libexpat.org/
+The W3 XML specification:   http://www.w3.org/TR/REC-xml
+</literallayout>
+
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>AUTHOR</title>
+    <para>
+	  This manual page was written by &dhusername; &dhemail; for
+      the &debian; system (but may be used by others).  Permission is
+      granted to copy, distribute and/or modify this document under
+      the terms of the <acronym>GNU</acronym> Free Documentation
+      License, Version 1.1.
+	</para>
+  </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->