xml/xmlexpatparser/src/expat-1.95.5/doc_pub/xmlwf.sgml
branchRCL_3
changeset 20 889504eac4fb
parent 19 6bcc0aa4be39
child 21 604ca70b6235
equal deleted inserted replaced
19:6bcc0aa4be39 20:889504eac4fb
     1 <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
       
     2 
       
     3 <!-- Process this file with docbook-to-man to generate an nroff manual
       
     4      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
       
     5      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
       
     6      less'.  A typical entry in a Makefile or Makefile.am is:
       
     7 
       
     8 manpage.1: manpage.sgml
       
     9 	docbook-to-man $< > $@
       
    10   -->
       
    11 
       
    12   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
       
    13   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
       
    14   <!ENTITY dhsurname   "<surname>Bronson</surname>">
       
    15   <!-- Please adjust the date whenever revising the manpage. -->
       
    16   <!ENTITY dhdate      "<date>December  5, 2001</date>">
       
    17   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
       
    18        allowed: see man(7), man(1). -->
       
    19   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
       
    20   <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
       
    21   <!ENTITY dhusername  "Scott Bronson">
       
    22   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
       
    23   <!ENTITY dhpackage   "xmlwf">
       
    24 
       
    25   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
       
    26   <!ENTITY gnu         "<acronym>GNU</acronym>">
       
    27 ]>
       
    28 
       
    29 <refentry>
       
    30   <refentryinfo>
       
    31     <address>
       
    32       &dhemail;
       
    33     </address>
       
    34     <author>
       
    35       &dhfirstname;
       
    36       &dhsurname;
       
    37     </author>
       
    38     <copyright>
       
    39       <year>2001</year>
       
    40       <holder>&dhusername;</holder>
       
    41     </copyright>
       
    42     &dhdate;
       
    43   </refentryinfo>
       
    44   <refmeta>
       
    45     &dhucpackage;
       
    46 
       
    47     &dhsection;
       
    48   </refmeta>
       
    49   <refnamediv>
       
    50     <refname>&dhpackage;</refname>
       
    51 
       
    52     <refpurpose>Determines if an XML document is well-formed</refpurpose>
       
    53   </refnamediv>
       
    54   <refsynopsisdiv>
       
    55     <cmdsynopsis>
       
    56       <command>&dhpackage;</command>
       
    57 	  <arg><option>-s</option></arg>
       
    58 	  <arg><option>-n</option></arg>
       
    59 	  <arg><option>-p</option></arg>
       
    60 	  <arg><option>-x</option></arg>
       
    61 
       
    62 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
       
    63 	  <arg><option>-w</option></arg>
       
    64 
       
    65 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
       
    66 	  <arg><option>-c</option></arg>
       
    67 	  <arg><option>-m</option></arg>
       
    68 
       
    69 	  <arg><option>-r</option></arg>
       
    70 	  <arg><option>-t</option></arg>
       
    71 
       
    72 	  <arg><option>-v</option></arg>
       
    73 
       
    74 	  <arg>file ...</arg>
       
    75     </cmdsynopsis>
       
    76   </refsynopsisdiv>
       
    77  
       
    78   <refsect1>
       
    79     <title>DESCRIPTION</title>
       
    80 
       
    81     <para>
       
    82 	<command>&dhpackage;</command> uses the Expat library to determine
       
    83 	if an XML document is well-formed.  It is non-validating.
       
    84 	</para>
       
    85 
       
    86 	<para>
       
    87 	If you do not specify any files on the command-line,
       
    88 	and you have a recent version of &dhpackage;, the input
       
    89 	file will be read from stdin.
       
    90 	</para>
       
    91 
       
    92   </refsect1>
       
    93 
       
    94   <refsect1>
       
    95     <title>WELL-FORMED DOCUMENTS</title>
       
    96 
       
    97 	<para>
       
    98 	  A well-formed document must adhere to the
       
    99 	  following rules:
       
   100 	</para>
       
   101 
       
   102 	<itemizedlist>
       
   103       <listitem><para>
       
   104 	    The file begins with an XML declaration.  For instance,
       
   105 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
       
   106 		<emphasis>NOTE:</emphasis> &dhpackage; does not currently
       
   107 		check for a valid XML declaration.
       
   108       </para></listitem>
       
   109       <listitem><para>
       
   110 		Every start tag is either empty (&lt;tag/&gt;)
       
   111 		or has a corresponding end tag.
       
   112       </para></listitem>
       
   113       <listitem><para>
       
   114 	    There is exactly one root element.  This element must contain
       
   115 		all other elements in the document.  Only comments, white
       
   116 		space, and processing instructions may come after the close
       
   117 		of the root element.
       
   118       </para></listitem>
       
   119       <listitem><para>
       
   120 		All elements nest properly.
       
   121       </para></listitem>
       
   122       <listitem><para>
       
   123 		All attribute values are enclosed in quotes (either single
       
   124 		or double).
       
   125       </para></listitem>
       
   126     </itemizedlist>
       
   127 
       
   128 	<para>
       
   129 	  If the document has a DTD, and it strictly complies with that
       
   130 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
       
   131 	  &dhpackage; is a non-validating parser -- it does not check the DTD.
       
   132 	  However, it does support external entities (see the -x option).
       
   133 	</para>
       
   134   </refsect1>
       
   135 
       
   136   <refsect1>
       
   137     <title>OPTIONS</title>
       
   138 
       
   139 <para>
       
   140 When an option includes an argument, you may specify the argument either
       
   141 separate ("d output") or mashed ("-doutput").  &dhpackage; supports both.
       
   142 </para>
       
   143 
       
   144     <variablelist>
       
   145 
       
   146       <varlistentry>
       
   147         <term><option>-c</option></term>
       
   148         <listitem>
       
   149 		<para>
       
   150   If the input file is well-formed and &dhpackage; doesn't
       
   151   encounter any errors, the input file is simply copied to
       
   152   the output directory unchanged.
       
   153   This implies no namespaces (turns off -n) and
       
   154   requires -d to specify an output file.
       
   155   		</para>
       
   156         </listitem>
       
   157       </varlistentry>
       
   158 
       
   159       <varlistentry>
       
   160         <term><option>-d output-dir</option></term>
       
   161         <listitem>
       
   162 		<para>
       
   163   Specifies a directory to contain transformed
       
   164   representations of the input files.
       
   165   By default, -d outputs a canonical representation
       
   166   (described below).
       
   167   You can select different output formats using -c and -m.
       
   168 	  </para>
       
   169 	  <para>
       
   170   The output filenames will
       
   171   be exactly the same as the input filenames or "STDIN" if the input is
       
   172   coming from STDIN.  Therefore, you must be careful that the
       
   173   output file does not go into the same directory as the input
       
   174   file.  Otherwise, &dhpackage; will delete the input file before
       
   175   it generates the output file (just like running
       
   176   <literal>cat &lt; file &gt; file</literal> in most shells).
       
   177 	  </para>
       
   178 	  <para> 
       
   179   Two structurally equivalent XML documents have a byte-for-byte
       
   180   identical canonical XML representation.
       
   181   Note that ignorable white space is considered significant and
       
   182   is treated equivalently to data.
       
   183   More on canonical XML can be found at
       
   184   http://www.jclark.com/xml/canonxml.html .
       
   185 	  </para>
       
   186         </listitem>
       
   187       </varlistentry>
       
   188 
       
   189       <varlistentry>
       
   190         <term><option>-e encoding</option></term>
       
   191         <listitem>
       
   192 		<para>
       
   193    Specifies the character encoding for the document, overriding
       
   194    any document encoding declaration.  &dhpackage;
       
   195    has four built-in encodings: 
       
   196    	<literal>US-ASCII</literal>,
       
   197 	<literal>UTF-8</literal>,
       
   198 	<literal>UTF-16</literal>, and
       
   199     <literal>ISO-8859-1</literal>.
       
   200 	Also see the -w option.
       
   201 	   </para>
       
   202         </listitem>
       
   203       </varlistentry>
       
   204 
       
   205       <varlistentry>
       
   206         <term><option>-m</option></term>
       
   207         <listitem>
       
   208 		<para>
       
   209   Outputs some strange sort of XML file that completely
       
   210   describes the the input file, including character postitions.
       
   211   Requires -d to specify an output file.
       
   212 	   </para>
       
   213         </listitem>
       
   214       </varlistentry>
       
   215 
       
   216       <varlistentry>
       
   217         <term><option>-n</option></term>
       
   218         <listitem>
       
   219 		<para>
       
   220   Turns on namespace processing.  (describe namespaces)
       
   221   -c disables namespaces.
       
   222 	   </para>
       
   223         </listitem>
       
   224       </varlistentry>
       
   225 
       
   226       <varlistentry>
       
   227         <term><option>-p</option></term>
       
   228         <listitem>
       
   229 		<para>
       
   230     Tells xmlwf to process external DTDs and parameter
       
   231     entities.
       
   232 	 </para>
       
   233 	 <para>
       
   234 	Normally &dhpackage; never parses parameter entities.
       
   235 	-p tells it to always parse them.
       
   236 	-p implies -x.
       
   237 	   </para>
       
   238         </listitem>
       
   239       </varlistentry>
       
   240 
       
   241       <varlistentry>
       
   242         <term><option>-r</option></term>
       
   243         <listitem>
       
   244 		<para>
       
   245    Normally &dhpackage; memory-maps the XML file before parsing.
       
   246    -r turns off memory-mapping and uses normal file IO calls instead.
       
   247    Of course, memory-mapping is automatically turned off
       
   248    when reading from STDIN.
       
   249 	   </para>
       
   250         </listitem>
       
   251       </varlistentry>
       
   252 
       
   253       <varlistentry>
       
   254         <term><option>-s</option></term>
       
   255         <listitem>
       
   256 		<para>
       
   257   Prints an error if the document is not standalone. 
       
   258   A document is standalone if it has no external subset and no
       
   259   references to parameter entities.
       
   260 	   </para>
       
   261         </listitem>
       
   262       </varlistentry>
       
   263 
       
   264       <varlistentry>
       
   265         <term><option>-t</option></term>
       
   266         <listitem>
       
   267 		<para>
       
   268   Turns on timings.  This tells Expat to parse the entire file,
       
   269   but not perform any processing.
       
   270   This gives a fairly accurate idea of the raw speed of Expat itself
       
   271   without client overhead.
       
   272    -t turns off most of the output options (-d, -m -c, ...).
       
   273 	   </para>
       
   274         </listitem>
       
   275       </varlistentry>
       
   276 
       
   277       <varlistentry>
       
   278         <term><option>-v</option></term>
       
   279         <listitem>
       
   280 		<para>
       
   281   Prints the version of the Expat library being used, and then exits.
       
   282 	   </para>
       
   283         </listitem>
       
   284       </varlistentry>
       
   285 
       
   286       <varlistentry>
       
   287         <term><option>-w</option></term>
       
   288         <listitem>
       
   289 		<para>
       
   290   Enables Windows code pages.
       
   291   Normally, &dhpackage; will throw an error if it runs across
       
   292   an encoding that it is not equipped to handle itself.  With
       
   293   -w, &dhpackage; will try to use a Windows code page.  See
       
   294   also -e.
       
   295 	   </para>
       
   296         </listitem>
       
   297       </varlistentry>
       
   298 
       
   299       <varlistentry>
       
   300         <term><option>-x</option></term>
       
   301         <listitem>
       
   302 		<para>
       
   303   Turns on parsing external entities.
       
   304   </para>
       
   305 <para>
       
   306   Non-validating parsers are not required to resolve external
       
   307   entities, or even expand entities at all.
       
   308   Expat always expands internal entities (?),
       
   309   but external entity parsing must be enabled explicitly.
       
   310   </para>
       
   311   <para>
       
   312   External entities are simply entities that obtain their
       
   313   data from outside the XML file currently being parsed.
       
   314   </para>
       
   315   <para>
       
   316   This is an example of an internal entity:
       
   317 <literallayout>
       
   318 &lt;!ENTITY vers '1.0.2'&gt;
       
   319 </literallayout>
       
   320   </para>
       
   321   <para>
       
   322   And here are some examples of external entities:
       
   323 
       
   324 <literallayout>
       
   325 &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
       
   326 &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
       
   327 </literallayout>
       
   328 
       
   329 	   </para>
       
   330         </listitem>
       
   331       </varlistentry>
       
   332 
       
   333       <varlistentry>
       
   334         <term><option>--</option></term>
       
   335         <listitem>
       
   336 		<para>
       
   337     For some reason, &dhpackage; specifically ignores "--"
       
   338 	anywhere it appears on the command line.
       
   339 	   </para>
       
   340         </listitem>
       
   341       </varlistentry>
       
   342     </variablelist>
       
   343 
       
   344 	<para>
       
   345 	Older versions of &dhpackage; do not support reading from STDIN.
       
   346 	</para>
       
   347   </refsect1>
       
   348 
       
   349   <refsect1>
       
   350   <title>OUTPUT</title>
       
   351     <para>
       
   352 	If an input file is not well-formed, &dhpackage; outputs
       
   353 	a single line describing the problem to STDOUT.
       
   354 	If a file is well formed, &dhpackage; outputs nothing.
       
   355 	Note that the result code is <emphasis>not</emphasis> set.
       
   356 	</para>
       
   357   </refsect1>
       
   358   
       
   359   <refsect1>
       
   360     <title>BUGS</title>
       
   361 	<para>
       
   362 	According to the W3C standard, an XML file without a
       
   363 	declaration at the beginning is not considered well-formed.
       
   364 	However, &dhpackage; allows this to pass.
       
   365 	</para>
       
   366 	<para>
       
   367 	&dhpackage; returns a 0 - noerr result, even if the file is
       
   368 	not well-formed.  There is no good way for a program to use
       
   369 	xmlwf to quickly check a file -- it must parse xmlwf's STDOUT.
       
   370 	</para>
       
   371     <para>
       
   372     The errors should go to STDERR, not stdout.
       
   373     </para>
       
   374 	<para>
       
   375 	There should be a way to get -d to send its output to STDOUT
       
   376 	rather than forcing the user to send it to a file.
       
   377 	</para>
       
   378 	<para>
       
   379 	I have no idea why anyone would want to use the -d, -c
       
   380 	and -m options.  If someone could explain it to me, I'd
       
   381 	like to add this information to this manpage.
       
   382 	</para>
       
   383   </refsect1>
       
   384 
       
   385   <refsect1>
       
   386     <title>ALTERNATIVES</title>
       
   387 	<para>
       
   388 	  Here are some XML validators on the web:
       
   389 
       
   390 <literallayout>
       
   391 http://www.hcrc.ed.ac.uk/~richard/xml-check.html
       
   392 http://www.stg.brown.edu/service/xmlvalid/
       
   393 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
       
   394 http://www.xml.com/pub/a/tools/ruwf/check.html
       
   395 &nbsp;    (on a page with no less than 15 ads!  Shame!)
       
   396 </literallayout>
       
   397 
       
   398 		 </para>
       
   399   </refsect1>
       
   400 
       
   401   <refsect1>
       
   402     <title>SEE ALSO</title>
       
   403 	<para>
       
   404 
       
   405 <literallayout>
       
   406 The Expat home page:        http://www.libexpat.org/
       
   407 The W3 XML specification:   http://www.w3.org/TR/REC-xml
       
   408 </literallayout>
       
   409 
       
   410 	</para>
       
   411   </refsect1>
       
   412 
       
   413   <refsect1>
       
   414     <title>AUTHOR</title>
       
   415     <para>
       
   416 	  This manual page was written by &dhusername; &dhemail; for
       
   417       the &debian; system (but may be used by others).  Permission is
       
   418       granted to copy, distribute and/or modify this document under
       
   419       the terms of the <acronym>GNU</acronym> Free Documentation
       
   420       License, Version 1.1.
       
   421 	</para>
       
   422   </refsect1>
       
   423 </refentry>
       
   424 
       
   425 <!-- Keep this comment at the end of the file
       
   426 Local variables:
       
   427 mode: sgml
       
   428 sgml-omittag:t
       
   429 sgml-shorttag:t
       
   430 sgml-minimize-attributes:nil
       
   431 sgml-always-quote-attributes:t
       
   432 sgml-indent-step:2
       
   433 sgml-indent-data:t
       
   434 sgml-parent-document:nil
       
   435 sgml-default-dtd-file:nil
       
   436 sgml-exposed-tags:nil
       
   437 sgml-local-catalogs:nil
       
   438 sgml-local-ecat-files:nil
       
   439 End:
       
   440 -->