diff options
Diffstat (limited to 'doc/cxx/parser/guide/index.xhtml')
| -rw-r--r-- | doc/cxx/parser/guide/index.xhtml | 4163 | 
1 files changed, 4163 insertions, 0 deletions
| diff --git a/doc/cxx/parser/guide/index.xhtml b/doc/cxx/parser/guide/index.xhtml new file mode 100644 index 0000000..6964a14 --- /dev/null +++ b/doc/cxx/parser/guide/index.xhtml @@ -0,0 +1,4163 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> +  <title>C++/Parser Mapping Getting Started Guide</title> + +  <meta name="copyright" content="© 2005-2023 Code Synthesis"/> +  <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parser,validation"/> +  <meta name="description" content="C++/Parser Mapping Getting Started Guide"/> + +  <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> +  pre { +    padding    : 0 0 0 0em; +    margin     : 0em 0em 0em 0; + +    font-size  : 102% +  } + +  body { +    min-width: 48em; +  } + +  h1 { +    font-weight: bold; +    font-size: 200%; +    line-height: 1.2em; +  } + +  h2 { +    font-weight : bold; +    font-size   : 150%; + +    padding-top : 0.8em; +  } + +  h3 { +    font-size   : 140%; +    padding-top : 0.8em; +  } + +  /* Adjust indentation for three levels. */ +  #container { +    max-width: 48em; +  } + +  #content { +    padding: 0 0.1em 0 4em; +    /*background-color: red;*/ +  } + +  #content h1 { +    margin-left: -2.06em; +  } + +  #content h2 { +    margin-left: -1.33em; +  } + +  /* Title page */ + +  #titlepage { +    padding: 2em 0 1em 0; +    border-bottom: 1px solid black; +  } + +  #titlepage .title { +    font-weight: bold; +    font-size: 200%; +    text-align: center; +  } + +  #titlepage #first-title { +    padding: 1em 0 0.4em 0; +  } + +  #titlepage #second-title { +    padding: 0.4em 0 2em 0; +  } + +  /* Lists */ +  ul.list li { +    padding-top      : 0.3em; +    padding-bottom   : 0.3em; +  } + +  ol.steps { +    padding-left     : 1.8em; +  } + +  ol.steps li { +    padding-top      : 0.3em; +    padding-bottom   : 0.3em; +  } + + +  div.img { +    text-align: center; +    padding: 2em 0 2em 0; +  } + +  /*  */ +  dl dt { +    padding   : 0.8em 0 0 0; +  } + +  /* Built-in table */ +  #builtin { +    margin: 2em 0 2em 0; + +    border-collapse   : collapse; +    border            : 1px solid; +    border-color      : #000000; + +    font-size        : 11px; +    line-height      : 14px; +  } + +  #builtin th, #builtin td { +    border: 1px solid; +    padding           : 0.9em 0.9em 0.7em 0.9em; +  } + +  #builtin th { +    background : #cde8f6; +  } + +  #builtin td { +    text-align: left; +  } + +  /* XML Schema features table. */ +  #features { +    margin: 2em 0 2em 0; + +    border-collapse   : collapse; +    border            : 1px solid; +    border-color      : #000000; + +    font-size        : 11px; +    line-height      : 14px; +  } + +  #features th, #features td { +    border: 1px solid; +    padding           : 0.6em 0.6em 0.6em 0.6em; +  } + +  #features th { +    background : #cde8f6; +  } + +  #features td { +    text-align: left; +  } + + +  /* TOC */ +  table.toc { +    border-style      : none; +    border-collapse   : separate; +    border-spacing    : 0; + +    margin            : 0.2em 0 0.2em 0; +    padding           : 0 0 0 0; +  } + +  table.toc tr { +    padding           : 0 0 0 0; +    margin            : 0 0 0 0; +  } + +  table.toc * td, table.toc * th { +    border-style      : none; +    margin            : 0 0 0 0; +    vertical-align    : top; +  } + +  table.toc * th { +    font-weight       : normal; +    padding           : 0em 0.1em 0em 0; +    text-align        : left; +    white-space       : nowrap; +  } + +  table.toc * table.toc th { +    padding-left      : 1em; +  } + +  table.toc * td { +    padding           : 0em 0 0em 0.7em; +    text-align        : left; +  } +</style> + + +</head> + +<body> +<div id="container"> +  <div id="content"> + +  <div class="noprint"> + +  <div id="titlepage"> +    <div class="title" id="first-title">C++/Parser Mapping</div> +    <div class="title" id="second-title">Getting Started Guide</div> + +  <p>Copyright © 2005-2023 Code Synthesis.</p> + +  <p>Permission is granted to copy, distribute and/or modify this +     document under the terms of the +     <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free +     Documentation License, version 1.2</a>; with no Invariant Sections, +     no Front-Cover Texts and no Back-Cover Texts. +  </p> + +  <p>This document is available in the following formats: +     <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/index.xhtml">XHTML</a>, +     <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.pdf">PDF</a>, and +     <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.ps">PostScript</a>.</p> + +  </div> + +  <h1>Table of Contents</h1> + +  <table class="toc"> +    <tr> +      <th></th><td><a href="#0">Preface</a> +        <table class="toc"> +          <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> +          <tr><th></th><td><a href="#0.2">More Information</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>1</th><td><a href="#1">Introduction</a> +        <table class="toc"> +          <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> +          <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>2</th><td><a href="#2">Hello World Example</a> +        <table class="toc"> +          <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> +          <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> +          <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> +          <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>3</th><td><a href="#3">Parser Skeletons</a> +        <table class="toc"> +          <tr><th>3.1</th><td><a href="#3.1">Implementing the Gender Parser</a></td></tr> +          <tr><th>3.2</th><td><a href="#3.2">Implementing the Person Parser</a></td></tr> +          <tr><th>3.3</th><td><a href="#3.3">Implementing the People Parser</a></td></tr> +          <tr><th>3.4</th><td><a href="#3.4">Connecting the Parsers Together</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>4</th><td><a href="#4">Type Maps</a> +        <table class="toc"> +          <tr><th>4.1</th><td><a href="#4.1">Object Model</a></td></tr> +          <tr><th>4.2</th><td><a href="#4.2">Type Map File Format</a></td></tr> +          <tr><th>4.3</th><td><a href="#4.3">Parser Implementations</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>5</th><td><a href="#5">Mapping Configuration</a> +        <table class="toc"> +          <tr><th>5.1</th><td><a href="#5.1">C++ Standard</a></td></tr> +          <tr><th>5.2</th><td><a href="#5.2">Character Type and Encoding</a></td></tr> +          <tr><th>5.3</th><td><a href="#5.3">Underlying XML Parser</a></td></tr> +	  <tr><th>5.4</th><td><a href="#5.4">XML Schema Validation</a></td></tr> +	  <tr><th>5.5</th><td><a href="#5.5">Support for Polymorphism</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>6</th><td><a href="#6">Built-In XML Schema Type Parsers</a> +        <table class="toc"> +          <tr><th>6.1</th><td><a href="#6.1"><code>QName</code> Parser</a></td></tr> +          <tr><th>6.2</th><td><a href="#6.2"><code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></td></tr> +          <tr><th>6.3</th><td><a href="#6.3"><code>base64Binary</code> and <code>hexBinary</code> Parsers</a></td></tr> +	  <tr><th>6.4</th><td><a href="#6.4">Time Zone Representation</a></td></tr> +	  <tr><th>6.5</th><td><a href="#6.5"><code>date</code> Parser</a></td></tr> +	  <tr><th>6.6</th><td><a href="#6.6"><code>dateTime</code> Parser</a></td></tr> +	  <tr><th>6.7</th><td><a href="#6.7"><code>duration</code> Parser</a></td></tr> +	  <tr><th>6.8</th><td><a href="#6.8"><code>gDay</code> Parser</a></td></tr> +	  <tr><th>6.9</th><td><a href="#6.9"><code>gMonth</code> Parser</a></td></tr> +	  <tr><th>6.10</th><td><a href="#6.10"><code>gMonthDay</code> Parser</a></td></tr> +	  <tr><th>6.11</th><td><a href="#6.11"><code>gYear</code> Parser</a></td></tr> +	  <tr><th>6.12</th><td><a href="#6.12"><code>gYearMonth</code> Parser</a></td></tr> +	  <tr><th>6.13</th><td><a href="#6.13"><code>time</code> Parser</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th>7</th><td><a href="#7">Document Parser and Error Handling</a> +        <table class="toc"> +          <tr><th>7.1</th><td><a href="#7.1">Xerces-C++ Document Parser</a></td></tr> +          <tr><th>7.2</th><td><a href="#7.2">Expat Document Parser</a></td></tr> +          <tr><th>7.3</th><td><a href="#7.3">Error Handling</a></td></tr> +        </table> +      </td> +    </tr> + +    <tr> +      <th></th><td><a href="#A">Appendix A — Supported XML Schema Constructs</a></td> +    </tr> + +  </table> +  </div> + +  <h1><a name="0">Preface</a></h1> + +  <h2><a name="0.1">About This Document</a></h2> + +  <p>The goal of this document is to provide you with an understanding of +     the C++/Parser programming model and allow you to efficiently evaluate +     XSD against your project's technical requirements. As such, this +     document is intended for C++ developers and software architects +     who are looking for an XML processing solution. Prior experience +     with XML and C++ is required to understand this document. Basic +     understanding of XML Schema is advantageous but not expected +     or required. +  </p> + + +  <h2><a name="0.2">More Information</a></h2> + +  <p>Beyond this guide, you may also find the following sources of +     information useful:</p> + +  <ul class="list"> +    <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD +        Compiler Command Line Manual</a></li> + +    <li>The <code>cxx/parser/</code> directory in the +        <a href="https://cppget.org/xsd-examples">xsd-examples</a> package +        contains a collection of examples and a README file with an overview +        of each example.</li> + +    <li>The <code>README</code> file in the +        <a href="https://cppget.org/xsd-examples">xsd-examples</a> package +        explains how to build the examples.</li> + +    <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> +        mailing list is the place to ask technical questions about XSD and the C++/Parser mapping. +        Furthermore, the <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> +        may already have answers to some of your questions.</li> + +  </ul> + +  <!-- Introduction --> + +  <h1><a name="1">1 Introduction</a></h1> + +  <p>Welcome to CodeSynthesis XSD and the C++/Parser mapping. XSD is a +     cross-platform W3C XML Schema to C++ data binding compiler. C++/Parser +     is a W3C XML Schema to C++ mapping that represents an XML vocabulary +     as a set of parser skeletons which you can implement to perform XML +     processing as required by your application logic. +  </p> + +  <h2><a name="1.1">1.1 Mapping Overview</a></h2> + +  <p>The C++/Parser mapping provides event-driven, stream-oriented +     XML parsing, XML Schema validation, and C++ data binding. It was +     specifically designed and optimized for high performance and +     small footprint. Based on the static analysis of the schemas, XSD +     generates compact, highly-optimized hierarchical state machines +     that combine data extraction, validation, and even dispatching +     in a single step. As a result, the generated code is typically +     2-10 times faster than general-purpose validating XML parsers +     while maintaining the lowest static and dynamic memory footprints. +  </p> + +  <p>To speed up application development, the C++/Parser mapping +     can be instructed to generate sample parser implementations +     and a test driver which can then be filled with the application +     logic code. The mapping also provides a wide range of +     mechanisms for controlling and customizing the generated code.</p> + +  <p>The next chapter shows how to create a simple application that uses +     the C++/Parser mapping to parse, validate, and extract data from a +     simple XML document. The following chapters show how to +     use the C++/Parser mapping in more detail.</p> + +  <h2><a name="1.2">1.2 Benefits</a></h2> + +  <p>Traditional XML access APIs such as Document Object Model (DOM) +     or Simple API for XML (SAX) have a number of drawbacks that +     make them less suitable for creating robust and maintainable +     XML processing applications. These drawbacks include: +  </p> + +  <ul class="list"> +    <li>Generic representation of XML in terms of elements, attributes, +        and text forces an application developer to write a substantial +        amount of bridging code that identifies and transforms pieces +        of information encoded in XML to a representation more suitable +        for consumption by the application logic.</li> + +    <li>String-based flow control defers error detection to runtime. +        It also reduces code readability and maintainability.</li> + +    <li>Lack of type safety because the data is represented +        as text.</li> + +    <li>Resulting applications are hard to debug, change, and +        maintain.</li> +  </ul> + +  <p>In contrast, statically-typed, vocabulary-specific parser +     skeletons produced by the C++/Parser mapping allow you to +     operate in your domain terms instead of the generic elements, +     attributes, and text. Static typing helps catch errors at +     compile-time rather than at run-time. Automatic code generation +     frees you for more interesting tasks (such as doing something +     useful with the information stored in the XML documents) and +     minimizes the effort needed to adapt your applications to +     changes in the document structure. To summarize, the C++/Parser +     mapping has the following key advantages over generic XML +     access APIs:</p> + +  <ul class="list"> +    <li><b>Ease of use.</b> The generated code hides all the complexity +        associated with recreating the document structure, maintaining the +        dispatch state, and converting the data from the text representation +        to data types suitable for manipulation by the application logic. +        Parser skeletons also provide a convenient mechanism for building +        custom in-memory representations.</li> + +    <li><b>Natural representation.</b> The generated parser skeletons +        implement parser callbacks as virtual functions with names +        corresponding to elements and attributes in XML. As a result, +        you process the XML data using your domain vocabulary instead +        of generic elements, attributes, and text. +    </li> + +    <li><b>Concise code.</b> With a separate parser skeleton for each +        XML Schema type, the application implementation is +        simpler and thus easier to read and understand.</li> + +    <li><b>Safety.</b> The XML data is delivered to parser callbacks as +        statically typed objects. The parser callbacks themselves are virtual +        functions. This helps catch programming errors at compile-time +        rather than at runtime.</li> + +    <li><b>Maintainability.</b> Automatic code generation minimizes the +        effort needed to adapt the application to changes in the +        document structure. With static typing, the C++ compiler +        can pin-point the places in the application code that need to be +        changed.</li> + +   <li><b>Efficiency.</b> The generated parser skeletons combine +       data extraction, validation, and even dispatching in a single +       step. This makes them much more efficient than traditional +       architectures with separate stages for validation and data +       extraction/dispatch.</li> +  </ul> + +  <!-- Hello World Parser --> + + +  <h1><a name="2">2 Hello World Example</a></h1> + +  <p>In this chapter we will examine how to parse a very simple XML +     document using the XSD-generated C++/Parser skeletons. +     The code presented in this chapter is based on the <code>hello</code> +     example which can be found in the <code>cxx/parser/</code> directory in +     the <a href="https://cppget.org/xsd-examples">xsd-examples</a> +     package.</p> + +  <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + +  <p>First, we need to get an idea about the structure +     of the XML documents we are going to process. Our +     <code>hello.xml</code>, for example, could look like this:</p> + +  <pre class="xml"> +<?xml version="1.0"?> +<hello> + +  <greeting>Hello</greeting> + +  <name>sun</name> +  <name>moon</name> +  <name>world</name> + +</hello> +  </pre> + +  <p>Then we can write a description of the above XML in the +     XML Schema language and save it into <code>hello.xsd</code>:</p> + +  <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + +  <xs:complexType name="hello"> +    <xs:sequence> +      <xs:element name="greeting" type="xs:string"/> +      <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> +    </xs:sequence> +  </xs:complexType> + +  <xs:element name="hello" type="hello"/> + +</xs:schema> +  </pre> + +  <p>Even if you are not familiar with XML Schema, it +     should be easy to connect declarations in <code>hello.xsd</code> +     to elements in <code>hello.xml</code>. The <code>hello</code> type +     is defined as a sequence of the nested <code>greeting</code> and +     <code>name</code> elements. Note that the term sequence in XML +     Schema means that elements should appear in a particular order +     as opposed to appearing multiple times. The <code>name</code> +     element has its <code>maxOccurs</code> property set to +     <code>unbounded</code> which means it can appear multiple times +     in an XML document. Finally, the globally-defined <code>hello</code> +     element prescribes the root element for our vocabulary. For an +     easily-approachable introduction to XML Schema refer to +     <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: +     Primer</a>.</p> + +  <p>The above schema is a specification of our XML vocabulary; it tells +     everybody what valid documents of our XML-based language should look +     like. The next step is to compile this schema to generate +     the object model and parsing functions.</p> + +  <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + +  <p>Now we are ready to translate our <code>hello.xsd</code> to C++ parser +     skeletons. To do this we invoke the XSD compiler from a terminal +     (UNIX) or a command prompt (Windows): +  </p> + +  <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat hello.xsd +  </pre> + +  <p>The <code>--xml-parser</code> option indicates that we want to +     use Expat as the underlying XML parser (see <a href="#5.3">Section +     5.3, "Underlying XML Parser"</a>). The XSD compiler produces two +     C++ files: <code>hello-pskel.hxx</code> and <code>hello-pskel.cxx</code>. +     The following code fragment is taken from <code>hello-pskel.hxx</code>; +     it should give you an idea about what gets generated: +  </p> + +  <pre class="c++"> +class hello_pskel +{ +public: +  // Parser callbacks. Override them in your implementation. +  // +  virtual void +  pre (); + +  virtual void +  greeting (const std::string&); + +  virtual void +  name (const std::string&); + +  virtual void +  post_hello (); + +  // Parser construction API. +  // +  void +  greeting_parser (xml_schema::string_pskel&); + +  void +  name_parser (xml_schema::string_pskel&); + +  void +  parsers (xml_schema::string_pskel& /* greeting */, +           xml_schema::string_pskel& /* name */); + +private: +  ... +}; +  </pre> + +  <p>The first four member functions shown above are called parser +     callbacks. You would normally override them in your implementation +     of the parser to do something useful. Let's go through all of +     them one by one.</p> + +  <p>The <code>pre()</code> function is an initialization callback. It is +    called when a new element of type <code>hello</code> is about +    to be parsed. You would normally use this function to allocate a new +    instance of the resulting type or clear accumulators that are used +    to gather information during parsing. The default implementation +    of this function does nothing.</p> + +  <p>The <code>post_hello()</code> function is a finalization callback. Its +     name is constructed by adding the parser skeleton name to the +     <code>post_</code> prefix. The finalization callback is called when +     parsing of the element is complete and the result, if any, should +     be returned. Note that in our case the return type of +     <code>post_hello()</code> is <code>void</code> which means there +     is nothing to return. More on parser return types later. +  </p> + +  <p>You may be wondering why the finalization callback is called +     <code>post_hello()</code> instead of <code>post()</code> just +     like <code>pre()</code>. The reason for this is that +     finalization callbacks can have different return types and +     result in function signature clashes across inheritance +     hierarchies. To prevent this the signatures of finalization +     callbacks are made unique by adding the type name to their names.</p> + +  <p>The <code>greeting()</code> and <code>name()</code> functions are +     called when the <code>greeting</code> and <code>name</code> elements +     have been parsed, respectively. Their arguments are of type +     <code>std::string</code> and contain the data extracted from XML.</p> + +  <p>The last three functions are for connecting parsers to each other. +     For example, there is a predefined parser for built-in XML Schema type +     <code>string</code> in the XSD runtime. We will be using +     it to parse the contents of <code>greeting</code> and +     <code>name</code> elements, as shown in the next section.</p> + +  <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + +  <p>At this point we have all the parts we need to do something useful +     with the information stored in our XML document. The first step is +     to implement the parser: +  </p> + +  <pre class="c++"> +#include <iostream> +#include "hello-pskel.hxx" + +class hello_pimpl: public hello_pskel +{ +public: +  virtual void +  greeting (const std::string& g) +  { +    greeting_ = g; +  } + +  virtual void +  name (const std::string& n) +  { +    std::cout << greeting_ << ", " << n << "!" << std::endl; +  } + +private: +  std::string greeting_; +}; +  </pre> + +  <p>We left both <code>pre()</code> and <code>post_hello()</code> with the +     default implementations; we don't have anything to initialize or +     return. The rest is pretty straightforward: we store the greeting +     in a member variable and later, when parsing names, use it to +     say hello.</p> + +  <p>An observant reader my ask what happens if the <code>name</code> +     element comes before <code>greeting</code>? Don't we need to +     make sure <code>greeting_</code> was initialized and report +     an error otherwise? The answer is no, we don't have to do +     any of this. The <code>hello_pskel</code> parser skeleton +     performs validation of XML according to the schema from which +     it was generated. As a result, it will check the order +     of the <code>greeting</code> and <code>name</code> elements +     and report an error if it is violated.</p> + +  <p>Now it is time to put this parser implementation to work:</p> + +  <pre class="c++"> +using namespace std; + +int +main (int argc, char* argv[]) +{ +  try +  { +    // Construct the parser. +    // +    xml_schema::string_pimpl string_p; +    hello_pimpl hello_p; + +    hello_p.greeting_parser (string_p); +    hello_p.name_parser (string_p); + +    // Parse the XML instance. +    // +    xml_schema::document doc_p (hello_p, "hello"); + +    hello_p.pre (); +    doc_p.parse (argv[1]); +    hello_p.post_hello (); +  } +  catch (const xml_schema::exception& e) +  { +    cerr << e << endl; +    return 1; +  } +} +  </pre> + +  <p>The first part of this code snippet instantiates individual parsers +     and assembles them into a complete vocabulary parser. +     <code>xml_schema::string_pimpl</code> is an implementation of a parser +     for built-in XML Schema type <code>string</code>. It is provided by +     the XSD runtime along with parsers for other built-in types (for +     more information on the built-in parsers see <a href="#6">Chapter 6, +     "Built-In XML Schema Type Parsers"</a>). We use <code>string_pimpl</code> +     to parse the <code>greeting</code> and <code>name</code> elements as +     indicated by the calls to <code>greeting_parser()</code> and +     <code>name_parser()</code>. +  </p> + +  <p>Then we instantiate a document parser (<code>doc_p</code>). The +     first argument to its constructor is the parser for +     the root element (<code>hello_p</code> in our case). The +     second argument is the root element name. +   </p> + +  <p>The final piece is the calls to <code>pre()</code>, <code>parse()</code>, +     and <code>post_hello()</code>. The call to <code>parse()</code> +     perform the actual XML parsing while the calls to <code>pre()</code> and +     <code>post_hello()</code> make sure that the parser for the root +     element can perform proper initialization and cleanup.</p> + +  <p>While our parser implementation and test driver are pretty small and +     easy to write by hand, for bigger XML vocabularies it can be a +     substantial effort. To help with this task XSD can automatically +     generate sample parser implementations and a test driver from your +     schemas. You can request the generation of a sample implementation with +     empty function bodies by specifying the <code>--generate-noop-impl</code> +     option. Or you can generate a sample implementation that prints the +     data store in XML by using the <code>--generate-print-impl</code> +     option. To request the generation of a test driver you can use the +     <code>--generate-test-driver</code> option. For more information +     on these options refer to the +     <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD +     Compiler Command Line Manual</a>. The <code>'generated'</code> example +     in the <a href="https://cppget.org/xsd-examples">xsd-examples</a> package +     shows the sample implementation generation feature in action.</p> + + +  <h2><a name="2.4">2.4 Compiling and Running</a></h2> + +  <p>After saving all the parts from the previous section in +     <code>driver.cxx</code>, we are ready to compile our first +     application and run it on the test XML document. On a UNIX +     system this can be done with the following commands: +  </p> + +  <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx hello-pskel.cxx +$ c++ -std=c++11 -o driver driver.o hello-pskel.o -lexpat +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! +  </pre> + +  <p>Here <code>.../libxsd</code> represents the path to the +     <a href="https://cppget.org/libxsd">libxsd</a> package root +     directory. We can also test the error handling. To test XML +     well-formedness checking, we can try to parse +     <code>hello-pskel.hxx</code>:</p> + +  <pre class="terminal"> +$ ./driver hello-pskel.hxx +hello-pskel.hxx:1:0: not well-formed (invalid token) +  </pre> + +  <p>We can also try to parse a valid XML but not from our +     vocabulary, for example <code>hello.xsd</code>:</p> + +  <pre class="terminal"> +$ ./driver hello.xsd +hello.xsd:2:0: expected element 'hello' instead of +'http://www.w3.org/2001/XMLSchema#schema' +  </pre> + + +  <!-- Chapater 3 --> + + +  <h1><a name="3">3 Parser Skeletons</a></h1> + +  <p>As we have seen in the previous chapter, the XSD compiler generates +     a parser skeleton class for each type defined in XML Schema. In +     this chapter we will take a closer look at different functions +     that comprise a parser skeleton as well as the way to connect +     our implementations of these parser skeletons to create a complete +     parser.</p> + +  <p>In this and subsequent chapters we will use the following schema +     that describes a collection of person records. We save it in +     <code>people.xsd</code>:</p> + +  <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + +  <xs:simpleType name="gender"> +    <xs:restriction base="xs:string"> +      <xs:enumeration value="male"/> +      <xs:enumeration value="female"/> +    </xs:restriction> +  </xs:simpleType> + +  <xs:complexType name="person"> +    <xs:sequence> +      <xs:element name="first-name" type="xs:string"/> +      <xs:element name="last-name" type="xs:string"/> +      <xs:element name="gender" type="gender"/> +      <xs:element name="age" type="xs:short"/> +    </xs:sequence> +  </xs:complexType> + +  <xs:complexType name="people"> +    <xs:sequence> +      <xs:element name="person" type="person" maxOccurs="unbounded"/> +    </xs:sequence> +  </xs:complexType> + +  <xs:element name="people" type="people"/> + +</xs:schema> +  </pre> + +  <p>A sample XML instance to go along with this schema is saved +     in <code>people.xml</code>:</p> + +  <pre class="xml"> +<?xml version="1.0"?> +<people> +  <person> +    <first-name>John</first-name> +    <last-name>Doe</last-name> +    <gender>male</gender> +    <age>32</age> +  </person> +  <person> +    <first-name>Jane</first-name> +    <last-name>Doe</last-name> +    <gender>female</gender> +    <age>28</age> +  </person> +</people> +  </pre> + +  <p>Compiling <code>people.xsd</code> with the XSD compiler results +     in three parser skeletons being generated: <code>gender_pskel</code>, +     <code>person_pskel</code>, and <code>people_pskel</code>. We are going +     to examine and implement each of them in the subsequent sections.</p> + +  <h2><a name="3.1">3.1 Implementing the Gender Parser</a></h2> + +  <p>The generated <code>gender_pskel</code> parser skeleton looks like +     this:</p> + +  <pre class="c++"> +class gender_pskel: public virtual xml_schema::string_pskel +{ +public: +  // Parser callbacks. Override them in your implementation. +  // +  virtual void +  pre (); + +  virtual void +  post_gender (); +}; +  </pre> + +  <p>Notice that <code>gender_pskel</code> inherits from +     <code>xml_schema::string_skel</code> which is a parser skeleton +     for built-in XML Schema type <code>string</code> and is +     predefined in the XSD runtime library. This is an example +     of the general rule that parser skeletons follow: if a type +     in XML Schema inherits from another then there will be an +     equivalent inheritance between the corresponding parser +     skeleton classes.</p> + +  <p>The <code>pre()</code> and <code>post_gender()</code> callbacks +     should look familiar from the previous chapter. Let's now +     implement the parser. Our implementation will simply print +     the gender to <code>cout</code>:</p> + + +  <pre class="c++"> +class gender_pimpl: public gender_pskel, +                    public xml_schema::string_pimpl +{ +public: +  virtual void +  post_gender () +  { +    std::string s = post_string (); +    cout << "gender: " << s << endl; +  } +}; +  </pre> + +  <p>While the code is quite short, there is a lot going on. First, +     notice that we are inheriting from <code>gender_pskel</code> <em>and</em> +     from <code>xml_schema::string_pimpl</code>. We've encountered +     <code>xml_schema::string_pimpl</code> already; it is an +     implementation of the <code>xml_schema::string_pskel</code> parser +     skeleton for built-in XML Schema type <code>string</code>.</p> + +  <p>This is another common theme in the C++/Parser programming model: +     reusing implementations of the base parsers in the derived ones with +     the C++ mixin idiom. In our case, <code>string_pimpl</code> will +     do all the dirty work of extracting the data and we can just get +     it at the end with the call to <code>post_string()</code>.</p> + +  <p>In case you are curious, here is what +     <code>xml_schema::string_pskel</code> and +     <code>xml_schema::string_pimpl</code> look like:</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class string_pskel: public simple_content +  { +  public: +    virtual std::string +    post_string () = 0; +  }; + +  class string_pimpl: public virtual string_pskel +  { +  public: +    virtual void +    _pre (); + +    virtual void +    _characters (const xml_schema::ro_string&); + +    virtual std::string +    post_string (); + +  protected: +    std::string str_; +  }; +} +  </pre> + +  <p>There are three new pieces in this code that we haven't seen yet. +     They are the <code>simple_content</code> class as well as +     the <code>_pre()</code> and <code>_characters()</code> functions. +     The <code>simple_content</code> class is defined in the XSD +     runtime and is a base class for all parser skeletons that conform +     to the simple content model in XML Schema. Types with the +     simple content model cannot have nested elements—only text +     and attributes. There is also the <code>complex_content</code> +     class which corresponds to the complex content mode (types with +     nested elements, for example, <code>person</code> from +     <code>people.xsd</code>).</p> + +  <p>The <code>_pre()</code> function is a parser callback. Remember we +     talked about the <code>pre()</code> and <code>post_*()</code> callbacks +     in the previous chapter? There are actually two more callbacks +     with similar roles: <code>_pre()</code> and <code>_post ()</code>. +     As a result, each parser skeleton has four special callbacks:</p> + +  <pre class="c++"> +  virtual void +  pre (); + +  virtual void +  _pre (); + +  virtual void +  _post (); + +  virtual void +  post_name (); +  </pre> + +  <p><code>pre()</code> and <code>_pre()</code> are initialization +     callbacks. They get called in that order before a new instance of the type +     is about to be parsed. The difference between <code>pre()</code> and +     <code>_pre()</code> is conventional: <code>pre()</code> can +     be completely overridden by a derived parser. The derived +     parser can also override <code>_pre()</code> but has to always call +     the original version. This allows you to partition initialization +     into customizable and required parts.</p> + +  <p>Similarly, <code>_post()</code> and <code>post_name()</code> are +     finalization callbacks with exactly the same semantics: +    <code>post_name()</code> can be completely overridden by the derived +     parser while the original <code>_post()</code> should always be called. +  </p> + +  <p>The final bit we need to discuss in this section is the +     <code>_characters()</code> function. As you might have guessed, it +     is also a callback. A low-level one that delivers raw character content +     for the type being parsed. You will seldom need to use this callback +     directly. Using implementations for the built-in parsers provided by +     the XSD runtime is usually a simpler and more convenient +     alternative.</p> + +  <p>At this point you might be wondering why some <code>post_*()</code> +     callbacks, for example <code>post_string()</code>, return some data +     while others, for example <code>post_gender()</code>, have +     <code>void</code> as a return type. This is a valid concern +     and it will be addressed in the next chapter.</p> + +  <h2><a name="3.2">3.2 Implementing the Person Parser</a></h2> + +  <p>The generated <code>person_pskel</code> parser skeleton looks like +     this:</p> + +  <pre class="c++"> +class person_pskel: public xml_schema::complex_content +{ +public: +  // Parser callbacks. Override them in your implementation. +  // +  virtual void +  pre (); + +  virtual void +  first_name (const std::string&); + +  virtual void +  last_name (const std::string&); + +  virtual void +  gender (); + +  virtual void +  age (short); + +  virtual void +  post_person (); + +  // Parser construction API. +  // +  void +  first_name_parser (xml_schema::string_pskel&); + +  void +  last_name_parser (xml_schema::string_pskel&); + +  void +  gender_parser (gender_pskel&); + +  void +  age_parser (xml_schema::short_pskel&); + +  void +  parsers (xml_schema::string_pskel& /* first-name */, +           xml_schema::string_pskel& /* last-name */, +           gender_pskel&             /* gender */, +           xml_schema::short_pskel&  /* age */); +}; +  </pre> + + +  <p>As you can see, we have a parser callback for each of the nested +     elements found in the <code>person</code> XML Schema type. +     The implementation of this parser is straightforward:</p> + +  <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: +  virtual void +  first_name (const std::string& n) +  { +    cout << "first: " << f << endl; +  } + +  virtual void +  last_name (const std::string& l) +  { +    cout << "last: " << l << endl; +  } + +  virtual void +  age (short a) +  { +    cout << "age: " << a << endl; +  } +}; +  </pre> + +  <p>Notice that we didn't override the <code>gender()</code> callback +     because all the printing is done by <code>gender_pimpl</code>.</p> + + +  <h2><a name="3.3">3.3 Implementing the People Parser</a></h2> + +  <p>The generated <code>people_pskel</code> parser skeleton looks like +     this:</p> + +  <pre class="c++"> +class people_pskel: public xml_schema::complex_content +{ +public: +  // Parser callbacks. Override them in your implementation. +  // +  virtual void +  pre (); + +  virtual void +  person (); + +  virtual void +  post_people (); + +  // Parser construction API. +  // +  void +  person_parser (person_pskel&); + +  void +  parsers (person_pskel& /* person */); +}; +  </pre> + +  <p>The <code>person()</code> callback will be called after parsing each +     <code>person</code> element. While <code>person_pimpl</code> does +     all the printing, one useful thing we can do in this callback is to +     print an extra newline after each person record so that our +     output is more readable:</p> + +  <pre class="c++"> +class people_pimpl: public people_pskel +{ +public: +  virtual void +  person () +  { +    cout << endl; +  } +}; +  </pre> + +  <p>Now it is time to put everything together.</p> + + +  <h2><a name="3.4">3.4 Connecting the Parsers Together</a></h2> + +  <p>At this point we have all the individual parsers implemented +     and can proceed to assemble them into a complete parser +     for our XML vocabulary. The first step is to instantiate +     all the individual parsers that we will need:</p> + +  <pre class="c++"> +xml_schema::short_pimpl short_p; +xml_schema::string_pimpl string_p; + +gender_pimpl gender_p; +person_pimpl person_p; +people_pimpl people_p; +  </pre> + +  <p>Notice that our schema uses two built-in XML Schema types: +     <code>string</code> for the <code>first-name</code> and +     <code>last-name</code> elements as well as <code>short</code> +     for <code>age</code>. We will use predefined parsers that +     come with the XSD runtime to handle these types. The next +     step is to connect all the individual parsers. We do this +     with the help of functions defined in the parser +     skeletons and marked with the "Parser Construction API" +     comment. One way to do it is to connect each individual +     parser by calling the <code>*_parser()</code> functions:</p> + +  <pre class="c++"> +person_p.first_name_parser (string_p); +person_p.last_name_parser (string_p); +person_p.gender_parser (gender_p); +person_p.age_parser (short_p); + +people_p.person_parser (person_p); +  </pre> + +  <p>You might be wondering what happens if you do not provide +     a parser by not calling one of the <code>*_parser()</code> functions. +     In that case the corresponding XML content will be skipped, +     including validation. This is an efficient way to ignore parts +     of the document that you are not interested in.</p> + + +  <p>An alternative, shorter, way to connect the parsers is by using +     the <code>parsers()</code> functions which connects all the parsers +     for a given type at once:</p> + +  <pre class="c++"> +person_p.parsers (string_p, string_p, gender_p, short_p); +people_p.parsers (person_p); +  </pre> + +  <p>The following figure illustrates the resulting connections. Notice +     the correspondence between return types of the <code>post_*()</code> +     functions and argument types of element callbacks that are connected +     by the arrows.</p> + +  <!-- align=center is needed for html2ps --> +  <div class="img" align="center"><img src="figure-1.png"/></div> + +  <p>The last step is the construction of the document parser and +     invocation of the complete parser on our sample XML instance:</p> + +  <pre class="c++"> +xml_schema::document doc_p (people_p, "people"); + +people_p.pre (); +doc_p.parse ("people.xml"); +people_p.post_people (); +  </pre> + +  <p>Let's consider <code>xml_schema::document</code> in +     more detail. While the exact definition of this class +     varies depending on the underlying parser selected, +     here is the common part:</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class document +  { +  public: +    document (xml_schema::parser_base&, +              const std::string& root_element_name, +              bool polymorphic = false); + +    document (xml_schema::parser_base&, +              const std::string& root_element_namespace, +              const std::string& root_element_name, +              bool polymorphic = false); + +    void +    parse (const std::string& file); + +    void +    parse (std::istream&); + +    ... + +  }; +} +  </pre> + +   <p><code>xml_schema::document</code> is a root parser for +     the vocabulary. The first argument to its constructors is the +     parser for the type of the root element (<code>people_impl</code> +     in our case). Because a type parser is only concerned with +     the element's content and not with the element's name, we need +     to specify the root element's name somewhere. That's +     what is passed as the second and third arguments to the +     <code>document</code>'s constructors.</p> + +   <p>There are also two overloaded <code>parse()</code> functions +      defined in the <code>document</code> class (there are actually +      more but the others are specific to the underlying XML parser). +      The first version parses a local file identified by a name. The +      second version reads the data from an input stream. For more +      information on the <code>xml_schema::document</code> class +      refer to <a href="#7">Chapter 7, "Document Parser and Error +      Handling"</a>.</p> + +   <p>Let's now consider a step-by-step list of actions that happen +      as we parse through <code>people.xml</code>. The content of +      <code>people.xml</code> is repeated below for convenience.</p> + +  <pre class="xml"> +<?xml version="1.0"?> +<people> +  <person> +    <first-name>John</first-name> +    <last-name>Doe</last-name> +    <gender>male</gender> +    <age>32</age> +  </person> +  <person> +    <first-name>Jane</first-name> +    <last-name>Doe</last-name> +    <gender>female</gender> +    <age>28</age> +  </person> +</people> +  </pre> + + +   <ol class="steps"> +     <li><code>people_p.pre()</code> is called from +         <code>main()</code>. We did not provide any implementation +         for this callback so this call is a no-op.</li> + +     <li><code>doc_p.parse("people.xml")</code> is called from +         <code>main()</code>. The parser opens the file and starts +         parsing its content.</li> + +     <li>The parser encounters the root element. <code>doc_p</code> +         verifies that the root element is correct and calls +         <code>_pre()</code> on <code>people_p</code> which is also +         a no-op. Parsing is now delegated to <code>people_p</code>.</li> + +     <li>The parser encounters the <code>person</code> element. +         <code>people_p</code> determines that <code>person_p</code> +         is responsible for parsing this element. <code>pre()</code> +         and <code>_pre()</code> callbacks are called on <code>person_p</code>. +         Parsing is now delegated to <code>person_p</code>.</li> + +     <li>The parser encounters the <code>first-name</code> element. +         <code>person_p</code> determines that <code>string_p</code> +         is responsible for parsing this element. <code>pre()</code> +         and <code>_pre()</code> callbacks are called on <code>string_p</code>. +         Parsing is now delegated to <code>string_p</code>.</li> + +     <li>The parser encounters character content consisting of +         <code>"John"</code>. The <code>_characters()</code> callback is +         called on <code>string_p</code>.</li> + +     <li>The parser encounters the end of <code>first-name</code> +         element. The <code>_post()</code> and <code>post_string()</code> +         callbacks are called on <code>string_p</code>. The +         <code>first_name()</code> callback is called on <code>person_p</code> +         with the return value of <code>post_string()</code>. The +         <code>first_name()</code> implementation prints +         <code>"first: John"</code> to <code>cout</code>. +         Parsing is now returned to <code>person_p</code>.</li> + +     <li>Steps analogous to 5-7 are performed for the <code>last-name</code>, +         <code>gender</code>, and <code>age</code> elements.</li> + +     <li>The parser encounters the end of <code>person</code> +         element. The <code>_post()</code> and <code>post_person()</code> +         callbacks are called on <code>person_p</code>. The +         <code>person()</code> callback is called on <code>people_p</code>. +         The <code>person()</code> implementation prints a new line +         to <code>cout</code>. Parsing is now returned to +         <code>people_p</code>.</li> + +     <li>Steps 4-9 are performed for the second <code>person</code> +         element.</li> + +     <li>The parser encounters the end of <code>people</code> +         element. The <code>_post()</code> callback is called on +         <code>people_p</code>. The <code>doc_p.parse("people.xml")</code> +         call returns to <code>main()</code>.</li> + +     <li><code>people_p.post_people()</code> is called from +         <code>main()</code> which is a no-op.</li> + +   </ol> + + +  <!-- Chpater 4 --> + + +  <h1><a name="4">4 Type Maps</a></h1> + +  <p>There are many useful things you can do inside parser callbacks as they +     are right now. There are, however, times when you want to propagate +     some information from one parser to another or to the caller of the +     parser. One common task that would greatly benefit from such a +     possibility is building a tree-like in-memory object model of the +     data stored in XML. During execution, each individual sub-parser +     would create a sub-tree and return it to its <em>parent</em> parser +     which can then incorporate this sub-tree into the whole tree.</p> + +  <p>In this chapter we will discuss the mechanisms offered by the +     C++/Parser mapping for returning information from individual +     parsers and see how to use them to build an object model +     of our people vocabulary.</p> + +  <h2><a name="4.1">4.1 Object Model</a></h2> + +  <p>An object model for our person record example could +     look like this (saved in the <code>people.hxx</code> file):</p> + +  <pre class="c++"> +#include <string> +#include <vector> + +enum gender +{ +  male, +  female +}; + +class person +{ +public: +  person (const std::string& first, +          const std::string& last, +          ::gender gender, +          short age) +    : first_ (first), last_ (last), +      gender_ (gender), age_ (age) +  { +  } + +  const std::string& +  first () const +  { +    return first_; +  } + +  const std::string& +  last () const +  { +    return last_; +  } + +  ::gender +  gender () const +  { +    return gender_; +  } + +  short +  age () const +  { +    return age_; +  } + +private: +  std::string first_; +  std::string last_; +  ::gender gender_; +  short age_; +}; + +typedef std::vector<person> people; +  </pre> + +  <p>While it is clear which parser is responsible for which part of +     the object model, it is not exactly clear how, for +     example, <code>gender_pimpl</code> will deliver <code>gender</code> +     to <code>person_pimpl</code>. You might have noticed that +     <code>string_pimpl</code> manages to deliver its value to the +     <code>first_name()</code> callback of <code>person_pimpl</code>. Let's +     see how we can utilize the same mechanism to propagate our +     own data.</p> + +  <p>There is a way to tell the XSD compiler that you want to +     exchange data between parsers. More precisely, for each +     type defined in XML Schema, you can tell the compiler two things. +     First, the return type of the <code>post_*()</code> callback +     in the parser skeleton generated for this type. And, second, +     the argument type for callbacks corresponding to elements and +     attributes of this type. For example, for XML Schema type +     <code>gender</code> we can specify the return type for +     <code>post_gender()</code> in the <code>gender_pskel</code> +     skeleton and the argument type for the <code>gender()</code> callback +     in the <code>person_pskel</code> skeleton. As you might have guessed, +     the generated code will then pass the return value from the +     <code>post_*()</code> callback as an argument to the element or +     attribute callback.</p> + +  <p>The way to tell the XSD compiler about these XML Schema to +     C++ mappings is with type map files. Here is a simple type +     map for the <code>gender</code> type from the previous paragraph:</p> + +  <pre class="type-map"> +include "people.hxx"; +gender ::gender ::gender; +  </pre> + +  <p>The first line indicates that the generated code must include +     <code>people.hxx</code> in order to get the definition for the +     <code>gender</code> type. The second line specifies that both +     argument and return types for the <code>gender</code> +     XML Schema type should be the <code>::gender</code> C++ enum +     (we use fully-qualified C++ names to avoid name clashes). +     The next section will describe the type map format in detail. +     We save this type map in <code>people.map</code> and +     then translate our schemas with the <code>--type-map</code> +     option to let the XSD compiler know about our type map:</p> + +  <pre class="terminal"> +$ xsd cxx-parser --type-map people.map people.xsd +  </pre> + +  <p>If we now look at the generated <code>people-pskel.hxx</code>, +     we will see the following changes in the <code>gender_pskel</code> and +     <code>person_pskel</code> skeletons:</p> + +  <pre class="c++"> +#include "people.hxx" + +class gender_pskel: public virtual xml_schema::string_pskel +{ +  virtual ::gender +  post_gender () = 0; + +  ... +}; + +class person_pskel: public xml_schema::complex_content +{ +  virtual void +  gender (::gender); + +  ... +}; +  </pre> + +  <p>Notice that <code>#include "people.hxx"</code> was added to +     the generated header file from the type map to provide the +     definition for the <code>gender</code> enum.</p> + +  <h2><a name="4.2">4.2 Type Map File Format</a></h2> + +  <p>Type map files are used to define a mapping between XML Schema +     and C++ types. The compiler uses this information +     to determine return types of <code>post_*()</code> +     callbacks in parser skeletons corresponding to XML Schema +     types as well as argument types for callbacks corresponding +     to elements and attributes of these types.</p> + +  <p>The compiler has a set of predefined mapping rules that map +     the built-in XML Schema types to suitable C++ types (discussed +     below) and all other types to <code>void</code>. +     By providing your own type maps you can override these predefined +     rules. The format of the type map file is presented below: +  </p> + +  <pre class="type-map"> +namespace <schema-namespace> [<cxx-namespace>] +{ +  (include <file-name>;)* +  ([type] <schema-type> <cxx-ret-type> [<cxx-arg-type>];)* +} +  </pre> + +  <p>Both <code><i><schema-namespace></i></code> and +     <code><i><schema-type></i></code> are regex patterns while +     <code><i><cxx-namespace></i></code>, +     <code><i><cxx-ret-type></i></code>, and +     <code><i><cxx-arg-type></i></code> are regex pattern +     substitutions. All names can be optionally enclosed in +     <code>" "</code>, for example, to include white-spaces.</p> + +  <p><code><i><schema-namespace></i></code> determines XML +     Schema namespace. Optional <code><i><cxx-namespace></i></code> +     is prefixed to every C++ type name in this namespace declaration. +     <code><i><cxx-ret-type></i></code> is a C++ type name that is +     used as a return type for the <code>post_*()</code> callback. +     Optional <code><i><cxx-arg-type></i></code> is an argument +     type for callbacks corresponding to elements and attributes +     of this type. If <code><i><cxx-arg-type></i></code> is not +     specified, it defaults to <code><i><cxx-ret-type></i></code> +     if <code><i><cxx-ret-type></i></code> ends with <code>*</code> or +     <code>&</code> (that is, it is a pointer or a reference) and +     <code>const <i><cxx-ret-type></i>&</code> +     otherwise. +     <code><i><file-name></i></code> is a file name either in the +     <code>" "</code> or <code>< ></code> format +     and is added with the <code>#include</code> directive to +     the generated code.</p> + +  <p>The <code><b>#</b></code> character starts a comment that ends +     with a new line or end of file. To specify a name that contains +     <code><b>#</b></code> enclose it in <code><b>" "</b></code>. +     For example:</p> + +  <pre> +namespace http://www.example.com/xmlns/my my +{ +  include "my.hxx"; + +  # Pass apples by value. +  # +  apple apple; + +  # Pass oranges as pointers. +  # +  orange orange_t*; +} +  </pre> + +  <p>In the example above, for the +     <code>http://www.example.com/xmlns/my#orange</code> +     XML Schema type, the <code>my::orange_t*</code> C++ type will +     be used as both return and argument types.</p> + +  <p>Several namespace declarations can be specified in a single +     file. The namespace declaration can also be completely +     omitted to map types in a schema without a namespace. For +     instance:</p> + +  <pre class="type-map"> +include "my.hxx"; +apple apple; + +namespace http://www.example.com/xmlns/my +{ +  orange "const orange_t*"; +} +  </pre> + +  <p>The compiler has a number of predefined mapping rules for +     the built-in XML Schema types which can be presented as the +     following map files. The string-based XML Schema types are +     mapped to either <code>std::string</code> or +     <code>std::wstring</code> depending on the character type +     selected (see <a href="#5.2"> Section 5.2, "Character Type and +     Encoding"</a> for more information). The binary XML Schema +     types are mapped to either <code>std::unique_ptr<xml_schema::buffer></code> +     or <code>std::auto_ptr<xml_schema::buffer></code> +     depending on the C++ standard selected (C++11 or C++98, +     respectively; refer to the <code>--std</code> XSD compiler +     command line option for details).</p> + +  <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ +  boolean bool bool; + +  byte "signed char" "signed char"; +  unsignedByte "unsigned char" "unsigned char"; + +  short short short; +  unsignedShort "unsigned short" "unsigned short"; + +  int int int; +  unsignedInt "unsigned int" "unsigned int"; + +  long "long long" "long long"; +  unsignedLong "unsigned long long" "unsigned long long"; + +  integer "long long" "long long"; + +  negativeInteger "long long" "long long"; +  nonPositiveInteger "long long" "long long"; + +  positiveInteger "unsigned long long" "unsigned long long"; +  nonNegativeInteger "unsigned long long" "unsigned long long"; + +  float float float; +  double double double; +  decimal double double; + +  string std::string; +  normalizedString std::string; +  token std::string; +  Name std::string; +  NMTOKEN std::string; +  NCName std::string; +  ID std::string; +  IDREF std::string; +  language std::string; +  anyURI std::string; + +  NMTOKENS xml_schema::string_sequence; +  IDREFS xml_schema::string_sequence; + +  QName xml_schema::qname; + +  base64Binary std::[unique|auto]_ptr<xml_schema::buffer> +               std::[unique|auto]_ptr<xml_schema::buffer>; +  hexBinary std::[unique|auto]_ptr<xml_schema::buffer> +            std::[unique|auto]_ptr<xml_schema::buffer>; + +  date xml_schema::date; +  dateTime xml_schema::date_time; +  duration xml_schema::duration; +  gDay xml_schema::gday; +  gMonth xml_schema::gmonth; +  gMonthDay xml_schema::gmonth_day; +  gYear xml_schema::gyear; +  gYearMonth xml_schema::gyear_month; +  time xml_schema::time; +} +  </pre> + +  <p>For more information about the mapping of the built-in XML Schema types +     to C++ types refer to <a href="#6">Chapter 6, "Built-In XML Schema Type +     Parsers"</a>. The last predefined rule maps anything that wasn't +     mapped by previous rules to <code>void</code>:</p> + +  <pre class="type-map"> +namespace .* +{ +  .* void void; +} +  </pre> + + +  <p>When you provide your own type maps with the +     <code>--type-map</code> option, they are evaluated first. This +     allows you to selectively override any of the predefined rules. +     Note also that if you change the mapping +     of a built-in XML Schema type then it becomes your responsibility +     to provide the corresponding parser skeleton and implementation +     in the <code>xml_schema</code> namespace. You can include the +     custom definitions into the generated header file using the +     <code>--hxx-prologue-*</code> options.</p> + +  <h2><a name="4.3">4.3 Parser Implementations</a></h2> + +  <p>With the knowledge from the previous section, we can proceed +     with creating a type map that maps types in the <code>people.xsd</code> +     schema to our object model classes in +     <code>people.hxx</code>. In fact, we already have the beginning +     of our type map file in <code>people.map</code>. Let's extend +     it with the rest of the types:</p> + +  <pre class="type-map"> +include "people.hxx"; + +gender ::gender ::gender; +person ::person; +people ::people; +  </pre> + +  <p>There are a few things to note about this type map. We did not +     provide the argument types for <code>person</code> and +     <code>people</code> because the default constant reference is +     exactly what we need. We also did not provide any mappings +     for built-in XML Schema types <code>string</code> and +     <code>short</code> because they are handled by the predefined +     rules and we are happy with the result. Note also that +     all C++ types are fully qualified. This is done to avoid +     potential name conflicts in the generated code. Now we can +     recompile our schema and move on to implementing the parsers:</p> + +  <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat --type-map people.map people.xsd +  </pre> + +  <p>Here is the implementation of our three parsers in full. One +     way to save typing when implementing your own parsers is +     to open the generated code and copy the signatures of parser +     callbacks into your code. Or you could always auto generate the +     sample implementations and fill them with your code.</p> + + +  <pre class="c++"> +#include "people-pskel.hxx" + +class gender_pimpl: public gender_pskel, +                    public xml_schema::string_pimpl +{ +public: +  virtual ::gender +  post_gender () +  { +    return post_string () == "male" ? male : female; +  } +}; + +class person_pimpl: public person_pskel +{ +public: +  virtual void +  first_name (const std::string& f) +  { +    first_ = f; +  } + +  virtual void +  last_name (const std::string& l) +  { +    last_ = l; +  } + +  virtual void +  gender (::gender g) +  { +    gender_ = g; +  } + +  virtual void +  age (short a) +  { +    age_ = a; +  } + +  virtual ::person +  post_person () +  { +    return ::person (first_, last_, gender_, age_); +  } + +private: +  std::string first_; +  std::string last_; +  ::gender gender_; +  short age_; +}; + +class people_pimpl: public people_pskel +{ +public: +  virtual void +  person (const ::person& p) +  { +    people_.push_back (p); +  } + +  virtual ::people +  post_people () +  { +    ::people r; +    r.swap (people_); +    return r; +  } + +private: +  ::people people_; +}; +  </pre> + +  <p>This code fragment should look familiar by now. Just note that +     all the <code>post_*()</code> callbacks now have return types instead +     of <code>void</code>. Here is the implementation of the test +     driver for this example:</p> + +  <pre class="c++"> +#include <iostream> + +using namespace std; + +int +main (int argc, char* argv[]) +{ +  // Construct the parser. +  // +  xml_schema::short_pimpl short_p; +  xml_schema::string_pimpl string_p; + +  gender_pimpl gender_p; +  person_pimpl person_p; +  people_pimpl people_p; + +  person_p.parsers (string_p, string_p, gender_p, short_p); +  people_p.parsers (person_p); + +  // Parse the document to obtain the object model. +  // +  xml_schema::document doc_p (people_p, "people"); + +  people_p.pre (); +  doc_p.parse (argv[1]); +  people ppl = people_p.post_people (); + +  // Print the object model. +  // +  for (people::iterator i (ppl.begin ()); i != ppl.end (); ++i) +  { +    cout << "first:  " << i->first () << endl +         << "last:   " << i->last () << endl +         << "gender: " << (i->gender () == male ? "male" : "female") << endl +         << "age:    " << i->age () << endl +         << endl; +  } +} +  </pre> + +  <p>The parser creation and assembly part is exactly the same as in +     the previous chapter. The parsing part is a bit different: +     <code>post_people()</code> now has a return value which is the +     complete object model. We store it in the +     <code>ppl</code> variable. The last bit of the code simply iterates +     over the <code>people</code> vector and prints the information +     for each person. We save the last two code fragments to +     <code>driver.cxx</code> and proceed to compile and test +     our new application:</p> + + +  <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx people-pskel.cxx +$ c++ -std=c++11 -o driver driver.o people-pskel.o -lexpat +$ ./driver people.xml +first:  John +last:   Doe +gender: male +age:    32 + +first:  Jane +last:   Doe +gender: female +age:    28 +  </pre> + + +  <!-- Mapping Configuration --> + + +  <h1><a name="5">5 Mapping Configuration</a></h1> + +  <p>The C++/Parser mapping has a number of configuration parameters that +     determine the overall properties and behavior of the generated code. +     Configuration parameters are specified with the XSD command line +     options and include the C++ standard, the character type that is used +     by the generated code, the underlying XML parser, whether the XML Schema +     validation is performed in the generated code, and support for XML Schema +     polymorphism. This chapter describes these configuration +     parameters in more detail. For more ways to configure the generated +     code refer to the +     <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD +     Compiler Command Line Manual</a>. +  </p> + +  <h2><a name="5.1">5.1 C++ Standard</a></h2> + +  <p>The C++/Parser mapping provides support for ISO/IEC C++ 2011 (C++11) +     and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the +     generated code we use the <code>--std</code> XSD compiler command +     line option. While the majority of the examples in this guide use +     C++11, the document explains the C++11/98 usage difference and so +     they can easily be converted to C++98.</p> + +  <h2><a name="5.2">5.2 Character Type and Encoding</a></h2> + +  <p>The C++/Parser mapping has built-in support for two character types: +    <code>char</code> and <code>wchar_t</code>. You can select the +    character type with the <code>--char-type</code> command line +    option. The default character type is <code>char</code>. The +    string-based built-in XML Schema types are returned as either +    <code>std::string</code> or <code>std::wstring</code> depending +    on the character type selected.</p> + +  <p>Another aspect of the mapping that depends on the character type +     is character encoding. For the <code>char</code> character type +     the default encoding is UTF-8. Other supported encodings are +     ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as +     custom encodings. You can select which encoding should be used +     in the object model with the <code>--char-encoding</code> command +     line option.</p> + +  <p>For the <code>wchar_t</code> character type the encoding is +     automatically selected between UTF-16 and UTF-32/UCS-4 depending +     on the size of the <code>wchar_t</code> type. On some platforms +     (for example, Windows with Visual C++ and AIX with IBM XL C++) +     <code>wchar_t</code> is 2 bytes long. For these platforms the +     encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes +     long and UTF-32/UCS-4 is used.</p> + +  <p>Note also that the character encoding that is used in the object model +     is independent of the encodings used in input and output XML. In fact, +     all three (object mode, input XML, and output XML) can have different +     encodings.</p> + +  <h2><a name="5.3">5.3 Underlying XML Parser</a></h2> + +  <p>The C++/Parser mapping can be used with either Xerces-C++ or Expat +     as the underlying XML parser. You can select the XML parser with +     the <code>--xml-parser</code> command line option. Valid values +     for this option are <code>xerces</code> and <code>expat</code>. +     The default XML parser is Xerces-C++.</p> + +  <p>The generated code is identical for both parsers except for the +     <code>xml_schema::document</code> class in which some of the +     <code>parse()</code> functions are parser-specific as described +     in <a href="#7">Chapter 7, "Document Parser and Error Handling"</a>.</p> + + +  <h2><a name="5.4">5.4 XML Schema Validation</a></h2> + +  <p>The C++/Parser mapping provides support for validating a +     commonly-used subset of W3C XML Schema in the generated code. +     For the list of supported XML Schema constructs refer to +     <a href="#A">Appendix A, "Supported XML Schema Constructs"</a>.</p> + +  <p>By default validation in the generated code is disabled if +     the underlying XML parser is validating (Xerces-C++) and +     enabled otherwise (Expat). See <a href="#5.3">Section 5.3, +     "Underlying XML Parser"</a> for more information about +     the underlying XML parser. You can override the default +     behavior with the <code>--generate-validation</code> +     and <code>--suppress-validation</code> command line options.</p> + + +  <h2><a name="5.5">5.5 Support for Polymorphism</a></h2> + +  <p>By default the XSD compiler generates non-polymorphic code. If your +     vocabulary uses XML Schema polymorphism in the form of <code>xsi:type</code> +     and/or substitution groups, then you will need to compile your schemas +     with the <code>--generate-polymorphic</code> option to produce +     polymorphism-aware code as well as pass <code>true</code> as the last +     argument to the <code>xml_schema::document</code>'s constructors.</p> + +  <p>When using the polymorphism-aware generated code, you can specify +     several parsers for a single element by passing a parser map +     instead of an individual parser to the parser connection function +     for the element. One of the parsers will then be looked up and used +     depending on the <code>xsi:type</code> attribute value or an element +     name from a substitution group. Consider the following schema as an +     example:</p> + +  <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + +  <xs:complexType name="person"> +    <xs:sequence> +      <xs:element name="name" type="xs:string"/> +    </xs:sequence> +  </xs:complexType> + +  <!-- substitution group root --> +  <xs:element name="person" type="person"/> + +  <xs:complexType name="superman"> +    <xs:complexContent> +      <xs:extension base="person"> +        <xs:attribute name="can-fly" type="xs:boolean"/> +      </xs:extension> +    </xs:complexContent> +  </xs:complexType> + +  <xs:element name="superman" +              type="superman" +              substitutionGroup="person"/> + +  <xs:complexType name="batman"> +    <xs:complexContent> +      <xs:extension base="superman"> +        <xs:attribute name="wing-span" type="xs:unsignedInt"/> +      </xs:extension> +    </xs:complexContent> +  </xs:complexType> + +  <xs:element name="batman" +              type="batman" +              substitutionGroup="superman"/> + +  <xs:complexType name="supermen"> +    <xs:sequence> +      <xs:element ref="person" maxOccurs="unbounded"/> +    </xs:sequence> +  </xs:complexType> + +  <xs:element name="supermen" type="supermen"/> + +</xs:schema> +  </pre> + +  <p>Conforming XML documents can use the <code>superman</code> +     and <code>batman</code> types in place of the <code>person</code> +     type either by specifying the type with the <code>xsi:type</code> +     attributes or by using the elements from the substitution +     group, for instance:</p> + + +  <pre class="xml"> +<supermen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + +  <person> +    <name>John Doe</name> +  </person> + +  <superman can-fly="false"> +    <name>James "007" Bond</name> +  </superman> + +  <superman can-fly="true" wing-span="10" xsi:type="batman"> +    <name>Bruce Wayne</name> +  </superman> + +</supermen> +  </pre> + +  <p>To print the data stored in such XML documents we can implement +     the parsers as follows:</p> + +  <pre class="c++"> +class person_pimpl: public virtual person_pskel +{ +public: +  virtual void +  pre () +  { +    cout << "starting to parse person" << endl; +  } + +  virtual void +  name (const std::string& v) +  { +    cout << "name: " << v << endl; +  } + +  virtual void +  post_person () +  { +    cout << "finished parsing person" << endl; +  } +}; + +class superman_pimpl: public virtual superman_pskel, +                      public person_pimpl +{ +public: +  virtual void +  pre () +  { +    cout << "starting to parse superman" << endl; +  } + +  virtual void +  can_fly (bool v) +  { +    cout << "can-fly: " << v << endl; +  } + +  virtual void +  post_person () +  { +    post_superman (); +  } + +  virtual void +  post_superman () +  { +    cout << "finished parsing superman" << endl +  } +}; + +class batman_pimpl: public virtual batman_pskel, +                    public superman_pimpl +{ +public: +  virtual void +  pre () +  { +    cout << "starting to parse batman" << endl; +  } + +  virtual void +  wing_span (unsigned int v) +  { +    cout << "wing-span: " << v << endl; +  } + +  virtual void +  post_superman () +  { +    post_batman (); +  } + +  virtual void +  post_batman () +  { +    cout << "finished parsing batman" << endl; +  } +}; +  </pre> + +  <p>Note that because the derived type parsers (<code>superman_pskel</code> +     and <code>batman_pskel</code>) are called via the <code>person_pskel</code> +     interface, we have to override the <code>post_person()</code> +     virtual function in <code>superman_pimpl</code> to call +     <code>post_superman()</code> and the <code>post_superman()</code> +     virtual function in <code>batman_pimpl</code> to call +     <code>post_batman()</code>.</p> + +  <p>The following code fragment shows how to connect the parsers together. +     Notice that for the <code>person</code> element in the <code>supermen_p</code> +     parser we specify a parser map instead of a specific parser and we pass +     <code>true</code> as the last argument to the document parser constructor +     to indicate that we are parsing potentially-polymorphic XML documents:</p> + +  <pre class="c++"> +int +main (int argc, char* argv[]) +{ +  // Construct the parser. +  // +  xml_schema::string_pimpl string_p; +  xml_schema::boolean_pimpl boolean_p; +  xml_schema::unsigned_int_pimpl unsigned_int_p; + +  person_pimpl person_p; +  superman_pimpl superman_p; +  batman_pimpl batman_p; + +  xml_schema::parser_map_impl person_map; +  supermen_pimpl supermen_p; + +  person_p.parsers (string_p); +  superman_p.parsers (string_p, boolean_p); +  batman_p.parsers (string_p, boolean_p, unsigned_int_p); + +  // Here we are specifying a parser map which containes several +  // parsers that can be used to parse the person element. +  // +  person_map.insert (person_p); +  person_map.insert (superman_p); +  person_map.insert (batman_p); + +  supermen_p.person_parser (person_map); + +  // Parse the XML document. The last argument to the document's +  // constructor indicates that we are parsing polymorphic XML +  // documents. +  // +  xml_schema::document doc_p (supermen_p, "supermen", true); + +  supermen_p.pre (); +  doc_p.parse (argv[1]); +  supermen_p.post_supermen (); +} +  </pre> + +  <p>When polymorphism-aware code is generated, each element's +     <code>*_parser()</code> function is overloaded to also accept +     an object of the <code>xml_schema::parser_map</code> type. +     For example, the <code>supermen_pskel</code> class from the +     above example looks like this:</p> + +  <pre class="c++"> +class supermen_pskel: public xml_schema::parser_complex_content +{ +public: + +  ... + +  // Parser construction API. +  // +  void +  parsers (person_pskel&); + +  // Individual element parsers. +  // +  void +  person_parser (person_pskel&); + +  void +  person_parser (const xml_schema::parser_map&); + +  ... +}; +  </pre> + +  <p>Note that you can specify both the individual (static) parser and +     the parser map. The individual parser will be used when the static +     element type and the dynamic type of the object being parsed are +     the same. This is the case, for example, when there is no +     <code>xsi:type</code> attribute and the element hasn't been +     substituted. Because the individual parser for an element is +     cached and no map lookup is necessary, it makes sense to specify +     both the individual parser and the parser map when most of the +     objects being parsed are of the static type and optimal +     performance is important. The following code fragment shows +     how to change the above example to set both the individual +     parser and the parser map:</p> + +  <pre class="c++"> +int +main (int argc, char* argv[]) +{ +  ... + +  person_map.insert (superman_p); +  person_map.insert (batman_p); + +  supermen_p.person_parser (person_p); +  supermen_p.person_parser (person_map); + +  ... +} +  </pre> + + +  <p>The <code>xml_schema::parser_map</code> interface and the +     <code>xml_schema::parser_map_impl</code> default implementation +     are presented below:</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class parser_map +  { +  public: +    virtual parser_base* +    find (const ro_string* type) const = 0; +  }; + +  class parser_map_impl: public parser_map +  { +  public: +    void +    insert (parser_base&); + +    virtual parser_base* +    find (const ro_string* type) const; + +  private: +    parser_map_impl (const parser_map_impl&); + +    parser_map_impl& +    operator= (const parser_map_impl&); + +    ... +  }; +} +  </pre> + +  <p>The <code>type</code> argument in the <code>find()</code> virtual +     function is the type name and namespace from the xsi:type attribute +     (the namespace prefix is resolved to the actual XML namespace) +     or the type of an element from the substitution group in the form +     <code>"<name> <namespace>"</code> with the space and the +     namespace part absent if the type does not have a namespace. +     You can obtain a parser's dynamic type in the same format +     using the <code>_dynamic_type()</code> function. The static +     type can be obtained by calling the static <code>_static_type()</code> +     function, for example <code>person_pskel::_static_type()</code>. +     Both functions return a C string (<code>const char*</code> or +     <code>const wchar_t*</code>, depending on the character type +     used) which is valid for as long as the application is running. +     The following example shows how we can implement our own parser +     map using <code>std::map</code>:</p> + + +  <pre class="c++"> +#include <map> +#include <string> + +class parser_map: public xml_schema::parser_map +{ +public: + void + insert (xml_schema::parser_base& p) + { +   map_[p._dynamic_type ()] = &p; + } + + virtual xml_schema::parser_base* + find (const xml_schema::ro_string* type) const + { +   map::const_iterator i = map_.find (type); +   return i != map_.end () ? i->second : 0; + } + +private: +  typedef std::map<std::string, xml_schema::parser_base*> map; +  map map_; +}; +  </pre> + +  <p>Most of code presented in this section is taken from the +     <code>polymorphism</code> example which can be found in the +     <code>cxx/parser/</code> directory in the +     <a href="https://cppget.org/xsd-examples">xsd-examples</a> package. +     Handling of <code>xsi:type</code> and substitution groups when used on +     root elements requires a number of special actions as shown in +     the <code>polyroot</code> example.</p> + + +  <!-- Built-in XML Schema Type Parsers --> + + +  <h1><a name="6">6 Built-In XML Schema Type Parsers</a></h1> + +  <p>The XSD runtime provides parser implementations for all built-in +     XML Schema types as summarized in the following table. Declarations +     for these types are automatically included into each generated +     header file. As a result you don't need to include any headers +     to gain access to these parser implementations. Note that some +     parsers return either <code>std::string</code> or +     <code>std::wstring</code> depending on the character type selected.</p> + +  <!-- border="1" is necessary for html2ps --> +  <table id="builtin" border="1"> +    <tr> +      <th>XML Schema type</th> +      <th>Parser implementation in the <code>xml_schema</code> namespace</th> +      <th>Parser return type</th> +    </tr> + +    <tr> +      <th colspan="3">anyType and anySimpleType types</th> +    </tr> +    <tr> +      <td><code>anyType</code></td> +      <td><code>any_type_pimpl</code></td> +      <td><code>void</code></td> +    </tr> +    <tr> +      <td><code>anySimpleType</code></td> +      <td><code>any_simple_type_pimpl</code></td> +      <td><code>void</code></td> +    </tr> + +    <tr> +      <th colspan="3">fixed-length integral types</th> +    </tr> +    <!-- 8-bit --> +    <tr> +      <td><code>byte</code></td> +      <td><code>byte_pimpl</code></td> +      <td><code>signed char</code></td> +    </tr> +    <tr> +      <td><code>unsignedByte</code></td> +      <td><code>unsigned_byte_pimpl</code></td> +      <td><code>unsigned char</code></td> +    </tr> + +    <!-- 16-bit --> +    <tr> +      <td><code>short</code></td> +      <td><code>short_pimpl</code></td> +      <td><code>short</code></td> +    </tr> +    <tr> +      <td><code>unsignedShort</code></td> +      <td><code>unsigned_short_pimpl</code></td> +      <td><code>unsigned short</code></td> +    </tr> + +    <!-- 32-bit --> +    <tr> +      <td><code>int</code></td> +      <td><code>int_pimpl</code></td> +      <td><code>int</code></td> +    </tr> +    <tr> +      <td><code>unsignedInt</code></td> +      <td><code>unsigned_int_pimpl</code></td> +      <td><code>unsigned int</code></td> +    </tr> + +    <!-- 64-bit --> +    <tr> +      <td><code>long</code></td> +      <td><code>long_pimpl</code></td> +      <td><code>long long</code></td> +    </tr> +    <tr> +      <td><code>unsignedLong</code></td> +      <td><code>unsigned_long_pimpl</code></td> +      <td><code>unsigned long long</code></td> +    </tr> + +    <tr> +      <th colspan="3">arbitrary-length integral types</th> +    </tr> +    <tr> +      <td><code>integer</code></td> +      <td><code>integer_pimpl</code></td> +      <td><code>long long</code></td> +    </tr> +    <tr> +      <td><code>nonPositiveInteger</code></td> +      <td><code>non_positive_integer_pimpl</code></td> +      <td><code>long long</code></td> +    </tr> +    <tr> +      <td><code>nonNegativeInteger</code></td> +      <td><code>non_negative_integer_pimpl</code></td> +      <td><code>unsigned long long</code></td> +    </tr> +    <tr> +      <td><code>positiveInteger</code></td> +      <td><code>positive_integer_pimpl</code></td> +      <td><code>unsigned long long</code></td> +    </tr> +    <tr> +      <td><code>negativeInteger</code></td> +      <td><code>negative_integer_pimpl</code></td> +      <td><code>long long</code></td> +    </tr> + +    <tr> +      <th colspan="3">boolean types</th> +    </tr> +    <tr> +      <td><code>boolean</code></td> +      <td><code>boolean_pimpl</code></td> +      <td><code>bool</code></td> +    </tr> + +    <tr> +      <th colspan="3">fixed-precision floating-point types</th> +    </tr> +    <tr> +      <td><code>float</code></td> +      <td><code>float_pimpl</code></td> +      <td><code>float</code></td> +    </tr> +    <tr> +      <td><code>double</code></td> +      <td><code>double_pimpl</code></td> +      <td><code>double</code></td> +    </tr> + +    <tr> +      <th colspan="3">arbitrary-precision floating-point types</th> +    </tr> +    <tr> +      <td><code>decimal</code></td> +      <td><code>decimal_pimpl</code></td> +      <td><code>double</code></td> +    </tr> + +    <tr> +      <th colspan="3">string-based types</th> +    </tr> +    <tr> +      <td><code>string</code></td> +      <td><code>string_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>normalizedString</code></td> +      <td><code>normalized_string_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>token</code></td> +      <td><code>token_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>Name</code></td> +      <td><code>name_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>NMTOKEN</code></td> +      <td><code>nmtoken_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>NCName</code></td> +      <td><code>ncname_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> + +    <tr> +      <td><code>language</code></td> +      <td><code>language_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> + +    <tr> +      <th colspan="3">qualified name</th> +    </tr> +    <tr> +      <td><code>QName</code></td> +      <td><code>qname_pimpl</code></td> +      <td><code>xml_schema::qname</code><br/><a href="#6.1">Section 6.1, +          "<code>QName</code> Parser"</a></td> +    </tr> + +    <tr> +      <th colspan="3">ID/IDREF types</th> +    </tr> +    <tr> +      <td><code>ID</code></td> +      <td><code>id_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> +    <tr> +      <td><code>IDREF</code></td> +      <td><code>idref_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> + +    <tr> +      <th colspan="3">list types</th> +    </tr> +    <tr> +      <td><code>NMTOKENS</code></td> +      <td><code>nmtokens_pimpl</code></td> +      <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section +          6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> +    </tr> +    <tr> +      <td><code>IDREFS</code></td> +      <td><code>idrefs_pimpl</code></td> +      <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section +          6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> +    </tr> + +    <tr> +      <th colspan="3">URI types</th> +    </tr> +    <tr> +      <td><code>anyURI</code></td> +      <td><code>uri_pimpl</code></td> +      <td><code>std::string</code> or <code>std::wstring</code></td> +    </tr> + +    <tr> +      <th colspan="3">binary types</th> +    </tr> +    <tr> +      <td><code>base64Binary</code></td> +      <td><code>base64_binary_pimpl</code></td> +      <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> +          <a href="#6.3">Section 6.3, "<code>base64Binary</code> and +          <code>hexBinary</code> Parsers"</a></td> +    </tr> +    <tr> +      <td><code>hexBinary</code></td> +      <td><code>hex_binary_pimpl</code></td> +      <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> +          <a href="#6.3">Section 6.3, "<code>base64Binary</code> and +          <code>hexBinary</code> Parsers"</a></td> +    </tr> + +    <tr> +      <th colspan="3">date/time types</th> +    </tr> +    <tr> +      <td><code>date</code></td> +      <td><code>date_pimpl</code></td> +      <td><code>xml_schema::date</code><br/><a href="#6.5">Section 6.5, +          "<code>date</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>dateTime</code></td> +      <td><code>date_time_pimpl</code></td> +      <td><code>xml_schema::date_time</code><br/><a href="#6.6">Section 6.6, +          "<code>dateTime</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>duration</code></td> +      <td><code>duration_pimpl</code></td> +      <td><code>xml_schema::duration</code><br/><a href="#6.7">Section 6.7, +          "<code>duration</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>gDay</code></td> +      <td><code>gday_pimpl</code></td> +      <td><code>xml_schema::gday</code><br/><a href="#6.8">Section 6.8, +          "<code>gDay</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>gMonth</code></td> +      <td><code>gmonth_pimpl</code></td> +      <td><code>xml_schema::gmonth</code><br/><a href="#6.9">Section 6.9, +          "<code>gMonth</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>gMonthDay</code></td> +      <td><code>gmonth_day_pimpl</code></td> +      <td><code>xml_schema::gmonth_day</code><br/><a href="#6.10">Section 6.10, +          "<code>gMonthDay</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>gYear</code></td> +      <td><code>gyear_pimpl</code></td> +      <td><code>xml_schema::gyear</code><br/><a href="#6.11">Section 6.11, +          "<code>gYear</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>gYearMonth</code></td> +      <td><code>gyear_month_pimpl</code></td> +      <td><code>xml_schema::gyear_month</code><br/><a href="#6.12">Section +          6.12, "<code>gYearMonth</code> Parser"</a></td> +    </tr> +    <tr> +      <td><code>time</code></td> +      <td><code>time_pimpl</code></td> +      <td><code>xml_schema::time</code><br/><a href="#6.13">Section 6.13, +          "<code>time</code> Parser"</a></td> +    </tr> + +  </table> + +  <h2><a name="6.1">6.1 <code>QName</code> Parser</a></h2> + +  <p>The return type of the <code>qname_pimpl</code> parser implementation +     is <code>xml_schema::qname</code> which represents an XML qualified +     name. Its interface is presented below. +     Note that the <code>std::string</code> type in the interface becomes +     <code>std::wstring</code> if the selected character type is +     <code>wchar_t</code>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class qname +  { +  public: +    explicit +    qname (const std::string& name); +    qname (const std::string& prefix, const std::string& name); + +    const std::string& +    prefix () const; + +    void +    prefix (const std::string&); + +    const std::string& +    name () const; + +    void +    name (const std::string&); +  }; + +  bool +  operator== (const qname&, const qname&); + +  bool +  operator!= (const qname&, const qname&); +} +  </pre> + + +  <h2><a name="6.2">6.2 <code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></h2> + +  <p>The return type of the <code>nmtokens_pimpl</code> and +     <code>idrefs_pimpl</code> parser implementations is +     <code>xml_schema::string_sequence</code> which represents a +     sequence of strings. Its interface is presented below. +     Note that the <code>std::string</code> type in the interface becomes +     <code>std::wstring</code> if the selected character type is +     <code>wchar_t</code>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class string_sequence: public std::vector<std::string> +  { +  public: +    string_sequence (); + +    explicit +    string_sequence (std::vector<std::string>::size_type n, +                     const std::string& x = std::string ()); + +    template <typename I> +    string_sequence (const I& begin, const I& end); +  }; + +  bool +  operator== (const string_sequence&, const string_sequence&); + +  bool +  operator!= (const string_sequence&, const string_sequence&); +} +  </pre> + + +  <h2><a name="6.3">6.3 <code>base64Binary</code> and <code>hexBinary</code> Parsers</a></h2> + +  <p>The return type of the <code>base64_binary_pimpl</code> and +     <code>hex_binary_pimpl</code> parser implementations is either +     <code>std::unique_ptr<xml_schema::buffer></code> (C++11) or +     <code>std::auto_ptr<xml_schema::buffer></code> (C++98), +     depending on the C++ standard selected (<code>--std</code> XSD +     compiler option). The <code>xml_schema::buffer</code> type +     represents a binary buffer and its interface is presented below.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class buffer +  { +  public: +    typedef std::size_t size_t; + +    class bounds {}; // Out of bounds exception. + +  public: +    explicit +    buffer (size_t size = 0); +    buffer (size_t size, size_t capacity); +    buffer (const void* data, size_t size); +    buffer (const void* data, size_t size, size_t capacity); +    buffer (void* data, +            size_t size, +            size_t capacity, +            bool assume_ownership); + +  public: +    buffer (const buffer&); + +    buffer& +    operator= (const buffer&); + +    void +    swap (buffer&); + +  public: +    size_t +    capacity () const; + +    bool +    capacity (size_t); + +  public: +    size_t +    size () const; + +    bool +    size (size_t); + +  public: +    const char* +    data () const; + +    char* +    data (); + +    const char* +    begin () const; + +    char* +    begin (); + +    const char* +    end () const; + +    char* +    end (); +  }; + +  bool +  operator== (const buffer&, const buffer&); + +  bool +  operator!= (const buffer&, const buffer&); +} +  </pre> + +  <p>If the <code>assume_ownership</code> argument to the constructor +     is <code>true</code>, the instance assumes the ownership of the +     memory block pointed to by the <code>data</code> argument and will +     eventually release it by calling <code>operator delete()</code>. The +     <code>capacity()</code> and <code>size()</code> modifier functions +     return <code>true</code> if the underlying buffer has moved. +  </p> + +  <p>The <code>bounds</code> exception is thrown if the constructor +     arguments violate the <code>(size <= capacity)</code> +     constraint.</p> + + +  <h2><a name="6.4">6.4 Time Zone Representation</a></h2> + +  <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, +     <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, +     <code>gYearMonth</code>, and <code>time</code> XML Schema built-in +     types all include an optional time zone component. The following +     <code>xml_schema::time_zone</code> base class is used to represent +     this information:</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class time_zone +  { +  public: +    time_zone (); +    time_zone (short hours, short minutes); + +    bool +    zone_present () const; + +    void +    zone_reset (); + +    short +    zone_hours () const; + +    void +    zone_hours (short); + +    short +    zone_minutes () const; + +    void +    zone_minutes (short); +  }; + +  bool +  operator== (const time_zone&, const time_zone&); + +  bool +  operator!= (const time_zone&, const time_zone&); +} +  </pre> + +  <p>The <code>zone_present()</code> accessor function returns <code>true</code> +     if the time zone is specified. The <code>zone_reset()</code> modifier +     function resets the time zone object to the <em>not specified</em> +     state. If the time zone offset is negative then both hours and +     minutes components are represented as negative integers.</p> + + +  <h2><a name="6.5">6.5 <code>date</code> Parser</a></h2> + + <p>The return type of the <code>date_pimpl</code> parser implementation +     is <code>xml_schema::date</code> which represents a year, a day, and a month +     with an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class date +  { +  public: +    date (int year, unsigned short month, unsigned short day); +    date (int year, unsigned short month, unsigned short day, +          short zone_hours, short zone_minutes); + +    int +    year () const; + +    void +    year (int); + +    unsigned short +    month () const; + +    void +    month (unsigned short); + +    unsigned short +    day () const; + +    void +    day (unsigned short); +  }; + +  bool +  operator== (const date&, const date&); + +  bool +  operator!= (const date&, const date&); +} +  </pre> + +  <h2><a name="6.6">6.6 <code>dateTime</code> Parser</a></h2> + +  <p>The return type of the <code>date_time_pimpl</code> parser implementation +     is <code>xml_schema::date_time</code> which represents a year, a month, a day, +     hours, minutes, and seconds with an optional time zone. Its interface +     is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class date_time +  { +  public: +    date_time (int year, unsigned short month, unsigned short day, +               unsigned short hours, unsigned short minutes, +               double seconds); + +    date_time (int year, unsigned short month, unsigned short day, +               unsigned short hours, unsigned short minutes, +               double seconds, short zone_hours, short zone_minutes); + +    int +    year () const; + +    void +    year (int); + +    unsigned short +    month () const; + +    void +    month (unsigned short); + +    unsigned short +    day () const; + +    void +    day (unsigned short); + +    unsigned short +    hours () const; + +    void +    hours (unsigned short); + +    unsigned short +    minutes () const; + +    void +    minutes (unsigned short); + +    double +    seconds () const; + +    void +    seconds (double); +  }; + +  bool +  operator== (const date_time&, const date_time&); + +  bool +  operator!= (const date_time&, const date_time&); +} +  </pre> + +  <h2><a name="6.7">6.7 <code>duration</code> Parser</a></h2> + +  <p>The return type of the <code>duration_pimpl</code> parser implementation +     is <code>xml_schema::duration</code> which represents a potentially +     negative duration in the form of years, months, days, hours, minutes, +     and seconds. Its interface is presented below.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class duration +  { +  public: +    duration (bool negative, +              unsigned int years, unsigned int months, unsigned int days, +              unsigned int hours, unsigned int minutes, double seconds); + +    bool +    negative () const; + +    void +    negative (bool); + +    unsigned int +    years () const; + +    void +    years (unsigned int); + +    unsigned int +    months () const; + +    void +    months (unsigned int); + +    unsigned int +    days () const; + +    void +    days (unsigned int); + +    unsigned int +    hours () const; + +    void +    hours (unsigned int); + +    unsigned int +    minutes () const; + +    void +    minutes (unsigned int); + +    double +    seconds () const; + +    void +    seconds (double); +  }; + +  bool +  operator== (const duration&, const duration&); + +  bool +  operator!= (const duration&, const duration&); +} +  </pre> + + +  <h2><a name="6.8">6.8 <code>gDay</code> Parser</a></h2> + +  <p>The return type of the <code>gday_pimpl</code> parser implementation +     is <code>xml_schema::gday</code> which represents a day of the month with +     an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class gday +  { +  public: +    explicit +    gday (unsigned short day); +    gday (unsigned short day, short zone_hours, short zone_minutes); + +    unsigned short +    day () const; + +    void +    day (unsigned short); +  }; + +  bool +  operator== (const gday&, const gday&); + +  bool +  operator!= (const gday&, const gday&); +} +  </pre> + +  <h2><a name="6.9">6.9 <code>gMonth</code> Parser</a></h2> + +  <p>The return type of the <code>gmonth_pimpl</code> parser implementation +     is <code>xml_schema::gmonth</code> which represents a month of the year +     with an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class gmonth +  { +  public: +    explicit +    gmonth (unsigned short month); +    gmonth (unsigned short month, short zone_hours, short zone_minutes); + +    unsigned short +    month () const; + +    void +    month (unsigned short); +  }; + +  bool +  operator== (const gmonth&, const gmonth&); + +  bool +  operator!= (const gmonth&, const gmonth&); +} +  </pre> + +  <h2><a name="6.10">6.10 <code>gMonthDay</code> Parser</a></h2> + +  <p>The return type of the <code>gmonth_day_pimpl</code> parser implementation +     is <code>xml_schema::gmonth_day</code> which represents a day and a month +     of the year with an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class gmonth_day +  { +  public: +    gmonth_day (unsigned short month, unsigned short day); +    gmonth_day (unsigned short month, unsigned short day, +                short zone_hours, short zone_minutes); + +    unsigned short +    month () const; + +    void +    month (unsigned short); + +    unsigned short +    day () const; + +    void +    day (unsigned short); +  }; + +  bool +  operator== (const gmonth_day&, const gmonth_day&); + +  bool +  operator!= (const gmonth_day&, const gmonth_day&); +} +  </pre> + +  <h2><a name="6.11">6.11 <code>gYear</code> Parser</a></h2> + +  <p>The return type of the <code>gyear_pimpl</code> parser implementation +     is <code>xml_schema::gyear</code> which represents a year with +     an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class gyear +  { +  public: +    explicit +    gyear (int year); +    gyear (int year, short zone_hours, short zone_minutes); + +    int +    year () const; + +    void +    year (int); +  }; + +  bool +  operator== (const gyear&, const gyear&); + +  bool +  operator!= (const gyear&, const gyear&); +} +  </pre> + +  <h2><a name="6.12">6.12 <code>gYearMonth</code> Parser</a></h2> + +  <p>The return type of the <code>gyear_month_pimpl</code> parser implementation +     is <code>xml_schema::gyear_month</code> which represents a year and a month +     with an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class gyear_month +  { +  public: +    gyear_month (int year, unsigned short month); +    gyear_month (int year, unsigned short month, +                 short zone_hours, short zone_minutes); + +    int +    year () const; + +    void +    year (int); + +    unsigned short +    month () const; + +    void +    month (unsigned short); +  }; + +  bool +  operator== (const gyear_month&, const gyear_month&); + +  bool +  operator!= (const gyear_month&, const gyear_month&); +} +  </pre> + + +  <h2><a name="6.13">6.13 <code>time</code> Parser</a></h2> + + <p>The return type of the <code>time_pimpl</code> parser implementation +     is <code>xml_schema::time</code> which represents hours, minutes, +     and seconds with an optional time zone. Its interface is presented below. +     For more information on the base <code>xml_schema::time_zone</code> +     class refer to <a href="#6.4">Section 6.4, "Time Zone +     Representation"</a>.</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class time +  { +  public: +    time (unsigned short hours, unsigned short minutes, double seconds); +    time (unsigned short hours, unsigned short minutes, double seconds, +          short zone_hours, short zone_minutes); + +    unsigned short +    hours () const; + +    void +    hours (unsigned short); + +    unsigned short +    minutes () const; + +    void +    minutes (unsigned short); + +    double +    seconds () const; + +    void +    seconds (double); +  }; + +  bool +  operator== (const time&, const time&); + +  bool +  operator!= (const time&, const time&); +} +  </pre> + + +  <!-- Error Handling --> + + +  <h1><a name="7">7 Document Parser and Error Handling</a></h1> + +  <p>In this chapter we will discuss the <code>xml_schema::document</code> +     type as well as the error handling mechanisms provided by the mapping +     in more detail. As mentioned in <a href="#3.4">Section 3.4, +     "Connecting the Parsers Together"</a>, the interface of +     <code>xml_schema::document</code> depends on the underlying XML +     parser selected (<a href="#5.3">Section 5.3, "Underlying XML +     Parser"</a>). The following sections describe the +     <code>document</code> type interface for Xerces-C++ and +     Expat as underlying parsers.</p> + +  <h2><a name="7.1">7.1 Xerces-C++ Document Parser</a></h2> + +  <p>When Xerces-C++ is used as the underlying XML parser, the +     <code>document</code> type has the following interface. Note that +     if the character type is <code>wchar_t</code>, then the string type +     in the interface becomes <code>std::wstring</code> +     (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class parser_base; +  class error_handler; + +  class flags +  { +  public: +    // Do not validate XML documents with the Xerces-C++ validator. +    // +    static const unsigned long dont_validate; + +    // Do not initialize the Xerces-C++ runtime. +    // +    static const unsigned long dont_initialize; + +    // Disable handling of subsequent imports for the same namespace +    // in Xerces-C++ 3.1.0 and later. +    // +    static const unsigned long no_multiple_imports; +  }; + +  class properties +  { +  public: +    // Add a location for a schema with a target namespace. +    // +    void +    schema_location (const std::string& namespace_, +                     const std::string& location); + +    // Add a location for a schema without a target namespace. +    // +    void +    no_namespace_schema_location (const std::string& location); +  }; + +  class document +  { +  public: +    document (parser_base& root, +              const std::string& root_element_name, +	      bool polymorphic = false); + +    document (parser_base& root, +              const std::string& root_element_namespace, +              const std::string& root_element_name, +	      bool polymorphic = false); + +  public: +    // Parse URI or a local file. +    // +    void +    parse (const std::string& uri, +           flags = 0, +           const properties& = properties ()); + +    // Parse URI or a local file with a user-provided error_handler +    // object. +    // +    void +    parse (const std::string& uri, +           error_handler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse URI or a local file with a user-provided ErrorHandler +    // object. Note that you must initialize the Xerces-C++ runtime +    // before calling this function. +    // +    void +    parse (const std::string& uri, +           xercesc::ErrorHandler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse URI or a local file using a user-provided SAX2XMLReader +    // object. Note that you must initialize the Xerces-C++ runtime +    // before calling this function. +    // +    void +    parse (const std::string& uri, +           xercesc::SAX2XMLReader&, +           flags = 0, +           const properties& = properties ()); + +  public: +    // Parse std::istream. +    // +    void +    parse (std::istream&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with a user-provided error_handler object. +    // +    void +    parse (std::istream&, +           error_handler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with a user-provided ErrorHandler object. +    // Note that you must initialize the Xerces-C++ runtime before +    // calling this function. +    // +    void +    parse (std::istream&, +           xercesc::ErrorHandler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream using a user-provided SAX2XMLReader object. +    // Note that you must initialize the Xerces-C++ runtime before +    // calling this function. +    // +    void +    parse (std::istream&, +           xercesc::SAX2XMLReader&, +           flags = 0, +           const properties& = properties ()); + +  public: +    // Parse std::istream with a system id. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with a system id and a user-provided +    // error_handler object. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           error_handler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with a system id and a user-provided +    // ErrorHandler object. Note that you must initialize the +    // Xerces-C++ runtime before calling this function. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           xercesc::ErrorHandler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with a system id using a user-provided +    // SAX2XMLReader object. Note that you must initialize the +    // Xerces-C++ runtime before calling this function. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           xercesc::SAX2XMLReader&, +           flags = 0, +           const properties& = properties ()); + +  public: +    // Parse std::istream with system and public ids. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with system and public ids and a user-provided +    // error_handler object. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id, +           error_handler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with system and public ids and a user-provided +    // ErrorHandler object. Note that you must initialize the Xerces-C++ +    // runtime before calling this function. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id, +           xercesc::ErrorHandler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse std::istream with system and public ids using a user- +    // provided SAX2XMLReader object. Note that you must initialize +    // the Xerces-C++ runtime before calling this function. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id, +           xercesc::SAX2XMLReader&, +           flags = 0, +           const properties& = properties ()); + +  public: +    // Parse InputSource. Note that you must initialize the Xerces-C++ +    // runtime before calling this function. +    // +    void +    parse (const xercesc::InputSource&, +           flags = 0, +           const properties& = properties ()); + +    // Parse InputSource with a user-provided error_handler object. +    // Note that you must initialize the Xerces-C++ runtime before +    // calling this function. +    // +    void +    parse (const xercesc::InputSource&, +           error_handler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse InputSource with a user-provided ErrorHandler object. +    // Note that you must initialize the Xerces-C++ runtime before +    // calling this function. +    // +    void +    parse (const xercesc::InputSource&, +           xercesc::ErrorHandler&, +           flags = 0, +           const properties& = properties ()); + +    // Parse InputSource using a user-provided SAX2XMLReader object. +    // Note that you must initialize the Xerces-C++ runtime before +    // calling this function. +    // +    void +    parse (const xercesc::InputSource&, +           xercesc::SAX2XMLReader&, +           flags = 0, +           const properties& = properties ()); +  }; +} +  </pre> + +  <p>The <code>document</code> class is a root parser for +     the vocabulary. The first argument to its constructors is the +     parser for the type of the root element. The <code>parser_base</code> +     class is the base type for all parser skeletons. The second and +     third arguments to the <code>document</code>'s constructors are +     the root element's name and namespace. The last argument, +     <code>polymorphic</code>, specifies whether the XML documents +     being parsed use polymorphism. For more information on support +     for XML Schema polymorphism in the C++/Parser mapping refer +     to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + +  <p>The rest of the <code>document</code> interface consists of overloaded +     <code>parse()</code> functions. The last two arguments in each of these +     functions are <code>flags</code> and <code>properties</code>. The +     <code>flags</code> argument allows you to modify the default behavior +     of the parsing functions. The <code>properties</code> argument allows +     you to override the schema location attributes specified in XML +     documents. Note that the schema location paths are relative to an +     XML document unless they are complete URIs. For example if you want +     to use a local schema file then you will need to use a URI in the +     form <code>file:///absolute/path/to/your/schema</code>.</p> + +  <p>A number of overloaded <code>parse()</code> functions have the +     <code>system_id</code> and <code>public_id</code> arguments. The +     system id is a <em>system</em> identifier of the resources being +     parsed (for example, URI or a full file path). The public id is a +     <em>public</em> identifier of the resource (for example, an +     application-specific name or a relative file path). The system id +     is used to resolve relative paths (for example, schema paths). In +     diagnostics messages the public id is used if it is available. +     Otherwise the system id is used.</p> + +  <p>The error handling mechanisms employed by the <code>document</code> +     parser are described in <a href="#7.3">Section 7.3, "Error +     Handling"</a>.</p> + +  <h2><a name="7.2">7.2 Expat Document Parser</a></h2> + +  <p>When Expat is used as the underlying XML parser, the +     <code>document</code> type has the following interface. Note that +     if the character type is <code>wchar_t</code>, then the string type +     in the interface becomes <code>std::wstring</code> +     (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class parser_base; +  class error_handler; + +  class document +  { +  public: +    document (parser_base&, +              const std::string& root_element_name, +              bool polymorphic = false); + +    document (parser_base&, +              const std::string& root_element_namespace, +              const std::string& root_element_name, +              bool polymorphic = false); + +  public: +    // Parse a local file. The file is accessed with std::ifstream +    // in binary mode. The std::ios_base::failure exception is used +    // to report io errors (badbit and failbit). +    void +    parse (const std::string& file); + +    // Parse a local file with a user-provided error_handler +    // object. The file is accessed with std::ifstream in binary +    // mode. The std::ios_base::failure exception is used to report +    // io errors (badbit and failbit). +    // +    void +    parse (const std::string& file, error_handler&); + +  public: +    // Parse std::istream. +    // +    void +    parse (std::istream&); + +    // Parse std::istream with a user-provided error_handler object. +    // +    void +    parse (std::istream&, error_handler&); + +    // Parse std::istream with a system id. +    // +    void +    parse (std::istream&, const std::string& system_id); + +    // Parse std::istream with a system id and a user-provided +    // error_handler object. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           error_handler&); + +    // Parse std::istream with system and public ids. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id); + +    // Parse std::istream with system and public ids and a user-provided +    // error_handler object. +    // +    void +    parse (std::istream&, +           const std::string& system_id, +           const std::string& public_id, +           error_handler&); + +  public: +    // Parse a chunk of input. You can call these functions multiple +    // times with the last call having the last argument true. +    // +    void +    parse (const void* data, std::size_t size, bool last); + +    void +    parse (const void* data, std::size_t size, bool last, +           error_handler&); + +    void +    parse (const void* data, std::size_t size, bool last, +           const std::string& system_id); + +    void +    parse (const void* data, std::size_t size, bool last, +           const std::string& system_id, +           error_handler&); + +    void +    parse (const void* data, std::size_t size, bool last, +           const std::string& system_id, +           const std::string& public_id); + +    void +    parse (const void* data, std::size_t size, bool last, +           const std::string& system_id, +           const std::string& public_id, +           error_handler&); + +  public: +    // Low-level Expat-specific parsing API. +    // +    void +    parse_begin (XML_Parser); + +    void +    parse_begin (XML_Parser, const std::string& public_id); + +    void +    parse_begin (XML_Parser, error_handler&); + +    void +    parse_begin (XML_Parser, +                 const std::string& public_id, +                 error_handler&); +    void +    parse_end (); +  }; +} +  </pre> + +  <p>The <code>document</code> class is a root parser for +     the vocabulary. The first argument to its constructors is the +     parser for the type of the root element. The <code>parser_base</code> +     class is the base type for all parser skeletons. The second and +     third arguments to the <code>document</code>'s constructors are +     the root element's name and namespace. The last argument, +     <code>polymorphic</code>, specifies whether the XML documents +     being parsed use polymorphism. For more information on support +     for XML Schema polymorphism in the C++/Parser mapping refer +     to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + +  <p>A number of overloaded <code>parse()</code> functions have the +     <code>system_id</code> and <code>public_id</code> arguments. The +     system id is a <em>system</em> identifier of the resources being +     parsed (for example, URI or a full file path). The public id is a +     <em>public</em> identifier of the resource (for example, an +     application-specific name or a relative file path). The system id +     is used to resolve relative paths. In diagnostics messages the +     public id is used if it is available. Otherwise the system id +     is used.</p> + +  <p>The <code>parse_begin()</code> and <code>parse_end()</code> functions +     present a low-level, Expat-specific parsing API for maximum control. +     A typical use-case would look like this (pseudo-code):</p> + +  <pre class="c++"> +xxx_pimpl root_p; +document doc_p (root_p, "root"); + +root_p.pre (); +doc_p.parse_begin (xml_parser, "file.xml"); + +while (more_data_to_parse) +{ +  // Call XML_Parse or XML_ParseBuffer. + +  if (status == XML_STATUS_ERROR) +    break; +} + +// Call parse_end even in case of an error to translate +// XML and Schema errors to exceptions or error_handler +// calls. +// +doc.parse_end (); +result_type result (root_p.post_xxx ()); +  </pre> + +  <p>Note that if your vocabulary uses XML namespaces, the +     <code>XML_ParserCreateNS()</code> functions should be used to create +     the XML parser. Space (<code>XML_Char (' ')</code>) should be used +     as a separator (the second argument to <code>XML_ParserCreateNS()</code>). +  </p> + +  <p>The error handling mechanisms employed by the <code>document</code> +     parser are described in <a href="#7.3">Section 7.3, "Error +     Handling"</a>.</p> + + +  <h2><a name="7.3">7.3 Error Handling</a></h2> + +  <p>There are three categories of errors that can result from running +     a parser on an XML document: System, XML, and Application. +     The System category contains memory allocation and file/stream +     operation errors. The XML category covers XML parsing and +     well-formedness checking as well as XML Schema validation errors. +     Finally, the Application category is for application logic errors +     that you may want to propagate from parser implementations to the +     caller of the parser. +  </p> + +  <p>The System errors are mapped to the standard exceptions. The +     out of memory condition is indicated by throwing an instance +     of <code>std::bad_alloc</code>. The stream operation errors +     are reported either by throwing an instance of +     <code>std::ios_base::failure</code> if exceptions are enabled +     or by setting the stream state.</p> + +  <p>Note that if you are parsing <code>std::istream</code> on +     which exceptions are not enabled, then you will need to +     check the stream state before calling the <code>post()</code> +     callback, as shown in the following example:</p> + +  <pre class="c++"> +int +main (int argc, char* argv[]) +{ +  ... + +  std::ifstream ifs (argv[1]); + +  if (ifs.fail ()) +  { +    cerr << argv[1] << ": unable to open" << endl; +    return 1; +  } + +  root_p.pre (); +  doc_p.parse (ifs); + +  if (ifs.fail ()) +  { +    cerr << argv[1] << ": io failure" << endl; +    return 1; +  } + +  result_type result (root_p.post_xxx ()); +} +  </pre> + +  <p>The above example can be rewritten to use exceptions +     as shown below:</p> + +  <pre class="c++"> +int +main (int argc, char* argv[]) +{ +  try +  { +    ... + +    std::ifstream ifs; +    ifs.exceptions (std::ifstream::badbit | std::ifstream::failbit); +    ifs.open (argv[1]); + +    root_p.pre (); +    doc_p.parse (ifs); +    result_type result (root_p.post_xxx ()); +  } +  catch (const std::ifstream::failure&) +  { +    cerr << argv[1] << ": unable to open or io failure" << endl; +    return 1; +  } +} +  </pre> + + +  <p>For reporting application errors from parsing callbacks, you +     can throw any exceptions of your choice. They are propagated to +     the caller of the parser without any alterations.</p> + +  <p>The XML errors can be reported either by throwing the +     <code>xml_schema::parsing</code> exception or by a callback +     to the <code>xml_schema::error_handler</code> object (and +     <code>xercesc::ErrorHandler</code> object in case of Xerces-C++).</p> + +  <p>The <code>xml_schema::parsing</code> exception contains +     a list of warnings and errors that were accumulated during +     parsing. Note that this exception is thrown only if there +     was an error. This makes it impossible to obtain warnings +     from an otherwise successful parsing using this mechanism. +     The following listing shows the definition of +     <code>xml_schema::parsing</code> exception. Note that if the +     character type is <code>wchar_t</code>, then the string type +     and output stream type in the definition become +     <code>std::wstring</code> and <code>std::wostream</code>, +     respectively (see <a href="#5.2">Section 5.2, "Character Type +     and Encoding"</a>).</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class exception: public std::exception +  { +  protected: +    virtual void +    print (std::ostream&) const = 0; +  }; + +  inline std::ostream& +  operator<< (std::ostream& os, const exception& e) +  { +    e.print (os); +    return os; +  } + + +  class severity +  { +  public: +    enum value +    { +      warning, +      error +    }; +  }; + + +  class error +  { +  public: +    error (xml_schema::severity, +           const std::string& id, +           unsigned long line, +           unsigned long column, +           const std::string& message); + +    xml_schema::severity +    severity () const; + +    const std::string& +    id () const; + +    unsigned long +    line () const; + +    unsigned long +    column () const; + +    const std::string& +    message () const; +  }; + +  std::ostream& +  operator<< (std::ostream&, const error&); + + +  class diagnostics: public std::vector<error> +  { +  }; + +  std::ostream& +  operator<< (std::ostream&, const diagnostics&); + + +  class parsing: public exception +  { +  public: +    parsing (); +    parsing (const xml_schema::diagnostics&); + +    const xml_schema::diagnostics& +    diagnostics () const; + +    virtual const char* +    what () const throw (); + +  protected: +    virtual void +    print (std::ostream&) const; +  }; +} +  </pre> + +  <p>The following example shows how we can catch and print this +     exception. The code will print diagnostics messages one per line +     in case of an error.</p> + +  <pre class="c++"> +int +main (int argc, char* argv[]) +{ +  try +  { +    // Parse. +  } +  catch (const xml_schema::parsing& e) +  { +    cerr << e << endl; +    return 1; +  } +} +  </pre> + +  <p>With the <code>error_handler</code> approach the diagnostics +     messages are delivered as parsing progresses. The following +     listing presents the definition of the <code>error_handler</code> +     interface. Note that if the character type is <code>wchar_t</code>, +     then the string type in the interface becomes <code>std::wstring</code> +     (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + +  <pre class="c++"> +namespace xml_schema +{ +  class error_handler +  { +  public: +    class severity +    { +    public: +      enum value +      { +        warning, +        error, +        fatal +      }; +    }; + +    virtual bool +    handle (const std::string& id, +            unsigned long line, +            unsigned long column, +            severity, +            const std::string& message) = 0; +  }; +} +  </pre> + +  <p>The return value of the <code>handle()</code> function indicates whether +     parsing should continue if possible. The error with the fatal severity +     level terminates the parsing process regardless of the returned value. +     At the end of the parsing process with an error that was reported via +     the  <code>error_handler</code> object, an empty +     <code>xml_schema::parsing</code> exception is thrown to indicate +     the failure to the caller. You can alter this behavior by throwing +     your own exception from the <code>handle()</code> function.</p> + + +  <!-- Appendix A --> + + +  <h1><a name="A">Appendix A — Supported XML Schema Constructs</a></h1> + +  <p>The C++/Parser mapping supports validation of the following W3C XML +     Schema constructs in the generated code.</p> + +  <!-- border="1" is necessary for html2ps --> +  <table id="features" border="1"> +    <tr><th>Construct</th><th>Notes</th></tr> +    <tr><th colspan="2">Structure</th></tr> + +    <tr><td>element</td><td></td></tr> +    <tr><td>attribute</td><td></td></tr> + +    <tr><td>any</td><td></td></tr> +    <tr><td>anyAttribute</td><td></td></tr> + +    <tr><td>all</td><td></td></tr> +    <tr><td>sequence</td><td></td></tr> +    <tr><td>choice</td><td></td></tr> + +    <tr><td>complex type, empty content</td><td></td></tr> +    <tr><td>complex type, mixed content</td><td></td></tr> +    <tr><td>complex type, simple content extension</td><td></td></tr> +    <tr><td>complex type, simple content restriction</td> +        <td>Simple type facets are not validated.</td></tr> +    <tr><td>complex type, complex content extension</td><td></td></tr> +    <tr><td>complex type, complex content restriction</td><td></td></tr> + +    <tr><td>list</td><td></td></tr> + +    <tr><th colspan="2">Datatypes</th></tr> + +    <tr><td>byte</td><td></td></tr> +    <tr><td>unsignedByte</td><td></td></tr> +    <tr><td>short</td><td></td></tr> +    <tr><td>unsignedShort</td><td></td></tr> +    <tr><td>int</td><td></td></tr> +    <tr><td>unsignedInt</td><td></td></tr> +    <tr><td>long</td><td></td></tr> +    <tr><td>unsignedLong</td><td></td></tr> +    <tr><td>integer</td><td></td></tr> +    <tr><td>nonPositiveInteger</td><td></td></tr> +    <tr><td>nonNegativeInteger</td><td></td></tr> +    <tr><td>positiveInteger</td><td></td></tr> +    <tr><td>negativeInteger</td><td></td></tr> + +    <tr><td>boolean</td><td></td></tr> + +    <tr><td>float</td><td></td></tr> +    <tr><td>double</td><td></td></tr> +    <tr><td>decimal</td><td></td></tr> + +    <tr><td>string</td><td></td></tr> +    <tr><td>normalizedString</td><td></td></tr> +    <tr><td>token</td><td></td></tr> +    <tr><td>Name</td><td></td></tr> +    <tr><td>NMTOKEN</td><td></td></tr> +    <tr><td>NCName</td><td></td></tr> +    <tr><td>language</td><td></td></tr> +    <tr><td>anyURI</td><td></td></tr> + +    <tr><td>ID</td><td>Identity constraint is not enforced.</td></tr> +    <tr><td>IDREF</td><td>Identity constraint is not enforced.</td></tr> + +    <tr><td>NMTOKENS</td><td></td></tr> +    <tr><td>IDREFS</td><td>Identity constraint is not enforced.</td></tr> + +    <tr><td>QName</td><td></td></tr> + +    <tr><td>base64Binary</td><td></td></tr> +    <tr><td>hexBinary</td><td></td></tr> + +    <tr><td>date</td><td></td></tr> +    <tr><td>dateTime</td><td></td></tr> +    <tr><td>duration</td><td></td></tr> +    <tr><td>gDay</td><td></td></tr> +    <tr><td>gMonth</td><td></td></tr> +    <tr><td>gMonthDay</td><td></td></tr> +    <tr><td>gYear</td><td></td></tr> +    <tr><td>gYearMonth</td><td></td></tr> +    <tr><td>time</td><td></td></tr> +  </table> + + +  </div> +</div> + +</body> +</html> | 
