diff options
Diffstat (limited to 'doc/cxx')
-rw-r--r-- | doc/cxx/parser/guide/figure-1.png | bin | 0 -> 34195 bytes | |||
-rw-r--r-- | doc/cxx/parser/guide/figure-1.svg | 373 | ||||
-rw-r--r-- | doc/cxx/parser/guide/guide.html2ps.in | 65 | ||||
-rw-r--r-- | doc/cxx/parser/guide/index.xhtml | 4163 | ||||
-rw-r--r-- | doc/cxx/parser/guide/index.xhtml.in | 4163 | ||||
-rw-r--r-- | doc/cxx/tree/guide/guide.html2ps.in | 65 | ||||
-rw-r--r-- | doc/cxx/tree/guide/index.xhtml | 2736 | ||||
-rw-r--r-- | doc/cxx/tree/guide/index.xhtml.in | 2736 | ||||
-rw-r--r-- | doc/cxx/tree/manual/index.xhtml | 6826 | ||||
-rw-r--r-- | doc/cxx/tree/manual/index.xhtml.in | 6826 | ||||
-rw-r--r-- | doc/cxx/tree/manual/manual.html2ps.in | 66 |
11 files changed, 28019 insertions, 0 deletions
diff --git a/doc/cxx/parser/guide/figure-1.png b/doc/cxx/parser/guide/figure-1.png Binary files differnew file mode 100644 index 0000000..15d1723 --- /dev/null +++ b/doc/cxx/parser/guide/figure-1.png diff --git a/doc/cxx/parser/guide/figure-1.svg b/doc/cxx/parser/guide/figure-1.svg new file mode 100644 index 0000000..d994a79 --- /dev/null +++ b/doc/cxx/parser/guide/figure-1.svg @@ -0,0 +1,373 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://web.resource.org/cc/" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="744.09448819" + height="1052.3622047" + id="svg2" + sodipodi:version="0.32" + inkscape:version="0.44.1" + sodipodi:docbase="/tmp" + sodipodi:docname="figure-1.svg" + inkscape:export-filename="/home/boris/tmp/figure-1.png" + inkscape:export-xdpi="76.195885" + inkscape:export-ydpi="76.195885"> + <defs + id="defs4"> + <marker + inkscape:stockid="Arrow1Lend" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lend" + style="overflow:visible;"> + <path + id="path2934" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;" + transform="scale(0.8) rotate(180) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Dot_l" + orient="auto" + refY="0.0" + refX="0.0" + id="Dot_l" + style="overflow:visible"> + <path + id="path2875" + d="M -2.5,-1.0 C -2.5,1.7600000 -4.7400000,4.0 -7.5,4.0 C -10.260000,4.0 -12.5,1.7600000 -12.5,-1.0 C -12.5,-3.7600000 -10.260000,-6.0 -7.5,-6.0 C -4.7400000,-6.0 -2.5,-3.7600000 -2.5,-1.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;marker-end:none" + transform="scale(0.8) translate(7.4, 1)" /> + </marker> + <marker + inkscape:stockid="Arrow1Mend" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Mend" + style="overflow:visible;"> + <path + id="path2928" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;" + transform="scale(0.4) rotate(180) translate(10,0)" /> + </marker> + <marker + inkscape:stockid="Dot_m" + orient="auto" + refY="0.0" + refX="0.0" + id="Dot_m" + style="overflow:visible"> + <path + id="path2872" + d="M -2.5,-1.0 C -2.5,1.7600000 -4.7400000,4.0 -7.5,4.0 C -10.260000,4.0 -12.5,1.7600000 -12.5,-1.0 C -12.5,-3.7600000 -10.260000,-6.0 -7.5,-6.0 C -4.7400000,-6.0 -2.5,-3.7600000 -2.5,-1.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;marker-end:none" + transform="scale(0.4) translate(7.4, 1)" /> + </marker> + <marker + inkscape:stockid="Arrow1Lstart" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lstart" + style="overflow:visible"> + <path + id="path2937" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none" + transform="scale(0.8) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Arrow2Mend" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow2Mend" + style="overflow:visible;"> + <path + id="path2910" + style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;" + d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z " + transform="scale(0.6) rotate(180) translate(0,0)" /> + </marker> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + gridtolerance="10000" + guidetolerance="10" + objecttolerance="10" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="0.98994949" + inkscape:cx="328.23027" + inkscape:cy="733.01096" + inkscape:document-units="px" + inkscape:current-layer="layer1" + inkscape:window-width="1280" + inkscape:window-height="991" + inkscape:window-x="154" + inkscape:window-y="44" /> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <g + id="g3902"> + <rect + y="194.64178" + x="24.142784" + height="106.2678" + width="149.70432" + id="rect1872" + style="fill:#c5ddf8;fill-opacity:1;fill-rule:evenodd;stroke:#c5ddf8;stroke-width:5.29799986;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" /> + <text + sodipodi:linespacing="125%" + id="text3038" + y="219.99649" + x="28.284279" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;color:black;fill:black;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;font-family:Monospace" + xml:space="preserve"><tspan + y="219.99649" + x="28.284279" + id="tspan3064" + sodipodi:role="line">class people_pimpl</tspan><tspan + y="236.24649" + x="28.284279" + id="tspan3066" + sodipodi:role="line">{</tspan><tspan + y="252.49649" + x="28.284279" + id="tspan3068" + sodipodi:role="line"> void </tspan><tspan + y="268.74649" + x="28.284279" + id="tspan3070" + sodipodi:role="line"> person ();</tspan><tspan + y="284.99649" + x="28.284279" + id="tspan3072" + sodipodi:role="line">};</tspan></text> + </g> + <g + id="g3881"> + <rect + y="124.93772" + x="252.43373" + height="245.67592" + width="180.01601" + id="rect5750" + style="fill:#c5ddf8;fill-opacity:1;fill-rule:evenodd;stroke:#c5ddf8;stroke-width:9.12976837;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" /> + <text + sodipodi:linespacing="100%" + id="text5752" + y="148.27567" + x="257.5889" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;color:black;fill:black;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;font-family:Monospace" + xml:space="preserve"><tspan + y="148.27567" + x="257.5889" + id="tspan5900" + sodipodi:role="line">class person_pimpl</tspan><tspan + y="161.27567" + x="257.5889" + id="tspan5902" + sodipodi:role="line">{</tspan><tspan + y="174.27567" + x="257.5889" + id="tspan5904" + sodipodi:role="line"> void</tspan><tspan + y="187.27567" + x="257.5889" + id="tspan5906" + sodipodi:role="line"> first_name (string);</tspan><tspan + y="200.27567" + x="257.5889" + id="tspan5908" + sodipodi:role="line" /><tspan + y="213.27567" + x="257.5889" + id="tspan5910" + sodipodi:role="line"> void</tspan><tspan + y="226.27567" + x="257.5889" + id="tspan5912" + sodipodi:role="line"> last_name (string);</tspan><tspan + y="239.27567" + x="257.5889" + id="tspan5914" + sodipodi:role="line" /><tspan + y="252.27567" + x="257.5889" + id="tspan5916" + sodipodi:role="line"> void</tspan><tspan + y="265.27567" + x="257.5889" + id="tspan5918" + sodipodi:role="line"> gender ();</tspan><tspan + y="278.27567" + x="257.5889" + id="tspan5920" + sodipodi:role="line" /><tspan + y="291.27567" + x="257.5889" + id="tspan5922" + sodipodi:role="line"> void</tspan><tspan + y="304.27567" + x="257.5889" + id="tspan5924" + sodipodi:role="line"> age (short);</tspan><tspan + y="317.27567" + x="257.5889" + id="tspan5926" + sodipodi:role="line"> </tspan><tspan + y="330.27567" + x="257.5889" + id="tspan5928" + sodipodi:role="line"> void</tspan><tspan + y="343.27567" + x="257.5889" + id="tspan5930" + sodipodi:role="line"> post_person ();</tspan><tspan + y="356.27567" + x="257.5889" + id="tspan5932" + sodipodi:role="line">};</tspan></text> + </g> + <g + id="g3845"> + <rect + y="77.741814" + x="506.28357" + height="99.610825" + width="151.1286" + id="rect5955" + style="fill:#c5ddf8;fill-opacity:1;fill-rule:evenodd;stroke:#c5ddf8;stroke-width:5.69227886;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" /> + <flowRoot + transform="translate(-5.050762,12.10153)" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" + id="flowRoot5957" + xml:space="preserve"><flowRegion + id="flowRegion5959"><rect + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" + y="74.534515" + x="516.18793" + height="88.893425" + width="143.44167" + id="rect5961" /></flowRegion><flowPara + id="flowPara5965">class string_pimpl</flowPara><flowPara + id="flowPara5967">{</flowPara><flowPara + id="flowPara5969"> string</flowPara><flowPara + id="flowPara5971"> post_string ();</flowPara><flowPara + id="flowPara5973">};</flowPara><flowPara + id="flowPara5975" /></flowRoot> </g> + <g + id="g3857"> + <rect + style="fill:#c5ddf8;fill-opacity:1;fill-rule:evenodd;stroke:#c5ddf8;stroke-width:5.69227886;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + id="rect5977" + width="151.1286" + height="99.610825" + x="506.28357" + y="316.15808" /> + <flowRoot + xml:space="preserve" + id="flowRoot5979" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" + transform="translate(-5.050761,250.5178)" + inkscape:export-filename="/tmp/figure-1.png" + inkscape:export-xdpi="546.53815" + inkscape:export-ydpi="546.53815"><flowRegion + id="flowRegion5981"><rect + id="rect5983" + width="143.44167" + height="88.893425" + x="516.18793" + y="74.534515" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" /></flowRegion><flowPara + id="flowPara5985">class short_pimpl</flowPara><flowPara + id="flowPara5987">{</flowPara><flowPara + id="flowPara5989"> short</flowPara><flowPara + id="flowPara5991"> post_short ();</flowPara><flowPara + id="flowPara5993">};</flowPara><flowPara + id="flowPara5995" /></flowRoot> </g> + <g + id="g3869"> + <rect + style="fill:#c5ddf8;fill-opacity:1;fill-rule:evenodd;stroke:#c5ddf8;stroke-width:5.69227886;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + id="rect6023" + width="151.1286" + height="99.610825" + x="505.7785" + y="196.93977" /> + <flowRoot + xml:space="preserve" + id="flowRoot6025" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" + transform="translate(-5.555838,129.2792)"><flowRegion + id="flowRegion6027"><rect + id="rect6029" + width="143.44167" + height="88.893425" + x="516.18793" + y="74.534515" + style="font-size:13px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Monospace" /></flowRegion><flowPara + id="flowPara6031">class gender_pimpl</flowPara><flowPara + id="flowPara6033">{</flowPara><flowPara + id="flowPara6035"> void</flowPara><flowPara + id="flowPara6037"> post_gender ();</flowPara><flowPara + id="flowPara6039">};</flowPara><flowPara + id="flowPara6041" /></flowRoot> </g> + <path + style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:black;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;marker-start:url(#Dot_l);marker-end:url(#Arrow1Lend)" + d="M 265.67011,339.69956 L 210.41811,339.34242 L 210.77124,264.14332 L 127.7843,264.4432" + id="path6051" + inkscape:connector-type="polyline" + sodipodi:nodetypes="cccs" /> + <path + sodipodi:nodetypes="cccc" + inkscape:connector-type="polyline" + id="path6077" + d="M 518.20825,383.6412 L 471.23616,384.14628 L 471.4887,300.55615 L 368.70568,300.80869" + style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:black;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_l);marker-end:url(#Arrow1Lend);stroke-opacity:1;display:inline" /> + <path + style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:black;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_l);marker-end:url(#Arrow1Lend);stroke-opacity:1;display:inline" + d="M 517.1981,262.42289 L 353.55339,262.42289" + id="path6081" + inkscape:connector-type="polyline" + sodipodi:nodetypes="cccs" /> + <path + style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:black;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_l);marker-end:url(#Arrow1Lend);stroke-opacity:1;display:inline" + d="M 518.57143,145.93361 L 470.35714,146.14281 L 470.53572,183.07646 L 431.42857,183.79075" + id="path6089" + inkscape:connector-type="polyline" + sodipodi:nodetypes="cccc" /> + <path + style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:black;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-opacity:1;display:inline" + d="M 470.46175,178.43361 L 470.89286,222.36218 L 423.21428,222.71932" + id="path6091" + inkscape:connector-type="polyline" + sodipodi:nodetypes="ccc" /> + </g> +</svg> diff --git a/doc/cxx/parser/guide/guide.html2ps.in b/doc/cxx/parser/guide/guide.html2ps.in new file mode 100644 index 0000000..8131487 --- /dev/null +++ b/doc/cxx/parser/guide/guide.html2ps.in @@ -0,0 +1,65 @@ +@@html2ps { + option { + toc: hb; + colour: 1; + hyphenate: 1; + titlepage: 1; + } + + datefmt: "%B %Y"; + + titlepage { + content: " +<div align=center> + <h1><big>C++/Parser Mapping</big></h1> + <h1><big>Getting Started Guide</big></h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> +</div> + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href='https://www.codesynthesis.com/licenses/fdl-1.2.txt'>GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/index.xhtml'>XHTML</a>, + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.pdf'>PDF</a>, and + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.ps'>PostScript</a>.</p>"; + } + + toc { + indent: 2em; + } + + header { + odd-right: $H; + even-left: $H; + } + + footer { + odd-left: $D; + odd-center: $T; + odd-right: $N; + + even-left: $N; + even-center: $T; + even-right: $D; + } +} + +body { + font-size: 12pt; + text-align: justify; +} + +pre { + font-size: 10pt; +} diff --git a/doc/cxx/parser/guide/index.xhtml b/doc/cxx/parser/guide/index.xhtml new file mode 100644 index 0000000..6964a14 --- /dev/null +++ b/doc/cxx/parser/guide/index.xhtml @@ -0,0 +1,4163 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Parser Mapping Getting Started Guide</title> + + <meta name="copyright" content="© 2005-2023 Code Synthesis"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parser,validation"/> + <meta name="description" content="C++/Parser Mapping Getting Started Guide"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + line-height: 1.2em; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 140%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage .title { + font-weight: bold; + font-size: 200%; + text-align: center; + } + + #titlepage #first-title { + padding: 1em 0 0.4em 0; + } + + #titlepage #second-title { + padding: 0.4em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + ol.steps { + padding-left : 1.8em; + } + + ol.steps li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + + div.img { + text-align: center; + padding: 2em 0 2em 0; + } + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + /* XML Schema features table. */ + #features { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #features th, #features td { + border: 1px solid; + padding : 0.6em 0.6em 0.6em 0.6em; + } + + #features th { + background : #cde8f6; + } + + #features td { + text-align: left; + } + + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div class="title" id="first-title">C++/Parser Mapping</div> + <div class="title" id="second-title">Getting Started Guide</div> + + <p>Copyright © 2005-2023 Code Synthesis.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.ps">PostScript</a>.</p> + + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a> + <table class="toc"> + <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> + <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>2</th><td><a href="#2">Hello World Example</a> + <table class="toc"> + <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> + <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> + <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> + <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Parser Skeletons</a> + <table class="toc"> + <tr><th>3.1</th><td><a href="#3.1">Implementing the Gender Parser</a></td></tr> + <tr><th>3.2</th><td><a href="#3.2">Implementing the Person Parser</a></td></tr> + <tr><th>3.3</th><td><a href="#3.3">Implementing the People Parser</a></td></tr> + <tr><th>3.4</th><td><a href="#3.4">Connecting the Parsers Together</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Type Maps</a> + <table class="toc"> + <tr><th>4.1</th><td><a href="#4.1">Object Model</a></td></tr> + <tr><th>4.2</th><td><a href="#4.2">Type Map File Format</a></td></tr> + <tr><th>4.3</th><td><a href="#4.3">Parser Implementations</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Mapping Configuration</a> + <table class="toc"> + <tr><th>5.1</th><td><a href="#5.1">C++ Standard</a></td></tr> + <tr><th>5.2</th><td><a href="#5.2">Character Type and Encoding</a></td></tr> + <tr><th>5.3</th><td><a href="#5.3">Underlying XML Parser</a></td></tr> + <tr><th>5.4</th><td><a href="#5.4">XML Schema Validation</a></td></tr> + <tr><th>5.5</th><td><a href="#5.5">Support for Polymorphism</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>6</th><td><a href="#6">Built-In XML Schema Type Parsers</a> + <table class="toc"> + <tr><th>6.1</th><td><a href="#6.1"><code>QName</code> Parser</a></td></tr> + <tr><th>6.2</th><td><a href="#6.2"><code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></td></tr> + <tr><th>6.3</th><td><a href="#6.3"><code>base64Binary</code> and <code>hexBinary</code> Parsers</a></td></tr> + <tr><th>6.4</th><td><a href="#6.4">Time Zone Representation</a></td></tr> + <tr><th>6.5</th><td><a href="#6.5"><code>date</code> Parser</a></td></tr> + <tr><th>6.6</th><td><a href="#6.6"><code>dateTime</code> Parser</a></td></tr> + <tr><th>6.7</th><td><a href="#6.7"><code>duration</code> Parser</a></td></tr> + <tr><th>6.8</th><td><a href="#6.8"><code>gDay</code> Parser</a></td></tr> + <tr><th>6.9</th><td><a href="#6.9"><code>gMonth</code> Parser</a></td></tr> + <tr><th>6.10</th><td><a href="#6.10"><code>gMonthDay</code> Parser</a></td></tr> + <tr><th>6.11</th><td><a href="#6.11"><code>gYear</code> Parser</a></td></tr> + <tr><th>6.12</th><td><a href="#6.12"><code>gYearMonth</code> Parser</a></td></tr> + <tr><th>6.13</th><td><a href="#6.13"><code>time</code> Parser</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>7</th><td><a href="#7">Document Parser and Error Handling</a> + <table class="toc"> + <tr><th>7.1</th><td><a href="#7.1">Xerces-C++ Document Parser</a></td></tr> + <tr><th>7.2</th><td><a href="#7.2">Expat Document Parser</a></td></tr> + <tr><th>7.3</th><td><a href="#7.3">Error Handling</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th></th><td><a href="#A">Appendix A — Supported XML Schema Constructs</a></td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>The goal of this document is to provide you with an understanding of + the C++/Parser programming model and allow you to efficiently evaluate + XSD against your project's technical requirements. As such, this + document is intended for C++ developers and software architects + who are looking for an XML processing solution. Prior experience + with XML and C++ is required to understand this document. Basic + understanding of XML Schema is advantageous but not expected + or required. + </p> + + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this guide, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/parser/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is the place to ask technical questions about XSD and the C++/Parser mapping. + Furthermore, the <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + + </ul> + + <!-- Introduction --> + + <h1><a name="1">1 Introduction</a></h1> + + <p>Welcome to CodeSynthesis XSD and the C++/Parser mapping. XSD is a + cross-platform W3C XML Schema to C++ data binding compiler. C++/Parser + is a W3C XML Schema to C++ mapping that represents an XML vocabulary + as a set of parser skeletons which you can implement to perform XML + processing as required by your application logic. + </p> + + <h2><a name="1.1">1.1 Mapping Overview</a></h2> + + <p>The C++/Parser mapping provides event-driven, stream-oriented + XML parsing, XML Schema validation, and C++ data binding. It was + specifically designed and optimized for high performance and + small footprint. Based on the static analysis of the schemas, XSD + generates compact, highly-optimized hierarchical state machines + that combine data extraction, validation, and even dispatching + in a single step. As a result, the generated code is typically + 2-10 times faster than general-purpose validating XML parsers + while maintaining the lowest static and dynamic memory footprints. + </p> + + <p>To speed up application development, the C++/Parser mapping + can be instructed to generate sample parser implementations + and a test driver which can then be filled with the application + logic code. The mapping also provides a wide range of + mechanisms for controlling and customizing the generated code.</p> + + <p>The next chapter shows how to create a simple application that uses + the C++/Parser mapping to parse, validate, and extract data from a + simple XML document. The following chapters show how to + use the C++/Parser mapping in more detail.</p> + + <h2><a name="1.2">1.2 Benefits</a></h2> + + <p>Traditional XML access APIs such as Document Object Model (DOM) + or Simple API for XML (SAX) have a number of drawbacks that + make them less suitable for creating robust and maintainable + XML processing applications. These drawbacks include: + </p> + + <ul class="list"> + <li>Generic representation of XML in terms of elements, attributes, + and text forces an application developer to write a substantial + amount of bridging code that identifies and transforms pieces + of information encoded in XML to a representation more suitable + for consumption by the application logic.</li> + + <li>String-based flow control defers error detection to runtime. + It also reduces code readability and maintainability.</li> + + <li>Lack of type safety because the data is represented + as text.</li> + + <li>Resulting applications are hard to debug, change, and + maintain.</li> + </ul> + + <p>In contrast, statically-typed, vocabulary-specific parser + skeletons produced by the C++/Parser mapping allow you to + operate in your domain terms instead of the generic elements, + attributes, and text. Static typing helps catch errors at + compile-time rather than at run-time. Automatic code generation + frees you for more interesting tasks (such as doing something + useful with the information stored in the XML documents) and + minimizes the effort needed to adapt your applications to + changes in the document structure. To summarize, the C++/Parser + mapping has the following key advantages over generic XML + access APIs:</p> + + <ul class="list"> + <li><b>Ease of use.</b> The generated code hides all the complexity + associated with recreating the document structure, maintaining the + dispatch state, and converting the data from the text representation + to data types suitable for manipulation by the application logic. + Parser skeletons also provide a convenient mechanism for building + custom in-memory representations.</li> + + <li><b>Natural representation.</b> The generated parser skeletons + implement parser callbacks as virtual functions with names + corresponding to elements and attributes in XML. As a result, + you process the XML data using your domain vocabulary instead + of generic elements, attributes, and text. + </li> + + <li><b>Concise code.</b> With a separate parser skeleton for each + XML Schema type, the application implementation is + simpler and thus easier to read and understand.</li> + + <li><b>Safety.</b> The XML data is delivered to parser callbacks as + statically typed objects. The parser callbacks themselves are virtual + functions. This helps catch programming errors at compile-time + rather than at runtime.</li> + + <li><b>Maintainability.</b> Automatic code generation minimizes the + effort needed to adapt the application to changes in the + document structure. With static typing, the C++ compiler + can pin-point the places in the application code that need to be + changed.</li> + + <li><b>Efficiency.</b> The generated parser skeletons combine + data extraction, validation, and even dispatching in a single + step. This makes them much more efficient than traditional + architectures with separate stages for validation and data + extraction/dispatch.</li> + </ul> + + <!-- Hello World Parser --> + + + <h1><a name="2">2 Hello World Example</a></h1> + + <p>In this chapter we will examine how to parse a very simple XML + document using the XSD-generated C++/Parser skeletons. + The code presented in this chapter is based on the <code>hello</code> + example which can be found in the <code>cxx/parser/</code> directory in + the <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + + <p>First, we need to get an idea about the structure + of the XML documents we are going to process. Our + <code>hello.xml</code>, for example, could look like this:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + <p>Then we can write a description of the above XML in the + XML Schema language and save it into <code>hello.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello"> + <xs:sequence> + <xs:element name="greeting" type="xs:string"/> + <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello"/> + +</xs:schema> + </pre> + + <p>Even if you are not familiar with XML Schema, it + should be easy to connect declarations in <code>hello.xsd</code> + to elements in <code>hello.xml</code>. The <code>hello</code> type + is defined as a sequence of the nested <code>greeting</code> and + <code>name</code> elements. Note that the term sequence in XML + Schema means that elements should appear in a particular order + as opposed to appearing multiple times. The <code>name</code> + element has its <code>maxOccurs</code> property set to + <code>unbounded</code> which means it can appear multiple times + in an XML document. Finally, the globally-defined <code>hello</code> + element prescribes the root element for our vocabulary. For an + easily-approachable introduction to XML Schema refer to + <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: + Primer</a>.</p> + + <p>The above schema is a specification of our XML vocabulary; it tells + everybody what valid documents of our XML-based language should look + like. The next step is to compile this schema to generate + the object model and parsing functions.</p> + + <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + + <p>Now we are ready to translate our <code>hello.xsd</code> to C++ parser + skeletons. To do this we invoke the XSD compiler from a terminal + (UNIX) or a command prompt (Windows): + </p> + + <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat hello.xsd + </pre> + + <p>The <code>--xml-parser</code> option indicates that we want to + use Expat as the underlying XML parser (see <a href="#5.3">Section + 5.3, "Underlying XML Parser"</a>). The XSD compiler produces two + C++ files: <code>hello-pskel.hxx</code> and <code>hello-pskel.cxx</code>. + The following code fragment is taken from <code>hello-pskel.hxx</code>; + it should give you an idea about what gets generated: + </p> + + <pre class="c++"> +class hello_pskel +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + greeting (const std::string&); + + virtual void + name (const std::string&); + + virtual void + post_hello (); + + // Parser construction API. + // + void + greeting_parser (xml_schema::string_pskel&); + + void + name_parser (xml_schema::string_pskel&); + + void + parsers (xml_schema::string_pskel& /* greeting */, + xml_schema::string_pskel& /* name */); + +private: + ... +}; + </pre> + + <p>The first four member functions shown above are called parser + callbacks. You would normally override them in your implementation + of the parser to do something useful. Let's go through all of + them one by one.</p> + + <p>The <code>pre()</code> function is an initialization callback. It is + called when a new element of type <code>hello</code> is about + to be parsed. You would normally use this function to allocate a new + instance of the resulting type or clear accumulators that are used + to gather information during parsing. The default implementation + of this function does nothing.</p> + + <p>The <code>post_hello()</code> function is a finalization callback. Its + name is constructed by adding the parser skeleton name to the + <code>post_</code> prefix. The finalization callback is called when + parsing of the element is complete and the result, if any, should + be returned. Note that in our case the return type of + <code>post_hello()</code> is <code>void</code> which means there + is nothing to return. More on parser return types later. + </p> + + <p>You may be wondering why the finalization callback is called + <code>post_hello()</code> instead of <code>post()</code> just + like <code>pre()</code>. The reason for this is that + finalization callbacks can have different return types and + result in function signature clashes across inheritance + hierarchies. To prevent this the signatures of finalization + callbacks are made unique by adding the type name to their names.</p> + + <p>The <code>greeting()</code> and <code>name()</code> functions are + called when the <code>greeting</code> and <code>name</code> elements + have been parsed, respectively. Their arguments are of type + <code>std::string</code> and contain the data extracted from XML.</p> + + <p>The last three functions are for connecting parsers to each other. + For example, there is a predefined parser for built-in XML Schema type + <code>string</code> in the XSD runtime. We will be using + it to parse the contents of <code>greeting</code> and + <code>name</code> elements, as shown in the next section.</p> + + <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + + <p>At this point we have all the parts we need to do something useful + with the information stored in our XML document. The first step is + to implement the parser: + </p> + + <pre class="c++"> +#include <iostream> +#include "hello-pskel.hxx" + +class hello_pimpl: public hello_pskel +{ +public: + virtual void + greeting (const std::string& g) + { + greeting_ = g; + } + + virtual void + name (const std::string& n) + { + std::cout << greeting_ << ", " << n << "!" << std::endl; + } + +private: + std::string greeting_; +}; + </pre> + + <p>We left both <code>pre()</code> and <code>post_hello()</code> with the + default implementations; we don't have anything to initialize or + return. The rest is pretty straightforward: we store the greeting + in a member variable and later, when parsing names, use it to + say hello.</p> + + <p>An observant reader my ask what happens if the <code>name</code> + element comes before <code>greeting</code>? Don't we need to + make sure <code>greeting_</code> was initialized and report + an error otherwise? The answer is no, we don't have to do + any of this. The <code>hello_pskel</code> parser skeleton + performs validation of XML according to the schema from which + it was generated. As a result, it will check the order + of the <code>greeting</code> and <code>name</code> elements + and report an error if it is violated.</p> + + <p>Now it is time to put this parser implementation to work:</p> + + <pre class="c++"> +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + // Construct the parser. + // + xml_schema::string_pimpl string_p; + hello_pimpl hello_p; + + hello_p.greeting_parser (string_p); + hello_p.name_parser (string_p); + + // Parse the XML instance. + // + xml_schema::document doc_p (hello_p, "hello"); + + hello_p.pre (); + doc_p.parse (argv[1]); + hello_p.post_hello (); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>The first part of this code snippet instantiates individual parsers + and assembles them into a complete vocabulary parser. + <code>xml_schema::string_pimpl</code> is an implementation of a parser + for built-in XML Schema type <code>string</code>. It is provided by + the XSD runtime along with parsers for other built-in types (for + more information on the built-in parsers see <a href="#6">Chapter 6, + "Built-In XML Schema Type Parsers"</a>). We use <code>string_pimpl</code> + to parse the <code>greeting</code> and <code>name</code> elements as + indicated by the calls to <code>greeting_parser()</code> and + <code>name_parser()</code>. + </p> + + <p>Then we instantiate a document parser (<code>doc_p</code>). The + first argument to its constructor is the parser for + the root element (<code>hello_p</code> in our case). The + second argument is the root element name. + </p> + + <p>The final piece is the calls to <code>pre()</code>, <code>parse()</code>, + and <code>post_hello()</code>. The call to <code>parse()</code> + perform the actual XML parsing while the calls to <code>pre()</code> and + <code>post_hello()</code> make sure that the parser for the root + element can perform proper initialization and cleanup.</p> + + <p>While our parser implementation and test driver are pretty small and + easy to write by hand, for bigger XML vocabularies it can be a + substantial effort. To help with this task XSD can automatically + generate sample parser implementations and a test driver from your + schemas. You can request the generation of a sample implementation with + empty function bodies by specifying the <code>--generate-noop-impl</code> + option. Or you can generate a sample implementation that prints the + data store in XML by using the <code>--generate-print-impl</code> + option. To request the generation of a test driver you can use the + <code>--generate-test-driver</code> option. For more information + on these options refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. The <code>'generated'</code> example + in the <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + shows the sample implementation generation feature in action.</p> + + + <h2><a name="2.4">2.4 Compiling and Running</a></h2> + + <p>After saving all the parts from the previous section in + <code>driver.cxx</code>, we are ready to compile our first + application and run it on the test XML document. On a UNIX + system this can be done with the following commands: + </p> + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx hello-pskel.cxx +$ c++ -std=c++11 -o driver driver.o hello-pskel.o -lexpat +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! + </pre> + + <p>Here <code>.../libxsd</code> represents the path to the + <a href="https://cppget.org/libxsd">libxsd</a> package root + directory. We can also test the error handling. To test XML + well-formedness checking, we can try to parse + <code>hello-pskel.hxx</code>:</p> + + <pre class="terminal"> +$ ./driver hello-pskel.hxx +hello-pskel.hxx:1:0: not well-formed (invalid token) + </pre> + + <p>We can also try to parse a valid XML but not from our + vocabulary, for example <code>hello.xsd</code>:</p> + + <pre class="terminal"> +$ ./driver hello.xsd +hello.xsd:2:0: expected element 'hello' instead of +'http://www.w3.org/2001/XMLSchema#schema' + </pre> + + + <!-- Chapater 3 --> + + + <h1><a name="3">3 Parser Skeletons</a></h1> + + <p>As we have seen in the previous chapter, the XSD compiler generates + a parser skeleton class for each type defined in XML Schema. In + this chapter we will take a closer look at different functions + that comprise a parser skeleton as well as the way to connect + our implementations of these parser skeletons to create a complete + parser.</p> + + <p>In this and subsequent chapters we will use the following schema + that describes a collection of person records. We save it in + <code>people.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:simpleType name="gender"> + <xs:restriction base="xs:string"> + <xs:enumeration value="male"/> + <xs:enumeration value="female"/> + </xs:restriction> + </xs:simpleType> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="gender" type="gender"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="people"> + <xs:sequence> + <xs:element name="person" type="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="people" type="people"/> + +</xs:schema> + </pre> + + <p>A sample XML instance to go along with this schema is saved + in <code>people.xml</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + <p>Compiling <code>people.xsd</code> with the XSD compiler results + in three parser skeletons being generated: <code>gender_pskel</code>, + <code>person_pskel</code>, and <code>people_pskel</code>. We are going + to examine and implement each of them in the subsequent sections.</p> + + <h2><a name="3.1">3.1 Implementing the Gender Parser</a></h2> + + <p>The generated <code>gender_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class gender_pskel: public virtual xml_schema::string_pskel +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + post_gender (); +}; + </pre> + + <p>Notice that <code>gender_pskel</code> inherits from + <code>xml_schema::string_skel</code> which is a parser skeleton + for built-in XML Schema type <code>string</code> and is + predefined in the XSD runtime library. This is an example + of the general rule that parser skeletons follow: if a type + in XML Schema inherits from another then there will be an + equivalent inheritance between the corresponding parser + skeleton classes.</p> + + <p>The <code>pre()</code> and <code>post_gender()</code> callbacks + should look familiar from the previous chapter. Let's now + implement the parser. Our implementation will simply print + the gender to <code>cout</code>:</p> + + + <pre class="c++"> +class gender_pimpl: public gender_pskel, + public xml_schema::string_pimpl +{ +public: + virtual void + post_gender () + { + std::string s = post_string (); + cout << "gender: " << s << endl; + } +}; + </pre> + + <p>While the code is quite short, there is a lot going on. First, + notice that we are inheriting from <code>gender_pskel</code> <em>and</em> + from <code>xml_schema::string_pimpl</code>. We've encountered + <code>xml_schema::string_pimpl</code> already; it is an + implementation of the <code>xml_schema::string_pskel</code> parser + skeleton for built-in XML Schema type <code>string</code>.</p> + + <p>This is another common theme in the C++/Parser programming model: + reusing implementations of the base parsers in the derived ones with + the C++ mixin idiom. In our case, <code>string_pimpl</code> will + do all the dirty work of extracting the data and we can just get + it at the end with the call to <code>post_string()</code>.</p> + + <p>In case you are curious, here is what + <code>xml_schema::string_pskel</code> and + <code>xml_schema::string_pimpl</code> look like:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_pskel: public simple_content + { + public: + virtual std::string + post_string () = 0; + }; + + class string_pimpl: public virtual string_pskel + { + public: + virtual void + _pre (); + + virtual void + _characters (const xml_schema::ro_string&); + + virtual std::string + post_string (); + + protected: + std::string str_; + }; +} + </pre> + + <p>There are three new pieces in this code that we haven't seen yet. + They are the <code>simple_content</code> class as well as + the <code>_pre()</code> and <code>_characters()</code> functions. + The <code>simple_content</code> class is defined in the XSD + runtime and is a base class for all parser skeletons that conform + to the simple content model in XML Schema. Types with the + simple content model cannot have nested elements—only text + and attributes. There is also the <code>complex_content</code> + class which corresponds to the complex content mode (types with + nested elements, for example, <code>person</code> from + <code>people.xsd</code>).</p> + + <p>The <code>_pre()</code> function is a parser callback. Remember we + talked about the <code>pre()</code> and <code>post_*()</code> callbacks + in the previous chapter? There are actually two more callbacks + with similar roles: <code>_pre()</code> and <code>_post ()</code>. + As a result, each parser skeleton has four special callbacks:</p> + + <pre class="c++"> + virtual void + pre (); + + virtual void + _pre (); + + virtual void + _post (); + + virtual void + post_name (); + </pre> + + <p><code>pre()</code> and <code>_pre()</code> are initialization + callbacks. They get called in that order before a new instance of the type + is about to be parsed. The difference between <code>pre()</code> and + <code>_pre()</code> is conventional: <code>pre()</code> can + be completely overridden by a derived parser. The derived + parser can also override <code>_pre()</code> but has to always call + the original version. This allows you to partition initialization + into customizable and required parts.</p> + + <p>Similarly, <code>_post()</code> and <code>post_name()</code> are + finalization callbacks with exactly the same semantics: + <code>post_name()</code> can be completely overridden by the derived + parser while the original <code>_post()</code> should always be called. + </p> + + <p>The final bit we need to discuss in this section is the + <code>_characters()</code> function. As you might have guessed, it + is also a callback. A low-level one that delivers raw character content + for the type being parsed. You will seldom need to use this callback + directly. Using implementations for the built-in parsers provided by + the XSD runtime is usually a simpler and more convenient + alternative.</p> + + <p>At this point you might be wondering why some <code>post_*()</code> + callbacks, for example <code>post_string()</code>, return some data + while others, for example <code>post_gender()</code>, have + <code>void</code> as a return type. This is a valid concern + and it will be addressed in the next chapter.</p> + + <h2><a name="3.2">3.2 Implementing the Person Parser</a></h2> + + <p>The generated <code>person_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class person_pskel: public xml_schema::complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + first_name (const std::string&); + + virtual void + last_name (const std::string&); + + virtual void + gender (); + + virtual void + age (short); + + virtual void + post_person (); + + // Parser construction API. + // + void + first_name_parser (xml_schema::string_pskel&); + + void + last_name_parser (xml_schema::string_pskel&); + + void + gender_parser (gender_pskel&); + + void + age_parser (xml_schema::short_pskel&); + + void + parsers (xml_schema::string_pskel& /* first-name */, + xml_schema::string_pskel& /* last-name */, + gender_pskel& /* gender */, + xml_schema::short_pskel& /* age */); +}; + </pre> + + + <p>As you can see, we have a parser callback for each of the nested + elements found in the <code>person</code> XML Schema type. + The implementation of this parser is straightforward:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& n) + { + cout << "first: " << f << endl; + } + + virtual void + last_name (const std::string& l) + { + cout << "last: " << l << endl; + } + + virtual void + age (short a) + { + cout << "age: " << a << endl; + } +}; + </pre> + + <p>Notice that we didn't override the <code>gender()</code> callback + because all the printing is done by <code>gender_pimpl</code>.</p> + + + <h2><a name="3.3">3.3 Implementing the People Parser</a></h2> + + <p>The generated <code>people_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class people_pskel: public xml_schema::complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + person (); + + virtual void + post_people (); + + // Parser construction API. + // + void + person_parser (person_pskel&); + + void + parsers (person_pskel& /* person */); +}; + </pre> + + <p>The <code>person()</code> callback will be called after parsing each + <code>person</code> element. While <code>person_pimpl</code> does + all the printing, one useful thing we can do in this callback is to + print an extra newline after each person record so that our + output is more readable:</p> + + <pre class="c++"> +class people_pimpl: public people_pskel +{ +public: + virtual void + person () + { + cout << endl; + } +}; + </pre> + + <p>Now it is time to put everything together.</p> + + + <h2><a name="3.4">3.4 Connecting the Parsers Together</a></h2> + + <p>At this point we have all the individual parsers implemented + and can proceed to assemble them into a complete parser + for our XML vocabulary. The first step is to instantiate + all the individual parsers that we will need:</p> + + <pre class="c++"> +xml_schema::short_pimpl short_p; +xml_schema::string_pimpl string_p; + +gender_pimpl gender_p; +person_pimpl person_p; +people_pimpl people_p; + </pre> + + <p>Notice that our schema uses two built-in XML Schema types: + <code>string</code> for the <code>first-name</code> and + <code>last-name</code> elements as well as <code>short</code> + for <code>age</code>. We will use predefined parsers that + come with the XSD runtime to handle these types. The next + step is to connect all the individual parsers. We do this + with the help of functions defined in the parser + skeletons and marked with the "Parser Construction API" + comment. One way to do it is to connect each individual + parser by calling the <code>*_parser()</code> functions:</p> + + <pre class="c++"> +person_p.first_name_parser (string_p); +person_p.last_name_parser (string_p); +person_p.gender_parser (gender_p); +person_p.age_parser (short_p); + +people_p.person_parser (person_p); + </pre> + + <p>You might be wondering what happens if you do not provide + a parser by not calling one of the <code>*_parser()</code> functions. + In that case the corresponding XML content will be skipped, + including validation. This is an efficient way to ignore parts + of the document that you are not interested in.</p> + + + <p>An alternative, shorter, way to connect the parsers is by using + the <code>parsers()</code> functions which connects all the parsers + for a given type at once:</p> + + <pre class="c++"> +person_p.parsers (string_p, string_p, gender_p, short_p); +people_p.parsers (person_p); + </pre> + + <p>The following figure illustrates the resulting connections. Notice + the correspondence between return types of the <code>post_*()</code> + functions and argument types of element callbacks that are connected + by the arrows.</p> + + <!-- align=center is needed for html2ps --> + <div class="img" align="center"><img src="figure-1.png"/></div> + + <p>The last step is the construction of the document parser and + invocation of the complete parser on our sample XML instance:</p> + + <pre class="c++"> +xml_schema::document doc_p (people_p, "people"); + +people_p.pre (); +doc_p.parse ("people.xml"); +people_p.post_people (); + </pre> + + <p>Let's consider <code>xml_schema::document</code> in + more detail. While the exact definition of this class + varies depending on the underlying parser selected, + here is the common part:</p> + + <pre class="c++"> +namespace xml_schema +{ + class document + { + public: + document (xml_schema::parser_base&, + const std::string& root_element_name, + bool polymorphic = false); + + document (xml_schema::parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + void + parse (const std::string& file); + + void + parse (std::istream&); + + ... + + }; +} + </pre> + + <p><code>xml_schema::document</code> is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element (<code>people_impl</code> + in our case). Because a type parser is only concerned with + the element's content and not with the element's name, we need + to specify the root element's name somewhere. That's + what is passed as the second and third arguments to the + <code>document</code>'s constructors.</p> + + <p>There are also two overloaded <code>parse()</code> functions + defined in the <code>document</code> class (there are actually + more but the others are specific to the underlying XML parser). + The first version parses a local file identified by a name. The + second version reads the data from an input stream. For more + information on the <code>xml_schema::document</code> class + refer to <a href="#7">Chapter 7, "Document Parser and Error + Handling"</a>.</p> + + <p>Let's now consider a step-by-step list of actions that happen + as we parse through <code>people.xml</code>. The content of + <code>people.xml</code> is repeated below for convenience.</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + + <ol class="steps"> + <li><code>people_p.pre()</code> is called from + <code>main()</code>. We did not provide any implementation + for this callback so this call is a no-op.</li> + + <li><code>doc_p.parse("people.xml")</code> is called from + <code>main()</code>. The parser opens the file and starts + parsing its content.</li> + + <li>The parser encounters the root element. <code>doc_p</code> + verifies that the root element is correct and calls + <code>_pre()</code> on <code>people_p</code> which is also + a no-op. Parsing is now delegated to <code>people_p</code>.</li> + + <li>The parser encounters the <code>person</code> element. + <code>people_p</code> determines that <code>person_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>person_p</code>. + Parsing is now delegated to <code>person_p</code>.</li> + + <li>The parser encounters the <code>first-name</code> element. + <code>person_p</code> determines that <code>string_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>string_p</code>. + Parsing is now delegated to <code>string_p</code>.</li> + + <li>The parser encounters character content consisting of + <code>"John"</code>. The <code>_characters()</code> callback is + called on <code>string_p</code>.</li> + + <li>The parser encounters the end of <code>first-name</code> + element. The <code>_post()</code> and <code>post_string()</code> + callbacks are called on <code>string_p</code>. The + <code>first_name()</code> callback is called on <code>person_p</code> + with the return value of <code>post_string()</code>. The + <code>first_name()</code> implementation prints + <code>"first: John"</code> to <code>cout</code>. + Parsing is now returned to <code>person_p</code>.</li> + + <li>Steps analogous to 5-7 are performed for the <code>last-name</code>, + <code>gender</code>, and <code>age</code> elements.</li> + + <li>The parser encounters the end of <code>person</code> + element. The <code>_post()</code> and <code>post_person()</code> + callbacks are called on <code>person_p</code>. The + <code>person()</code> callback is called on <code>people_p</code>. + The <code>person()</code> implementation prints a new line + to <code>cout</code>. Parsing is now returned to + <code>people_p</code>.</li> + + <li>Steps 4-9 are performed for the second <code>person</code> + element.</li> + + <li>The parser encounters the end of <code>people</code> + element. The <code>_post()</code> callback is called on + <code>people_p</code>. The <code>doc_p.parse("people.xml")</code> + call returns to <code>main()</code>.</li> + + <li><code>people_p.post_people()</code> is called from + <code>main()</code> which is a no-op.</li> + + </ol> + + + <!-- Chpater 4 --> + + + <h1><a name="4">4 Type Maps</a></h1> + + <p>There are many useful things you can do inside parser callbacks as they + are right now. There are, however, times when you want to propagate + some information from one parser to another or to the caller of the + parser. One common task that would greatly benefit from such a + possibility is building a tree-like in-memory object model of the + data stored in XML. During execution, each individual sub-parser + would create a sub-tree and return it to its <em>parent</em> parser + which can then incorporate this sub-tree into the whole tree.</p> + + <p>In this chapter we will discuss the mechanisms offered by the + C++/Parser mapping for returning information from individual + parsers and see how to use them to build an object model + of our people vocabulary.</p> + + <h2><a name="4.1">4.1 Object Model</a></h2> + + <p>An object model for our person record example could + look like this (saved in the <code>people.hxx</code> file):</p> + + <pre class="c++"> +#include <string> +#include <vector> + +enum gender +{ + male, + female +}; + +class person +{ +public: + person (const std::string& first, + const std::string& last, + ::gender gender, + short age) + : first_ (first), last_ (last), + gender_ (gender), age_ (age) + { + } + + const std::string& + first () const + { + return first_; + } + + const std::string& + last () const + { + return last_; + } + + ::gender + gender () const + { + return gender_; + } + + short + age () const + { + return age_; + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +typedef std::vector<person> people; + </pre> + + <p>While it is clear which parser is responsible for which part of + the object model, it is not exactly clear how, for + example, <code>gender_pimpl</code> will deliver <code>gender</code> + to <code>person_pimpl</code>. You might have noticed that + <code>string_pimpl</code> manages to deliver its value to the + <code>first_name()</code> callback of <code>person_pimpl</code>. Let's + see how we can utilize the same mechanism to propagate our + own data.</p> + + <p>There is a way to tell the XSD compiler that you want to + exchange data between parsers. More precisely, for each + type defined in XML Schema, you can tell the compiler two things. + First, the return type of the <code>post_*()</code> callback + in the parser skeleton generated for this type. And, second, + the argument type for callbacks corresponding to elements and + attributes of this type. For example, for XML Schema type + <code>gender</code> we can specify the return type for + <code>post_gender()</code> in the <code>gender_pskel</code> + skeleton and the argument type for the <code>gender()</code> callback + in the <code>person_pskel</code> skeleton. As you might have guessed, + the generated code will then pass the return value from the + <code>post_*()</code> callback as an argument to the element or + attribute callback.</p> + + <p>The way to tell the XSD compiler about these XML Schema to + C++ mappings is with type map files. Here is a simple type + map for the <code>gender</code> type from the previous paragraph:</p> + + <pre class="type-map"> +include "people.hxx"; +gender ::gender ::gender; + </pre> + + <p>The first line indicates that the generated code must include + <code>people.hxx</code> in order to get the definition for the + <code>gender</code> type. The second line specifies that both + argument and return types for the <code>gender</code> + XML Schema type should be the <code>::gender</code> C++ enum + (we use fully-qualified C++ names to avoid name clashes). + The next section will describe the type map format in detail. + We save this type map in <code>people.map</code> and + then translate our schemas with the <code>--type-map</code> + option to let the XSD compiler know about our type map:</p> + + <pre class="terminal"> +$ xsd cxx-parser --type-map people.map people.xsd + </pre> + + <p>If we now look at the generated <code>people-pskel.hxx</code>, + we will see the following changes in the <code>gender_pskel</code> and + <code>person_pskel</code> skeletons:</p> + + <pre class="c++"> +#include "people.hxx" + +class gender_pskel: public virtual xml_schema::string_pskel +{ + virtual ::gender + post_gender () = 0; + + ... +}; + +class person_pskel: public xml_schema::complex_content +{ + virtual void + gender (::gender); + + ... +}; + </pre> + + <p>Notice that <code>#include "people.hxx"</code> was added to + the generated header file from the type map to provide the + definition for the <code>gender</code> enum.</p> + + <h2><a name="4.2">4.2 Type Map File Format</a></h2> + + <p>Type map files are used to define a mapping between XML Schema + and C++ types. The compiler uses this information + to determine return types of <code>post_*()</code> + callbacks in parser skeletons corresponding to XML Schema + types as well as argument types for callbacks corresponding + to elements and attributes of these types.</p> + + <p>The compiler has a set of predefined mapping rules that map + the built-in XML Schema types to suitable C++ types (discussed + below) and all other types to <code>void</code>. + By providing your own type maps you can override these predefined + rules. The format of the type map file is presented below: + </p> + + <pre class="type-map"> +namespace <schema-namespace> [<cxx-namespace>] +{ + (include <file-name>;)* + ([type] <schema-type> <cxx-ret-type> [<cxx-arg-type>];)* +} + </pre> + + <p>Both <code><i><schema-namespace></i></code> and + <code><i><schema-type></i></code> are regex patterns while + <code><i><cxx-namespace></i></code>, + <code><i><cxx-ret-type></i></code>, and + <code><i><cxx-arg-type></i></code> are regex pattern + substitutions. All names can be optionally enclosed in + <code>" "</code>, for example, to include white-spaces.</p> + + <p><code><i><schema-namespace></i></code> determines XML + Schema namespace. Optional <code><i><cxx-namespace></i></code> + is prefixed to every C++ type name in this namespace declaration. + <code><i><cxx-ret-type></i></code> is a C++ type name that is + used as a return type for the <code>post_*()</code> callback. + Optional <code><i><cxx-arg-type></i></code> is an argument + type for callbacks corresponding to elements and attributes + of this type. If <code><i><cxx-arg-type></i></code> is not + specified, it defaults to <code><i><cxx-ret-type></i></code> + if <code><i><cxx-ret-type></i></code> ends with <code>*</code> or + <code>&</code> (that is, it is a pointer or a reference) and + <code>const <i><cxx-ret-type></i>&</code> + otherwise. + <code><i><file-name></i></code> is a file name either in the + <code>" "</code> or <code>< ></code> format + and is added with the <code>#include</code> directive to + the generated code.</p> + + <p>The <code><b>#</b></code> character starts a comment that ends + with a new line or end of file. To specify a name that contains + <code><b>#</b></code> enclose it in <code><b>" "</b></code>. + For example:</p> + + <pre> +namespace http://www.example.com/xmlns/my my +{ + include "my.hxx"; + + # Pass apples by value. + # + apple apple; + + # Pass oranges as pointers. + # + orange orange_t*; +} + </pre> + + <p>In the example above, for the + <code>http://www.example.com/xmlns/my#orange</code> + XML Schema type, the <code>my::orange_t*</code> C++ type will + be used as both return and argument types.</p> + + <p>Several namespace declarations can be specified in a single + file. The namespace declaration can also be completely + omitted to map types in a schema without a namespace. For + instance:</p> + + <pre class="type-map"> +include "my.hxx"; +apple apple; + +namespace http://www.example.com/xmlns/my +{ + orange "const orange_t*"; +} + </pre> + + <p>The compiler has a number of predefined mapping rules for + the built-in XML Schema types which can be presented as the + following map files. The string-based XML Schema types are + mapped to either <code>std::string</code> or + <code>std::wstring</code> depending on the character type + selected (see <a href="#5.2"> Section 5.2, "Character Type and + Encoding"</a> for more information). The binary XML Schema + types are mapped to either <code>std::unique_ptr<xml_schema::buffer></code> + or <code>std::auto_ptr<xml_schema::buffer></code> + depending on the C++ standard selected (C++11 or C++98, + respectively; refer to the <code>--std</code> XSD compiler + command line option for details).</p> + + <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ + boolean bool bool; + + byte "signed char" "signed char"; + unsignedByte "unsigned char" "unsigned char"; + + short short short; + unsignedShort "unsigned short" "unsigned short"; + + int int int; + unsignedInt "unsigned int" "unsigned int"; + + long "long long" "long long"; + unsignedLong "unsigned long long" "unsigned long long"; + + integer "long long" "long long"; + + negativeInteger "long long" "long long"; + nonPositiveInteger "long long" "long long"; + + positiveInteger "unsigned long long" "unsigned long long"; + nonNegativeInteger "unsigned long long" "unsigned long long"; + + float float float; + double double double; + decimal double double; + + string std::string; + normalizedString std::string; + token std::string; + Name std::string; + NMTOKEN std::string; + NCName std::string; + ID std::string; + IDREF std::string; + language std::string; + anyURI std::string; + + NMTOKENS xml_schema::string_sequence; + IDREFS xml_schema::string_sequence; + + QName xml_schema::qname; + + base64Binary std::[unique|auto]_ptr<xml_schema::buffer> + std::[unique|auto]_ptr<xml_schema::buffer>; + hexBinary std::[unique|auto]_ptr<xml_schema::buffer> + std::[unique|auto]_ptr<xml_schema::buffer>; + + date xml_schema::date; + dateTime xml_schema::date_time; + duration xml_schema::duration; + gDay xml_schema::gday; + gMonth xml_schema::gmonth; + gMonthDay xml_schema::gmonth_day; + gYear xml_schema::gyear; + gYearMonth xml_schema::gyear_month; + time xml_schema::time; +} + </pre> + + <p>For more information about the mapping of the built-in XML Schema types + to C++ types refer to <a href="#6">Chapter 6, "Built-In XML Schema Type + Parsers"</a>. The last predefined rule maps anything that wasn't + mapped by previous rules to <code>void</code>:</p> + + <pre class="type-map"> +namespace .* +{ + .* void void; +} + </pre> + + + <p>When you provide your own type maps with the + <code>--type-map</code> option, they are evaluated first. This + allows you to selectively override any of the predefined rules. + Note also that if you change the mapping + of a built-in XML Schema type then it becomes your responsibility + to provide the corresponding parser skeleton and implementation + in the <code>xml_schema</code> namespace. You can include the + custom definitions into the generated header file using the + <code>--hxx-prologue-*</code> options.</p> + + <h2><a name="4.3">4.3 Parser Implementations</a></h2> + + <p>With the knowledge from the previous section, we can proceed + with creating a type map that maps types in the <code>people.xsd</code> + schema to our object model classes in + <code>people.hxx</code>. In fact, we already have the beginning + of our type map file in <code>people.map</code>. Let's extend + it with the rest of the types:</p> + + <pre class="type-map"> +include "people.hxx"; + +gender ::gender ::gender; +person ::person; +people ::people; + </pre> + + <p>There are a few things to note about this type map. We did not + provide the argument types for <code>person</code> and + <code>people</code> because the default constant reference is + exactly what we need. We also did not provide any mappings + for built-in XML Schema types <code>string</code> and + <code>short</code> because they are handled by the predefined + rules and we are happy with the result. Note also that + all C++ types are fully qualified. This is done to avoid + potential name conflicts in the generated code. Now we can + recompile our schema and move on to implementing the parsers:</p> + + <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat --type-map people.map people.xsd + </pre> + + <p>Here is the implementation of our three parsers in full. One + way to save typing when implementing your own parsers is + to open the generated code and copy the signatures of parser + callbacks into your code. Or you could always auto generate the + sample implementations and fill them with your code.</p> + + + <pre class="c++"> +#include "people-pskel.hxx" + +class gender_pimpl: public gender_pskel, + public xml_schema::string_pimpl +{ +public: + virtual ::gender + post_gender () + { + return post_string () == "male" ? male : female; + } +}; + +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& f) + { + first_ = f; + } + + virtual void + last_name (const std::string& l) + { + last_ = l; + } + + virtual void + gender (::gender g) + { + gender_ = g; + } + + virtual void + age (short a) + { + age_ = a; + } + + virtual ::person + post_person () + { + return ::person (first_, last_, gender_, age_); + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +class people_pimpl: public people_pskel +{ +public: + virtual void + person (const ::person& p) + { + people_.push_back (p); + } + + virtual ::people + post_people () + { + ::people r; + r.swap (people_); + return r; + } + +private: + ::people people_; +}; + </pre> + + <p>This code fragment should look familiar by now. Just note that + all the <code>post_*()</code> callbacks now have return types instead + of <code>void</code>. Here is the implementation of the test + driver for this example:</p> + + <pre class="c++"> +#include <iostream> + +using namespace std; + +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::short_pimpl short_p; + xml_schema::string_pimpl string_p; + + gender_pimpl gender_p; + person_pimpl person_p; + people_pimpl people_p; + + person_p.parsers (string_p, string_p, gender_p, short_p); + people_p.parsers (person_p); + + // Parse the document to obtain the object model. + // + xml_schema::document doc_p (people_p, "people"); + + people_p.pre (); + doc_p.parse (argv[1]); + people ppl = people_p.post_people (); + + // Print the object model. + // + for (people::iterator i (ppl.begin ()); i != ppl.end (); ++i) + { + cout << "first: " << i->first () << endl + << "last: " << i->last () << endl + << "gender: " << (i->gender () == male ? "male" : "female") << endl + << "age: " << i->age () << endl + << endl; + } +} + </pre> + + <p>The parser creation and assembly part is exactly the same as in + the previous chapter. The parsing part is a bit different: + <code>post_people()</code> now has a return value which is the + complete object model. We store it in the + <code>ppl</code> variable. The last bit of the code simply iterates + over the <code>people</code> vector and prints the information + for each person. We save the last two code fragments to + <code>driver.cxx</code> and proceed to compile and test + our new application:</p> + + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx people-pskel.cxx +$ c++ -std=c++11 -o driver driver.o people-pskel.o -lexpat +$ ./driver people.xml +first: John +last: Doe +gender: male +age: 32 + +first: Jane +last: Doe +gender: female +age: 28 + </pre> + + + <!-- Mapping Configuration --> + + + <h1><a name="5">5 Mapping Configuration</a></h1> + + <p>The C++/Parser mapping has a number of configuration parameters that + determine the overall properties and behavior of the generated code. + Configuration parameters are specified with the XSD command line + options and include the C++ standard, the character type that is used + by the generated code, the underlying XML parser, whether the XML Schema + validation is performed in the generated code, and support for XML Schema + polymorphism. This chapter describes these configuration + parameters in more detail. For more ways to configure the generated + code refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. + </p> + + <h2><a name="5.1">5.1 C++ Standard</a></h2> + + <p>The C++/Parser mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h2><a name="5.2">5.2 Character Type and Encoding</a></h2> + + <p>The C++/Parser mapping has built-in support for two character types: + <code>char</code> and <code>wchar_t</code>. You can select the + character type with the <code>--char-type</code> command line + option. The default character type is <code>char</code>. The + string-based built-in XML Schema types are returned as either + <code>std::string</code> or <code>std::wstring</code> depending + on the character type selected.</p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings. You can select which encoding should be used + in the object model with the <code>--char-encoding</code> command + line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <p>Note also that the character encoding that is used in the object model + is independent of the encodings used in input and output XML. In fact, + all three (object mode, input XML, and output XML) can have different + encodings.</p> + + <h2><a name="5.3">5.3 Underlying XML Parser</a></h2> + + <p>The C++/Parser mapping can be used with either Xerces-C++ or Expat + as the underlying XML parser. You can select the XML parser with + the <code>--xml-parser</code> command line option. Valid values + for this option are <code>xerces</code> and <code>expat</code>. + The default XML parser is Xerces-C++.</p> + + <p>The generated code is identical for both parsers except for the + <code>xml_schema::document</code> class in which some of the + <code>parse()</code> functions are parser-specific as described + in <a href="#7">Chapter 7, "Document Parser and Error Handling"</a>.</p> + + + <h2><a name="5.4">5.4 XML Schema Validation</a></h2> + + <p>The C++/Parser mapping provides support for validating a + commonly-used subset of W3C XML Schema in the generated code. + For the list of supported XML Schema constructs refer to + <a href="#A">Appendix A, "Supported XML Schema Constructs"</a>.</p> + + <p>By default validation in the generated code is disabled if + the underlying XML parser is validating (Xerces-C++) and + enabled otherwise (Expat). See <a href="#5.3">Section 5.3, + "Underlying XML Parser"</a> for more information about + the underlying XML parser. You can override the default + behavior with the <code>--generate-validation</code> + and <code>--suppress-validation</code> command line options.</p> + + + <h2><a name="5.5">5.5 Support for Polymorphism</a></h2> + + <p>By default the XSD compiler generates non-polymorphic code. If your + vocabulary uses XML Schema polymorphism in the form of <code>xsi:type</code> + and/or substitution groups, then you will need to compile your schemas + with the <code>--generate-polymorphic</code> option to produce + polymorphism-aware code as well as pass <code>true</code> as the last + argument to the <code>xml_schema::document</code>'s constructors.</p> + + <p>When using the polymorphism-aware generated code, you can specify + several parsers for a single element by passing a parser map + instead of an individual parser to the parser connection function + for the element. One of the parsers will then be looked up and used + depending on the <code>xsi:type</code> attribute value or an element + name from a substitution group. Consider the following schema as an + example:</p> + + <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="name" type="xs:string"/> + </xs:sequence> + </xs:complexType> + + <!-- substitution group root --> + <xs:element name="person" type="person"/> + + <xs:complexType name="superman"> + <xs:complexContent> + <xs:extension base="person"> + <xs:attribute name="can-fly" type="xs:boolean"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="superman" + type="superman" + substitutionGroup="person"/> + + <xs:complexType name="batman"> + <xs:complexContent> + <xs:extension base="superman"> + <xs:attribute name="wing-span" type="xs:unsignedInt"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="batman" + type="batman" + substitutionGroup="superman"/> + + <xs:complexType name="supermen"> + <xs:sequence> + <xs:element ref="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="supermen" type="supermen"/> + +</xs:schema> + </pre> + + <p>Conforming XML documents can use the <code>superman</code> + and <code>batman</code> types in place of the <code>person</code> + type either by specifying the type with the <code>xsi:type</code> + attributes or by using the elements from the substitution + group, for instance:</p> + + + <pre class="xml"> +<supermen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + <person> + <name>John Doe</name> + </person> + + <superman can-fly="false"> + <name>James "007" Bond</name> + </superman> + + <superman can-fly="true" wing-span="10" xsi:type="batman"> + <name>Bruce Wayne</name> + </superman> + +</supermen> + </pre> + + <p>To print the data stored in such XML documents we can implement + the parsers as follows:</p> + + <pre class="c++"> +class person_pimpl: public virtual person_pskel +{ +public: + virtual void + pre () + { + cout << "starting to parse person" << endl; + } + + virtual void + name (const std::string& v) + { + cout << "name: " << v << endl; + } + + virtual void + post_person () + { + cout << "finished parsing person" << endl; + } +}; + +class superman_pimpl: public virtual superman_pskel, + public person_pimpl +{ +public: + virtual void + pre () + { + cout << "starting to parse superman" << endl; + } + + virtual void + can_fly (bool v) + { + cout << "can-fly: " << v << endl; + } + + virtual void + post_person () + { + post_superman (); + } + + virtual void + post_superman () + { + cout << "finished parsing superman" << endl + } +}; + +class batman_pimpl: public virtual batman_pskel, + public superman_pimpl +{ +public: + virtual void + pre () + { + cout << "starting to parse batman" << endl; + } + + virtual void + wing_span (unsigned int v) + { + cout << "wing-span: " << v << endl; + } + + virtual void + post_superman () + { + post_batman (); + } + + virtual void + post_batman () + { + cout << "finished parsing batman" << endl; + } +}; + </pre> + + <p>Note that because the derived type parsers (<code>superman_pskel</code> + and <code>batman_pskel</code>) are called via the <code>person_pskel</code> + interface, we have to override the <code>post_person()</code> + virtual function in <code>superman_pimpl</code> to call + <code>post_superman()</code> and the <code>post_superman()</code> + virtual function in <code>batman_pimpl</code> to call + <code>post_batman()</code>.</p> + + <p>The following code fragment shows how to connect the parsers together. + Notice that for the <code>person</code> element in the <code>supermen_p</code> + parser we specify a parser map instead of a specific parser and we pass + <code>true</code> as the last argument to the document parser constructor + to indicate that we are parsing potentially-polymorphic XML documents:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::string_pimpl string_p; + xml_schema::boolean_pimpl boolean_p; + xml_schema::unsigned_int_pimpl unsigned_int_p; + + person_pimpl person_p; + superman_pimpl superman_p; + batman_pimpl batman_p; + + xml_schema::parser_map_impl person_map; + supermen_pimpl supermen_p; + + person_p.parsers (string_p); + superman_p.parsers (string_p, boolean_p); + batman_p.parsers (string_p, boolean_p, unsigned_int_p); + + // Here we are specifying a parser map which containes several + // parsers that can be used to parse the person element. + // + person_map.insert (person_p); + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_map); + + // Parse the XML document. The last argument to the document's + // constructor indicates that we are parsing polymorphic XML + // documents. + // + xml_schema::document doc_p (supermen_p, "supermen", true); + + supermen_p.pre (); + doc_p.parse (argv[1]); + supermen_p.post_supermen (); +} + </pre> + + <p>When polymorphism-aware code is generated, each element's + <code>*_parser()</code> function is overloaded to also accept + an object of the <code>xml_schema::parser_map</code> type. + For example, the <code>supermen_pskel</code> class from the + above example looks like this:</p> + + <pre class="c++"> +class supermen_pskel: public xml_schema::parser_complex_content +{ +public: + + ... + + // Parser construction API. + // + void + parsers (person_pskel&); + + // Individual element parsers. + // + void + person_parser (person_pskel&); + + void + person_parser (const xml_schema::parser_map&); + + ... +}; + </pre> + + <p>Note that you can specify both the individual (static) parser and + the parser map. The individual parser will be used when the static + element type and the dynamic type of the object being parsed are + the same. This is the case, for example, when there is no + <code>xsi:type</code> attribute and the element hasn't been + substituted. Because the individual parser for an element is + cached and no map lookup is necessary, it makes sense to specify + both the individual parser and the parser map when most of the + objects being parsed are of the static type and optimal + performance is important. The following code fragment shows + how to change the above example to set both the individual + parser and the parser map:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + ... + + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_p); + supermen_p.person_parser (person_map); + + ... +} + </pre> + + + <p>The <code>xml_schema::parser_map</code> interface and the + <code>xml_schema::parser_map_impl</code> default implementation + are presented below:</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_map + { + public: + virtual parser_base* + find (const ro_string* type) const = 0; + }; + + class parser_map_impl: public parser_map + { + public: + void + insert (parser_base&); + + virtual parser_base* + find (const ro_string* type) const; + + private: + parser_map_impl (const parser_map_impl&); + + parser_map_impl& + operator= (const parser_map_impl&); + + ... + }; +} + </pre> + + <p>The <code>type</code> argument in the <code>find()</code> virtual + function is the type name and namespace from the xsi:type attribute + (the namespace prefix is resolved to the actual XML namespace) + or the type of an element from the substitution group in the form + <code>"<name> <namespace>"</code> with the space and the + namespace part absent if the type does not have a namespace. + You can obtain a parser's dynamic type in the same format + using the <code>_dynamic_type()</code> function. The static + type can be obtained by calling the static <code>_static_type()</code> + function, for example <code>person_pskel::_static_type()</code>. + Both functions return a C string (<code>const char*</code> or + <code>const wchar_t*</code>, depending on the character type + used) which is valid for as long as the application is running. + The following example shows how we can implement our own parser + map using <code>std::map</code>:</p> + + + <pre class="c++"> +#include <map> +#include <string> + +class parser_map: public xml_schema::parser_map +{ +public: + void + insert (xml_schema::parser_base& p) + { + map_[p._dynamic_type ()] = &p; + } + + virtual xml_schema::parser_base* + find (const xml_schema::ro_string* type) const + { + map::const_iterator i = map_.find (type); + return i != map_.end () ? i->second : 0; + } + +private: + typedef std::map<std::string, xml_schema::parser_base*> map; + map map_; +}; + </pre> + + <p>Most of code presented in this section is taken from the + <code>polymorphism</code> example which can be found in the + <code>cxx/parser/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package. + Handling of <code>xsi:type</code> and substitution groups when used on + root elements requires a number of special actions as shown in + the <code>polyroot</code> example.</p> + + + <!-- Built-in XML Schema Type Parsers --> + + + <h1><a name="6">6 Built-In XML Schema Type Parsers</a></h1> + + <p>The XSD runtime provides parser implementations for all built-in + XML Schema types as summarized in the following table. Declarations + for these types are automatically included into each generated + header file. As a result you don't need to include any headers + to gain access to these parser implementations. Note that some + parsers return either <code>std::string</code> or + <code>std::wstring</code> depending on the character type selected.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Parser implementation in the <code>xml_schema</code> namespace</th> + <th>Parser return type</th> + </tr> + + <tr> + <th colspan="3">anyType and anySimpleType types</th> + </tr> + <tr> + <td><code>anyType</code></td> + <td><code>any_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + <tr> + <td><code>anySimpleType</code></td> + <td><code>any_simple_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte_pimpl</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte_pimpl</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_pimpl</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short_pimpl</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_pimpl</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int_pimpl</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean_pimpl</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_pimpl</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string-based types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <td><code>language</code></td> + <td><code>language_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname_pimpl</code></td> + <td><code>xml_schema::qname</code><br/><a href="#6.1">Section 6.1, + "<code>QName</code> Parser"</a></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">list types</th> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens_pimpl</code></td> + <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs_pimpl</code></td> + <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary_pimpl</code></td> + <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary_pimpl</code></td> + <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date_pimpl</code></td> + <td><code>xml_schema::date</code><br/><a href="#6.5">Section 6.5, + "<code>date</code> Parser"</a></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time_pimpl</code></td> + <td><code>xml_schema::date_time</code><br/><a href="#6.6">Section 6.6, + "<code>dateTime</code> Parser"</a></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration_pimpl</code></td> + <td><code>xml_schema::duration</code><br/><a href="#6.7">Section 6.7, + "<code>duration</code> Parser"</a></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday_pimpl</code></td> + <td><code>xml_schema::gday</code><br/><a href="#6.8">Section 6.8, + "<code>gDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth_pimpl</code></td> + <td><code>xml_schema::gmonth</code><br/><a href="#6.9">Section 6.9, + "<code>gMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day_pimpl</code></td> + <td><code>xml_schema::gmonth_day</code><br/><a href="#6.10">Section 6.10, + "<code>gMonthDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear_pimpl</code></td> + <td><code>xml_schema::gyear</code><br/><a href="#6.11">Section 6.11, + "<code>gYear</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month_pimpl</code></td> + <td><code>xml_schema::gyear_month</code><br/><a href="#6.12">Section + 6.12, "<code>gYearMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time_pimpl</code></td> + <td><code>xml_schema::time</code><br/><a href="#6.13">Section 6.13, + "<code>time</code> Parser"</a></td> + </tr> + + </table> + + <h2><a name="6.1">6.1 <code>QName</code> Parser</a></h2> + + <p>The return type of the <code>qname_pimpl</code> parser implementation + is <code>xml_schema::qname</code> which represents an XML qualified + name. Its interface is presented below. + Note that the <code>std::string</code> type in the interface becomes + <code>std::wstring</code> if the selected character type is + <code>wchar_t</code>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class qname + { + public: + explicit + qname (const std::string& name); + qname (const std::string& prefix, const std::string& name); + + const std::string& + prefix () const; + + void + prefix (const std::string&); + + const std::string& + name () const; + + void + name (const std::string&); + }; + + bool + operator== (const qname&, const qname&); + + bool + operator!= (const qname&, const qname&); +} + </pre> + + + <h2><a name="6.2">6.2 <code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></h2> + + <p>The return type of the <code>nmtokens_pimpl</code> and + <code>idrefs_pimpl</code> parser implementations is + <code>xml_schema::string_sequence</code> which represents a + sequence of strings. Its interface is presented below. + Note that the <code>std::string</code> type in the interface becomes + <code>std::wstring</code> if the selected character type is + <code>wchar_t</code>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence: public std::vector<std::string> + { + public: + string_sequence (); + + explicit + string_sequence (std::vector<std::string>::size_type n, + const std::string& x = std::string ()); + + template <typename I> + string_sequence (const I& begin, const I& end); + }; + + bool + operator== (const string_sequence&, const string_sequence&); + + bool + operator!= (const string_sequence&, const string_sequence&); +} + </pre> + + + <h2><a name="6.3">6.3 <code>base64Binary</code> and <code>hexBinary</code> Parsers</a></h2> + + <p>The return type of the <code>base64_binary_pimpl</code> and + <code>hex_binary_pimpl</code> parser implementations is either + <code>std::unique_ptr<xml_schema::buffer></code> (C++11) or + <code>std::auto_ptr<xml_schema::buffer></code> (C++98), + depending on the C++ standard selected (<code>--std</code> XSD + compiler option). The <code>xml_schema::buffer</code> type + represents a binary buffer and its interface is presented below.</p> + + <pre class="c++"> +namespace xml_schema +{ + class buffer + { + public: + typedef std::size_t size_t; + + class bounds {}; // Out of bounds exception. + + public: + explicit + buffer (size_t size = 0); + buffer (size_t size, size_t capacity); + buffer (const void* data, size_t size); + buffer (const void* data, size_t size, size_t capacity); + buffer (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + + public: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + void + swap (buffer&); + + public: + size_t + capacity () const; + + bool + capacity (size_t); + + public: + size_t + size () const; + + bool + size (size_t); + + public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); + }; + + bool + operator== (const buffer&, const buffer&); + + bool + operator!= (const buffer&, const buffer&); +} + </pre> + + <p>If the <code>assume_ownership</code> argument to the constructor + is <code>true</code>, the instance assumes the ownership of the + memory block pointed to by the <code>data</code> argument and will + eventually release it by calling <code>operator delete()</code>. The + <code>capacity()</code> and <code>size()</code> modifier functions + return <code>true</code> if the underlying buffer has moved. + </p> + + <p>The <code>bounds</code> exception is thrown if the constructor + arguments violate the <code>(size <= capacity)</code> + constraint.</p> + + + <h2><a name="6.4">6.4 Time Zone Representation</a></h2> + + <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, + <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, + <code>gYearMonth</code>, and <code>time</code> XML Schema built-in + types all include an optional time zone component. The following + <code>xml_schema::time_zone</code> base class is used to represent + this information:</p> + + <pre class="c++"> +namespace xml_schema +{ + class time_zone + { + public: + time_zone (); + time_zone (short hours, short minutes); + + bool + zone_present () const; + + void + zone_reset (); + + short + zone_hours () const; + + void + zone_hours (short); + + short + zone_minutes () const; + + void + zone_minutes (short); + }; + + bool + operator== (const time_zone&, const time_zone&); + + bool + operator!= (const time_zone&, const time_zone&); +} + </pre> + + <p>The <code>zone_present()</code> accessor function returns <code>true</code> + if the time zone is specified. The <code>zone_reset()</code> modifier + function resets the time zone object to the <em>not specified</em> + state. If the time zone offset is negative then both hours and + minutes components are represented as negative integers.</p> + + + <h2><a name="6.5">6.5 <code>date</code> Parser</a></h2> + + <p>The return type of the <code>date_pimpl</code> parser implementation + is <code>xml_schema::date</code> which represents a year, a day, and a month + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date + { + public: + date (int year, unsigned short month, unsigned short day); + date (int year, unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const date&, const date&); + + bool + operator!= (const date&, const date&); +} + </pre> + + <h2><a name="6.6">6.6 <code>dateTime</code> Parser</a></h2> + + <p>The return type of the <code>date_time_pimpl</code> parser implementation + is <code>xml_schema::date_time</code> which represents a year, a month, a day, + hours, minutes, and seconds with an optional time zone. Its interface + is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date_time + { + public: + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const date_time&, const date_time&); + + bool + operator!= (const date_time&, const date_time&); +} + </pre> + + <h2><a name="6.7">6.7 <code>duration</code> Parser</a></h2> + + <p>The return type of the <code>duration_pimpl</code> parser implementation + is <code>xml_schema::duration</code> which represents a potentially + negative duration in the form of years, months, days, hours, minutes, + and seconds. Its interface is presented below.</p> + + <pre class="c++"> +namespace xml_schema +{ + class duration + { + public: + duration (bool negative, + unsigned int years, unsigned int months, unsigned int days, + unsigned int hours, unsigned int minutes, double seconds); + + bool + negative () const; + + void + negative (bool); + + unsigned int + years () const; + + void + years (unsigned int); + + unsigned int + months () const; + + void + months (unsigned int); + + unsigned int + days () const; + + void + days (unsigned int); + + unsigned int + hours () const; + + void + hours (unsigned int); + + unsigned int + minutes () const; + + void + minutes (unsigned int); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const duration&, const duration&); + + bool + operator!= (const duration&, const duration&); +} + </pre> + + + <h2><a name="6.8">6.8 <code>gDay</code> Parser</a></h2> + + <p>The return type of the <code>gday_pimpl</code> parser implementation + is <code>xml_schema::gday</code> which represents a day of the month with + an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gday + { + public: + explicit + gday (unsigned short day); + gday (unsigned short day, short zone_hours, short zone_minutes); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gday&, const gday&); + + bool + operator!= (const gday&, const gday&); +} + </pre> + + <h2><a name="6.9">6.9 <code>gMonth</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_pimpl</code> parser implementation + is <code>xml_schema::gmonth</code> which represents a month of the year + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth + { + public: + explicit + gmonth (unsigned short month); + gmonth (unsigned short month, short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gmonth&, const gmonth&); + + bool + operator!= (const gmonth&, const gmonth&); +} + </pre> + + <h2><a name="6.10">6.10 <code>gMonthDay</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_day_pimpl</code> parser implementation + is <code>xml_schema::gmonth_day</code> which represents a day and a month + of the year with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth_day + { + public: + gmonth_day (unsigned short month, unsigned short day); + gmonth_day (unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gmonth_day&, const gmonth_day&); + + bool + operator!= (const gmonth_day&, const gmonth_day&); +} + </pre> + + <h2><a name="6.11">6.11 <code>gYear</code> Parser</a></h2> + + <p>The return type of the <code>gyear_pimpl</code> parser implementation + is <code>xml_schema::gyear</code> which represents a year with + an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear + { + public: + explicit + gyear (int year); + gyear (int year, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + }; + + bool + operator== (const gyear&, const gyear&); + + bool + operator!= (const gyear&, const gyear&); +} + </pre> + + <h2><a name="6.12">6.12 <code>gYearMonth</code> Parser</a></h2> + + <p>The return type of the <code>gyear_month_pimpl</code> parser implementation + is <code>xml_schema::gyear_month</code> which represents a year and a month + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear_month + { + public: + gyear_month (int year, unsigned short month); + gyear_month (int year, unsigned short month, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gyear_month&, const gyear_month&); + + bool + operator!= (const gyear_month&, const gyear_month&); +} + </pre> + + + <h2><a name="6.13">6.13 <code>time</code> Parser</a></h2> + + <p>The return type of the <code>time_pimpl</code> parser implementation + is <code>xml_schema::time</code> which represents hours, minutes, + and seconds with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class time + { + public: + time (unsigned short hours, unsigned short minutes, double seconds); + time (unsigned short hours, unsigned short minutes, double seconds, + short zone_hours, short zone_minutes); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const time&, const time&); + + bool + operator!= (const time&, const time&); +} + </pre> + + + <!-- Error Handling --> + + + <h1><a name="7">7 Document Parser and Error Handling</a></h1> + + <p>In this chapter we will discuss the <code>xml_schema::document</code> + type as well as the error handling mechanisms provided by the mapping + in more detail. As mentioned in <a href="#3.4">Section 3.4, + "Connecting the Parsers Together"</a>, the interface of + <code>xml_schema::document</code> depends on the underlying XML + parser selected (<a href="#5.3">Section 5.3, "Underlying XML + Parser"</a>). The following sections describe the + <code>document</code> type interface for Xerces-C++ and + Expat as underlying parsers.</p> + + <h2><a name="7.1">7.1 Xerces-C++ Document Parser</a></h2> + + <p>When Xerces-C++ is used as the underlying XML parser, the + <code>document</code> type has the following interface. Note that + if the character type is <code>wchar_t</code>, then the string type + in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_base; + class error_handler; + + class flags + { + public: + // Do not validate XML documents with the Xerces-C++ validator. + // + static const unsigned long dont_validate; + + // Do not initialize the Xerces-C++ runtime. + // + static const unsigned long dont_initialize; + + // Disable handling of subsequent imports for the same namespace + // in Xerces-C++ 3.1.0 and later. + // + static const unsigned long no_multiple_imports; + }; + + class properties + { + public: + // Add a location for a schema with a target namespace. + // + void + schema_location (const std::string& namespace_, + const std::string& location); + + // Add a location for a schema without a target namespace. + // + void + no_namespace_schema_location (const std::string& location); + }; + + class document + { + public: + document (parser_base& root, + const std::string& root_element_name, + bool polymorphic = false); + + document (parser_base& root, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + public: + // Parse URI or a local file. + // + void + parse (const std::string& uri, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file with a user-provided error_handler + // object. + // + void + parse (const std::string& uri, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file with a user-provided ErrorHandler + // object. Note that you must initialize the Xerces-C++ runtime + // before calling this function. + // + void + parse (const std::string& uri, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file using a user-provided SAX2XMLReader + // object. Note that you must initialize the Xerces-C++ runtime + // before calling this function. + // + void + parse (const std::string& uri, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream. + // + void + parse (std::istream&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a user-provided error_handler object. + // + void + parse (std::istream&, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a user-provided ErrorHandler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (std::istream&, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream using a user-provided SAX2XMLReader object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (std::istream&, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream with a system id. + // + void + parse (std::istream&, + const std::string& system_id, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id and a user-provided + // ErrorHandler object. Note that you must initialize the + // Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id using a user-provided + // SAX2XMLReader object. Note that you must initialize the + // Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream with system and public ids. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids and a user-provided + // ErrorHandler object. Note that you must initialize the Xerces-C++ + // runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids using a user- + // provided SAX2XMLReader object. Note that you must initialize + // the Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse InputSource. Note that you must initialize the Xerces-C++ + // runtime before calling this function. + // + void + parse (const xercesc::InputSource&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource with a user-provided error_handler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource with a user-provided ErrorHandler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource using a user-provided SAX2XMLReader object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + }; +} + </pre> + + <p>The <code>document</code> class is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element. The <code>parser_base</code> + class is the base type for all parser skeletons. The second and + third arguments to the <code>document</code>'s constructors are + the root element's name and namespace. The last argument, + <code>polymorphic</code>, specifies whether the XML documents + being parsed use polymorphism. For more information on support + for XML Schema polymorphism in the C++/Parser mapping refer + to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + + <p>The rest of the <code>document</code> interface consists of overloaded + <code>parse()</code> functions. The last two arguments in each of these + functions are <code>flags</code> and <code>properties</code>. The + <code>flags</code> argument allows you to modify the default behavior + of the parsing functions. The <code>properties</code> argument allows + you to override the schema location attributes specified in XML + documents. Note that the schema location paths are relative to an + XML document unless they are complete URIs. For example if you want + to use a local schema file then you will need to use a URI in the + form <code>file:///absolute/path/to/your/schema</code>.</p> + + <p>A number of overloaded <code>parse()</code> functions have the + <code>system_id</code> and <code>public_id</code> arguments. The + system id is a <em>system</em> identifier of the resources being + parsed (for example, URI or a full file path). The public id is a + <em>public</em> identifier of the resource (for example, an + application-specific name or a relative file path). The system id + is used to resolve relative paths (for example, schema paths). In + diagnostics messages the public id is used if it is available. + Otherwise the system id is used.</p> + + <p>The error handling mechanisms employed by the <code>document</code> + parser are described in <a href="#7.3">Section 7.3, "Error + Handling"</a>.</p> + + <h2><a name="7.2">7.2 Expat Document Parser</a></h2> + + <p>When Expat is used as the underlying XML parser, the + <code>document</code> type has the following interface. Note that + if the character type is <code>wchar_t</code>, then the string type + in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_base; + class error_handler; + + class document + { + public: + document (parser_base&, + const std::string& root_element_name, + bool polymorphic = false); + + document (parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + public: + // Parse a local file. The file is accessed with std::ifstream + // in binary mode. The std::ios_base::failure exception is used + // to report io errors (badbit and failbit). + void + parse (const std::string& file); + + // Parse a local file with a user-provided error_handler + // object. The file is accessed with std::ifstream in binary + // mode. The std::ios_base::failure exception is used to report + // io errors (badbit and failbit). + // + void + parse (const std::string& file, error_handler&); + + public: + // Parse std::istream. + // + void + parse (std::istream&); + + // Parse std::istream with a user-provided error_handler object. + // + void + parse (std::istream&, error_handler&); + + // Parse std::istream with a system id. + // + void + parse (std::istream&, const std::string& system_id); + + // Parse std::istream with a system id and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + error_handler&); + + // Parse std::istream with system and public ids. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id); + + // Parse std::istream with system and public ids and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + error_handler&); + + public: + // Parse a chunk of input. You can call these functions multiple + // times with the last call having the last argument true. + // + void + parse (const void* data, std::size_t size, bool last); + + void + parse (const void* data, std::size_t size, bool last, + error_handler&); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + error_handler&); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + const std::string& public_id); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + const std::string& public_id, + error_handler&); + + public: + // Low-level Expat-specific parsing API. + // + void + parse_begin (XML_Parser); + + void + parse_begin (XML_Parser, const std::string& public_id); + + void + parse_begin (XML_Parser, error_handler&); + + void + parse_begin (XML_Parser, + const std::string& public_id, + error_handler&); + void + parse_end (); + }; +} + </pre> + + <p>The <code>document</code> class is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element. The <code>parser_base</code> + class is the base type for all parser skeletons. The second and + third arguments to the <code>document</code>'s constructors are + the root element's name and namespace. The last argument, + <code>polymorphic</code>, specifies whether the XML documents + being parsed use polymorphism. For more information on support + for XML Schema polymorphism in the C++/Parser mapping refer + to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + + <p>A number of overloaded <code>parse()</code> functions have the + <code>system_id</code> and <code>public_id</code> arguments. The + system id is a <em>system</em> identifier of the resources being + parsed (for example, URI or a full file path). The public id is a + <em>public</em> identifier of the resource (for example, an + application-specific name or a relative file path). The system id + is used to resolve relative paths. In diagnostics messages the + public id is used if it is available. Otherwise the system id + is used.</p> + + <p>The <code>parse_begin()</code> and <code>parse_end()</code> functions + present a low-level, Expat-specific parsing API for maximum control. + A typical use-case would look like this (pseudo-code):</p> + + <pre class="c++"> +xxx_pimpl root_p; +document doc_p (root_p, "root"); + +root_p.pre (); +doc_p.parse_begin (xml_parser, "file.xml"); + +while (more_data_to_parse) +{ + // Call XML_Parse or XML_ParseBuffer. + + if (status == XML_STATUS_ERROR) + break; +} + +// Call parse_end even in case of an error to translate +// XML and Schema errors to exceptions or error_handler +// calls. +// +doc.parse_end (); +result_type result (root_p.post_xxx ()); + </pre> + + <p>Note that if your vocabulary uses XML namespaces, the + <code>XML_ParserCreateNS()</code> functions should be used to create + the XML parser. Space (<code>XML_Char (' ')</code>) should be used + as a separator (the second argument to <code>XML_ParserCreateNS()</code>). + </p> + + <p>The error handling mechanisms employed by the <code>document</code> + parser are described in <a href="#7.3">Section 7.3, "Error + Handling"</a>.</p> + + + <h2><a name="7.3">7.3 Error Handling</a></h2> + + <p>There are three categories of errors that can result from running + a parser on an XML document: System, XML, and Application. + The System category contains memory allocation and file/stream + operation errors. The XML category covers XML parsing and + well-formedness checking as well as XML Schema validation errors. + Finally, the Application category is for application logic errors + that you may want to propagate from parser implementations to the + caller of the parser. + </p> + + <p>The System errors are mapped to the standard exceptions. The + out of memory condition is indicated by throwing an instance + of <code>std::bad_alloc</code>. The stream operation errors + are reported either by throwing an instance of + <code>std::ios_base::failure</code> if exceptions are enabled + or by setting the stream state.</p> + + <p>Note that if you are parsing <code>std::istream</code> on + which exceptions are not enabled, then you will need to + check the stream state before calling the <code>post()</code> + callback, as shown in the following example:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + ... + + std::ifstream ifs (argv[1]); + + if (ifs.fail ()) + { + cerr << argv[1] << ": unable to open" << endl; + return 1; + } + + root_p.pre (); + doc_p.parse (ifs); + + if (ifs.fail ()) + { + cerr << argv[1] << ": io failure" << endl; + return 1; + } + + result_type result (root_p.post_xxx ()); +} + </pre> + + <p>The above example can be rewritten to use exceptions + as shown below:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + try + { + ... + + std::ifstream ifs; + ifs.exceptions (std::ifstream::badbit | std::ifstream::failbit); + ifs.open (argv[1]); + + root_p.pre (); + doc_p.parse (ifs); + result_type result (root_p.post_xxx ()); + } + catch (const std::ifstream::failure&) + { + cerr << argv[1] << ": unable to open or io failure" << endl; + return 1; + } +} + </pre> + + + <p>For reporting application errors from parsing callbacks, you + can throw any exceptions of your choice. They are propagated to + the caller of the parser without any alterations.</p> + + <p>The XML errors can be reported either by throwing the + <code>xml_schema::parsing</code> exception or by a callback + to the <code>xml_schema::error_handler</code> object (and + <code>xercesc::ErrorHandler</code> object in case of Xerces-C++).</p> + + <p>The <code>xml_schema::parsing</code> exception contains + a list of warnings and errors that were accumulated during + parsing. Note that this exception is thrown only if there + was an error. This makes it impossible to obtain warnings + from an otherwise successful parsing using this mechanism. + The following listing shows the definition of + <code>xml_schema::parsing</code> exception. Note that if the + character type is <code>wchar_t</code>, then the string type + and output stream type in the definition become + <code>std::wstring</code> and <code>std::wostream</code>, + respectively (see <a href="#5.2">Section 5.2, "Character Type + and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class exception: public std::exception + { + protected: + virtual void + print (std::ostream&) const = 0; + }; + + inline std::ostream& + operator<< (std::ostream& os, const exception& e) + { + e.print (os); + return os; + } + + + class severity + { + public: + enum value + { + warning, + error + }; + }; + + + class error + { + public: + error (xml_schema::severity, + const std::string& id, + unsigned long line, + unsigned long column, + const std::string& message); + + xml_schema::severity + severity () const; + + const std::string& + id () const; + + unsigned long + line () const; + + unsigned long + column () const; + + const std::string& + message () const; + }; + + std::ostream& + operator<< (std::ostream&, const error&); + + + class diagnostics: public std::vector<error> + { + }; + + std::ostream& + operator<< (std::ostream&, const diagnostics&); + + + class parsing: public exception + { + public: + parsing (); + parsing (const xml_schema::diagnostics&); + + const xml_schema::diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); + + protected: + virtual void + print (std::ostream&) const; + }; +} + </pre> + + <p>The following example shows how we can catch and print this + exception. The code will print diagnostics messages one per line + in case of an error.</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + try + { + // Parse. + } + catch (const xml_schema::parsing& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>With the <code>error_handler</code> approach the diagnostics + messages are delivered as parsing progresses. The following + listing presents the definition of the <code>error_handler</code> + interface. Note that if the character type is <code>wchar_t</code>, + then the string type in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class error_handler + { + public: + class severity + { + public: + enum value + { + warning, + error, + fatal + }; + }; + + virtual bool + handle (const std::string& id, + unsigned long line, + unsigned long column, + severity, + const std::string& message) = 0; + }; +} + </pre> + + <p>The return value of the <code>handle()</code> function indicates whether + parsing should continue if possible. The error with the fatal severity + level terminates the parsing process regardless of the returned value. + At the end of the parsing process with an error that was reported via + the <code>error_handler</code> object, an empty + <code>xml_schema::parsing</code> exception is thrown to indicate + the failure to the caller. You can alter this behavior by throwing + your own exception from the <code>handle()</code> function.</p> + + + <!-- Appendix A --> + + + <h1><a name="A">Appendix A — Supported XML Schema Constructs</a></h1> + + <p>The C++/Parser mapping supports validation of the following W3C XML + Schema constructs in the generated code.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="features" border="1"> + <tr><th>Construct</th><th>Notes</th></tr> + <tr><th colspan="2">Structure</th></tr> + + <tr><td>element</td><td></td></tr> + <tr><td>attribute</td><td></td></tr> + + <tr><td>any</td><td></td></tr> + <tr><td>anyAttribute</td><td></td></tr> + + <tr><td>all</td><td></td></tr> + <tr><td>sequence</td><td></td></tr> + <tr><td>choice</td><td></td></tr> + + <tr><td>complex type, empty content</td><td></td></tr> + <tr><td>complex type, mixed content</td><td></td></tr> + <tr><td>complex type, simple content extension</td><td></td></tr> + <tr><td>complex type, simple content restriction</td> + <td>Simple type facets are not validated.</td></tr> + <tr><td>complex type, complex content extension</td><td></td></tr> + <tr><td>complex type, complex content restriction</td><td></td></tr> + + <tr><td>list</td><td></td></tr> + + <tr><th colspan="2">Datatypes</th></tr> + + <tr><td>byte</td><td></td></tr> + <tr><td>unsignedByte</td><td></td></tr> + <tr><td>short</td><td></td></tr> + <tr><td>unsignedShort</td><td></td></tr> + <tr><td>int</td><td></td></tr> + <tr><td>unsignedInt</td><td></td></tr> + <tr><td>long</td><td></td></tr> + <tr><td>unsignedLong</td><td></td></tr> + <tr><td>integer</td><td></td></tr> + <tr><td>nonPositiveInteger</td><td></td></tr> + <tr><td>nonNegativeInteger</td><td></td></tr> + <tr><td>positiveInteger</td><td></td></tr> + <tr><td>negativeInteger</td><td></td></tr> + + <tr><td>boolean</td><td></td></tr> + + <tr><td>float</td><td></td></tr> + <tr><td>double</td><td></td></tr> + <tr><td>decimal</td><td></td></tr> + + <tr><td>string</td><td></td></tr> + <tr><td>normalizedString</td><td></td></tr> + <tr><td>token</td><td></td></tr> + <tr><td>Name</td><td></td></tr> + <tr><td>NMTOKEN</td><td></td></tr> + <tr><td>NCName</td><td></td></tr> + <tr><td>language</td><td></td></tr> + <tr><td>anyURI</td><td></td></tr> + + <tr><td>ID</td><td>Identity constraint is not enforced.</td></tr> + <tr><td>IDREF</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>NMTOKENS</td><td></td></tr> + <tr><td>IDREFS</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>QName</td><td></td></tr> + + <tr><td>base64Binary</td><td></td></tr> + <tr><td>hexBinary</td><td></td></tr> + + <tr><td>date</td><td></td></tr> + <tr><td>dateTime</td><td></td></tr> + <tr><td>duration</td><td></td></tr> + <tr><td>gDay</td><td></td></tr> + <tr><td>gMonth</td><td></td></tr> + <tr><td>gMonthDay</td><td></td></tr> + <tr><td>gYear</td><td></td></tr> + <tr><td>gYearMonth</td><td></td></tr> + <tr><td>time</td><td></td></tr> + </table> + + + </div> +</div> + +</body> +</html> diff --git a/doc/cxx/parser/guide/index.xhtml.in b/doc/cxx/parser/guide/index.xhtml.in new file mode 100644 index 0000000..119f421 --- /dev/null +++ b/doc/cxx/parser/guide/index.xhtml.in @@ -0,0 +1,4163 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Parser Mapping Getting Started Guide</title> + + <meta name="copyright" content="© @copyright@"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parser,validation"/> + <meta name="description" content="C++/Parser Mapping Getting Started Guide"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + line-height: 1.2em; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 140%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage .title { + font-weight: bold; + font-size: 200%; + text-align: center; + } + + #titlepage #first-title { + padding: 1em 0 0.4em 0; + } + + #titlepage #second-title { + padding: 0.4em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + ol.steps { + padding-left : 1.8em; + } + + ol.steps li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + + div.img { + text-align: center; + padding: 2em 0 2em 0; + } + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + /* XML Schema features table. */ + #features { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #features th, #features td { + border: 1px solid; + padding : 0.6em 0.6em 0.6em 0.6em; + } + + #features th { + background : #cde8f6; + } + + #features td { + text-align: left; + } + + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div class="title" id="first-title">C++/Parser Mapping</div> + <div class="title" id="second-title">Getting Started Guide</div> + + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/parser/guide/cxx-parser-guide.ps">PostScript</a>.</p> + + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a> + <table class="toc"> + <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> + <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>2</th><td><a href="#2">Hello World Example</a> + <table class="toc"> + <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> + <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> + <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> + <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Parser Skeletons</a> + <table class="toc"> + <tr><th>3.1</th><td><a href="#3.1">Implementing the Gender Parser</a></td></tr> + <tr><th>3.2</th><td><a href="#3.2">Implementing the Person Parser</a></td></tr> + <tr><th>3.3</th><td><a href="#3.3">Implementing the People Parser</a></td></tr> + <tr><th>3.4</th><td><a href="#3.4">Connecting the Parsers Together</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Type Maps</a> + <table class="toc"> + <tr><th>4.1</th><td><a href="#4.1">Object Model</a></td></tr> + <tr><th>4.2</th><td><a href="#4.2">Type Map File Format</a></td></tr> + <tr><th>4.3</th><td><a href="#4.3">Parser Implementations</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Mapping Configuration</a> + <table class="toc"> + <tr><th>5.1</th><td><a href="#5.1">C++ Standard</a></td></tr> + <tr><th>5.2</th><td><a href="#5.2">Character Type and Encoding</a></td></tr> + <tr><th>5.3</th><td><a href="#5.3">Underlying XML Parser</a></td></tr> + <tr><th>5.4</th><td><a href="#5.4">XML Schema Validation</a></td></tr> + <tr><th>5.5</th><td><a href="#5.5">Support for Polymorphism</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>6</th><td><a href="#6">Built-In XML Schema Type Parsers</a> + <table class="toc"> + <tr><th>6.1</th><td><a href="#6.1"><code>QName</code> Parser</a></td></tr> + <tr><th>6.2</th><td><a href="#6.2"><code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></td></tr> + <tr><th>6.3</th><td><a href="#6.3"><code>base64Binary</code> and <code>hexBinary</code> Parsers</a></td></tr> + <tr><th>6.4</th><td><a href="#6.4">Time Zone Representation</a></td></tr> + <tr><th>6.5</th><td><a href="#6.5"><code>date</code> Parser</a></td></tr> + <tr><th>6.6</th><td><a href="#6.6"><code>dateTime</code> Parser</a></td></tr> + <tr><th>6.7</th><td><a href="#6.7"><code>duration</code> Parser</a></td></tr> + <tr><th>6.8</th><td><a href="#6.8"><code>gDay</code> Parser</a></td></tr> + <tr><th>6.9</th><td><a href="#6.9"><code>gMonth</code> Parser</a></td></tr> + <tr><th>6.10</th><td><a href="#6.10"><code>gMonthDay</code> Parser</a></td></tr> + <tr><th>6.11</th><td><a href="#6.11"><code>gYear</code> Parser</a></td></tr> + <tr><th>6.12</th><td><a href="#6.12"><code>gYearMonth</code> Parser</a></td></tr> + <tr><th>6.13</th><td><a href="#6.13"><code>time</code> Parser</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>7</th><td><a href="#7">Document Parser and Error Handling</a> + <table class="toc"> + <tr><th>7.1</th><td><a href="#7.1">Xerces-C++ Document Parser</a></td></tr> + <tr><th>7.2</th><td><a href="#7.2">Expat Document Parser</a></td></tr> + <tr><th>7.3</th><td><a href="#7.3">Error Handling</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th></th><td><a href="#A">Appendix A — Supported XML Schema Constructs</a></td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>The goal of this document is to provide you with an understanding of + the C++/Parser programming model and allow you to efficiently evaluate + XSD against your project's technical requirements. As such, this + document is intended for C++ developers and software architects + who are looking for an XML processing solution. Prior experience + with XML and C++ is required to understand this document. Basic + understanding of XML Schema is advantageous but not expected + or required. + </p> + + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this guide, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/parser/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is the place to ask technical questions about XSD and the C++/Parser mapping. + Furthermore, the <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + + </ul> + + <!-- Introduction --> + + <h1><a name="1">1 Introduction</a></h1> + + <p>Welcome to CodeSynthesis XSD and the C++/Parser mapping. XSD is a + cross-platform W3C XML Schema to C++ data binding compiler. C++/Parser + is a W3C XML Schema to C++ mapping that represents an XML vocabulary + as a set of parser skeletons which you can implement to perform XML + processing as required by your application logic. + </p> + + <h2><a name="1.1">1.1 Mapping Overview</a></h2> + + <p>The C++/Parser mapping provides event-driven, stream-oriented + XML parsing, XML Schema validation, and C++ data binding. It was + specifically designed and optimized for high performance and + small footprint. Based on the static analysis of the schemas, XSD + generates compact, highly-optimized hierarchical state machines + that combine data extraction, validation, and even dispatching + in a single step. As a result, the generated code is typically + 2-10 times faster than general-purpose validating XML parsers + while maintaining the lowest static and dynamic memory footprints. + </p> + + <p>To speed up application development, the C++/Parser mapping + can be instructed to generate sample parser implementations + and a test driver which can then be filled with the application + logic code. The mapping also provides a wide range of + mechanisms for controlling and customizing the generated code.</p> + + <p>The next chapter shows how to create a simple application that uses + the C++/Parser mapping to parse, validate, and extract data from a + simple XML document. The following chapters show how to + use the C++/Parser mapping in more detail.</p> + + <h2><a name="1.2">1.2 Benefits</a></h2> + + <p>Traditional XML access APIs such as Document Object Model (DOM) + or Simple API for XML (SAX) have a number of drawbacks that + make them less suitable for creating robust and maintainable + XML processing applications. These drawbacks include: + </p> + + <ul class="list"> + <li>Generic representation of XML in terms of elements, attributes, + and text forces an application developer to write a substantial + amount of bridging code that identifies and transforms pieces + of information encoded in XML to a representation more suitable + for consumption by the application logic.</li> + + <li>String-based flow control defers error detection to runtime. + It also reduces code readability and maintainability.</li> + + <li>Lack of type safety because the data is represented + as text.</li> + + <li>Resulting applications are hard to debug, change, and + maintain.</li> + </ul> + + <p>In contrast, statically-typed, vocabulary-specific parser + skeletons produced by the C++/Parser mapping allow you to + operate in your domain terms instead of the generic elements, + attributes, and text. Static typing helps catch errors at + compile-time rather than at run-time. Automatic code generation + frees you for more interesting tasks (such as doing something + useful with the information stored in the XML documents) and + minimizes the effort needed to adapt your applications to + changes in the document structure. To summarize, the C++/Parser + mapping has the following key advantages over generic XML + access APIs:</p> + + <ul class="list"> + <li><b>Ease of use.</b> The generated code hides all the complexity + associated with recreating the document structure, maintaining the + dispatch state, and converting the data from the text representation + to data types suitable for manipulation by the application logic. + Parser skeletons also provide a convenient mechanism for building + custom in-memory representations.</li> + + <li><b>Natural representation.</b> The generated parser skeletons + implement parser callbacks as virtual functions with names + corresponding to elements and attributes in XML. As a result, + you process the XML data using your domain vocabulary instead + of generic elements, attributes, and text. + </li> + + <li><b>Concise code.</b> With a separate parser skeleton for each + XML Schema type, the application implementation is + simpler and thus easier to read and understand.</li> + + <li><b>Safety.</b> The XML data is delivered to parser callbacks as + statically typed objects. The parser callbacks themselves are virtual + functions. This helps catch programming errors at compile-time + rather than at runtime.</li> + + <li><b>Maintainability.</b> Automatic code generation minimizes the + effort needed to adapt the application to changes in the + document structure. With static typing, the C++ compiler + can pin-point the places in the application code that need to be + changed.</li> + + <li><b>Efficiency.</b> The generated parser skeletons combine + data extraction, validation, and even dispatching in a single + step. This makes them much more efficient than traditional + architectures with separate stages for validation and data + extraction/dispatch.</li> + </ul> + + <!-- Hello World Parser --> + + + <h1><a name="2">2 Hello World Example</a></h1> + + <p>In this chapter we will examine how to parse a very simple XML + document using the XSD-generated C++/Parser skeletons. + The code presented in this chapter is based on the <code>hello</code> + example which can be found in the <code>cxx/parser/</code> directory in + the <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + + <p>First, we need to get an idea about the structure + of the XML documents we are going to process. Our + <code>hello.xml</code>, for example, could look like this:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + <p>Then we can write a description of the above XML in the + XML Schema language and save it into <code>hello.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello"> + <xs:sequence> + <xs:element name="greeting" type="xs:string"/> + <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello"/> + +</xs:schema> + </pre> + + <p>Even if you are not familiar with XML Schema, it + should be easy to connect declarations in <code>hello.xsd</code> + to elements in <code>hello.xml</code>. The <code>hello</code> type + is defined as a sequence of the nested <code>greeting</code> and + <code>name</code> elements. Note that the term sequence in XML + Schema means that elements should appear in a particular order + as opposed to appearing multiple times. The <code>name</code> + element has its <code>maxOccurs</code> property set to + <code>unbounded</code> which means it can appear multiple times + in an XML document. Finally, the globally-defined <code>hello</code> + element prescribes the root element for our vocabulary. For an + easily-approachable introduction to XML Schema refer to + <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: + Primer</a>.</p> + + <p>The above schema is a specification of our XML vocabulary; it tells + everybody what valid documents of our XML-based language should look + like. The next step is to compile this schema to generate + the object model and parsing functions.</p> + + <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + + <p>Now we are ready to translate our <code>hello.xsd</code> to C++ parser + skeletons. To do this we invoke the XSD compiler from a terminal + (UNIX) or a command prompt (Windows): + </p> + + <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat hello.xsd + </pre> + + <p>The <code>--xml-parser</code> option indicates that we want to + use Expat as the underlying XML parser (see <a href="#5.3">Section + 5.3, "Underlying XML Parser"</a>). The XSD compiler produces two + C++ files: <code>hello-pskel.hxx</code> and <code>hello-pskel.cxx</code>. + The following code fragment is taken from <code>hello-pskel.hxx</code>; + it should give you an idea about what gets generated: + </p> + + <pre class="c++"> +class hello_pskel +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + greeting (const std::string&); + + virtual void + name (const std::string&); + + virtual void + post_hello (); + + // Parser construction API. + // + void + greeting_parser (xml_schema::string_pskel&); + + void + name_parser (xml_schema::string_pskel&); + + void + parsers (xml_schema::string_pskel& /* greeting */, + xml_schema::string_pskel& /* name */); + +private: + ... +}; + </pre> + + <p>The first four member functions shown above are called parser + callbacks. You would normally override them in your implementation + of the parser to do something useful. Let's go through all of + them one by one.</p> + + <p>The <code>pre()</code> function is an initialization callback. It is + called when a new element of type <code>hello</code> is about + to be parsed. You would normally use this function to allocate a new + instance of the resulting type or clear accumulators that are used + to gather information during parsing. The default implementation + of this function does nothing.</p> + + <p>The <code>post_hello()</code> function is a finalization callback. Its + name is constructed by adding the parser skeleton name to the + <code>post_</code> prefix. The finalization callback is called when + parsing of the element is complete and the result, if any, should + be returned. Note that in our case the return type of + <code>post_hello()</code> is <code>void</code> which means there + is nothing to return. More on parser return types later. + </p> + + <p>You may be wondering why the finalization callback is called + <code>post_hello()</code> instead of <code>post()</code> just + like <code>pre()</code>. The reason for this is that + finalization callbacks can have different return types and + result in function signature clashes across inheritance + hierarchies. To prevent this the signatures of finalization + callbacks are made unique by adding the type name to their names.</p> + + <p>The <code>greeting()</code> and <code>name()</code> functions are + called when the <code>greeting</code> and <code>name</code> elements + have been parsed, respectively. Their arguments are of type + <code>std::string</code> and contain the data extracted from XML.</p> + + <p>The last three functions are for connecting parsers to each other. + For example, there is a predefined parser for built-in XML Schema type + <code>string</code> in the XSD runtime. We will be using + it to parse the contents of <code>greeting</code> and + <code>name</code> elements, as shown in the next section.</p> + + <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + + <p>At this point we have all the parts we need to do something useful + with the information stored in our XML document. The first step is + to implement the parser: + </p> + + <pre class="c++"> +#include <iostream> +#include "hello-pskel.hxx" + +class hello_pimpl: public hello_pskel +{ +public: + virtual void + greeting (const std::string& g) + { + greeting_ = g; + } + + virtual void + name (const std::string& n) + { + std::cout << greeting_ << ", " << n << "!" << std::endl; + } + +private: + std::string greeting_; +}; + </pre> + + <p>We left both <code>pre()</code> and <code>post_hello()</code> with the + default implementations; we don't have anything to initialize or + return. The rest is pretty straightforward: we store the greeting + in a member variable and later, when parsing names, use it to + say hello.</p> + + <p>An observant reader my ask what happens if the <code>name</code> + element comes before <code>greeting</code>? Don't we need to + make sure <code>greeting_</code> was initialized and report + an error otherwise? The answer is no, we don't have to do + any of this. The <code>hello_pskel</code> parser skeleton + performs validation of XML according to the schema from which + it was generated. As a result, it will check the order + of the <code>greeting</code> and <code>name</code> elements + and report an error if it is violated.</p> + + <p>Now it is time to put this parser implementation to work:</p> + + <pre class="c++"> +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + // Construct the parser. + // + xml_schema::string_pimpl string_p; + hello_pimpl hello_p; + + hello_p.greeting_parser (string_p); + hello_p.name_parser (string_p); + + // Parse the XML instance. + // + xml_schema::document doc_p (hello_p, "hello"); + + hello_p.pre (); + doc_p.parse (argv[1]); + hello_p.post_hello (); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>The first part of this code snippet instantiates individual parsers + and assembles them into a complete vocabulary parser. + <code>xml_schema::string_pimpl</code> is an implementation of a parser + for built-in XML Schema type <code>string</code>. It is provided by + the XSD runtime along with parsers for other built-in types (for + more information on the built-in parsers see <a href="#6">Chapter 6, + "Built-In XML Schema Type Parsers"</a>). We use <code>string_pimpl</code> + to parse the <code>greeting</code> and <code>name</code> elements as + indicated by the calls to <code>greeting_parser()</code> and + <code>name_parser()</code>. + </p> + + <p>Then we instantiate a document parser (<code>doc_p</code>). The + first argument to its constructor is the parser for + the root element (<code>hello_p</code> in our case). The + second argument is the root element name. + </p> + + <p>The final piece is the calls to <code>pre()</code>, <code>parse()</code>, + and <code>post_hello()</code>. The call to <code>parse()</code> + perform the actual XML parsing while the calls to <code>pre()</code> and + <code>post_hello()</code> make sure that the parser for the root + element can perform proper initialization and cleanup.</p> + + <p>While our parser implementation and test driver are pretty small and + easy to write by hand, for bigger XML vocabularies it can be a + substantial effort. To help with this task XSD can automatically + generate sample parser implementations and a test driver from your + schemas. You can request the generation of a sample implementation with + empty function bodies by specifying the <code>--generate-noop-impl</code> + option. Or you can generate a sample implementation that prints the + data store in XML by using the <code>--generate-print-impl</code> + option. To request the generation of a test driver you can use the + <code>--generate-test-driver</code> option. For more information + on these options refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. The <code>'generated'</code> example + in the <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + shows the sample implementation generation feature in action.</p> + + + <h2><a name="2.4">2.4 Compiling and Running</a></h2> + + <p>After saving all the parts from the previous section in + <code>driver.cxx</code>, we are ready to compile our first + application and run it on the test XML document. On a UNIX + system this can be done with the following commands: + </p> + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx hello-pskel.cxx +$ c++ -std=c++11 -o driver driver.o hello-pskel.o -lexpat +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! + </pre> + + <p>Here <code>.../libxsd</code> represents the path to the + <a href="https://cppget.org/libxsd">libxsd</a> package root + directory. We can also test the error handling. To test XML + well-formedness checking, we can try to parse + <code>hello-pskel.hxx</code>:</p> + + <pre class="terminal"> +$ ./driver hello-pskel.hxx +hello-pskel.hxx:1:0: not well-formed (invalid token) + </pre> + + <p>We can also try to parse a valid XML but not from our + vocabulary, for example <code>hello.xsd</code>:</p> + + <pre class="terminal"> +$ ./driver hello.xsd +hello.xsd:2:0: expected element 'hello' instead of +'http://www.w3.org/2001/XMLSchema#schema' + </pre> + + + <!-- Chapater 3 --> + + + <h1><a name="3">3 Parser Skeletons</a></h1> + + <p>As we have seen in the previous chapter, the XSD compiler generates + a parser skeleton class for each type defined in XML Schema. In + this chapter we will take a closer look at different functions + that comprise a parser skeleton as well as the way to connect + our implementations of these parser skeletons to create a complete + parser.</p> + + <p>In this and subsequent chapters we will use the following schema + that describes a collection of person records. We save it in + <code>people.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:simpleType name="gender"> + <xs:restriction base="xs:string"> + <xs:enumeration value="male"/> + <xs:enumeration value="female"/> + </xs:restriction> + </xs:simpleType> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="gender" type="gender"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="people"> + <xs:sequence> + <xs:element name="person" type="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="people" type="people"/> + +</xs:schema> + </pre> + + <p>A sample XML instance to go along with this schema is saved + in <code>people.xml</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + <p>Compiling <code>people.xsd</code> with the XSD compiler results + in three parser skeletons being generated: <code>gender_pskel</code>, + <code>person_pskel</code>, and <code>people_pskel</code>. We are going + to examine and implement each of them in the subsequent sections.</p> + + <h2><a name="3.1">3.1 Implementing the Gender Parser</a></h2> + + <p>The generated <code>gender_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class gender_pskel: public virtual xml_schema::string_pskel +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + post_gender (); +}; + </pre> + + <p>Notice that <code>gender_pskel</code> inherits from + <code>xml_schema::string_skel</code> which is a parser skeleton + for built-in XML Schema type <code>string</code> and is + predefined in the XSD runtime library. This is an example + of the general rule that parser skeletons follow: if a type + in XML Schema inherits from another then there will be an + equivalent inheritance between the corresponding parser + skeleton classes.</p> + + <p>The <code>pre()</code> and <code>post_gender()</code> callbacks + should look familiar from the previous chapter. Let's now + implement the parser. Our implementation will simply print + the gender to <code>cout</code>:</p> + + + <pre class="c++"> +class gender_pimpl: public gender_pskel, + public xml_schema::string_pimpl +{ +public: + virtual void + post_gender () + { + std::string s = post_string (); + cout << "gender: " << s << endl; + } +}; + </pre> + + <p>While the code is quite short, there is a lot going on. First, + notice that we are inheriting from <code>gender_pskel</code> <em>and</em> + from <code>xml_schema::string_pimpl</code>. We've encountered + <code>xml_schema::string_pimpl</code> already; it is an + implementation of the <code>xml_schema::string_pskel</code> parser + skeleton for built-in XML Schema type <code>string</code>.</p> + + <p>This is another common theme in the C++/Parser programming model: + reusing implementations of the base parsers in the derived ones with + the C++ mixin idiom. In our case, <code>string_pimpl</code> will + do all the dirty work of extracting the data and we can just get + it at the end with the call to <code>post_string()</code>.</p> + + <p>In case you are curious, here is what + <code>xml_schema::string_pskel</code> and + <code>xml_schema::string_pimpl</code> look like:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_pskel: public simple_content + { + public: + virtual std::string + post_string () = 0; + }; + + class string_pimpl: public virtual string_pskel + { + public: + virtual void + _pre (); + + virtual void + _characters (const xml_schema::ro_string&); + + virtual std::string + post_string (); + + protected: + std::string str_; + }; +} + </pre> + + <p>There are three new pieces in this code that we haven't seen yet. + They are the <code>simple_content</code> class as well as + the <code>_pre()</code> and <code>_characters()</code> functions. + The <code>simple_content</code> class is defined in the XSD + runtime and is a base class for all parser skeletons that conform + to the simple content model in XML Schema. Types with the + simple content model cannot have nested elements—only text + and attributes. There is also the <code>complex_content</code> + class which corresponds to the complex content mode (types with + nested elements, for example, <code>person</code> from + <code>people.xsd</code>).</p> + + <p>The <code>_pre()</code> function is a parser callback. Remember we + talked about the <code>pre()</code> and <code>post_*()</code> callbacks + in the previous chapter? There are actually two more callbacks + with similar roles: <code>_pre()</code> and <code>_post ()</code>. + As a result, each parser skeleton has four special callbacks:</p> + + <pre class="c++"> + virtual void + pre (); + + virtual void + _pre (); + + virtual void + _post (); + + virtual void + post_name (); + </pre> + + <p><code>pre()</code> and <code>_pre()</code> are initialization + callbacks. They get called in that order before a new instance of the type + is about to be parsed. The difference between <code>pre()</code> and + <code>_pre()</code> is conventional: <code>pre()</code> can + be completely overridden by a derived parser. The derived + parser can also override <code>_pre()</code> but has to always call + the original version. This allows you to partition initialization + into customizable and required parts.</p> + + <p>Similarly, <code>_post()</code> and <code>post_name()</code> are + finalization callbacks with exactly the same semantics: + <code>post_name()</code> can be completely overridden by the derived + parser while the original <code>_post()</code> should always be called. + </p> + + <p>The final bit we need to discuss in this section is the + <code>_characters()</code> function. As you might have guessed, it + is also a callback. A low-level one that delivers raw character content + for the type being parsed. You will seldom need to use this callback + directly. Using implementations for the built-in parsers provided by + the XSD runtime is usually a simpler and more convenient + alternative.</p> + + <p>At this point you might be wondering why some <code>post_*()</code> + callbacks, for example <code>post_string()</code>, return some data + while others, for example <code>post_gender()</code>, have + <code>void</code> as a return type. This is a valid concern + and it will be addressed in the next chapter.</p> + + <h2><a name="3.2">3.2 Implementing the Person Parser</a></h2> + + <p>The generated <code>person_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class person_pskel: public xml_schema::complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + first_name (const std::string&); + + virtual void + last_name (const std::string&); + + virtual void + gender (); + + virtual void + age (short); + + virtual void + post_person (); + + // Parser construction API. + // + void + first_name_parser (xml_schema::string_pskel&); + + void + last_name_parser (xml_schema::string_pskel&); + + void + gender_parser (gender_pskel&); + + void + age_parser (xml_schema::short_pskel&); + + void + parsers (xml_schema::string_pskel& /* first-name */, + xml_schema::string_pskel& /* last-name */, + gender_pskel& /* gender */, + xml_schema::short_pskel& /* age */); +}; + </pre> + + + <p>As you can see, we have a parser callback for each of the nested + elements found in the <code>person</code> XML Schema type. + The implementation of this parser is straightforward:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& n) + { + cout << "first: " << f << endl; + } + + virtual void + last_name (const std::string& l) + { + cout << "last: " << l << endl; + } + + virtual void + age (short a) + { + cout << "age: " << a << endl; + } +}; + </pre> + + <p>Notice that we didn't override the <code>gender()</code> callback + because all the printing is done by <code>gender_pimpl</code>.</p> + + + <h2><a name="3.3">3.3 Implementing the People Parser</a></h2> + + <p>The generated <code>people_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class people_pskel: public xml_schema::complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + person (); + + virtual void + post_people (); + + // Parser construction API. + // + void + person_parser (person_pskel&); + + void + parsers (person_pskel& /* person */); +}; + </pre> + + <p>The <code>person()</code> callback will be called after parsing each + <code>person</code> element. While <code>person_pimpl</code> does + all the printing, one useful thing we can do in this callback is to + print an extra newline after each person record so that our + output is more readable:</p> + + <pre class="c++"> +class people_pimpl: public people_pskel +{ +public: + virtual void + person () + { + cout << endl; + } +}; + </pre> + + <p>Now it is time to put everything together.</p> + + + <h2><a name="3.4">3.4 Connecting the Parsers Together</a></h2> + + <p>At this point we have all the individual parsers implemented + and can proceed to assemble them into a complete parser + for our XML vocabulary. The first step is to instantiate + all the individual parsers that we will need:</p> + + <pre class="c++"> +xml_schema::short_pimpl short_p; +xml_schema::string_pimpl string_p; + +gender_pimpl gender_p; +person_pimpl person_p; +people_pimpl people_p; + </pre> + + <p>Notice that our schema uses two built-in XML Schema types: + <code>string</code> for the <code>first-name</code> and + <code>last-name</code> elements as well as <code>short</code> + for <code>age</code>. We will use predefined parsers that + come with the XSD runtime to handle these types. The next + step is to connect all the individual parsers. We do this + with the help of functions defined in the parser + skeletons and marked with the "Parser Construction API" + comment. One way to do it is to connect each individual + parser by calling the <code>*_parser()</code> functions:</p> + + <pre class="c++"> +person_p.first_name_parser (string_p); +person_p.last_name_parser (string_p); +person_p.gender_parser (gender_p); +person_p.age_parser (short_p); + +people_p.person_parser (person_p); + </pre> + + <p>You might be wondering what happens if you do not provide + a parser by not calling one of the <code>*_parser()</code> functions. + In that case the corresponding XML content will be skipped, + including validation. This is an efficient way to ignore parts + of the document that you are not interested in.</p> + + + <p>An alternative, shorter, way to connect the parsers is by using + the <code>parsers()</code> functions which connects all the parsers + for a given type at once:</p> + + <pre class="c++"> +person_p.parsers (string_p, string_p, gender_p, short_p); +people_p.parsers (person_p); + </pre> + + <p>The following figure illustrates the resulting connections. Notice + the correspondence between return types of the <code>post_*()</code> + functions and argument types of element callbacks that are connected + by the arrows.</p> + + <!-- align=center is needed for html2ps --> + <div class="img" align="center"><img src="figure-1.png"/></div> + + <p>The last step is the construction of the document parser and + invocation of the complete parser on our sample XML instance:</p> + + <pre class="c++"> +xml_schema::document doc_p (people_p, "people"); + +people_p.pre (); +doc_p.parse ("people.xml"); +people_p.post_people (); + </pre> + + <p>Let's consider <code>xml_schema::document</code> in + more detail. While the exact definition of this class + varies depending on the underlying parser selected, + here is the common part:</p> + + <pre class="c++"> +namespace xml_schema +{ + class document + { + public: + document (xml_schema::parser_base&, + const std::string& root_element_name, + bool polymorphic = false); + + document (xml_schema::parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + void + parse (const std::string& file); + + void + parse (std::istream&); + + ... + + }; +} + </pre> + + <p><code>xml_schema::document</code> is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element (<code>people_impl</code> + in our case). Because a type parser is only concerned with + the element's content and not with the element's name, we need + to specify the root element's name somewhere. That's + what is passed as the second and third arguments to the + <code>document</code>'s constructors.</p> + + <p>There are also two overloaded <code>parse()</code> functions + defined in the <code>document</code> class (there are actually + more but the others are specific to the underlying XML parser). + The first version parses a local file identified by a name. The + second version reads the data from an input stream. For more + information on the <code>xml_schema::document</code> class + refer to <a href="#7">Chapter 7, "Document Parser and Error + Handling"</a>.</p> + + <p>Let's now consider a step-by-step list of actions that happen + as we parse through <code>people.xml</code>. The content of + <code>people.xml</code> is repeated below for convenience.</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + + <ol class="steps"> + <li><code>people_p.pre()</code> is called from + <code>main()</code>. We did not provide any implementation + for this callback so this call is a no-op.</li> + + <li><code>doc_p.parse("people.xml")</code> is called from + <code>main()</code>. The parser opens the file and starts + parsing its content.</li> + + <li>The parser encounters the root element. <code>doc_p</code> + verifies that the root element is correct and calls + <code>_pre()</code> on <code>people_p</code> which is also + a no-op. Parsing is now delegated to <code>people_p</code>.</li> + + <li>The parser encounters the <code>person</code> element. + <code>people_p</code> determines that <code>person_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>person_p</code>. + Parsing is now delegated to <code>person_p</code>.</li> + + <li>The parser encounters the <code>first-name</code> element. + <code>person_p</code> determines that <code>string_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>string_p</code>. + Parsing is now delegated to <code>string_p</code>.</li> + + <li>The parser encounters character content consisting of + <code>"John"</code>. The <code>_characters()</code> callback is + called on <code>string_p</code>.</li> + + <li>The parser encounters the end of <code>first-name</code> + element. The <code>_post()</code> and <code>post_string()</code> + callbacks are called on <code>string_p</code>. The + <code>first_name()</code> callback is called on <code>person_p</code> + with the return value of <code>post_string()</code>. The + <code>first_name()</code> implementation prints + <code>"first: John"</code> to <code>cout</code>. + Parsing is now returned to <code>person_p</code>.</li> + + <li>Steps analogous to 5-7 are performed for the <code>last-name</code>, + <code>gender</code>, and <code>age</code> elements.</li> + + <li>The parser encounters the end of <code>person</code> + element. The <code>_post()</code> and <code>post_person()</code> + callbacks are called on <code>person_p</code>. The + <code>person()</code> callback is called on <code>people_p</code>. + The <code>person()</code> implementation prints a new line + to <code>cout</code>. Parsing is now returned to + <code>people_p</code>.</li> + + <li>Steps 4-9 are performed for the second <code>person</code> + element.</li> + + <li>The parser encounters the end of <code>people</code> + element. The <code>_post()</code> callback is called on + <code>people_p</code>. The <code>doc_p.parse("people.xml")</code> + call returns to <code>main()</code>.</li> + + <li><code>people_p.post_people()</code> is called from + <code>main()</code> which is a no-op.</li> + + </ol> + + + <!-- Chpater 4 --> + + + <h1><a name="4">4 Type Maps</a></h1> + + <p>There are many useful things you can do inside parser callbacks as they + are right now. There are, however, times when you want to propagate + some information from one parser to another or to the caller of the + parser. One common task that would greatly benefit from such a + possibility is building a tree-like in-memory object model of the + data stored in XML. During execution, each individual sub-parser + would create a sub-tree and return it to its <em>parent</em> parser + which can then incorporate this sub-tree into the whole tree.</p> + + <p>In this chapter we will discuss the mechanisms offered by the + C++/Parser mapping for returning information from individual + parsers and see how to use them to build an object model + of our people vocabulary.</p> + + <h2><a name="4.1">4.1 Object Model</a></h2> + + <p>An object model for our person record example could + look like this (saved in the <code>people.hxx</code> file):</p> + + <pre class="c++"> +#include <string> +#include <vector> + +enum gender +{ + male, + female +}; + +class person +{ +public: + person (const std::string& first, + const std::string& last, + ::gender gender, + short age) + : first_ (first), last_ (last), + gender_ (gender), age_ (age) + { + } + + const std::string& + first () const + { + return first_; + } + + const std::string& + last () const + { + return last_; + } + + ::gender + gender () const + { + return gender_; + } + + short + age () const + { + return age_; + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +typedef std::vector<person> people; + </pre> + + <p>While it is clear which parser is responsible for which part of + the object model, it is not exactly clear how, for + example, <code>gender_pimpl</code> will deliver <code>gender</code> + to <code>person_pimpl</code>. You might have noticed that + <code>string_pimpl</code> manages to deliver its value to the + <code>first_name()</code> callback of <code>person_pimpl</code>. Let's + see how we can utilize the same mechanism to propagate our + own data.</p> + + <p>There is a way to tell the XSD compiler that you want to + exchange data between parsers. More precisely, for each + type defined in XML Schema, you can tell the compiler two things. + First, the return type of the <code>post_*()</code> callback + in the parser skeleton generated for this type. And, second, + the argument type for callbacks corresponding to elements and + attributes of this type. For example, for XML Schema type + <code>gender</code> we can specify the return type for + <code>post_gender()</code> in the <code>gender_pskel</code> + skeleton and the argument type for the <code>gender()</code> callback + in the <code>person_pskel</code> skeleton. As you might have guessed, + the generated code will then pass the return value from the + <code>post_*()</code> callback as an argument to the element or + attribute callback.</p> + + <p>The way to tell the XSD compiler about these XML Schema to + C++ mappings is with type map files. Here is a simple type + map for the <code>gender</code> type from the previous paragraph:</p> + + <pre class="type-map"> +include "people.hxx"; +gender ::gender ::gender; + </pre> + + <p>The first line indicates that the generated code must include + <code>people.hxx</code> in order to get the definition for the + <code>gender</code> type. The second line specifies that both + argument and return types for the <code>gender</code> + XML Schema type should be the <code>::gender</code> C++ enum + (we use fully-qualified C++ names to avoid name clashes). + The next section will describe the type map format in detail. + We save this type map in <code>people.map</code> and + then translate our schemas with the <code>--type-map</code> + option to let the XSD compiler know about our type map:</p> + + <pre class="terminal"> +$ xsd cxx-parser --type-map people.map people.xsd + </pre> + + <p>If we now look at the generated <code>people-pskel.hxx</code>, + we will see the following changes in the <code>gender_pskel</code> and + <code>person_pskel</code> skeletons:</p> + + <pre class="c++"> +#include "people.hxx" + +class gender_pskel: public virtual xml_schema::string_pskel +{ + virtual ::gender + post_gender () = 0; + + ... +}; + +class person_pskel: public xml_schema::complex_content +{ + virtual void + gender (::gender); + + ... +}; + </pre> + + <p>Notice that <code>#include "people.hxx"</code> was added to + the generated header file from the type map to provide the + definition for the <code>gender</code> enum.</p> + + <h2><a name="4.2">4.2 Type Map File Format</a></h2> + + <p>Type map files are used to define a mapping between XML Schema + and C++ types. The compiler uses this information + to determine return types of <code>post_*()</code> + callbacks in parser skeletons corresponding to XML Schema + types as well as argument types for callbacks corresponding + to elements and attributes of these types.</p> + + <p>The compiler has a set of predefined mapping rules that map + the built-in XML Schema types to suitable C++ types (discussed + below) and all other types to <code>void</code>. + By providing your own type maps you can override these predefined + rules. The format of the type map file is presented below: + </p> + + <pre class="type-map"> +namespace <schema-namespace> [<cxx-namespace>] +{ + (include <file-name>;)* + ([type] <schema-type> <cxx-ret-type> [<cxx-arg-type>];)* +} + </pre> + + <p>Both <code><i><schema-namespace></i></code> and + <code><i><schema-type></i></code> are regex patterns while + <code><i><cxx-namespace></i></code>, + <code><i><cxx-ret-type></i></code>, and + <code><i><cxx-arg-type></i></code> are regex pattern + substitutions. All names can be optionally enclosed in + <code>" "</code>, for example, to include white-spaces.</p> + + <p><code><i><schema-namespace></i></code> determines XML + Schema namespace. Optional <code><i><cxx-namespace></i></code> + is prefixed to every C++ type name in this namespace declaration. + <code><i><cxx-ret-type></i></code> is a C++ type name that is + used as a return type for the <code>post_*()</code> callback. + Optional <code><i><cxx-arg-type></i></code> is an argument + type for callbacks corresponding to elements and attributes + of this type. If <code><i><cxx-arg-type></i></code> is not + specified, it defaults to <code><i><cxx-ret-type></i></code> + if <code><i><cxx-ret-type></i></code> ends with <code>*</code> or + <code>&</code> (that is, it is a pointer or a reference) and + <code>const <i><cxx-ret-type></i>&</code> + otherwise. + <code><i><file-name></i></code> is a file name either in the + <code>" "</code> or <code>< ></code> format + and is added with the <code>#include</code> directive to + the generated code.</p> + + <p>The <code><b>#</b></code> character starts a comment that ends + with a new line or end of file. To specify a name that contains + <code><b>#</b></code> enclose it in <code><b>" "</b></code>. + For example:</p> + + <pre> +namespace http://www.example.com/xmlns/my my +{ + include "my.hxx"; + + # Pass apples by value. + # + apple apple; + + # Pass oranges as pointers. + # + orange orange_t*; +} + </pre> + + <p>In the example above, for the + <code>http://www.example.com/xmlns/my#orange</code> + XML Schema type, the <code>my::orange_t*</code> C++ type will + be used as both return and argument types.</p> + + <p>Several namespace declarations can be specified in a single + file. The namespace declaration can also be completely + omitted to map types in a schema without a namespace. For + instance:</p> + + <pre class="type-map"> +include "my.hxx"; +apple apple; + +namespace http://www.example.com/xmlns/my +{ + orange "const orange_t*"; +} + </pre> + + <p>The compiler has a number of predefined mapping rules for + the built-in XML Schema types which can be presented as the + following map files. The string-based XML Schema types are + mapped to either <code>std::string</code> or + <code>std::wstring</code> depending on the character type + selected (see <a href="#5.2"> Section 5.2, "Character Type and + Encoding"</a> for more information). The binary XML Schema + types are mapped to either <code>std::unique_ptr<xml_schema::buffer></code> + or <code>std::auto_ptr<xml_schema::buffer></code> + depending on the C++ standard selected (C++11 or C++98, + respectively; refer to the <code>--std</code> XSD compiler + command line option for details).</p> + + <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ + boolean bool bool; + + byte "signed char" "signed char"; + unsignedByte "unsigned char" "unsigned char"; + + short short short; + unsignedShort "unsigned short" "unsigned short"; + + int int int; + unsignedInt "unsigned int" "unsigned int"; + + long "long long" "long long"; + unsignedLong "unsigned long long" "unsigned long long"; + + integer "long long" "long long"; + + negativeInteger "long long" "long long"; + nonPositiveInteger "long long" "long long"; + + positiveInteger "unsigned long long" "unsigned long long"; + nonNegativeInteger "unsigned long long" "unsigned long long"; + + float float float; + double double double; + decimal double double; + + string std::string; + normalizedString std::string; + token std::string; + Name std::string; + NMTOKEN std::string; + NCName std::string; + ID std::string; + IDREF std::string; + language std::string; + anyURI std::string; + + NMTOKENS xml_schema::string_sequence; + IDREFS xml_schema::string_sequence; + + QName xml_schema::qname; + + base64Binary std::[unique|auto]_ptr<xml_schema::buffer> + std::[unique|auto]_ptr<xml_schema::buffer>; + hexBinary std::[unique|auto]_ptr<xml_schema::buffer> + std::[unique|auto]_ptr<xml_schema::buffer>; + + date xml_schema::date; + dateTime xml_schema::date_time; + duration xml_schema::duration; + gDay xml_schema::gday; + gMonth xml_schema::gmonth; + gMonthDay xml_schema::gmonth_day; + gYear xml_schema::gyear; + gYearMonth xml_schema::gyear_month; + time xml_schema::time; +} + </pre> + + <p>For more information about the mapping of the built-in XML Schema types + to C++ types refer to <a href="#6">Chapter 6, "Built-In XML Schema Type + Parsers"</a>. The last predefined rule maps anything that wasn't + mapped by previous rules to <code>void</code>:</p> + + <pre class="type-map"> +namespace .* +{ + .* void void; +} + </pre> + + + <p>When you provide your own type maps with the + <code>--type-map</code> option, they are evaluated first. This + allows you to selectively override any of the predefined rules. + Note also that if you change the mapping + of a built-in XML Schema type then it becomes your responsibility + to provide the corresponding parser skeleton and implementation + in the <code>xml_schema</code> namespace. You can include the + custom definitions into the generated header file using the + <code>--hxx-prologue-*</code> options.</p> + + <h2><a name="4.3">4.3 Parser Implementations</a></h2> + + <p>With the knowledge from the previous section, we can proceed + with creating a type map that maps types in the <code>people.xsd</code> + schema to our object model classes in + <code>people.hxx</code>. In fact, we already have the beginning + of our type map file in <code>people.map</code>. Let's extend + it with the rest of the types:</p> + + <pre class="type-map"> +include "people.hxx"; + +gender ::gender ::gender; +person ::person; +people ::people; + </pre> + + <p>There are a few things to note about this type map. We did not + provide the argument types for <code>person</code> and + <code>people</code> because the default constant reference is + exactly what we need. We also did not provide any mappings + for built-in XML Schema types <code>string</code> and + <code>short</code> because they are handled by the predefined + rules and we are happy with the result. Note also that + all C++ types are fully qualified. This is done to avoid + potential name conflicts in the generated code. Now we can + recompile our schema and move on to implementing the parsers:</p> + + <pre class="terminal"> +$ xsd cxx-parser --xml-parser expat --type-map people.map people.xsd + </pre> + + <p>Here is the implementation of our three parsers in full. One + way to save typing when implementing your own parsers is + to open the generated code and copy the signatures of parser + callbacks into your code. Or you could always auto generate the + sample implementations and fill them with your code.</p> + + + <pre class="c++"> +#include "people-pskel.hxx" + +class gender_pimpl: public gender_pskel, + public xml_schema::string_pimpl +{ +public: + virtual ::gender + post_gender () + { + return post_string () == "male" ? male : female; + } +}; + +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& f) + { + first_ = f; + } + + virtual void + last_name (const std::string& l) + { + last_ = l; + } + + virtual void + gender (::gender g) + { + gender_ = g; + } + + virtual void + age (short a) + { + age_ = a; + } + + virtual ::person + post_person () + { + return ::person (first_, last_, gender_, age_); + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +class people_pimpl: public people_pskel +{ +public: + virtual void + person (const ::person& p) + { + people_.push_back (p); + } + + virtual ::people + post_people () + { + ::people r; + r.swap (people_); + return r; + } + +private: + ::people people_; +}; + </pre> + + <p>This code fragment should look familiar by now. Just note that + all the <code>post_*()</code> callbacks now have return types instead + of <code>void</code>. Here is the implementation of the test + driver for this example:</p> + + <pre class="c++"> +#include <iostream> + +using namespace std; + +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::short_pimpl short_p; + xml_schema::string_pimpl string_p; + + gender_pimpl gender_p; + person_pimpl person_p; + people_pimpl people_p; + + person_p.parsers (string_p, string_p, gender_p, short_p); + people_p.parsers (person_p); + + // Parse the document to obtain the object model. + // + xml_schema::document doc_p (people_p, "people"); + + people_p.pre (); + doc_p.parse (argv[1]); + people ppl = people_p.post_people (); + + // Print the object model. + // + for (people::iterator i (ppl.begin ()); i != ppl.end (); ++i) + { + cout << "first: " << i->first () << endl + << "last: " << i->last () << endl + << "gender: " << (i->gender () == male ? "male" : "female") << endl + << "age: " << i->age () << endl + << endl; + } +} + </pre> + + <p>The parser creation and assembly part is exactly the same as in + the previous chapter. The parsing part is a bit different: + <code>post_people()</code> now has a return value which is the + complete object model. We store it in the + <code>ppl</code> variable. The last bit of the code simply iterates + over the <code>people</code> vector and prints the information + for each person. We save the last two code fragments to + <code>driver.cxx</code> and proceed to compile and test + our new application:</p> + + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx people-pskel.cxx +$ c++ -std=c++11 -o driver driver.o people-pskel.o -lexpat +$ ./driver people.xml +first: John +last: Doe +gender: male +age: 32 + +first: Jane +last: Doe +gender: female +age: 28 + </pre> + + + <!-- Mapping Configuration --> + + + <h1><a name="5">5 Mapping Configuration</a></h1> + + <p>The C++/Parser mapping has a number of configuration parameters that + determine the overall properties and behavior of the generated code. + Configuration parameters are specified with the XSD command line + options and include the C++ standard, the character type that is used + by the generated code, the underlying XML parser, whether the XML Schema + validation is performed in the generated code, and support for XML Schema + polymorphism. This chapter describes these configuration + parameters in more detail. For more ways to configure the generated + code refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. + </p> + + <h2><a name="5.1">5.1 C++ Standard</a></h2> + + <p>The C++/Parser mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h2><a name="5.2">5.2 Character Type and Encoding</a></h2> + + <p>The C++/Parser mapping has built-in support for two character types: + <code>char</code> and <code>wchar_t</code>. You can select the + character type with the <code>--char-type</code> command line + option. The default character type is <code>char</code>. The + string-based built-in XML Schema types are returned as either + <code>std::string</code> or <code>std::wstring</code> depending + on the character type selected.</p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings. You can select which encoding should be used + in the object model with the <code>--char-encoding</code> command + line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <p>Note also that the character encoding that is used in the object model + is independent of the encodings used in input and output XML. In fact, + all three (object mode, input XML, and output XML) can have different + encodings.</p> + + <h2><a name="5.3">5.3 Underlying XML Parser</a></h2> + + <p>The C++/Parser mapping can be used with either Xerces-C++ or Expat + as the underlying XML parser. You can select the XML parser with + the <code>--xml-parser</code> command line option. Valid values + for this option are <code>xerces</code> and <code>expat</code>. + The default XML parser is Xerces-C++.</p> + + <p>The generated code is identical for both parsers except for the + <code>xml_schema::document</code> class in which some of the + <code>parse()</code> functions are parser-specific as described + in <a href="#7">Chapter 7, "Document Parser and Error Handling"</a>.</p> + + + <h2><a name="5.4">5.4 XML Schema Validation</a></h2> + + <p>The C++/Parser mapping provides support for validating a + commonly-used subset of W3C XML Schema in the generated code. + For the list of supported XML Schema constructs refer to + <a href="#A">Appendix A, "Supported XML Schema Constructs"</a>.</p> + + <p>By default validation in the generated code is disabled if + the underlying XML parser is validating (Xerces-C++) and + enabled otherwise (Expat). See <a href="#5.3">Section 5.3, + "Underlying XML Parser"</a> for more information about + the underlying XML parser. You can override the default + behavior with the <code>--generate-validation</code> + and <code>--suppress-validation</code> command line options.</p> + + + <h2><a name="5.5">5.5 Support for Polymorphism</a></h2> + + <p>By default the XSD compiler generates non-polymorphic code. If your + vocabulary uses XML Schema polymorphism in the form of <code>xsi:type</code> + and/or substitution groups, then you will need to compile your schemas + with the <code>--generate-polymorphic</code> option to produce + polymorphism-aware code as well as pass <code>true</code> as the last + argument to the <code>xml_schema::document</code>'s constructors.</p> + + <p>When using the polymorphism-aware generated code, you can specify + several parsers for a single element by passing a parser map + instead of an individual parser to the parser connection function + for the element. One of the parsers will then be looked up and used + depending on the <code>xsi:type</code> attribute value or an element + name from a substitution group. Consider the following schema as an + example:</p> + + <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="name" type="xs:string"/> + </xs:sequence> + </xs:complexType> + + <!-- substitution group root --> + <xs:element name="person" type="person"/> + + <xs:complexType name="superman"> + <xs:complexContent> + <xs:extension base="person"> + <xs:attribute name="can-fly" type="xs:boolean"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="superman" + type="superman" + substitutionGroup="person"/> + + <xs:complexType name="batman"> + <xs:complexContent> + <xs:extension base="superman"> + <xs:attribute name="wing-span" type="xs:unsignedInt"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="batman" + type="batman" + substitutionGroup="superman"/> + + <xs:complexType name="supermen"> + <xs:sequence> + <xs:element ref="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="supermen" type="supermen"/> + +</xs:schema> + </pre> + + <p>Conforming XML documents can use the <code>superman</code> + and <code>batman</code> types in place of the <code>person</code> + type either by specifying the type with the <code>xsi:type</code> + attributes or by using the elements from the substitution + group, for instance:</p> + + + <pre class="xml"> +<supermen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + <person> + <name>John Doe</name> + </person> + + <superman can-fly="false"> + <name>James "007" Bond</name> + </superman> + + <superman can-fly="true" wing-span="10" xsi:type="batman"> + <name>Bruce Wayne</name> + </superman> + +</supermen> + </pre> + + <p>To print the data stored in such XML documents we can implement + the parsers as follows:</p> + + <pre class="c++"> +class person_pimpl: public virtual person_pskel +{ +public: + virtual void + pre () + { + cout << "starting to parse person" << endl; + } + + virtual void + name (const std::string& v) + { + cout << "name: " << v << endl; + } + + virtual void + post_person () + { + cout << "finished parsing person" << endl; + } +}; + +class superman_pimpl: public virtual superman_pskel, + public person_pimpl +{ +public: + virtual void + pre () + { + cout << "starting to parse superman" << endl; + } + + virtual void + can_fly (bool v) + { + cout << "can-fly: " << v << endl; + } + + virtual void + post_person () + { + post_superman (); + } + + virtual void + post_superman () + { + cout << "finished parsing superman" << endl + } +}; + +class batman_pimpl: public virtual batman_pskel, + public superman_pimpl +{ +public: + virtual void + pre () + { + cout << "starting to parse batman" << endl; + } + + virtual void + wing_span (unsigned int v) + { + cout << "wing-span: " << v << endl; + } + + virtual void + post_superman () + { + post_batman (); + } + + virtual void + post_batman () + { + cout << "finished parsing batman" << endl; + } +}; + </pre> + + <p>Note that because the derived type parsers (<code>superman_pskel</code> + and <code>batman_pskel</code>) are called via the <code>person_pskel</code> + interface, we have to override the <code>post_person()</code> + virtual function in <code>superman_pimpl</code> to call + <code>post_superman()</code> and the <code>post_superman()</code> + virtual function in <code>batman_pimpl</code> to call + <code>post_batman()</code>.</p> + + <p>The following code fragment shows how to connect the parsers together. + Notice that for the <code>person</code> element in the <code>supermen_p</code> + parser we specify a parser map instead of a specific parser and we pass + <code>true</code> as the last argument to the document parser constructor + to indicate that we are parsing potentially-polymorphic XML documents:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::string_pimpl string_p; + xml_schema::boolean_pimpl boolean_p; + xml_schema::unsigned_int_pimpl unsigned_int_p; + + person_pimpl person_p; + superman_pimpl superman_p; + batman_pimpl batman_p; + + xml_schema::parser_map_impl person_map; + supermen_pimpl supermen_p; + + person_p.parsers (string_p); + superman_p.parsers (string_p, boolean_p); + batman_p.parsers (string_p, boolean_p, unsigned_int_p); + + // Here we are specifying a parser map which containes several + // parsers that can be used to parse the person element. + // + person_map.insert (person_p); + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_map); + + // Parse the XML document. The last argument to the document's + // constructor indicates that we are parsing polymorphic XML + // documents. + // + xml_schema::document doc_p (supermen_p, "supermen", true); + + supermen_p.pre (); + doc_p.parse (argv[1]); + supermen_p.post_supermen (); +} + </pre> + + <p>When polymorphism-aware code is generated, each element's + <code>*_parser()</code> function is overloaded to also accept + an object of the <code>xml_schema::parser_map</code> type. + For example, the <code>supermen_pskel</code> class from the + above example looks like this:</p> + + <pre class="c++"> +class supermen_pskel: public xml_schema::parser_complex_content +{ +public: + + ... + + // Parser construction API. + // + void + parsers (person_pskel&); + + // Individual element parsers. + // + void + person_parser (person_pskel&); + + void + person_parser (const xml_schema::parser_map&); + + ... +}; + </pre> + + <p>Note that you can specify both the individual (static) parser and + the parser map. The individual parser will be used when the static + element type and the dynamic type of the object being parsed are + the same. This is the case, for example, when there is no + <code>xsi:type</code> attribute and the element hasn't been + substituted. Because the individual parser for an element is + cached and no map lookup is necessary, it makes sense to specify + both the individual parser and the parser map when most of the + objects being parsed are of the static type and optimal + performance is important. The following code fragment shows + how to change the above example to set both the individual + parser and the parser map:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + ... + + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_p); + supermen_p.person_parser (person_map); + + ... +} + </pre> + + + <p>The <code>xml_schema::parser_map</code> interface and the + <code>xml_schema::parser_map_impl</code> default implementation + are presented below:</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_map + { + public: + virtual parser_base* + find (const ro_string* type) const = 0; + }; + + class parser_map_impl: public parser_map + { + public: + void + insert (parser_base&); + + virtual parser_base* + find (const ro_string* type) const; + + private: + parser_map_impl (const parser_map_impl&); + + parser_map_impl& + operator= (const parser_map_impl&); + + ... + }; +} + </pre> + + <p>The <code>type</code> argument in the <code>find()</code> virtual + function is the type name and namespace from the xsi:type attribute + (the namespace prefix is resolved to the actual XML namespace) + or the type of an element from the substitution group in the form + <code>"<name> <namespace>"</code> with the space and the + namespace part absent if the type does not have a namespace. + You can obtain a parser's dynamic type in the same format + using the <code>_dynamic_type()</code> function. The static + type can be obtained by calling the static <code>_static_type()</code> + function, for example <code>person_pskel::_static_type()</code>. + Both functions return a C string (<code>const char*</code> or + <code>const wchar_t*</code>, depending on the character type + used) which is valid for as long as the application is running. + The following example shows how we can implement our own parser + map using <code>std::map</code>:</p> + + + <pre class="c++"> +#include <map> +#include <string> + +class parser_map: public xml_schema::parser_map +{ +public: + void + insert (xml_schema::parser_base& p) + { + map_[p._dynamic_type ()] = &p; + } + + virtual xml_schema::parser_base* + find (const xml_schema::ro_string* type) const + { + map::const_iterator i = map_.find (type); + return i != map_.end () ? i->second : 0; + } + +private: + typedef std::map<std::string, xml_schema::parser_base*> map; + map map_; +}; + </pre> + + <p>Most of code presented in this section is taken from the + <code>polymorphism</code> example which can be found in the + <code>cxx/parser/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package. + Handling of <code>xsi:type</code> and substitution groups when used on + root elements requires a number of special actions as shown in + the <code>polyroot</code> example.</p> + + + <!-- Built-in XML Schema Type Parsers --> + + + <h1><a name="6">6 Built-In XML Schema Type Parsers</a></h1> + + <p>The XSD runtime provides parser implementations for all built-in + XML Schema types as summarized in the following table. Declarations + for these types are automatically included into each generated + header file. As a result you don't need to include any headers + to gain access to these parser implementations. Note that some + parsers return either <code>std::string</code> or + <code>std::wstring</code> depending on the character type selected.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Parser implementation in the <code>xml_schema</code> namespace</th> + <th>Parser return type</th> + </tr> + + <tr> + <th colspan="3">anyType and anySimpleType types</th> + </tr> + <tr> + <td><code>anyType</code></td> + <td><code>any_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + <tr> + <td><code>anySimpleType</code></td> + <td><code>any_simple_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte_pimpl</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte_pimpl</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_pimpl</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short_pimpl</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_pimpl</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int_pimpl</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer_pimpl</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer_pimpl</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean_pimpl</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_pimpl</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string-based types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <td><code>language</code></td> + <td><code>language_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname_pimpl</code></td> + <td><code>xml_schema::qname</code><br/><a href="#6.1">Section 6.1, + "<code>QName</code> Parser"</a></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">list types</th> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens_pimpl</code></td> + <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs_pimpl</code></td> + <td><code>xml_schema::string_sequence</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri_pimpl</code></td> + <td><code>std::string</code> or <code>std::wstring</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary_pimpl</code></td> + <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary_pimpl</code></td> + <td><code>std::[unique|auto]_ptr< xml_schema::buffer></code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date_pimpl</code></td> + <td><code>xml_schema::date</code><br/><a href="#6.5">Section 6.5, + "<code>date</code> Parser"</a></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time_pimpl</code></td> + <td><code>xml_schema::date_time</code><br/><a href="#6.6">Section 6.6, + "<code>dateTime</code> Parser"</a></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration_pimpl</code></td> + <td><code>xml_schema::duration</code><br/><a href="#6.7">Section 6.7, + "<code>duration</code> Parser"</a></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday_pimpl</code></td> + <td><code>xml_schema::gday</code><br/><a href="#6.8">Section 6.8, + "<code>gDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth_pimpl</code></td> + <td><code>xml_schema::gmonth</code><br/><a href="#6.9">Section 6.9, + "<code>gMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day_pimpl</code></td> + <td><code>xml_schema::gmonth_day</code><br/><a href="#6.10">Section 6.10, + "<code>gMonthDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear_pimpl</code></td> + <td><code>xml_schema::gyear</code><br/><a href="#6.11">Section 6.11, + "<code>gYear</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month_pimpl</code></td> + <td><code>xml_schema::gyear_month</code><br/><a href="#6.12">Section + 6.12, "<code>gYearMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time_pimpl</code></td> + <td><code>xml_schema::time</code><br/><a href="#6.13">Section 6.13, + "<code>time</code> Parser"</a></td> + </tr> + + </table> + + <h2><a name="6.1">6.1 <code>QName</code> Parser</a></h2> + + <p>The return type of the <code>qname_pimpl</code> parser implementation + is <code>xml_schema::qname</code> which represents an XML qualified + name. Its interface is presented below. + Note that the <code>std::string</code> type in the interface becomes + <code>std::wstring</code> if the selected character type is + <code>wchar_t</code>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class qname + { + public: + explicit + qname (const std::string& name); + qname (const std::string& prefix, const std::string& name); + + const std::string& + prefix () const; + + void + prefix (const std::string&); + + const std::string& + name () const; + + void + name (const std::string&); + }; + + bool + operator== (const qname&, const qname&); + + bool + operator!= (const qname&, const qname&); +} + </pre> + + + <h2><a name="6.2">6.2 <code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></h2> + + <p>The return type of the <code>nmtokens_pimpl</code> and + <code>idrefs_pimpl</code> parser implementations is + <code>xml_schema::string_sequence</code> which represents a + sequence of strings. Its interface is presented below. + Note that the <code>std::string</code> type in the interface becomes + <code>std::wstring</code> if the selected character type is + <code>wchar_t</code>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence: public std::vector<std::string> + { + public: + string_sequence (); + + explicit + string_sequence (std::vector<std::string>::size_type n, + const std::string& x = std::string ()); + + template <typename I> + string_sequence (const I& begin, const I& end); + }; + + bool + operator== (const string_sequence&, const string_sequence&); + + bool + operator!= (const string_sequence&, const string_sequence&); +} + </pre> + + + <h2><a name="6.3">6.3 <code>base64Binary</code> and <code>hexBinary</code> Parsers</a></h2> + + <p>The return type of the <code>base64_binary_pimpl</code> and + <code>hex_binary_pimpl</code> parser implementations is either + <code>std::unique_ptr<xml_schema::buffer></code> (C++11) or + <code>std::auto_ptr<xml_schema::buffer></code> (C++98), + depending on the C++ standard selected (<code>--std</code> XSD + compiler option). The <code>xml_schema::buffer</code> type + represents a binary buffer and its interface is presented below.</p> + + <pre class="c++"> +namespace xml_schema +{ + class buffer + { + public: + typedef std::size_t size_t; + + class bounds {}; // Out of bounds exception. + + public: + explicit + buffer (size_t size = 0); + buffer (size_t size, size_t capacity); + buffer (const void* data, size_t size); + buffer (const void* data, size_t size, size_t capacity); + buffer (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + + public: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + void + swap (buffer&); + + public: + size_t + capacity () const; + + bool + capacity (size_t); + + public: + size_t + size () const; + + bool + size (size_t); + + public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); + }; + + bool + operator== (const buffer&, const buffer&); + + bool + operator!= (const buffer&, const buffer&); +} + </pre> + + <p>If the <code>assume_ownership</code> argument to the constructor + is <code>true</code>, the instance assumes the ownership of the + memory block pointed to by the <code>data</code> argument and will + eventually release it by calling <code>operator delete()</code>. The + <code>capacity()</code> and <code>size()</code> modifier functions + return <code>true</code> if the underlying buffer has moved. + </p> + + <p>The <code>bounds</code> exception is thrown if the constructor + arguments violate the <code>(size <= capacity)</code> + constraint.</p> + + + <h2><a name="6.4">6.4 Time Zone Representation</a></h2> + + <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, + <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, + <code>gYearMonth</code>, and <code>time</code> XML Schema built-in + types all include an optional time zone component. The following + <code>xml_schema::time_zone</code> base class is used to represent + this information:</p> + + <pre class="c++"> +namespace xml_schema +{ + class time_zone + { + public: + time_zone (); + time_zone (short hours, short minutes); + + bool + zone_present () const; + + void + zone_reset (); + + short + zone_hours () const; + + void + zone_hours (short); + + short + zone_minutes () const; + + void + zone_minutes (short); + }; + + bool + operator== (const time_zone&, const time_zone&); + + bool + operator!= (const time_zone&, const time_zone&); +} + </pre> + + <p>The <code>zone_present()</code> accessor function returns <code>true</code> + if the time zone is specified. The <code>zone_reset()</code> modifier + function resets the time zone object to the <em>not specified</em> + state. If the time zone offset is negative then both hours and + minutes components are represented as negative integers.</p> + + + <h2><a name="6.5">6.5 <code>date</code> Parser</a></h2> + + <p>The return type of the <code>date_pimpl</code> parser implementation + is <code>xml_schema::date</code> which represents a year, a day, and a month + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date + { + public: + date (int year, unsigned short month, unsigned short day); + date (int year, unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const date&, const date&); + + bool + operator!= (const date&, const date&); +} + </pre> + + <h2><a name="6.6">6.6 <code>dateTime</code> Parser</a></h2> + + <p>The return type of the <code>date_time_pimpl</code> parser implementation + is <code>xml_schema::date_time</code> which represents a year, a month, a day, + hours, minutes, and seconds with an optional time zone. Its interface + is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date_time + { + public: + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const date_time&, const date_time&); + + bool + operator!= (const date_time&, const date_time&); +} + </pre> + + <h2><a name="6.7">6.7 <code>duration</code> Parser</a></h2> + + <p>The return type of the <code>duration_pimpl</code> parser implementation + is <code>xml_schema::duration</code> which represents a potentially + negative duration in the form of years, months, days, hours, minutes, + and seconds. Its interface is presented below.</p> + + <pre class="c++"> +namespace xml_schema +{ + class duration + { + public: + duration (bool negative, + unsigned int years, unsigned int months, unsigned int days, + unsigned int hours, unsigned int minutes, double seconds); + + bool + negative () const; + + void + negative (bool); + + unsigned int + years () const; + + void + years (unsigned int); + + unsigned int + months () const; + + void + months (unsigned int); + + unsigned int + days () const; + + void + days (unsigned int); + + unsigned int + hours () const; + + void + hours (unsigned int); + + unsigned int + minutes () const; + + void + minutes (unsigned int); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const duration&, const duration&); + + bool + operator!= (const duration&, const duration&); +} + </pre> + + + <h2><a name="6.8">6.8 <code>gDay</code> Parser</a></h2> + + <p>The return type of the <code>gday_pimpl</code> parser implementation + is <code>xml_schema::gday</code> which represents a day of the month with + an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gday + { + public: + explicit + gday (unsigned short day); + gday (unsigned short day, short zone_hours, short zone_minutes); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gday&, const gday&); + + bool + operator!= (const gday&, const gday&); +} + </pre> + + <h2><a name="6.9">6.9 <code>gMonth</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_pimpl</code> parser implementation + is <code>xml_schema::gmonth</code> which represents a month of the year + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth + { + public: + explicit + gmonth (unsigned short month); + gmonth (unsigned short month, short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gmonth&, const gmonth&); + + bool + operator!= (const gmonth&, const gmonth&); +} + </pre> + + <h2><a name="6.10">6.10 <code>gMonthDay</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_day_pimpl</code> parser implementation + is <code>xml_schema::gmonth_day</code> which represents a day and a month + of the year with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth_day + { + public: + gmonth_day (unsigned short month, unsigned short day); + gmonth_day (unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gmonth_day&, const gmonth_day&); + + bool + operator!= (const gmonth_day&, const gmonth_day&); +} + </pre> + + <h2><a name="6.11">6.11 <code>gYear</code> Parser</a></h2> + + <p>The return type of the <code>gyear_pimpl</code> parser implementation + is <code>xml_schema::gyear</code> which represents a year with + an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear + { + public: + explicit + gyear (int year); + gyear (int year, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + }; + + bool + operator== (const gyear&, const gyear&); + + bool + operator!= (const gyear&, const gyear&); +} + </pre> + + <h2><a name="6.12">6.12 <code>gYearMonth</code> Parser</a></h2> + + <p>The return type of the <code>gyear_month_pimpl</code> parser implementation + is <code>xml_schema::gyear_month</code> which represents a year and a month + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear_month + { + public: + gyear_month (int year, unsigned short month); + gyear_month (int year, unsigned short month, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gyear_month&, const gyear_month&); + + bool + operator!= (const gyear_month&, const gyear_month&); +} + </pre> + + + <h2><a name="6.13">6.13 <code>time</code> Parser</a></h2> + + <p>The return type of the <code>time_pimpl</code> parser implementation + is <code>xml_schema::time</code> which represents hours, minutes, + and seconds with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class time + { + public: + time (unsigned short hours, unsigned short minutes, double seconds); + time (unsigned short hours, unsigned short minutes, double seconds, + short zone_hours, short zone_minutes); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const time&, const time&); + + bool + operator!= (const time&, const time&); +} + </pre> + + + <!-- Error Handling --> + + + <h1><a name="7">7 Document Parser and Error Handling</a></h1> + + <p>In this chapter we will discuss the <code>xml_schema::document</code> + type as well as the error handling mechanisms provided by the mapping + in more detail. As mentioned in <a href="#3.4">Section 3.4, + "Connecting the Parsers Together"</a>, the interface of + <code>xml_schema::document</code> depends on the underlying XML + parser selected (<a href="#5.3">Section 5.3, "Underlying XML + Parser"</a>). The following sections describe the + <code>document</code> type interface for Xerces-C++ and + Expat as underlying parsers.</p> + + <h2><a name="7.1">7.1 Xerces-C++ Document Parser</a></h2> + + <p>When Xerces-C++ is used as the underlying XML parser, the + <code>document</code> type has the following interface. Note that + if the character type is <code>wchar_t</code>, then the string type + in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_base; + class error_handler; + + class flags + { + public: + // Do not validate XML documents with the Xerces-C++ validator. + // + static const unsigned long dont_validate; + + // Do not initialize the Xerces-C++ runtime. + // + static const unsigned long dont_initialize; + + // Disable handling of subsequent imports for the same namespace + // in Xerces-C++ 3.1.0 and later. + // + static const unsigned long no_multiple_imports; + }; + + class properties + { + public: + // Add a location for a schema with a target namespace. + // + void + schema_location (const std::string& namespace_, + const std::string& location); + + // Add a location for a schema without a target namespace. + // + void + no_namespace_schema_location (const std::string& location); + }; + + class document + { + public: + document (parser_base& root, + const std::string& root_element_name, + bool polymorphic = false); + + document (parser_base& root, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + public: + // Parse URI or a local file. + // + void + parse (const std::string& uri, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file with a user-provided error_handler + // object. + // + void + parse (const std::string& uri, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file with a user-provided ErrorHandler + // object. Note that you must initialize the Xerces-C++ runtime + // before calling this function. + // + void + parse (const std::string& uri, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse URI or a local file using a user-provided SAX2XMLReader + // object. Note that you must initialize the Xerces-C++ runtime + // before calling this function. + // + void + parse (const std::string& uri, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream. + // + void + parse (std::istream&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a user-provided error_handler object. + // + void + parse (std::istream&, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a user-provided ErrorHandler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (std::istream&, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream using a user-provided SAX2XMLReader object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (std::istream&, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream with a system id. + // + void + parse (std::istream&, + const std::string& system_id, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id and a user-provided + // ErrorHandler object. Note that you must initialize the + // Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with a system id using a user-provided + // SAX2XMLReader object. Note that you must initialize the + // Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse std::istream with system and public ids. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids and a user-provided + // ErrorHandler object. Note that you must initialize the Xerces-C++ + // runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse std::istream with system and public ids using a user- + // provided SAX2XMLReader object. Note that you must initialize + // the Xerces-C++ runtime before calling this function. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + + public: + // Parse InputSource. Note that you must initialize the Xerces-C++ + // runtime before calling this function. + // + void + parse (const xercesc::InputSource&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource with a user-provided error_handler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + error_handler&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource with a user-provided ErrorHandler object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + xercesc::ErrorHandler&, + flags = 0, + const properties& = properties ()); + + // Parse InputSource using a user-provided SAX2XMLReader object. + // Note that you must initialize the Xerces-C++ runtime before + // calling this function. + // + void + parse (const xercesc::InputSource&, + xercesc::SAX2XMLReader&, + flags = 0, + const properties& = properties ()); + }; +} + </pre> + + <p>The <code>document</code> class is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element. The <code>parser_base</code> + class is the base type for all parser skeletons. The second and + third arguments to the <code>document</code>'s constructors are + the root element's name and namespace. The last argument, + <code>polymorphic</code>, specifies whether the XML documents + being parsed use polymorphism. For more information on support + for XML Schema polymorphism in the C++/Parser mapping refer + to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + + <p>The rest of the <code>document</code> interface consists of overloaded + <code>parse()</code> functions. The last two arguments in each of these + functions are <code>flags</code> and <code>properties</code>. The + <code>flags</code> argument allows you to modify the default behavior + of the parsing functions. The <code>properties</code> argument allows + you to override the schema location attributes specified in XML + documents. Note that the schema location paths are relative to an + XML document unless they are complete URIs. For example if you want + to use a local schema file then you will need to use a URI in the + form <code>file:///absolute/path/to/your/schema</code>.</p> + + <p>A number of overloaded <code>parse()</code> functions have the + <code>system_id</code> and <code>public_id</code> arguments. The + system id is a <em>system</em> identifier of the resources being + parsed (for example, URI or a full file path). The public id is a + <em>public</em> identifier of the resource (for example, an + application-specific name or a relative file path). The system id + is used to resolve relative paths (for example, schema paths). In + diagnostics messages the public id is used if it is available. + Otherwise the system id is used.</p> + + <p>The error handling mechanisms employed by the <code>document</code> + parser are described in <a href="#7.3">Section 7.3, "Error + Handling"</a>.</p> + + <h2><a name="7.2">7.2 Expat Document Parser</a></h2> + + <p>When Expat is used as the underlying XML parser, the + <code>document</code> type has the following interface. Note that + if the character type is <code>wchar_t</code>, then the string type + in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_base; + class error_handler; + + class document + { + public: + document (parser_base&, + const std::string& root_element_name, + bool polymorphic = false); + + document (parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name, + bool polymorphic = false); + + public: + // Parse a local file. The file is accessed with std::ifstream + // in binary mode. The std::ios_base::failure exception is used + // to report io errors (badbit and failbit). + void + parse (const std::string& file); + + // Parse a local file with a user-provided error_handler + // object. The file is accessed with std::ifstream in binary + // mode. The std::ios_base::failure exception is used to report + // io errors (badbit and failbit). + // + void + parse (const std::string& file, error_handler&); + + public: + // Parse std::istream. + // + void + parse (std::istream&); + + // Parse std::istream with a user-provided error_handler object. + // + void + parse (std::istream&, error_handler&); + + // Parse std::istream with a system id. + // + void + parse (std::istream&, const std::string& system_id); + + // Parse std::istream with a system id and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + error_handler&); + + // Parse std::istream with system and public ids. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id); + + // Parse std::istream with system and public ids and a user-provided + // error_handler object. + // + void + parse (std::istream&, + const std::string& system_id, + const std::string& public_id, + error_handler&); + + public: + // Parse a chunk of input. You can call these functions multiple + // times with the last call having the last argument true. + // + void + parse (const void* data, std::size_t size, bool last); + + void + parse (const void* data, std::size_t size, bool last, + error_handler&); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + error_handler&); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + const std::string& public_id); + + void + parse (const void* data, std::size_t size, bool last, + const std::string& system_id, + const std::string& public_id, + error_handler&); + + public: + // Low-level Expat-specific parsing API. + // + void + parse_begin (XML_Parser); + + void + parse_begin (XML_Parser, const std::string& public_id); + + void + parse_begin (XML_Parser, error_handler&); + + void + parse_begin (XML_Parser, + const std::string& public_id, + error_handler&); + void + parse_end (); + }; +} + </pre> + + <p>The <code>document</code> class is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element. The <code>parser_base</code> + class is the base type for all parser skeletons. The second and + third arguments to the <code>document</code>'s constructors are + the root element's name and namespace. The last argument, + <code>polymorphic</code>, specifies whether the XML documents + being parsed use polymorphism. For more information on support + for XML Schema polymorphism in the C++/Parser mapping refer + to <a href="#5.5">Section 5.5, "Support for Polymorphism"</a>.</p> + + <p>A number of overloaded <code>parse()</code> functions have the + <code>system_id</code> and <code>public_id</code> arguments. The + system id is a <em>system</em> identifier of the resources being + parsed (for example, URI or a full file path). The public id is a + <em>public</em> identifier of the resource (for example, an + application-specific name or a relative file path). The system id + is used to resolve relative paths. In diagnostics messages the + public id is used if it is available. Otherwise the system id + is used.</p> + + <p>The <code>parse_begin()</code> and <code>parse_end()</code> functions + present a low-level, Expat-specific parsing API for maximum control. + A typical use-case would look like this (pseudo-code):</p> + + <pre class="c++"> +xxx_pimpl root_p; +document doc_p (root_p, "root"); + +root_p.pre (); +doc_p.parse_begin (xml_parser, "file.xml"); + +while (more_data_to_parse) +{ + // Call XML_Parse or XML_ParseBuffer. + + if (status == XML_STATUS_ERROR) + break; +} + +// Call parse_end even in case of an error to translate +// XML and Schema errors to exceptions or error_handler +// calls. +// +doc.parse_end (); +result_type result (root_p.post_xxx ()); + </pre> + + <p>Note that if your vocabulary uses XML namespaces, the + <code>XML_ParserCreateNS()</code> functions should be used to create + the XML parser. Space (<code>XML_Char (' ')</code>) should be used + as a separator (the second argument to <code>XML_ParserCreateNS()</code>). + </p> + + <p>The error handling mechanisms employed by the <code>document</code> + parser are described in <a href="#7.3">Section 7.3, "Error + Handling"</a>.</p> + + + <h2><a name="7.3">7.3 Error Handling</a></h2> + + <p>There are three categories of errors that can result from running + a parser on an XML document: System, XML, and Application. + The System category contains memory allocation and file/stream + operation errors. The XML category covers XML parsing and + well-formedness checking as well as XML Schema validation errors. + Finally, the Application category is for application logic errors + that you may want to propagate from parser implementations to the + caller of the parser. + </p> + + <p>The System errors are mapped to the standard exceptions. The + out of memory condition is indicated by throwing an instance + of <code>std::bad_alloc</code>. The stream operation errors + are reported either by throwing an instance of + <code>std::ios_base::failure</code> if exceptions are enabled + or by setting the stream state.</p> + + <p>Note that if you are parsing <code>std::istream</code> on + which exceptions are not enabled, then you will need to + check the stream state before calling the <code>post()</code> + callback, as shown in the following example:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + ... + + std::ifstream ifs (argv[1]); + + if (ifs.fail ()) + { + cerr << argv[1] << ": unable to open" << endl; + return 1; + } + + root_p.pre (); + doc_p.parse (ifs); + + if (ifs.fail ()) + { + cerr << argv[1] << ": io failure" << endl; + return 1; + } + + result_type result (root_p.post_xxx ()); +} + </pre> + + <p>The above example can be rewritten to use exceptions + as shown below:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + try + { + ... + + std::ifstream ifs; + ifs.exceptions (std::ifstream::badbit | std::ifstream::failbit); + ifs.open (argv[1]); + + root_p.pre (); + doc_p.parse (ifs); + result_type result (root_p.post_xxx ()); + } + catch (const std::ifstream::failure&) + { + cerr << argv[1] << ": unable to open or io failure" << endl; + return 1; + } +} + </pre> + + + <p>For reporting application errors from parsing callbacks, you + can throw any exceptions of your choice. They are propagated to + the caller of the parser without any alterations.</p> + + <p>The XML errors can be reported either by throwing the + <code>xml_schema::parsing</code> exception or by a callback + to the <code>xml_schema::error_handler</code> object (and + <code>xercesc::ErrorHandler</code> object in case of Xerces-C++).</p> + + <p>The <code>xml_schema::parsing</code> exception contains + a list of warnings and errors that were accumulated during + parsing. Note that this exception is thrown only if there + was an error. This makes it impossible to obtain warnings + from an otherwise successful parsing using this mechanism. + The following listing shows the definition of + <code>xml_schema::parsing</code> exception. Note that if the + character type is <code>wchar_t</code>, then the string type + and output stream type in the definition become + <code>std::wstring</code> and <code>std::wostream</code>, + respectively (see <a href="#5.2">Section 5.2, "Character Type + and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class exception: public std::exception + { + protected: + virtual void + print (std::ostream&) const = 0; + }; + + inline std::ostream& + operator<< (std::ostream& os, const exception& e) + { + e.print (os); + return os; + } + + + class severity + { + public: + enum value + { + warning, + error + }; + }; + + + class error + { + public: + error (xml_schema::severity, + const std::string& id, + unsigned long line, + unsigned long column, + const std::string& message); + + xml_schema::severity + severity () const; + + const std::string& + id () const; + + unsigned long + line () const; + + unsigned long + column () const; + + const std::string& + message () const; + }; + + std::ostream& + operator<< (std::ostream&, const error&); + + + class diagnostics: public std::vector<error> + { + }; + + std::ostream& + operator<< (std::ostream&, const diagnostics&); + + + class parsing: public exception + { + public: + parsing (); + parsing (const xml_schema::diagnostics&); + + const xml_schema::diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); + + protected: + virtual void + print (std::ostream&) const; + }; +} + </pre> + + <p>The following example shows how we can catch and print this + exception. The code will print diagnostics messages one per line + in case of an error.</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + try + { + // Parse. + } + catch (const xml_schema::parsing& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>With the <code>error_handler</code> approach the diagnostics + messages are delivered as parsing progresses. The following + listing presents the definition of the <code>error_handler</code> + interface. Note that if the character type is <code>wchar_t</code>, + then the string type in the interface becomes <code>std::wstring</code> + (see <a href="#5.2">Section 5.2, "Character Type and Encoding"</a>).</p> + + <pre class="c++"> +namespace xml_schema +{ + class error_handler + { + public: + class severity + { + public: + enum value + { + warning, + error, + fatal + }; + }; + + virtual bool + handle (const std::string& id, + unsigned long line, + unsigned long column, + severity, + const std::string& message) = 0; + }; +} + </pre> + + <p>The return value of the <code>handle()</code> function indicates whether + parsing should continue if possible. The error with the fatal severity + level terminates the parsing process regardless of the returned value. + At the end of the parsing process with an error that was reported via + the <code>error_handler</code> object, an empty + <code>xml_schema::parsing</code> exception is thrown to indicate + the failure to the caller. You can alter this behavior by throwing + your own exception from the <code>handle()</code> function.</p> + + + <!-- Appendix A --> + + + <h1><a name="A">Appendix A — Supported XML Schema Constructs</a></h1> + + <p>The C++/Parser mapping supports validation of the following W3C XML + Schema constructs in the generated code.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="features" border="1"> + <tr><th>Construct</th><th>Notes</th></tr> + <tr><th colspan="2">Structure</th></tr> + + <tr><td>element</td><td></td></tr> + <tr><td>attribute</td><td></td></tr> + + <tr><td>any</td><td></td></tr> + <tr><td>anyAttribute</td><td></td></tr> + + <tr><td>all</td><td></td></tr> + <tr><td>sequence</td><td></td></tr> + <tr><td>choice</td><td></td></tr> + + <tr><td>complex type, empty content</td><td></td></tr> + <tr><td>complex type, mixed content</td><td></td></tr> + <tr><td>complex type, simple content extension</td><td></td></tr> + <tr><td>complex type, simple content restriction</td> + <td>Simple type facets are not validated.</td></tr> + <tr><td>complex type, complex content extension</td><td></td></tr> + <tr><td>complex type, complex content restriction</td><td></td></tr> + + <tr><td>list</td><td></td></tr> + + <tr><th colspan="2">Datatypes</th></tr> + + <tr><td>byte</td><td></td></tr> + <tr><td>unsignedByte</td><td></td></tr> + <tr><td>short</td><td></td></tr> + <tr><td>unsignedShort</td><td></td></tr> + <tr><td>int</td><td></td></tr> + <tr><td>unsignedInt</td><td></td></tr> + <tr><td>long</td><td></td></tr> + <tr><td>unsignedLong</td><td></td></tr> + <tr><td>integer</td><td></td></tr> + <tr><td>nonPositiveInteger</td><td></td></tr> + <tr><td>nonNegativeInteger</td><td></td></tr> + <tr><td>positiveInteger</td><td></td></tr> + <tr><td>negativeInteger</td><td></td></tr> + + <tr><td>boolean</td><td></td></tr> + + <tr><td>float</td><td></td></tr> + <tr><td>double</td><td></td></tr> + <tr><td>decimal</td><td></td></tr> + + <tr><td>string</td><td></td></tr> + <tr><td>normalizedString</td><td></td></tr> + <tr><td>token</td><td></td></tr> + <tr><td>Name</td><td></td></tr> + <tr><td>NMTOKEN</td><td></td></tr> + <tr><td>NCName</td><td></td></tr> + <tr><td>language</td><td></td></tr> + <tr><td>anyURI</td><td></td></tr> + + <tr><td>ID</td><td>Identity constraint is not enforced.</td></tr> + <tr><td>IDREF</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>NMTOKENS</td><td></td></tr> + <tr><td>IDREFS</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>QName</td><td></td></tr> + + <tr><td>base64Binary</td><td></td></tr> + <tr><td>hexBinary</td><td></td></tr> + + <tr><td>date</td><td></td></tr> + <tr><td>dateTime</td><td></td></tr> + <tr><td>duration</td><td></td></tr> + <tr><td>gDay</td><td></td></tr> + <tr><td>gMonth</td><td></td></tr> + <tr><td>gMonthDay</td><td></td></tr> + <tr><td>gYear</td><td></td></tr> + <tr><td>gYearMonth</td><td></td></tr> + <tr><td>time</td><td></td></tr> + </table> + + + </div> +</div> + +</body> +</html> diff --git a/doc/cxx/tree/guide/guide.html2ps.in b/doc/cxx/tree/guide/guide.html2ps.in new file mode 100644 index 0000000..461ffde --- /dev/null +++ b/doc/cxx/tree/guide/guide.html2ps.in @@ -0,0 +1,65 @@ +@@html2ps { + option { + toc: hb; + colour: 1; + hyphenate: 1; + titlepage: 1; + } + + datefmt: "%B %Y"; + + titlepage { + content: " +<div align=center> + <h1><big>C++/Tree Mapping</big></h1> + <h1><big>Getting Started Guide</big></h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> +</div> + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href='https://www.codesynthesis.com/licenses/fdl-1.2.txt'>GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/index.xhtml'>XHTML</a>, + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-parser-guide.pdf'>PDF</a>, and + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-parser-guide.ps'>PostScript</a>.</p>"; + } + + toc { + indent: 2em; + } + + header { + odd-right: $H; + even-left: $H; + } + + footer { + odd-left: $D; + odd-center: $T; + odd-right: $N; + + even-left: $N; + even-center: $T; + even-right: $D; + } +} + +body { + font-size: 12pt; + text-align: justify; +} + +pre { + font-size: 10pt; +} diff --git a/doc/cxx/tree/guide/index.xhtml b/doc/cxx/tree/guide/index.xhtml new file mode 100644 index 0000000..fdaaa45 --- /dev/null +++ b/doc/cxx/tree/guide/index.xhtml @@ -0,0 +1,2736 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Tree Mapping Getting Started Guide</title> + + <meta name="copyright" content="© 2005-2023 Code Synthesis"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parsing,serialization,validation"/> + <meta name="description" content="C++/Tree Mapping Getting Started Guide"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + line-height: 1.2em; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 140%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage .title { + font-weight: bold; + font-size: 200%; + text-align: center; + } + + #titlepage #first-title { + padding: 1em 0 0.4em 0; + } + + #titlepage #second-title { + padding: 0.4em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + div.img { + text-align: center; + padding: 2em 0 2em 0; + } + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div class="title" id="first-title">C++/Tree Mapping</div> + <div class="title" id="second-title">Getting Started Guide</div> + + <p>Copyright © 2005-2023 Code Synthesis.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-tree-guide.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-tree-guide.ps">PostScript</a>.</p> + + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a> + <table class="toc"> + <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> + <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>2</th><td><a href="#2">Hello World Example</a> + <table class="toc"> + <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> + <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> + <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> + <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> + <tr><th>2.5</th><td><a href="#2.5">Adding Serialization</a></td></tr> + <tr><th>2.6</th><td><a href="#2.6">Selecting Naming Convention</a></td></tr> + <tr><th>2.7</th><td><a href="#2.7">Generating Documentation</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Overall Mapping Configuration</a> + <table class="toc"> + <tr><th>3.1</th><td><a href="#3.1">C++ Standard</a></td></tr> + <tr><th>3.2</th><td><a href="#3.2">Character Type and Encoding</a></td></tr> + <tr><th>3.3</th><td><a href="#3.3">Support for Polymorphism </a></td></tr> + <tr><th>3.4</th><td><a href="#3.4">Namespace Mapping</a></td></tr> + <tr><th>3.5</th><td><a href="#3.5">Thread Safety</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Working with Object Models</a> + <table class="toc"> + <tr><th>4.1</th><td><a href="#4.1">Attribute and Element Cardinalities</a></td></tr> + <tr><th>4.2</th><td><a href="#4.2">Accessing the Object Model</a></td></tr> + <tr><th>4.3</th><td><a href="#4.3">Modifying the Object Model</a></td></tr> + <tr><th>4.4</th><td><a href="#4.4">Creating the Object Model from Scratch</a></td></tr> + <tr><th>4.5</th><td><a href="#4.5">Mapping for the Built-in XML Schema Types</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Parsing</a> + <table class="toc"> + <tr><th>5.1</th><td><a href="#5.1">XML Schema Validation and Searching</a></td></tr> + <tr><th>5.2</th><td><a href="#5.2">Error Handling</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>6</th><td><a href="#6">Serialization</a> + <table class="toc"> + <tr><th>6.1</th><td><a href="#6.1">Namespace and Schema Information</a></td></tr> + <tr><th>6.2</th><td><a href="#6.2">Error Handling</a></td></tr> + </table> + </td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>The goal of this document is to provide you with an understanding of + the C++/Tree programming model and allow you to efficiently evaluate + XSD against your project's technical requirements. As such, this + document is intended for C++ developers and software architects + who are looking for an XML processing solution. For a more in-depth + description of the C++/Tree mapping refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/">C++/Tree + Mapping User Manual</a>.</p> + + <p>Prior experience with XML and C++ is required to understand this + document. Basic understanding of XML Schema is advantageous but + not expected or required. + </p> + + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this guide, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/">C++/Tree + Mapping User Manual</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree + Mapping Frequently Asked Questions (FAQ)</a></li> + + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is the place to ask technical questions about XSD and the C++/Parser mapping. + Furthermore, the <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + </ul> + + <!-- Introduction --> + + <h1><a name="1">1 Introduction</a></h1> + + <p>Welcome to CodeSynthesis XSD and the C++/Tree mapping. XSD is a + cross-platform W3C XML Schema to C++ data binding compiler. C++/Tree + is a W3C XML Schema to C++ mapping that represents the data stored + in XML as a statically-typed, vocabulary-specific object model. + </p> + + <h2><a name="1.1">1.1 Mapping Overview</a></h2> + + <p>Based on a formal description of an XML vocabulary (schema), the + C++/Tree mapping produces a tree-like data structure suitable for + in-memory processing. The core of the mapping consists of C++ + classes that constitute the object model and are derived from + types defined in XML Schema as well as XML parsing and + serialization code.</p> + + <p>Besides the core features, C++/Tree provide a number of additional + mapping elements that can be useful in some applications. These + include serialization and extraction to/from formats others than + XML, such as unstructured text (useful for debugging) and binary + representations such as XDR and CDR for high-speed data processing + as well as automatic documentation generation. The C++/Tree mapping + also provides a wide range of mechanisms for controlling and + customizing the generated code.</p> + + <p>A typical application that uses C++/Tree for XML processing usually + performs the following three steps: it first reads (parses) an XML + document to an in-memory object model, it then performs some useful + computations on that object model which may involve modification + of the model, and finally it may write (serialize) the modified + object model back to XML.</p> + + <p>The next chapter presents a simple application that performs these + three steps. The following chapters show how to use the C++/Tree + mapping in more detail.</p> + + <h2><a name="1.2">1.2 Benefits</a></h2> + + <p>Traditional XML access APIs such as Document Object Model (DOM) + or Simple API for XML (SAX) have a number of drawbacks that + make them less suitable for creating robust and maintainable + XML processing applications. These drawbacks include: + </p> + + <ul class="list"> + <li>Generic representation of XML in terms of elements, attributes, + and text forces an application developer to write a substantial + amount of bridging code that identifies and transforms pieces + of information encoded in XML to a representation more suitable + for consumption by the application logic.</li> + + <li>String-based flow control defers error detection to runtime. + It also reduces code readability and maintainability.</li> + + <li>Lack of type safety because the data is represented as text.</li> + + <li>Resulting applications are hard to debug, change, and + maintain.</li> + </ul> + + <p>In contrast, statically-typed, vocabulary-specific object model + produced by the C++/Tree mapping allows you to operate in your + domain terms instead of the generic elements, attributes, and + text. Static typing helps catch errors at compile-time rather + than at run-time. Automatic code generation frees you for more + interesting tasks (such as doing something useful with the + information stored in the XML documents) and minimizes the + effort needed to adapt your applications to changes in the + document structure. To summarize, the C++/Tree object model has + the following key advantages over generic XML access APIs:</p> + + <ul class="list"> + <li><b>Ease of use.</b> The generated code hides all the complexity + associated with parsing and serializing XML. This includes navigating + the structure and converting between the text representation and + data types suitable for manipulation by the application + logic.</li> + + <li><b>Natural representation.</b> The object representation allows + you to access the XML data using your domain vocabulary instead + of generic elements, attributes, and text.</li> + + <li><b>Concise code.</b> With the object representation the + application implementation is simpler and thus easier + to read and understand.</li> + + <li><b>Safety.</b> The generated object model is statically + typed and uses functions instead of strings to access the + information. This helps catch programming errors at compile-time + rather than at runtime.</li> + + <li><b>Maintainability.</b> Automatic code generation minimizes the + effort needed to adapt the application to changes in the + document structure. With static typing, the C++ compiler + can pin-point the places in the client code that need to be + changed.</li> + + <li><b>Compatibility.</b> Sequences of elements are represented in + the object model as containers conforming to the standard C++ + sequence requirements. This makes it possible to use standard + C++ algorithms on the object representation and frees you from + learning yet another container interface, as is the case with + DOM.</li> + + <li><b>Efficiency.</b> If the application makes repetitive use + of the data extracted from XML, then the C++/Tree object model + is more efficient because the navigation is performed using + function calls rather than string comparisons and the XML + data is extracted only once. Furthermore, the runtime memory + usage is reduced due to more efficient data storage + (for instance, storing numeric data as integers instead of + strings) as well as the static knowledge of cardinality + constraints.</li> + </ul> + + + <!-- Hello World Parser --> + + + <h1><a name="2">2 Hello World Example</a></h1> + + <p>In this chapter we will examine how to parse, access, modify, and + serialize a very simple XML document using the XSD-generated + C++/Tree object model. The code presented in this chapter is + based on the <code>hello</code> example which can be found in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package.</p> + + <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + + <p>First, we need to get an idea about the structure + of the XML documents we are going to process. Our + <code>hello.xml</code>, for example, could look like this:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + <p>Then we can write a description of the above XML in the + XML Schema language and save it into <code>hello.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello_t"> + <xs:sequence> + <xs:element name="greeting" type="xs:string"/> + <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello_t"/> + +</xs:schema> + </pre> + + <p>Even if you are not familiar with XML Schema, it + should be easy to connect declarations in <code>hello.xsd</code> + to elements in <code>hello.xml</code>. The <code>hello_t</code> type + is defined as a sequence of the nested <code>greeting</code> and + <code>name</code> elements. Note that the term sequence in XML + Schema means that elements should appear in a particular order + as opposed to appearing multiple times. The <code>name</code> + element has its <code>maxOccurs</code> property set to + <code>unbounded</code> which means it can appear multiple times + in an XML document. Finally, the globally-defined <code>hello</code> + element prescribes the root element for our vocabulary. For an + easily-approachable introduction to XML Schema refer to + <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: + Primer</a>.</p> + + <p>The above schema is a specification of our XML vocabulary; it tells + everybody what valid documents of our XML-based language should look + like. We can also update our <code>hello.xml</code> to include the + information about the schema so that XML parsers can validate + our document:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + + <p>The next step is to compile the schema to generate the object + model and parsing functions.</p> + + <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + + <p>Now we are ready to translate our <code>hello.xsd</code> to C++. + To do this we invoke the XSD compiler from a terminal (UNIX) or + a command prompt (Windows): + </p> + + <pre class="terminal"> +$ xsd cxx-tree hello.xsd + </pre> + + <p>The XSD compiler produces two C++ files: <code>hello.hxx</code> and + <code>hello.cxx</code>. The following code fragment is taken from + <code>hello.hxx</code>; it should give you an idea about what gets + generated: + </p> + + <pre class="c++"> +class hello_t +{ +public: + // greeting + // + typedef xml_schema::string greeting_type; + + const greeting_type& + greeting () const; + + greeting_type& + greeting (); + + void + greeting (const greeting_type& x); + + // name + // + typedef xml_schema::string name_type; + typedef xsd::sequence<name_type> name_sequence; + typedef name_sequence::iterator name_iterator; + typedef name_sequence::const_iterator name_const_iterator; + + const name_sequence& + name () const; + + name_sequence& + name (); + + void + name (const name_sequence& s); + + // Constructor. + // + hello_t (const greeting_type&); + + ... + +}; + +std::unique_ptr<hello_t> +hello (const std::string& uri); + +std::unique_ptr<hello_t> +hello (std::istream&); + </pre> + + <p>The <code>hello_t</code> C++ class corresponds to the + <code>hello_t</code> XML Schema type. For each element + in this type a set of C++ type definitions as well as + accessor and modifier functions are generated inside the + <code>hello_t</code> class. Note that the type definitions + and member functions for the <code>greeting</code> and + <code>name</code> elements are different because of the + cardinality differences between these two elements + (<code>greeting</code> is a required single element and + <code>name</code> is a sequence of elements).</p> + + <p>The <code>xml_schema::string</code> type used in the type + definitions is a C++ class provided by the XSD runtime + that corresponds to built-in XML Schema type + <code>string</code>. The <code>xml_schema::string</code> + is based on <code>std::string</code> and can be used as + such. Similarly, the <code>sequence</code> class template + that is used in the <code>name_sequence</code> type + definition is based on and has the same interface as + <code>std::vector</code>. The mapping between the built-in + XML Schema types and C++ types is described in more detail in + <a href="#4.5">Section 4.5, "Mapping for the Built-in XML Schema + Types"</a>. The <code>hello_t</code> class also includes a + constructor with an initializer for the required + <code>greeting</code> element as its argument.</p> + + <p>The <code>hello</code> overloaded global functions correspond + to the <code>hello</code> global element in XML Schema. A + global element in XML Schema is a valid document root. + By default XSD generated a set of parsing functions for each + global element defined in XML Schema (this can be overridden + with the <code>--root-element-*</code> options). Parsing + functions return a dynamically allocated object model as an + automatic pointer. The actual pointer used depends on the + C++ standard selected. For C++11 it is <code>std::unique_ptr</code> + as shown above. For C++98 it is <code>std::auto_ptr</code>. + For example, if we modify our XSD compiler invocation to + select C++98:</p> + + <pre class="terminal"> +$ xsd cxx-tree --std c++98 hello.xsd + </pre> + + <p>Then the parsing function signatures will become:</p> + + <pre class="c++"> +std::auto_ptr<hello_t> +hello (const std::string& uri); + +std::auto_ptr<hello_t> +hello (std::istream&); + </pre> + + <p>For more information on parsing functions see <a href="#5">Chapter 5, + "Parsing"</a>.</p> + + <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + + <p>At this point we have all the parts we need to do something useful + with the information stored in our XML document: + </p> + + <pre class="c++"> +#include <iostream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + unique_ptr<hello_t> h (hello (argv[1])); + + for (hello_t::name_const_iterator i (h->name ().begin ()); + i != h->name ().end (); + ++i) + { + cerr << h->greeting () << ", " << *i << "!" << endl; + } + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>The first part of our application calls one of the parsing + functions to parser an XML file specified in the command line. + We then use the returned object model to iterate over names + and print a greeting line for each of them. Finally, we + catch and print the <code>xml_schema::exception</code> + exception in case something goes wrong. This exception + is the root of the exception hierarchy used by the + XSD-generated code. + </p> + + + <h2><a name="2.4">2.4 Compiling and Running</a></h2> + + <p>After saving our application from the previous section in + <code>driver.cxx</code>, we are ready to compile our first + program and run it on the test XML document. On a UNIX + system this can be done with the following commands: + </p> + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx hello.cxx +$ c++ -std=c++11 -o driver driver.o hello.o -lxerces-c +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! + </pre> + + <p>Here <code>.../libxsd</code> represents the path to the + <a href="https://cppget.org/libxsd">libxsd</a> package root + directory. Note also that we are required to link our + application with the Xerces-C++ library because the generated + code uses it as the underlying XML parser.</p> + + <h2><a name="2.5">2.5 Adding Serialization</a></h2> + + <p>While parsing and accessing the XML data may be everything + you need, there are applications that require creating new + or modifying existing XML documents. By default XSD does + not produce serialization code. We will need to request + it with the <code>--generate-serialization</code> options:</p> + + <pre class="terminal"> +$ xsd cxx-tree --generate-serialization hello.xsd + </pre> + + <p>If we now examine the generated <code>hello.hxx</code> file, + we will find a set of overloaded serialization functions, + including the following version:</p> + + <pre class="c++"> +void +hello (std::ostream&, + const hello_t&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap ()); + + </pre> + + <p>Just like with parsing functions, XSD generates serialization + functions for each global element unless instructed otherwise + with one of the <code>--root-element-*</code> options. For more + information on serialization functions see <a href="#6">Chapter 6, + "Serialization"</a>.</p> + + <p>We first examine an application that modifies an existing + object model and serializes it back to XML:</p> + + <pre class="c++"> +#include <iostream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + unique_ptr<hello_t> h (hello (argv[1])); + + // Change the greeting phrase. + // + h->greeting ("Hi"); + + // Add another entry to the name sequence. + // + h->name ().push_back ("mars"); + + // Serialize the modified object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "hello.xsd"; + + hello (cout, *h, map); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>First, our application parses an XML document and obtains its + object model as in the previous example. Then it changes the + greeting string and adds another entry to the list of names. + Finally, it serializes the object model back to XML by calling + the serialization function.</p> + + <p>The first argument we pass to the serialization function is + <code>cout</code> which results in the XML being written to + the standard output for us to inspect. We could have also + written the result to a file or memory buffer by creating an + instance of <code>std::ofstream</code> or <code>std::ostringstream</code> + and passing it instead of <code>cout</code>. The second argument is the + object model we want to serialize. The final argument is an optional + namespace information map for our vocabulary. It captures information + such as namespaces, namespace prefixes to which they should be mapped, + and schemas associated with these namespaces. If we don't provide + this argument then generic namespace prefixes (<code>p1</code>, + <code>p2</code>, etc.) will be automatically assigned to XML namespaces + and no schema information will be added to the resulting document + (see <a href="#6">Chapter 6, "Serialization"</a> for details). + In our case, the prefix (map key) and namespace name are empty + because our vocabulary does not use XML namespaces.</p> + + <p>If we now compile and run this application we will see the + output as shown in the following listing:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hi</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + <name>mars</name> + +</hello> + </pre> + + <p>We can also create and serialize an object model from scratch + as shown in the following example:</p> + + <pre class="c++"> +#include <iostream> +#include <fstream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + hello_t h ("Hi"); + + hello_t::name_sequence& ns (h.name ()); + + ns.push_back ("Jane"); + ns.push_back ("John"); + + // Serialize the object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "hello.xsd"; + + std::ofstream ofs (argv[1]); + hello (ofs, h, map); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>In this example we used the generated constructor to create + an instance of type <code>hello_t</code>. To reduce typing, + we obtained a reference to the name sequence which we then + used to add a few names. The serialization part is identical + to the previous example except this time we are writing to + a file. If we compile and run this program, it produces the + following XML file:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hi</greeting> + + <name>Jane</name> + <name>John</name> + +</hello> + </pre> + + <h2><a name="2.6">2.6 Selecting Naming Convention</a></h2> + + <p>By default XSD uses the so-called K&R (Kernighan and Ritchie) + identifier naming convention in the generated code. In this + convention both type and function names are in lower case and + words are separated by underscores. If your application code or + schemas use a different notation, you may want to change the + naming convention used in the generated code for consistency. + XSD supports a set of widely-used naming conventions + that you can select with the <code>--type-naming</code> and + <code>--function-naming</code> options. You can also further + refine one of the predefined conventions or create a completely + custom naming scheme by using the <code>--*-regex</code> options.</p> + + <p>As an example, let's assume that our "Hello World" application + uses the so-called upper-camel-case naming convention for types + (that is, each word in a type name is capitalized) and the K&R + convention for function names. Since K&R is the default + convention for both type and function names, we only need to + change the type naming scheme:</p> + + <pre class="terminal"> +$ xsd cxx-tree --type-naming ucc hello.xsd + </pre> + + <p>The <code>ucc</code> argument to the <code>--type-naming</code> + options stands for upper-camel-case. If we now examine the + generated <code>hello.hxx</code>, we will see the following + changes compared to the declarations shown in the previous + sections:</p> + + <pre class="c++"> +class Hello_t +{ +public: + // greeting + // + typedef xml_schema::String GreetingType; + + const GreetingType& + greeting () const; + + GreetingType& + greeting (); + + void + greeting (const GreetingType& x); + + // name + // + typedef xml_schema::String NameType; + typedef xsd::sequence<NameType> NameSequence; + typedef NameSequence::iterator NameIterator; + typedef NameSequence::const_iterator NameConstIterator; + + const NameSequence& + name () const; + + NameSequence& + name (); + + void + name (const NameSequence& s); + + // Constructor. + // + Hello_t (const GreetingType&); + + ... + +}; + +std::unique_ptr<Hello_t> +hello (const std::string& uri); + +std::unique_ptr<Hello_t> +hello (std::istream&); + </pre> + + <p>Notice that the type names in the <code>xml_schema</code> namespace, + for example <code>xml_schema::String</code>, now also use the + upper-camel-case naming convention. The only thing that we may + be unhappy about in the above code is the <code>_t</code> + suffix in <code>Hello_t</code>. If we are not in a position + to change the schema, we can <em>touch-up</em> the <code>ucc</code> + convention with a custom translation rule using the + <code>--type-regex</code> option:</p> + + <pre class="terminal"> +$ xsd cxx-tree --type-naming ucc --type-regex '/ (.+)_t/\u$1/' hello.xsd + </pre> + + <p>This results in the following changes to the generated code:</p> + + <pre class="c++"> +class Hello +{ +public: + // greeting + // + typedef xml_schema::String GreetingType; + + const GreetingType& + greeting () const; + + GreetingType& + greeting (); + + void + greeting (const GreetingType& x); + + // name + // + typedef xml_schema::String NameType; + typedef xsd::sequence<NameType> NameSequence; + typedef NameSequence::iterator NameIterator; + typedef NameSequence::const_iterator NameConstIterator; + + const NameSequence& + name () const; + + NameSequence& + name (); + + void + name (const NameSequence& s); + + // Constructor. + // + Hello (const GreetingType&); + + ... + +}; + +std::unique_ptr<Hello> +hello (const std::string& uri); + +std::unique_ptr<Hello> +hello (std::istream&); + </pre> + + <p>For more detailed information on the <code>--type-naming</code>, + <code>--function-naming</code>, <code>--type-regex</code>, and + other <code>--*-regex</code> options refer to the NAMING + CONVENTION section in the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + <h2><a name="2.7">2.7 Generating Documentation</a></h2> + + <p>While our object model is quite simple, real-world vocabularies + can be quite complex with hundreds of types, elements, and + attributes. For such vocabularies figuring out which types + provide which member functions by studying the generated + source code or schemas can be a daunting task. To provide + application developers with a more accessible way of + understanding the generated object models, the XSD compiler + can be instructed to produce source code with documentation + comments in the Doxygen format. Then the source code can be + processed with the <a href="http://www.doxygen.org">Doxygen</a> + documentation system to extract this information and produce + documentation in various formats. + </p> + + <p>In this section we will see how to generate documentation + for our "Hello World" vocabulary. To showcase the full power + of the XSD documentation facilities, we will first document + our schema. The XSD compiler will then transfer + this information from the schema to the generated code and + then to the object model documentation. Note that the + documentation in the schema is not required for XSD to + generate useful documentation. Below you will find + our <code>hello.xsd</code> with added documentation:</p> + + <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello_t"> + + <xs:annotation> + <xs:documentation> + The hello_t type consists of a greeting phrase and a + collection of names to which this greeting applies. + </xs:documentation> + </xs:annotation> + + <xs:sequence> + + <xs:element name="greeting" type="xs:string"> + <xs:annotation> + <xs:documentation> + The greeting element contains the greeting phrase + for this hello object. + </xs:documentation> + </xs:annotation> + </xs:element> + + <xs:element name="name" type="xs:string" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation> + The name elements contains names to be greeted. + </xs:documentation> + </xs:annotation> + </xs:element> + + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello_t"> + <xs:annotation> + <xs:documentation> + The hello element is a root of the Hello XML vocabulary. + Every conforming document should start with this element. + </xs:documentation> + </xs:annotation> + </xs:element> + +</xs:schema> + </pre> + + <p>The first step in obtaining the documentation is to recompile + our schema with the <code>--generate-doxygen</code> option:</p> + + <pre class="terminal"> +$ xsd cxx-tree --generate-serialization --generate-doxygen hello.xsd + </pre> + + <p>Now the generated <code>hello.hxx</code> file contains comments + in the Doxygen format. The next step is to process this file + with the Doxygen documentation system. If your project does + not use Doxygen then you first need to create a configuration + file for your project:</p> + + <pre class="terminal"> +$ doxygen -g hello.doxygen + </pre> + + <p>You only need to perform this step once. Now we can generate + the documentation by executing the following command in the + directory with the generated source code:</p> + + <pre class="terminal"> +$ doxygen hello.doxygen + </pre> + + <p>While the generated documentation can be useful as is, we can + go one step further and link (using the Doxygen tags mechanism) + the documentation for our object model with the documentation + for the XSD runtime library which defines C++ classes for the + built-in XML Schema types. This way we can seamlessly browse + between documentation for the <code>hello_t</code> class which + is generated by the XSD compiler and the <code>xml_schema::string</code> + class which is defined in the XSD runtime library. The Doxygen + configuration file for the XSD runtime is provided with the XSD + distribution.</p> + + <p>You can view the result of the steps described in this section + on the <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/hello/html/annotated.html">Hello + Example Documentation</a> page.</p> + + <!-- Chapater 3 --> + + + <h1><a name="3">3 Overall Mapping Configuration</a></h1> + + <p>The C++/Tree mapping has a number of configuration parameters that + determine the overall properties and behavior of the generated code. + Configuration parameters are specified with the XSD command line + options. This chapter describes configuration aspects that are most + commonly encountered by application developers. These include: the + C++ standard, the character type that is used by the generated code, + handling of vocabularies that use XML Schema polymorphism, XML Schema + to C++ namespace mapping, and thread safety. For more ways to configure + the generated code refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. + </p> + + <h2><a name="3.1">3.1 C++ Standard</a></h2> + + <p>The C++/Tree mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h2><a name="3.2">3.2 Character Type and Encoding</a></h2> + + <p>The C++/Tree mapping has built-in support for two character types: + <code>char</code> and <code>wchar_t</code>. You can select the + character type with the <code>--char-type</code> command line + option. The default character type is <code>char</code>. The + character type affects all string and string-based types that + are used in the mapping. These include the string-based built-in + XML Schema types, exception types, stream types, etc.</p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings. You can select which encoding should be used + in the object model with the <code>--char-encoding</code> command + line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <p>Note also that the character encoding that is used in the object model + is independent of the encodings used in input and output XML. In fact, + all three (object mode, input XML, and output XML) can have different + encodings.</p> + + <h2><a name="3.3">3.3 Support for Polymorphism</a></h2> + + <p>By default XSD generates non-polymorphic code. If your vocabulary + uses XML Schema polymorphism in the form of <code>xsi:type</code> + and/or substitution groups, then you will need to compile + your schemas with the <code>--generate-polymorphic</code> option + to produce polymorphism-aware code. For more information on + working with polymorphic object models, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.11">Section 2.11, + "Mapping for <code>xsi:type</code> and Substitution Groups"</a> in + the C++/Tree Mapping User Manual.</p> + + <h2><a name="3.4">3.4 Namespace Mapping</a></h2> + + <p>XSD maps XML namespaces specified in the <code>targetNamespace</code> + attribute in XML Schema to one or more nested C++ namespaces. By + default, a namespace URI is mapped to a sequence of C++ namespace + names by removing the protocol and host parts and splitting the + rest into a sequence of names with <code>'/'</code> as the name + separator.</p> + + <p>The default mapping of namespace URIs to C++ namespaces + can be altered using the <code>--namespace-map</code> and + <code>--namespace-regex</code> compiler options. For example, + to map namespace URI <code>https://www.codesynthesis.com/my</code> to + C++ namespace <code>cs::my</code>, we can use the following option:</p> + + <pre class="terminal"> +--namespace-map https://www.codesynthesis.com/my=cs::my + </pre> + + <p>A vocabulary without a namespace is mapped to the global scope. This + also can be altered with the above options by using an empty name + for the XML namespace:</p> + + <pre class="terminal"> +--namespace-map =cs + </pre> + + <h2><a name="3.5">3.5 Thread Safety</a></h2> + + <p>XSD-generated code is thread-safe in the sense that you can + use different instantiations of the object model in several + threads concurrently. This is possible due to the generated + code not relying on any writable global variables. If you need + to share the same object between several threads then you will + need to provide some form of synchronization. One approach would + be to use the generated code customization mechanisms to embed + synchronization primitives into the generated C++ classes. For more + information on generated code customization refer to the + <a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a>.</p> + + <p>If you also would like to call parsing and/or serialization + functions from several threads potentially concurrently, then + you will need to make sure the Xerces-C++ runtime is initialized + and terminated only once. The easiest way to do this is to + initialize/terminate Xerces-C++ from <code>main()</code> when + there are no threads yet/anymore:</p> + + <pre class="c++"> +#include <xercesc/util/PlatformUtils.hpp> + +int +main () +{ + xercesc::XMLPlatformUtils::Initialize (); + + { + // Start/terminate threads and parse/serialize here. + } + + xercesc::XMLPlatformUtils::Terminate (); +} + </pre> + + <p>Because you initialize the Xerces-C++ runtime yourself you should + also pass the <code>xml_schema::flags::dont_initialize</code> flag + to parsing and serialization functions. See <a href="#5">Chapter 5, + "Parsing"</a> and <a href="#6">Chapter 6, "Serialization"</a> for + more information.</p> + + + <!-- Chapater 4 --> + + + <h1><a name="4">4 Working with Object Models</a></h1> + + <p>As we have seen in the previous chapters, the XSD compiler generates + a C++ class for each type defined in XML Schema. Together these classes + constitute an object model for an XML vocabulary. In this chapter we + will take a closer look at different elements that comprise an + object model class as well as how to create, access, and modify + object models.</p> + + <p>In this and subsequent chapters we will use the following schema + that describes a collection of person records. We save it in + <code>people.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:simpleType name="gender_t"> + <xs:restriction base="xs:string"> + <xs:enumeration value="male"/> + <xs:enumeration value="female"/> + </xs:restriction> + </xs:simpleType> + + <xs:complexType name="person_t"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="middle-name" type="xs:string" minOccurs="0"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="gender" type="gender_t"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> + <xs:attribute name="id" type="xs:unsignedInt" use="required"/> + </xs:complexType> + + <xs:complexType name="people_t"> + <xs:sequence> + <xs:element name="person" type="person_t" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="people" type="people_t"/> + +</xs:schema> + </pre> + + <p>A sample XML instance to go along with this schema is saved + in <code>people.xml</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> + +</people> + </pre> + + <p>Compiling <code>people.xsd</code> with the XSD compiler results + in three generated C++ classes: <code>gender_t</code>, + <code>person_t</code>, and <code>people_t</code>. + The <code>gender_t</code> class is modelled after the C++ + <code>enum</code> type. Its definition is presented below:</p> + + <pre class="c++"> +class gender_t: public xml_schema::string +{ +public: + enum value + { + male, + female + }; + + gender_t (value); + gender_t (const xml_schema::string&); + + gender_t& + operator= (value); + + operator value () const; +}; + </pre> + + <p>The following listing shows how we can use this type:</p> + + <pre class="c++"> +gender_t m (gender_t::male); +gender_t f ("female"); + +if (m == "female" || f == gender_t::male) +{ + ... +} + +switch (m) +{ +case gender_t::male: + { + ... + } +case gender_t::female: + { + ... + } +} + </pre> + + <p>The other two classes will be examined in detail in the subsequent + sections.</p> + + <h2><a name="4.1">4.1 Attribute and Element Cardinalities</a></h2> + + <p>As we have seen in the previous chapters, XSD generates a different + set of type definitions and member functions for elements with + different cardinalities. The C++/Tree mapping divides all the possible + element and attribute cardinalities into three cardinality classes: + <em>one</em>, <em>optional</em>, and <em>sequence</em>.</p> + + <p>The <em>one</em> cardinality class covers all elements that should + occur exactly once as well as required attributes. In our + example, the <code>first-name</code>, <code>last-name</code>, + <code>gender</code>, and <code>age</code> elements as well as + the <code>id</code> attribute belong to this cardinality class. + The following code fragment shows type definitions as well as the + accessor and modifier functions that are generated for the + <code>gender</code> element in the <code>person_t</code> class:</p> + + <pre class="c++"> +class person_t +{ + // gender + // + typedef gender_t gender_type; + + const gender_type& + gender () const; + + gender_type& + gender (); + + void + gender (const gender_type&); +}; + </pre> + + <p>The <code>gender_type</code> type is an alias for the element's type. + The first two accessor functions return read-only (constant) and + read-write references to the element's value, respectively. The + modifier function sets the new value for the element.</p> + + <p>The <em>optional</em> cardinality class covers all elements that + can occur zero or one time as well as optional attributes. In our + example, the <code>middle-name</code> element belongs to this + cardinality class. The following code fragment shows the type + definitions as well as the accessor and modifier functions that + are generated for this element in the <code>person_t</code> class:</p> + + <pre class="c++"> +class person_t +{ + // middle-name + // + typedef xml_schema::string middle_name_type; + typedef xsd::optional<middle_name_type> middle_name_optional; + + const middle_name_optional& + middle_name () const; + + middle_name_optional& + middle_name (); + + void + middle_name (const middle_name_type&); + + void + middle_name (const middle_name_optional&); +}; + </pre> + + <p>As with the <code>gender</code> element, <code>middle_name_type</code> + is an alias for the element's type. The <code>middle_name_optional</code> + type is a container for the element's optional value. It can be queried + for the presence of the value using the <code>present()</code> function. + The value itself can be retrieved using the <code>get()</code> + accessor and set using the <code>set()</code> modifier. The container + can be reverted to the value not present state with the call to the + <code>reset()</code> function. The following example shows how we + can use this container:</p> + + <pre class="c++"> +person_t::middle_name_optional n ("John"); + +if (n.present ()) +{ + cout << n.get () << endl; +} + +n.set ("Jane"); +n.reset (); + </pre> + + + <p>Unlike the <em>one</em> cardinality class, the accessor functions + for the <em>optional</em> class return read-only (constant) and + read-write references to the container instead of the element's + value directly. The modifier functions set the new value for the + element.</p> + + <p>Finally, the <em>sequence</em> cardinality class covers all elements + that can occur more than once. In our example, the + <code>person</code> element in the <code>people_t</code> type + belongs to this cardinality class. The following code fragment shows + the type definitions as well as the accessor and modifier functions + that are generated for this element in the <code>people_t</code> + class:</p> + + <pre class="c++"> +class people_t +{ + // person + // + typedef person_t person_type; + typedef xsd::sequence<person_type> person_sequence; + typedef person_sequence::iterator person_iterator; + typedef person_sequence::const_iterator person_const_iterator; + + const person_sequence& + person () const; + + person_sequence& + person (); + + void + person (const person_sequence&); +}; + </pre> + + <p>Identical to the other cardinality classes, <code>person_type</code> + is an alias for the element's type. The <code>person_sequence</code> + type is a sequence container for the element's values. It is based + on and has the same interface as <code>std::vector</code> and + therefore can be used in similar ways. The <code>person_iterator</code> + and <code>person_const_iterator</code> types are read-only + (constant) and read-write iterators for the <code>person_sequence</code> + container.</p> + + <p>Similar to the <em>optional</em> cardinality class, the + accessor functions for the <em>sequence</em> class return + read-only (constant) and read-write references to the sequence + container. The modifier functions copies the entries from + the passed sequence.</p> + + <p>C++/Tree is a "flattening" mapping in a sense that many levels of + nested compositors (<code>choice</code> and <code>sequence</code>), + all potentially with their own cardinalities, are in the end mapped + to a flat set of elements with one of the three cardinality classes + discussed above. While this results in a simple and easy to use API + for most types, in certain cases, the order of elements in the actual + XML documents is not preserved once parsed into the object model. To + overcome this limitation we can mark certain schema types, for which + content order is not sufficiently preserved, as ordered. For more + information on this functionality refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.8.4">Section + 2.8.4, "Element Order"</a> in the C++/Tree Mapping User Manual.</p> + + <p>For complex schemas with many levels of nested compositors + (<code>choice</code> and <code>sequence</code>) it can also + be hard to deduce the cardinality class of a particular element. + The generated Doxygen documentation can greatly help with + this task. For each element and attribute the documentation + clearly identifies its cardinality class. Alternatively, you + can study the generated header files to find out the cardinality + class of a particular attribute or element.</p> + + <p>In the next sections we will examine how to access and modify + information stored in an object model using accessor and modifier + functions described in this section.</p> + + <h2><a name="4.2">4.2 Accessing the Object Model</a></h2> + + <p>In this section we will learn how to get to the information + stored in the object model for our person records vocabulary. + The following application accesses and prints the contents + of the <code>people.xml</code> file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + unique_ptr<people_t> ppl (people ("people.xml")); + + // Iterate over individual person records. + // + people_t::person_sequence& ps (ppl->person ()); + + for (people_t::person_iterator i (ps.begin ()); i != ps.end (); ++i) + { + person_t& p (*i); + + // Print names: first-name and last-name are required elements, + // middle-name is optional. + // + cout << "name: " << p.first_name () << " "; + + if (p.middle_name ().present ()) + cout << p.middle_name ().get () << " "; + + cout << p.last_name () << endl; + + // Print gender, age, and id which are all required. + // + cout << "gender: " << p.gender () << endl + << "age: " << p.age () << endl + << "id: " << p.id () << endl + << endl; + } +} + </pre> + + <p>This code shows common patterns of accessing elements and attributes + with different cardinality classes. For the sequence element + (<code>person</code> in <code>people_t</code>) we first obtain a + reference to the container and then iterate over individual + records. The values of elements and attributes with the + <em>one</em> cardinality class (<code>first-name</code>, + <code>last-name</code>, <code>gender</code>, <code>age</code>, + and <code>id</code>) can be obtained directly by calling the + corresponding accessor functions. For the optional element + <code>middle-name</code> we first check if the value is present + and only then call <code>get()</code> to retrieve it.</p> + + <p>Note that when we want to reduce typing by creating a variable + representing a fragment of the object model that we are currently + working with (<code>ps</code> and <code>p</code> above), we obtain + a reference to that fragment instead of making a potentially + expensive copy. This is generally a good rule to follow when + creating high-performance applications.</p> + + <p>If we run the above application on our sample + <code>people.xml</code>, the output looks as follows:</p> + + <pre class="terminal"> +name: John Doe +gender: male +age: 32 +id: 1 + +name: Jane Mary Doe +gender: female +age: 28 +id: 2 + </pre> + + + <h2><a name="4.3">4.3 Modifying the Object Model</a></h2> + + <p>In this section we will learn how to modify the information + stored in the object model for our person records vocabulary. + The following application changes the contents of the + <code>people.xml</code> file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + unique_ptr<people_t> ppl (people ("people.xml")); + + // Iterate over individual person records and increment + // the age. + // + people_t::person_sequence& ps (ppl->person ()); + + for (people_t::person_iterator i (ps.begin ()); i != ps.end (); ++i) + { + // Alternative way: i->age ()++; + // + i->age (i->age () + 1); + } + + // Add middle-name to the first record and remove it from + // the second. + // + person_t& john (ps[0]); + person_t& jane (ps[1]); + + john.middle_name ("Mary"); + jane.middle_name ().reset (); + + // Add another John record. + // + ps.push_back (john); + + // Serialize the modified object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "people.xsd"; + + people (cout, *ppl, map); +} + </pre> + + <p>The first modification the above application performs is iterating + over person records and incrementing the age value. This code + fragment shows how to modify the value of a required attribute + or element. The next modification shows how to set a new value + for the optional <code>middle-name</code> element as well + as clear its value. Finally the example adds a copy of the + John Doe record to the <code>person</code> element sequence.</p> + + <p>Note that in this case using references for the <code>ps</code>, + <code>john</code>, and <code>jane</code> variables is no longer + a performance improvement but a requirement for the application + to function correctly. If we hadn't used references, all our changes + would have been made on copies without affecting the object model.</p> + + <p>If we run the above application on our sample <code>people.xml</code>, + the output looks as follows:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>33</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>29</age> + </person> + + <person id="1"> + <first-name>John</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>33</age> + </person> + +</people> + </pre> + + + <h2><a name="4.4">4.4 Creating the Object Model from Scratch</a></h2> + + <p>In this section we will learn how to create a new object model + for our person records vocabulary. The following application + recreates the content of the original <code>people.xml</code> + file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + people_t ppl; + people_t::person_sequence& ps (ppl.person ()); + + // Add the John Doe record. + // + ps.push_back ( + person_t ("John", // first-name + "Doe", // last-name + gender_t::male, // gender + 32, // age + 1)); + + // Add the Jane Doe record. + // + ps.push_back ( + person_t ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2)); // id + + // Add middle name to the Jane Doe record. + // + person_t& jane (ps.back ()); + jane.middle_name ("Mary"); + + // Serialize the object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "people.xsd"; + + people (cout, ppl, map); +} + </pre> + + <p>The only new part in the above application is the calls + to the <code>people_t</code> and <code>person_t</code> + constructors. As a general rule, for each C++ class + XSD generates a constructor with initializers + for each element and attribute belonging to the <em>one</em> + cardinality class. For our vocabulary, the following + constructors are generated:</p> + + <pre class="c++"> +class person_t +{ + person_t (const first_name_type&, + const last_name_type&, + const gender_type&, + const age_type&, + const id_type&); +}; + +class people_t +{ + people_t (); +}; + </pre> + + <p>Note also that we set the <code>middle-name</code> element + on the Jane Doe record by obtaining a reference to that record + in the object model and setting the <code>middle-name</code> + value on it. This is a general rule that should be followed + in order to obtain the best performance: if possible, + direct modifications to the object model should be preferred + to modifications on temporaries with subsequent copying. The + following code fragment shows a semantically equivalent but + slightly slower version:</p> + + <pre class="c++"> +// Add the Jane Doe record. +// +person_t jane ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2); // id + +jane.middle_name ("Mary"); + +ps.push_back (jane); + </pre> + + <p>We can also go one step further to reduce copying and improve + the performance of our application by using the non-copying + <code>push_back()</code> function which assumes ownership + of the passed objects:</p> + + <pre class="c++"> +// Add the Jane Doe record. C++11 version +// +unique_ptr<person_t> jane_p ( + new person_t ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2)); // id +ps.push_back (std::move (jane_p)); // assumes ownership + +// Add the John Doe record. C++98 version. +// +auto_ptr<person_t> john_p ( + new person_t ("John", // first-name + "Doe", // last-name + gender_t::male, // gender + 32, // age + 1)); +ps.push_back (john_p); // assumes ownership + </pre> + + <p>For more information on the non-copying modifier functions refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.8">Section + 2.8, "Mapping for Local Elements and Attributes"</a> in the C++/Tree Mapping + User Manual. The above application produces the following output:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> + +</people> + </pre> + + <h2><a name="4.5">4.5 Mapping for the Built-in XML Schema Types</a></h2> + + <p>Our person record vocabulary uses several built-in XML Schema + types: <code>string</code>, <code>short</code>, and + <code>unsignedInt</code>. Until now we haven't talked about + the mapping of built-in XML Schema types to C++ types and how + to work with them. This section provides an overview + of the built-in types. For more detailed information refer + to <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.5">Section + 2.5, "Mapping for Built-in Data Types"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>In XML Schema, built-in types are defined in the XML Schema namespace. + By default, the C++/Tree mapping maps this namespace to C++ + namespace <code>xml_schema</code> (this mapping can be altered + with the <code>--namespace-map</code> option). The following table + summarizes the mapping of XML Schema built-in types to C++ types:</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Alias in the <code>xml_schema</code> namespace</th> + <th>C++ type</th> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string</code></td> + <td>type derived from <code>string</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token</code></td> + <td>type derived from <code>normalized_string</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens</code></td> + <td>type derived from <code>sequence<nmtoken></code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>language</code></td> + <td><code>language</code></td> + <td>type derived from <code>token</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname</code></td> + <td><code>xml_schema::qname</code></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs</code></td> + <td>type derived from <code>sequence<idref></code></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary</code></td> + <td><code>xml_schema::base64_binary</code></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary</code></td> + <td><code>xml_schema::hex_binary</code></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date</code></td> + <td><code>xml_schema::date</code></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time</code></td> + <td><code>xml_schema::date_time</code></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration</code></td> + <td><code>xml_schema::duration</code></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday</code></td> + <td><code>xml_schema::gday</code></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth</code></td> + <td><code>xml_schema::gmonth</code></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day</code></td> + <td><code>xml_schema::gmonth_day</code></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear</code></td> + <td><code>xml_schema::gyear</code></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month</code></td> + <td><code>xml_schema::gyear_month</code></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time</code></td> + <td><code>xml_schema::time</code></td> + </tr> + + <tr> + <th colspan="3">entity types</th> + </tr> + <tr> + <td><code>ENTITY</code></td> + <td><code>entity</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>ENTITIES</code></td> + <td><code>entities</code></td> + <td>type derived from <code>sequence<entity></code></td> + </tr> + </table> + + <p>As you can see from the table above a number of built-in + XML Schema types are mapped to fundamental C++ types such + as <code>int</code> or <code>bool</code>. All string-based + XML Schema types are mapped to C++ types that are derived + from either <code>std::string</code> or + <code>std::wstring</code>, depending on the character + type selected. For access and modification purposes these + types can be treated as <code>std::string</code>. A number + of built-in types, such as <code>qname</code>, the binary + types, and the date/time types do not have suitable + fundamental or standard C++ types to map to. As a result, + these types are implemented from scratch in the XSD runtime. + For more information on their interfaces refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.5">Section + 2.5, "Mapping for Built-in Data Types"</a> in the C++/Tree Mapping + User Manual.</p> + + + <!-- Chapater 5 --> + + + <h1><a name="5">5 Parsing</a></h1> + + <p>We have already seen how to parse XML to an object model in this guide + before. In this chapter we will discuss the parsing topic in more + detail.</p> + + <p>By default, the C++/Tree mapping provides a total of 14 overloaded + parsing functions. They differ in the input methods used to + read XML as well as the error reporting mechanisms. It is also possible + to generate types for root elements instead of parsing and serialization + functions. This may be useful if your XML vocabulary has multiple + root elements. For more information on element types refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.9">Section + 2.9, "Mapping for Global Elements"</a> in the C++/Tree Mapping User + Manual.</p> + + + <p>In this section we will discuss the most commonly used versions of + the parsing functions. For a comprehensive description of parsing + refer to <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#3">Chapter + 3, "Parsing"</a> in the C++/Tree Mapping User Manual. For the <code>people</code> + global element from our person record vocabulary, we will concentrate + on the following three parsing functions:</p> + + <pre class="c++"> +std::[unique|auto]_ptr<people_t> +people (const std::string& uri, + xml_schema::flags f = 0, + const xml_schema::properties& p = xml_schema::properties ()); + +std::[unique|auto]_ptr<people_t> +people (std::istream& is, + xml_schema::flags f = 0, + const xml_schema::properties& p = xml_schema::properties ()); + +std::[unique|auto]_ptr<people_t> +people (std::istream& is, + const std::string& resource_id, + xml_schema::flags f = 0, + const xml_schema::properties& p = ::xml_schema::properties ()); + </pre> + + <p>The first function parses a local file or a URI. We have already + used this parsing function in the previous chapters. The second + and third functions read XML from a standard input stream. The + last function also requires a resource id. This id is used to + identify the XML document being parser in diagnostics messages + as well as to resolve relative paths to other documents (for example, + schemas) that might be referenced from the XML document.</p> + + <p>The last two arguments to all three parsing functions are parsing + flags and properties. The flags argument provides a number of ways + to fine-tune the parsing process. The properties argument allows + to pass additional information to the parsing functions. We will + use these two arguments in <a href="#5.1">Section 5.1, "XML Schema + Validation and Searching"</a> below. All three functions return + the object model as either <code>std::unique_ptr</code> (C++11) or + <code>std::auto_ptr</code> (C++98), depending on the C++ standard + selected (<code>--std</code> XSD compiler option). The following + example shows how we can use the above parsing functions:</p> + + <pre class="c++"> +using std::unique_ptr; + +// Parse a local file or URI. +// +unique_ptr<people_t> p1 (people ("people.xml")); +unique_ptr<people_t> p2 (people ("http://example.com/people.xml")); + +// Parse a local file via ifstream. +// +std::ifstream ifs ("people.xml"); +unique_ptr<people_t> p3 (people (ifs, "people.xml")); + +// Parse an XML string. +// +std::string str ("..."); // XML in a string. +std::istringstream iss (str); +unique_ptr<people_t> p4 (people (iss)); + </pre> + + + <h2><a name="5.1">5.1 XML Schema Validation and Searching</a></h2> + + <p>The C++/Tree mapping relies on the underlying Xerces-C++ XML + parser for full XML document validation. The XML Schema + validation is enabled by default and can be disabled by + passing the <code>xml_schema::flags::dont_validate</code> + flag to the parsing functions, for example:</p> + + <pre class="c++"> +unique_ptr<people_t> p ( + people ("people.xml", xml_schema::flags::dont_validate)); + </pre> + + <p>Even when XML Schema validation is disabled, the generated + code still performs a number of checks to prevent + construction of an inconsistent object model (for example, an + object model with missing required attributes or elements).</p> + + <p>When XML Schema validation is enabled, the XML parser needs + to locate a schema to validate against. There are several + methods to provide the schema location information to the + parser. The easiest and most commonly used method is to + specify schema locations in the XML document itself + with the <code>schemaLocation</code> or + <code>noNamespaceSchemaLocation</code> attributes, for example:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd" + xsi:schemaLocation="http://www.w3.org/XML/1998/namespace xml.xsd"> + </pre> + + <p>As you might have noticed, we used this method in all the sample XML + documents presented in this guide up until now. Note that the + schema locations specified with these two attributes are relative + to the document's path unless they are absolute URIs (that is + start with <code>http://</code>, <code>file://</code>, etc.). + In particular, if you specify just file names as your schema + locations, as we did above, then the schemas should reside in + the same directory as the XML document itself.</p> + + <p>Another method of providing the schema location information + is via the <code>xml_schema::properties</code> argument, as + shown in the following example:</p> + + <pre class="c++"> +xml_schema::properties props; +props.no_namespace_schema_location ("people.xsd"); +props.schema_location ("http://www.w3.org/XML/1998/namespace", "xml.xsd"); + +unique_ptr<people_t> p (people ("people.xml", 0, props)); + </pre> + + <p>The schema locations provided with this method overrides + those specified in the XML document. As with the previous + method, the schema locations specified this way are + relative to the document's path unless they are absolute URIs. + In particular, if you want to use local schemas that are + not related to the document being parsed, then you will + need to use the <code>file://</code> URI. The following + example shows how to use schemas that reside in the current + working directory:</p> + + <pre class="c++"> +#include <unistd.h> // getcwd +#include <limits.h> // PATH_MAX + +char cwd[PATH_MAX]; +if (getcwd (cwd, PATH_MAX) == 0) +{ + // Buffer too small? +} + +xml_schema::properties props; + +props.no_namespace_schema_location ( + "file:///" + std::string (cwd) + "/people.xsd"); + +props.schema_location ( + "http://www.w3.org/XML/1998/namespace", + "file:///" + std::string (cwd) + "/xml.xsd"); + +unique_ptr<people_t> p (people ("people.xml", 0, props)); + </pre> + + <p>A third method is the most useful if you are planning to parse + several XML documents of the same vocabulary. In that case + it may be beneficial to pre-parse and cache the schemas in + the XML parser which can then be used to parse all documents + without re-parsing the schemas. For more information on + this method refer to the <code>caching</code> example in the + <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package. + It is also possible to convert the schemas into a pre-compiled + binary representation and embed this representation directly into + the application executable. With this approach your application can + perform XML Schema validation without depending on any external + schema files. For more information on how to achieve this refer to + the <code>embedded</code> example in the <code>cxx/tree/</code> + directory in the <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <p>When the XML parser cannot locate a schema for the + XML document, the validation fails and XML document + elements and attributes for which schema definitions could + not be located are reported in the diagnostics. For + example, if we remove the <code>noNamespaceSchemaLocation</code> + attribute in <code>people.xml</code> from the previous chapter, + then we will get the following diagnostics if we try to parse + this file with validation enabled:</p> + + <pre class="terminal"> +people.xml:2:63 error: no declaration found for element 'people' +people.xml:4:18 error: no declaration found for element 'person' +people.xml:4:18 error: attribute 'id' is not declared for element 'person' +people.xml:5:17 error: no declaration found for element 'first-name' +people.xml:6:18 error: no declaration found for element 'middle-name' +people.xml:7:16 error: no declaration found for element 'last-name' +people.xml:8:13 error: no declaration found for element 'gender' +people.xml:9:10 error: no declaration found for element 'age' + </pre> + + <h2><a name="5.2">5.2 Error Handling</a></h2> + + <p>The parsing functions offer a number of ways to handle error conditions + with the C++ exceptions being the most commonly used mechanism. All + C++/Tree exceptions derive from common base <code>xml_schema::exception</code> + which in turn derives from <code>std::exception</code>. The easiest + way to uniformly handle all possible C++/Tree exceptions and print + detailed information about the error is to catch and print + <code>xml_schema::exception</code>, as shown in the following + example:</p> + + <pre class="c++"> +try +{ + unique_ptr<people_t> p (people ("people.xml")); +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>Each individual C++/Tree exception also allows you to obtain + error details programmatically. For example, the + <code>xml_schema::parsing</code> exception is thrown when + the XML parsing and validation in the underlying XML parser + fails. It encapsulates various diagnostics information + such as the file name, line and column numbers, as well as the + error or warning message for each entry. For more information + about this and other exceptions that can be thrown during + parsing, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#3.3">Section + 3.3, "Error Handling"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>Note that if you are parsing <code>std::istream</code> on which + exceptions are not enabled, then you will need to check the + stream state after the call to the parsing function in order + to detect any possible stream failures, for example:</p> + + <pre class="c++"> +std::ifstream ifs ("people.xml"); + +if (ifs.fail ()) +{ + cerr << "people.xml: unable to open" << endl; + return 1; +} + +unique_ptr<people_t> p (people (ifs, "people.xml")); + +if (ifs.fail ()) +{ + cerr << "people.xml: read error" << endl; + return 1; +} + </pre> + + <p>The above example can be rewritten to use exceptions as + shown below:</p> + + <pre class="c++"> +try +{ + std::ifstream ifs; + ifs.exceptions (std::ifstream::badbit | std::ifstream::failbit); + ifs.open ("people.xml"); + + unique_ptr<people_t> p (people (ifs, "people.xml")); +} +catch (const std::ifstream::failure&) +{ + cerr << "people.xml: unable to open or read error" << endl; + return 1; +} + </pre> + + + <!-- Chapater 6 --> + + + <h1><a name="6">6 Serialization</a></h1> + + <p>We have already seen how to serialize an object model back to XML + in this guide before. In this chapter we will discuss the + serialization topic in more detail.</p> + + <p>By default, the C++/Tree mapping provides a total of 8 overloaded + serialization functions. They differ in the output methods used to write + XML as well as the error reporting mechanisms. It is also possible to + generate types for root elements instead of parsing and serialization + functions. This may be useful if your XML vocabulary has multiple + root elements. For more information on element types refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.9">Section + 2.9, "Mapping for Global Elements"</a> in the C++/Tree Mapping User + Manual.</p> + + + <p>In this section we will discuss the most commonly + used version of serialization functions. For a comprehensive description + of serialization refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#4">Chapter + 4, "Serialization"</a> in the C++/Tree Mapping User Manual. For the + <code>people</code> global element from our person record vocabulary, + we will concentrate on the following serialization function:</p> + + <pre class="c++"> +void +people (std::ostream& os, + const people_t& x, + const xml_schema::namespace_infomap& map = + xml_schema::namespace_infomap (), + const std::string& encoding = "UTF-8", + xml_schema::flags f = 0); + </pre> + + <p>This function serializes the object model passed as the second + argument to the standard output stream passed as the first + argument. The third argument is a namespace information map + which we will discuss in more detail in the next section. + The fourth argument is a character encoding that the resulting + XML document should be in. Possible valid values for this + argument are "US-ASCII", "ISO8859-1", "UTF-8", "UTF-16BE", + "UTF-16LE", "UCS-4BE", and "UCS-4LE". Finally, the flags + argument allows fine-tuning of the serialization process. + The following example shows how we can use the above serialization + function:</p> + + <pre class="c++"> +people_t& p = ... + +xml_schema::namespace_infomap map; +map[""].schema = "people.xsd"; + +// Serialize to stdout. +// +people (std::cout, p, map); + +// Serialize to a file. +// +std::ofstream ofs ("people.xml"); +people (ofs, p, map); + +// Serialize to a string. +// +std::ostringstream oss; +people (oss, p, map); +std::string xml (oss.str ()); + </pre> + + + <h2><a name="6.1">6.1 Namespace and Schema Information</a></h2> + + <p>While XML serialization can be done just from the object + model alone, it is often desirable to assign meaningful + prefixes to XML namespaces used in the vocabulary as + well as to provide the schema location information. + This is accomplished by passing the namespace information + map to the serialization function. The key in this map is + a namespace prefix that should be assigned to an XML namespace + specified in the <code>name</code> variable of the + map value. You can also assign an optional schema location for + this namespace in the <code>schema</code> variable. Based + on each key-value entry in this map, the serialization + function adds two attributes to the resulting XML document: + the namespace-prefix mapping attribute and schema location + attribute. The empty prefix indicates that the namespace + should be mapped without a prefix. For example, the following + map:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = "http://www.example.com/example"; +map[""].schema = "example.xsd"; + +map["x"].name = "http://www.w3.org/XML/1998/namespace"; +map["x"].schema = "xml.xsd"; + </pre> + + <p>Results in the following XML document:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<example + xmlns="http://www.example.com/example" + xmlns:x="http://www.w3.org/XML/1998/namespace" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.example.com/example example.xsd + http://www.w3.org/XML/1998/namespace xml.xsd"> + </pre> + + <p>The empty namespace indicates that the vocabulary has no target + namespace. For example, the following map results in only the + <code>noNamespaceSchemaLocation</code> attribute being added:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = ""; +map[""].schema = "example.xsd"; + </pre> + + <h2><a name="6.2">6.2 Error Handling</a></h2> + + <p>Similar to the parsing functions, the serialization functions offer a + number of ways to handle error conditions with the C++ exceptions being + the most commonly used mechanisms. As with parsing, the easiest way to + uniformly handle all possible serialization exceptions and print + detailed information about the error is to catch and print + <code>xml_schema::exception</code>:</p> + + <pre class="c++"> +try +{ + people_t& p = ... + + xml_schema::namespace_infomap map; + map[""].schema = "people.xsd"; + + people (std::cout, p, map)); +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>The most commonly encountered serialization exception is + <code>xml_schema::serialization</code>. It is thrown + when the XML serialization in the underlying XML writer + fails. It encapsulates various diagnostics information + such as the file name, line and column numbers, as well as the + error or warning message for each entry. For more information + about this and other exceptions that can be thrown during + serialization, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#4.4">Section + 4.4, "Error Handling"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>Note that if you are serializing to <code>std::ostream</code> on + which exceptions are not enabled, then you will need to check the + stream state after the call to the serialization function in order + to detect any possible stream failures, for example:</p> + + <pre class="c++"> +std::ofstream ofs ("people.xml"); + +if (ofs.fail ()) +{ + cerr << "people.xml: unable to open" << endl; + return 1; +} + +people (ofs, p, map)); + +if (ofs.fail ()) +{ + cerr << "people.xml: write error" << endl; + return 1; +} + </pre> + + <p>The above example can be rewritten to use exceptions as + shown below:</p> + + <pre class="c++"> +try +{ + std::ofstream ofs; + ofs.exceptions (std::ofstream::badbit | std::ofstream::failbit); + ofs.open ("people.xml"); + + people (ofs, p, map)); +} +catch (const std::ofstream::failure&) +{ + cerr << "people.xml: unable to open or write error" << endl; + return 1; +} + </pre> + + </div> +</div> + +</body> +</html> diff --git a/doc/cxx/tree/guide/index.xhtml.in b/doc/cxx/tree/guide/index.xhtml.in new file mode 100644 index 0000000..2f7f1e2 --- /dev/null +++ b/doc/cxx/tree/guide/index.xhtml.in @@ -0,0 +1,2736 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Tree Mapping Getting Started Guide</title> + + <meta name="copyright" content="© @copyright@"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parsing,serialization,validation"/> + <meta name="description" content="C++/Tree Mapping Getting Started Guide"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + line-height: 1.2em; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 140%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage .title { + font-weight: bold; + font-size: 200%; + text-align: center; + } + + #titlepage #first-title { + padding: 1em 0 0.4em 0; + } + + #titlepage #second-title { + padding: 0.4em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + div.img { + text-align: center; + padding: 2em 0 2em 0; + } + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div class="title" id="first-title">C++/Tree Mapping</div> + <div class="title" id="second-title">Getting Started Guide</div> + + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-tree-guide.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/cxx-tree-guide.ps">PostScript</a>.</p> + + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a> + <table class="toc"> + <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> + <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>2</th><td><a href="#2">Hello World Example</a> + <table class="toc"> + <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> + <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> + <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> + <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> + <tr><th>2.5</th><td><a href="#2.5">Adding Serialization</a></td></tr> + <tr><th>2.6</th><td><a href="#2.6">Selecting Naming Convention</a></td></tr> + <tr><th>2.7</th><td><a href="#2.7">Generating Documentation</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Overall Mapping Configuration</a> + <table class="toc"> + <tr><th>3.1</th><td><a href="#3.1">C++ Standard</a></td></tr> + <tr><th>3.2</th><td><a href="#3.2">Character Type and Encoding</a></td></tr> + <tr><th>3.3</th><td><a href="#3.3">Support for Polymorphism </a></td></tr> + <tr><th>3.4</th><td><a href="#3.4">Namespace Mapping</a></td></tr> + <tr><th>3.5</th><td><a href="#3.5">Thread Safety</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Working with Object Models</a> + <table class="toc"> + <tr><th>4.1</th><td><a href="#4.1">Attribute and Element Cardinalities</a></td></tr> + <tr><th>4.2</th><td><a href="#4.2">Accessing the Object Model</a></td></tr> + <tr><th>4.3</th><td><a href="#4.3">Modifying the Object Model</a></td></tr> + <tr><th>4.4</th><td><a href="#4.4">Creating the Object Model from Scratch</a></td></tr> + <tr><th>4.5</th><td><a href="#4.5">Mapping for the Built-in XML Schema Types</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Parsing</a> + <table class="toc"> + <tr><th>5.1</th><td><a href="#5.1">XML Schema Validation and Searching</a></td></tr> + <tr><th>5.2</th><td><a href="#5.2">Error Handling</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>6</th><td><a href="#6">Serialization</a> + <table class="toc"> + <tr><th>6.1</th><td><a href="#6.1">Namespace and Schema Information</a></td></tr> + <tr><th>6.2</th><td><a href="#6.2">Error Handling</a></td></tr> + </table> + </td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>The goal of this document is to provide you with an understanding of + the C++/Tree programming model and allow you to efficiently evaluate + XSD against your project's technical requirements. As such, this + document is intended for C++ developers and software architects + who are looking for an XML processing solution. For a more in-depth + description of the C++/Tree mapping refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/">C++/Tree + Mapping User Manual</a>.</p> + + <p>Prior experience with XML and C++ is required to understand this + document. Basic understanding of XML Schema is advantageous but + not expected or required. + </p> + + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this guide, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/">C++/Tree + Mapping User Manual</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree + Mapping Frequently Asked Questions (FAQ)</a></li> + + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is the place to ask technical questions about XSD and the C++/Parser mapping. + Furthermore, the <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + </ul> + + <!-- Introduction --> + + <h1><a name="1">1 Introduction</a></h1> + + <p>Welcome to CodeSynthesis XSD and the C++/Tree mapping. XSD is a + cross-platform W3C XML Schema to C++ data binding compiler. C++/Tree + is a W3C XML Schema to C++ mapping that represents the data stored + in XML as a statically-typed, vocabulary-specific object model. + </p> + + <h2><a name="1.1">1.1 Mapping Overview</a></h2> + + <p>Based on a formal description of an XML vocabulary (schema), the + C++/Tree mapping produces a tree-like data structure suitable for + in-memory processing. The core of the mapping consists of C++ + classes that constitute the object model and are derived from + types defined in XML Schema as well as XML parsing and + serialization code.</p> + + <p>Besides the core features, C++/Tree provide a number of additional + mapping elements that can be useful in some applications. These + include serialization and extraction to/from formats others than + XML, such as unstructured text (useful for debugging) and binary + representations such as XDR and CDR for high-speed data processing + as well as automatic documentation generation. The C++/Tree mapping + also provides a wide range of mechanisms for controlling and + customizing the generated code.</p> + + <p>A typical application that uses C++/Tree for XML processing usually + performs the following three steps: it first reads (parses) an XML + document to an in-memory object model, it then performs some useful + computations on that object model which may involve modification + of the model, and finally it may write (serialize) the modified + object model back to XML.</p> + + <p>The next chapter presents a simple application that performs these + three steps. The following chapters show how to use the C++/Tree + mapping in more detail.</p> + + <h2><a name="1.2">1.2 Benefits</a></h2> + + <p>Traditional XML access APIs such as Document Object Model (DOM) + or Simple API for XML (SAX) have a number of drawbacks that + make them less suitable for creating robust and maintainable + XML processing applications. These drawbacks include: + </p> + + <ul class="list"> + <li>Generic representation of XML in terms of elements, attributes, + and text forces an application developer to write a substantial + amount of bridging code that identifies and transforms pieces + of information encoded in XML to a representation more suitable + for consumption by the application logic.</li> + + <li>String-based flow control defers error detection to runtime. + It also reduces code readability and maintainability.</li> + + <li>Lack of type safety because the data is represented as text.</li> + + <li>Resulting applications are hard to debug, change, and + maintain.</li> + </ul> + + <p>In contrast, statically-typed, vocabulary-specific object model + produced by the C++/Tree mapping allows you to operate in your + domain terms instead of the generic elements, attributes, and + text. Static typing helps catch errors at compile-time rather + than at run-time. Automatic code generation frees you for more + interesting tasks (such as doing something useful with the + information stored in the XML documents) and minimizes the + effort needed to adapt your applications to changes in the + document structure. To summarize, the C++/Tree object model has + the following key advantages over generic XML access APIs:</p> + + <ul class="list"> + <li><b>Ease of use.</b> The generated code hides all the complexity + associated with parsing and serializing XML. This includes navigating + the structure and converting between the text representation and + data types suitable for manipulation by the application + logic.</li> + + <li><b>Natural representation.</b> The object representation allows + you to access the XML data using your domain vocabulary instead + of generic elements, attributes, and text.</li> + + <li><b>Concise code.</b> With the object representation the + application implementation is simpler and thus easier + to read and understand.</li> + + <li><b>Safety.</b> The generated object model is statically + typed and uses functions instead of strings to access the + information. This helps catch programming errors at compile-time + rather than at runtime.</li> + + <li><b>Maintainability.</b> Automatic code generation minimizes the + effort needed to adapt the application to changes in the + document structure. With static typing, the C++ compiler + can pin-point the places in the client code that need to be + changed.</li> + + <li><b>Compatibility.</b> Sequences of elements are represented in + the object model as containers conforming to the standard C++ + sequence requirements. This makes it possible to use standard + C++ algorithms on the object representation and frees you from + learning yet another container interface, as is the case with + DOM.</li> + + <li><b>Efficiency.</b> If the application makes repetitive use + of the data extracted from XML, then the C++/Tree object model + is more efficient because the navigation is performed using + function calls rather than string comparisons and the XML + data is extracted only once. Furthermore, the runtime memory + usage is reduced due to more efficient data storage + (for instance, storing numeric data as integers instead of + strings) as well as the static knowledge of cardinality + constraints.</li> + </ul> + + + <!-- Hello World Parser --> + + + <h1><a name="2">2 Hello World Example</a></h1> + + <p>In this chapter we will examine how to parse, access, modify, and + serialize a very simple XML document using the XSD-generated + C++/Tree object model. The code presented in this chapter is + based on the <code>hello</code> example which can be found in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package.</p> + + <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + + <p>First, we need to get an idea about the structure + of the XML documents we are going to process. Our + <code>hello.xml</code>, for example, could look like this:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + <p>Then we can write a description of the above XML in the + XML Schema language and save it into <code>hello.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello_t"> + <xs:sequence> + <xs:element name="greeting" type="xs:string"/> + <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello_t"/> + +</xs:schema> + </pre> + + <p>Even if you are not familiar with XML Schema, it + should be easy to connect declarations in <code>hello.xsd</code> + to elements in <code>hello.xml</code>. The <code>hello_t</code> type + is defined as a sequence of the nested <code>greeting</code> and + <code>name</code> elements. Note that the term sequence in XML + Schema means that elements should appear in a particular order + as opposed to appearing multiple times. The <code>name</code> + element has its <code>maxOccurs</code> property set to + <code>unbounded</code> which means it can appear multiple times + in an XML document. Finally, the globally-defined <code>hello</code> + element prescribes the root element for our vocabulary. For an + easily-approachable introduction to XML Schema refer to + <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: + Primer</a>.</p> + + <p>The above schema is a specification of our XML vocabulary; it tells + everybody what valid documents of our XML-based language should look + like. We can also update our <code>hello.xml</code> to include the + information about the schema so that XML parsers can validate + our document:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + +</hello> + </pre> + + + <p>The next step is to compile the schema to generate the object + model and parsing functions.</p> + + <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + + <p>Now we are ready to translate our <code>hello.xsd</code> to C++. + To do this we invoke the XSD compiler from a terminal (UNIX) or + a command prompt (Windows): + </p> + + <pre class="terminal"> +$ xsd cxx-tree hello.xsd + </pre> + + <p>The XSD compiler produces two C++ files: <code>hello.hxx</code> and + <code>hello.cxx</code>. The following code fragment is taken from + <code>hello.hxx</code>; it should give you an idea about what gets + generated: + </p> + + <pre class="c++"> +class hello_t +{ +public: + // greeting + // + typedef xml_schema::string greeting_type; + + const greeting_type& + greeting () const; + + greeting_type& + greeting (); + + void + greeting (const greeting_type& x); + + // name + // + typedef xml_schema::string name_type; + typedef xsd::sequence<name_type> name_sequence; + typedef name_sequence::iterator name_iterator; + typedef name_sequence::const_iterator name_const_iterator; + + const name_sequence& + name () const; + + name_sequence& + name (); + + void + name (const name_sequence& s); + + // Constructor. + // + hello_t (const greeting_type&); + + ... + +}; + +std::unique_ptr<hello_t> +hello (const std::string& uri); + +std::unique_ptr<hello_t> +hello (std::istream&); + </pre> + + <p>The <code>hello_t</code> C++ class corresponds to the + <code>hello_t</code> XML Schema type. For each element + in this type a set of C++ type definitions as well as + accessor and modifier functions are generated inside the + <code>hello_t</code> class. Note that the type definitions + and member functions for the <code>greeting</code> and + <code>name</code> elements are different because of the + cardinality differences between these two elements + (<code>greeting</code> is a required single element and + <code>name</code> is a sequence of elements).</p> + + <p>The <code>xml_schema::string</code> type used in the type + definitions is a C++ class provided by the XSD runtime + that corresponds to built-in XML Schema type + <code>string</code>. The <code>xml_schema::string</code> + is based on <code>std::string</code> and can be used as + such. Similarly, the <code>sequence</code> class template + that is used in the <code>name_sequence</code> type + definition is based on and has the same interface as + <code>std::vector</code>. The mapping between the built-in + XML Schema types and C++ types is described in more detail in + <a href="#4.5">Section 4.5, "Mapping for the Built-in XML Schema + Types"</a>. The <code>hello_t</code> class also includes a + constructor with an initializer for the required + <code>greeting</code> element as its argument.</p> + + <p>The <code>hello</code> overloaded global functions correspond + to the <code>hello</code> global element in XML Schema. A + global element in XML Schema is a valid document root. + By default XSD generated a set of parsing functions for each + global element defined in XML Schema (this can be overridden + with the <code>--root-element-*</code> options). Parsing + functions return a dynamically allocated object model as an + automatic pointer. The actual pointer used depends on the + C++ standard selected. For C++11 it is <code>std::unique_ptr</code> + as shown above. For C++98 it is <code>std::auto_ptr</code>. + For example, if we modify our XSD compiler invocation to + select C++98:</p> + + <pre class="terminal"> +$ xsd cxx-tree --std c++98 hello.xsd + </pre> + + <p>Then the parsing function signatures will become:</p> + + <pre class="c++"> +std::auto_ptr<hello_t> +hello (const std::string& uri); + +std::auto_ptr<hello_t> +hello (std::istream&); + </pre> + + <p>For more information on parsing functions see <a href="#5">Chapter 5, + "Parsing"</a>.</p> + + <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + + <p>At this point we have all the parts we need to do something useful + with the information stored in our XML document: + </p> + + <pre class="c++"> +#include <iostream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + unique_ptr<hello_t> h (hello (argv[1])); + + for (hello_t::name_const_iterator i (h->name ().begin ()); + i != h->name ().end (); + ++i) + { + cerr << h->greeting () << ", " << *i << "!" << endl; + } + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>The first part of our application calls one of the parsing + functions to parser an XML file specified in the command line. + We then use the returned object model to iterate over names + and print a greeting line for each of them. Finally, we + catch and print the <code>xml_schema::exception</code> + exception in case something goes wrong. This exception + is the root of the exception hierarchy used by the + XSD-generated code. + </p> + + + <h2><a name="2.4">2.4 Compiling and Running</a></h2> + + <p>After saving our application from the previous section in + <code>driver.cxx</code>, we are ready to compile our first + program and run it on the test XML document. On a UNIX + system this can be done with the following commands: + </p> + + <pre class="terminal"> +$ c++ -std=c++11 -I.../libxsd -c driver.cxx hello.cxx +$ c++ -std=c++11 -o driver driver.o hello.o -lxerces-c +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! + </pre> + + <p>Here <code>.../libxsd</code> represents the path to the + <a href="https://cppget.org/libxsd">libxsd</a> package root + directory. Note also that we are required to link our + application with the Xerces-C++ library because the generated + code uses it as the underlying XML parser.</p> + + <h2><a name="2.5">2.5 Adding Serialization</a></h2> + + <p>While parsing and accessing the XML data may be everything + you need, there are applications that require creating new + or modifying existing XML documents. By default XSD does + not produce serialization code. We will need to request + it with the <code>--generate-serialization</code> options:</p> + + <pre class="terminal"> +$ xsd cxx-tree --generate-serialization hello.xsd + </pre> + + <p>If we now examine the generated <code>hello.hxx</code> file, + we will find a set of overloaded serialization functions, + including the following version:</p> + + <pre class="c++"> +void +hello (std::ostream&, + const hello_t&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap ()); + + </pre> + + <p>Just like with parsing functions, XSD generates serialization + functions for each global element unless instructed otherwise + with one of the <code>--root-element-*</code> options. For more + information on serialization functions see <a href="#6">Chapter 6, + "Serialization"</a>.</p> + + <p>We first examine an application that modifies an existing + object model and serializes it back to XML:</p> + + <pre class="c++"> +#include <iostream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + unique_ptr<hello_t> h (hello (argv[1])); + + // Change the greeting phrase. + // + h->greeting ("Hi"); + + // Add another entry to the name sequence. + // + h->name ().push_back ("mars"); + + // Serialize the modified object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "hello.xsd"; + + hello (cout, *h, map); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>First, our application parses an XML document and obtains its + object model as in the previous example. Then it changes the + greeting string and adds another entry to the list of names. + Finally, it serializes the object model back to XML by calling + the serialization function.</p> + + <p>The first argument we pass to the serialization function is + <code>cout</code> which results in the XML being written to + the standard output for us to inspect. We could have also + written the result to a file or memory buffer by creating an + instance of <code>std::ofstream</code> or <code>std::ostringstream</code> + and passing it instead of <code>cout</code>. The second argument is the + object model we want to serialize. The final argument is an optional + namespace information map for our vocabulary. It captures information + such as namespaces, namespace prefixes to which they should be mapped, + and schemas associated with these namespaces. If we don't provide + this argument then generic namespace prefixes (<code>p1</code>, + <code>p2</code>, etc.) will be automatically assigned to XML namespaces + and no schema information will be added to the resulting document + (see <a href="#6">Chapter 6, "Serialization"</a> for details). + In our case, the prefix (map key) and namespace name are empty + because our vocabulary does not use XML namespaces.</p> + + <p>If we now compile and run this application we will see the + output as shown in the following listing:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hi</greeting> + + <name>sun</name> + <name>moon</name> + <name>world</name> + <name>mars</name> + +</hello> + </pre> + + <p>We can also create and serialize an object model from scratch + as shown in the following example:</p> + + <pre class="c++"> +#include <iostream> +#include <fstream> +#include "hello.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + hello_t h ("Hi"); + + hello_t::name_sequence& ns (h.name ()); + + ns.push_back ("Jane"); + ns.push_back ("John"); + + // Serialize the object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "hello.xsd"; + + std::ofstream ofs (argv[1]); + hello (ofs, h, map); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + return 1; + } +} + </pre> + + <p>In this example we used the generated constructor to create + an instance of type <code>hello_t</code>. To reduce typing, + we obtained a reference to the name sequence which we then + used to add a few names. The serialization part is identical + to the previous example except this time we are writing to + a file. If we compile and run this program, it produces the + following XML file:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="hello.xsd"> + + <greeting>Hi</greeting> + + <name>Jane</name> + <name>John</name> + +</hello> + </pre> + + <h2><a name="2.6">2.6 Selecting Naming Convention</a></h2> + + <p>By default XSD uses the so-called K&R (Kernighan and Ritchie) + identifier naming convention in the generated code. In this + convention both type and function names are in lower case and + words are separated by underscores. If your application code or + schemas use a different notation, you may want to change the + naming convention used in the generated code for consistency. + XSD supports a set of widely-used naming conventions + that you can select with the <code>--type-naming</code> and + <code>--function-naming</code> options. You can also further + refine one of the predefined conventions or create a completely + custom naming scheme by using the <code>--*-regex</code> options.</p> + + <p>As an example, let's assume that our "Hello World" application + uses the so-called upper-camel-case naming convention for types + (that is, each word in a type name is capitalized) and the K&R + convention for function names. Since K&R is the default + convention for both type and function names, we only need to + change the type naming scheme:</p> + + <pre class="terminal"> +$ xsd cxx-tree --type-naming ucc hello.xsd + </pre> + + <p>The <code>ucc</code> argument to the <code>--type-naming</code> + options stands for upper-camel-case. If we now examine the + generated <code>hello.hxx</code>, we will see the following + changes compared to the declarations shown in the previous + sections:</p> + + <pre class="c++"> +class Hello_t +{ +public: + // greeting + // + typedef xml_schema::String GreetingType; + + const GreetingType& + greeting () const; + + GreetingType& + greeting (); + + void + greeting (const GreetingType& x); + + // name + // + typedef xml_schema::String NameType; + typedef xsd::sequence<NameType> NameSequence; + typedef NameSequence::iterator NameIterator; + typedef NameSequence::const_iterator NameConstIterator; + + const NameSequence& + name () const; + + NameSequence& + name (); + + void + name (const NameSequence& s); + + // Constructor. + // + Hello_t (const GreetingType&); + + ... + +}; + +std::unique_ptr<Hello_t> +hello (const std::string& uri); + +std::unique_ptr<Hello_t> +hello (std::istream&); + </pre> + + <p>Notice that the type names in the <code>xml_schema</code> namespace, + for example <code>xml_schema::String</code>, now also use the + upper-camel-case naming convention. The only thing that we may + be unhappy about in the above code is the <code>_t</code> + suffix in <code>Hello_t</code>. If we are not in a position + to change the schema, we can <em>touch-up</em> the <code>ucc</code> + convention with a custom translation rule using the + <code>--type-regex</code> option:</p> + + <pre class="terminal"> +$ xsd cxx-tree --type-naming ucc --type-regex '/ (.+)_t/\u$1/' hello.xsd + </pre> + + <p>This results in the following changes to the generated code:</p> + + <pre class="c++"> +class Hello +{ +public: + // greeting + // + typedef xml_schema::String GreetingType; + + const GreetingType& + greeting () const; + + GreetingType& + greeting (); + + void + greeting (const GreetingType& x); + + // name + // + typedef xml_schema::String NameType; + typedef xsd::sequence<NameType> NameSequence; + typedef NameSequence::iterator NameIterator; + typedef NameSequence::const_iterator NameConstIterator; + + const NameSequence& + name () const; + + NameSequence& + name (); + + void + name (const NameSequence& s); + + // Constructor. + // + Hello (const GreetingType&); + + ... + +}; + +std::unique_ptr<Hello> +hello (const std::string& uri); + +std::unique_ptr<Hello> +hello (std::istream&); + </pre> + + <p>For more detailed information on the <code>--type-naming</code>, + <code>--function-naming</code>, <code>--type-regex</code>, and + other <code>--*-regex</code> options refer to the NAMING + CONVENTION section in the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + <h2><a name="2.7">2.7 Generating Documentation</a></h2> + + <p>While our object model is quite simple, real-world vocabularies + can be quite complex with hundreds of types, elements, and + attributes. For such vocabularies figuring out which types + provide which member functions by studying the generated + source code or schemas can be a daunting task. To provide + application developers with a more accessible way of + understanding the generated object models, the XSD compiler + can be instructed to produce source code with documentation + comments in the Doxygen format. Then the source code can be + processed with the <a href="http://www.doxygen.org">Doxygen</a> + documentation system to extract this information and produce + documentation in various formats. + </p> + + <p>In this section we will see how to generate documentation + for our "Hello World" vocabulary. To showcase the full power + of the XSD documentation facilities, we will first document + our schema. The XSD compiler will then transfer + this information from the schema to the generated code and + then to the object model documentation. Note that the + documentation in the schema is not required for XSD to + generate useful documentation. Below you will find + our <code>hello.xsd</code> with added documentation:</p> + + <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello_t"> + + <xs:annotation> + <xs:documentation> + The hello_t type consists of a greeting phrase and a + collection of names to which this greeting applies. + </xs:documentation> + </xs:annotation> + + <xs:sequence> + + <xs:element name="greeting" type="xs:string"> + <xs:annotation> + <xs:documentation> + The greeting element contains the greeting phrase + for this hello object. + </xs:documentation> + </xs:annotation> + </xs:element> + + <xs:element name="name" type="xs:string" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation> + The name elements contains names to be greeted. + </xs:documentation> + </xs:annotation> + </xs:element> + + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello_t"> + <xs:annotation> + <xs:documentation> + The hello element is a root of the Hello XML vocabulary. + Every conforming document should start with this element. + </xs:documentation> + </xs:annotation> + </xs:element> + +</xs:schema> + </pre> + + <p>The first step in obtaining the documentation is to recompile + our schema with the <code>--generate-doxygen</code> option:</p> + + <pre class="terminal"> +$ xsd cxx-tree --generate-serialization --generate-doxygen hello.xsd + </pre> + + <p>Now the generated <code>hello.hxx</code> file contains comments + in the Doxygen format. The next step is to process this file + with the Doxygen documentation system. If your project does + not use Doxygen then you first need to create a configuration + file for your project:</p> + + <pre class="terminal"> +$ doxygen -g hello.doxygen + </pre> + + <p>You only need to perform this step once. Now we can generate + the documentation by executing the following command in the + directory with the generated source code:</p> + + <pre class="terminal"> +$ doxygen hello.doxygen + </pre> + + <p>While the generated documentation can be useful as is, we can + go one step further and link (using the Doxygen tags mechanism) + the documentation for our object model with the documentation + for the XSD runtime library which defines C++ classes for the + built-in XML Schema types. This way we can seamlessly browse + between documentation for the <code>hello_t</code> class which + is generated by the XSD compiler and the <code>xml_schema::string</code> + class which is defined in the XSD runtime library. The Doxygen + configuration file for the XSD runtime is provided with the XSD + distribution.</p> + + <p>You can view the result of the steps described in this section + on the <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/hello/html/annotated.html">Hello + Example Documentation</a> page.</p> + + <!-- Chapater 3 --> + + + <h1><a name="3">3 Overall Mapping Configuration</a></h1> + + <p>The C++/Tree mapping has a number of configuration parameters that + determine the overall properties and behavior of the generated code. + Configuration parameters are specified with the XSD command line + options. This chapter describes configuration aspects that are most + commonly encountered by application developers. These include: the + C++ standard, the character type that is used by the generated code, + handling of vocabularies that use XML Schema polymorphism, XML Schema + to C++ namespace mapping, and thread safety. For more ways to configure + the generated code refer to the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>. + </p> + + <h2><a name="3.1">3.1 C++ Standard</a></h2> + + <p>The C++/Tree mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h2><a name="3.2">3.2 Character Type and Encoding</a></h2> + + <p>The C++/Tree mapping has built-in support for two character types: + <code>char</code> and <code>wchar_t</code>. You can select the + character type with the <code>--char-type</code> command line + option. The default character type is <code>char</code>. The + character type affects all string and string-based types that + are used in the mapping. These include the string-based built-in + XML Schema types, exception types, stream types, etc.</p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings. You can select which encoding should be used + in the object model with the <code>--char-encoding</code> command + line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <p>Note also that the character encoding that is used in the object model + is independent of the encodings used in input and output XML. In fact, + all three (object mode, input XML, and output XML) can have different + encodings.</p> + + <h2><a name="3.3">3.3 Support for Polymorphism</a></h2> + + <p>By default XSD generates non-polymorphic code. If your vocabulary + uses XML Schema polymorphism in the form of <code>xsi:type</code> + and/or substitution groups, then you will need to compile + your schemas with the <code>--generate-polymorphic</code> option + to produce polymorphism-aware code. For more information on + working with polymorphic object models, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.11">Section 2.11, + "Mapping for <code>xsi:type</code> and Substitution Groups"</a> in + the C++/Tree Mapping User Manual.</p> + + <h2><a name="3.4">3.4 Namespace Mapping</a></h2> + + <p>XSD maps XML namespaces specified in the <code>targetNamespace</code> + attribute in XML Schema to one or more nested C++ namespaces. By + default, a namespace URI is mapped to a sequence of C++ namespace + names by removing the protocol and host parts and splitting the + rest into a sequence of names with <code>'/'</code> as the name + separator.</p> + + <p>The default mapping of namespace URIs to C++ namespaces + can be altered using the <code>--namespace-map</code> and + <code>--namespace-regex</code> compiler options. For example, + to map namespace URI <code>https://www.codesynthesis.com/my</code> to + C++ namespace <code>cs::my</code>, we can use the following option:</p> + + <pre class="terminal"> +--namespace-map https://www.codesynthesis.com/my=cs::my + </pre> + + <p>A vocabulary without a namespace is mapped to the global scope. This + also can be altered with the above options by using an empty name + for the XML namespace:</p> + + <pre class="terminal"> +--namespace-map =cs + </pre> + + <h2><a name="3.5">3.5 Thread Safety</a></h2> + + <p>XSD-generated code is thread-safe in the sense that you can + use different instantiations of the object model in several + threads concurrently. This is possible due to the generated + code not relying on any writable global variables. If you need + to share the same object between several threads then you will + need to provide some form of synchronization. One approach would + be to use the generated code customization mechanisms to embed + synchronization primitives into the generated C++ classes. For more + information on generated code customization refer to the + <a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a>.</p> + + <p>If you also would like to call parsing and/or serialization + functions from several threads potentially concurrently, then + you will need to make sure the Xerces-C++ runtime is initialized + and terminated only once. The easiest way to do this is to + initialize/terminate Xerces-C++ from <code>main()</code> when + there are no threads yet/anymore:</p> + + <pre class="c++"> +#include <xercesc/util/PlatformUtils.hpp> + +int +main () +{ + xercesc::XMLPlatformUtils::Initialize (); + + { + // Start/terminate threads and parse/serialize here. + } + + xercesc::XMLPlatformUtils::Terminate (); +} + </pre> + + <p>Because you initialize the Xerces-C++ runtime yourself you should + also pass the <code>xml_schema::flags::dont_initialize</code> flag + to parsing and serialization functions. See <a href="#5">Chapter 5, + "Parsing"</a> and <a href="#6">Chapter 6, "Serialization"</a> for + more information.</p> + + + <!-- Chapater 4 --> + + + <h1><a name="4">4 Working with Object Models</a></h1> + + <p>As we have seen in the previous chapters, the XSD compiler generates + a C++ class for each type defined in XML Schema. Together these classes + constitute an object model for an XML vocabulary. In this chapter we + will take a closer look at different elements that comprise an + object model class as well as how to create, access, and modify + object models.</p> + + <p>In this and subsequent chapters we will use the following schema + that describes a collection of person records. We save it in + <code>people.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:simpleType name="gender_t"> + <xs:restriction base="xs:string"> + <xs:enumeration value="male"/> + <xs:enumeration value="female"/> + </xs:restriction> + </xs:simpleType> + + <xs:complexType name="person_t"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="middle-name" type="xs:string" minOccurs="0"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="gender" type="gender_t"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> + <xs:attribute name="id" type="xs:unsignedInt" use="required"/> + </xs:complexType> + + <xs:complexType name="people_t"> + <xs:sequence> + <xs:element name="person" type="person_t" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="people" type="people_t"/> + +</xs:schema> + </pre> + + <p>A sample XML instance to go along with this schema is saved + in <code>people.xml</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> + +</people> + </pre> + + <p>Compiling <code>people.xsd</code> with the XSD compiler results + in three generated C++ classes: <code>gender_t</code>, + <code>person_t</code>, and <code>people_t</code>. + The <code>gender_t</code> class is modelled after the C++ + <code>enum</code> type. Its definition is presented below:</p> + + <pre class="c++"> +class gender_t: public xml_schema::string +{ +public: + enum value + { + male, + female + }; + + gender_t (value); + gender_t (const xml_schema::string&); + + gender_t& + operator= (value); + + operator value () const; +}; + </pre> + + <p>The following listing shows how we can use this type:</p> + + <pre class="c++"> +gender_t m (gender_t::male); +gender_t f ("female"); + +if (m == "female" || f == gender_t::male) +{ + ... +} + +switch (m) +{ +case gender_t::male: + { + ... + } +case gender_t::female: + { + ... + } +} + </pre> + + <p>The other two classes will be examined in detail in the subsequent + sections.</p> + + <h2><a name="4.1">4.1 Attribute and Element Cardinalities</a></h2> + + <p>As we have seen in the previous chapters, XSD generates a different + set of type definitions and member functions for elements with + different cardinalities. The C++/Tree mapping divides all the possible + element and attribute cardinalities into three cardinality classes: + <em>one</em>, <em>optional</em>, and <em>sequence</em>.</p> + + <p>The <em>one</em> cardinality class covers all elements that should + occur exactly once as well as required attributes. In our + example, the <code>first-name</code>, <code>last-name</code>, + <code>gender</code>, and <code>age</code> elements as well as + the <code>id</code> attribute belong to this cardinality class. + The following code fragment shows type definitions as well as the + accessor and modifier functions that are generated for the + <code>gender</code> element in the <code>person_t</code> class:</p> + + <pre class="c++"> +class person_t +{ + // gender + // + typedef gender_t gender_type; + + const gender_type& + gender () const; + + gender_type& + gender (); + + void + gender (const gender_type&); +}; + </pre> + + <p>The <code>gender_type</code> type is an alias for the element's type. + The first two accessor functions return read-only (constant) and + read-write references to the element's value, respectively. The + modifier function sets the new value for the element.</p> + + <p>The <em>optional</em> cardinality class covers all elements that + can occur zero or one time as well as optional attributes. In our + example, the <code>middle-name</code> element belongs to this + cardinality class. The following code fragment shows the type + definitions as well as the accessor and modifier functions that + are generated for this element in the <code>person_t</code> class:</p> + + <pre class="c++"> +class person_t +{ + // middle-name + // + typedef xml_schema::string middle_name_type; + typedef xsd::optional<middle_name_type> middle_name_optional; + + const middle_name_optional& + middle_name () const; + + middle_name_optional& + middle_name (); + + void + middle_name (const middle_name_type&); + + void + middle_name (const middle_name_optional&); +}; + </pre> + + <p>As with the <code>gender</code> element, <code>middle_name_type</code> + is an alias for the element's type. The <code>middle_name_optional</code> + type is a container for the element's optional value. It can be queried + for the presence of the value using the <code>present()</code> function. + The value itself can be retrieved using the <code>get()</code> + accessor and set using the <code>set()</code> modifier. The container + can be reverted to the value not present state with the call to the + <code>reset()</code> function. The following example shows how we + can use this container:</p> + + <pre class="c++"> +person_t::middle_name_optional n ("John"); + +if (n.present ()) +{ + cout << n.get () << endl; +} + +n.set ("Jane"); +n.reset (); + </pre> + + + <p>Unlike the <em>one</em> cardinality class, the accessor functions + for the <em>optional</em> class return read-only (constant) and + read-write references to the container instead of the element's + value directly. The modifier functions set the new value for the + element.</p> + + <p>Finally, the <em>sequence</em> cardinality class covers all elements + that can occur more than once. In our example, the + <code>person</code> element in the <code>people_t</code> type + belongs to this cardinality class. The following code fragment shows + the type definitions as well as the accessor and modifier functions + that are generated for this element in the <code>people_t</code> + class:</p> + + <pre class="c++"> +class people_t +{ + // person + // + typedef person_t person_type; + typedef xsd::sequence<person_type> person_sequence; + typedef person_sequence::iterator person_iterator; + typedef person_sequence::const_iterator person_const_iterator; + + const person_sequence& + person () const; + + person_sequence& + person (); + + void + person (const person_sequence&); +}; + </pre> + + <p>Identical to the other cardinality classes, <code>person_type</code> + is an alias for the element's type. The <code>person_sequence</code> + type is a sequence container for the element's values. It is based + on and has the same interface as <code>std::vector</code> and + therefore can be used in similar ways. The <code>person_iterator</code> + and <code>person_const_iterator</code> types are read-only + (constant) and read-write iterators for the <code>person_sequence</code> + container.</p> + + <p>Similar to the <em>optional</em> cardinality class, the + accessor functions for the <em>sequence</em> class return + read-only (constant) and read-write references to the sequence + container. The modifier functions copies the entries from + the passed sequence.</p> + + <p>C++/Tree is a "flattening" mapping in a sense that many levels of + nested compositors (<code>choice</code> and <code>sequence</code>), + all potentially with their own cardinalities, are in the end mapped + to a flat set of elements with one of the three cardinality classes + discussed above. While this results in a simple and easy to use API + for most types, in certain cases, the order of elements in the actual + XML documents is not preserved once parsed into the object model. To + overcome this limitation we can mark certain schema types, for which + content order is not sufficiently preserved, as ordered. For more + information on this functionality refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.8.4">Section + 2.8.4, "Element Order"</a> in the C++/Tree Mapping User Manual.</p> + + <p>For complex schemas with many levels of nested compositors + (<code>choice</code> and <code>sequence</code>) it can also + be hard to deduce the cardinality class of a particular element. + The generated Doxygen documentation can greatly help with + this task. For each element and attribute the documentation + clearly identifies its cardinality class. Alternatively, you + can study the generated header files to find out the cardinality + class of a particular attribute or element.</p> + + <p>In the next sections we will examine how to access and modify + information stored in an object model using accessor and modifier + functions described in this section.</p> + + <h2><a name="4.2">4.2 Accessing the Object Model</a></h2> + + <p>In this section we will learn how to get to the information + stored in the object model for our person records vocabulary. + The following application accesses and prints the contents + of the <code>people.xml</code> file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + unique_ptr<people_t> ppl (people ("people.xml")); + + // Iterate over individual person records. + // + people_t::person_sequence& ps (ppl->person ()); + + for (people_t::person_iterator i (ps.begin ()); i != ps.end (); ++i) + { + person_t& p (*i); + + // Print names: first-name and last-name are required elements, + // middle-name is optional. + // + cout << "name: " << p.first_name () << " "; + + if (p.middle_name ().present ()) + cout << p.middle_name ().get () << " "; + + cout << p.last_name () << endl; + + // Print gender, age, and id which are all required. + // + cout << "gender: " << p.gender () << endl + << "age: " << p.age () << endl + << "id: " << p.id () << endl + << endl; + } +} + </pre> + + <p>This code shows common patterns of accessing elements and attributes + with different cardinality classes. For the sequence element + (<code>person</code> in <code>people_t</code>) we first obtain a + reference to the container and then iterate over individual + records. The values of elements and attributes with the + <em>one</em> cardinality class (<code>first-name</code>, + <code>last-name</code>, <code>gender</code>, <code>age</code>, + and <code>id</code>) can be obtained directly by calling the + corresponding accessor functions. For the optional element + <code>middle-name</code> we first check if the value is present + and only then call <code>get()</code> to retrieve it.</p> + + <p>Note that when we want to reduce typing by creating a variable + representing a fragment of the object model that we are currently + working with (<code>ps</code> and <code>p</code> above), we obtain + a reference to that fragment instead of making a potentially + expensive copy. This is generally a good rule to follow when + creating high-performance applications.</p> + + <p>If we run the above application on our sample + <code>people.xml</code>, the output looks as follows:</p> + + <pre class="terminal"> +name: John Doe +gender: male +age: 32 +id: 1 + +name: Jane Mary Doe +gender: female +age: 28 +id: 2 + </pre> + + + <h2><a name="4.3">4.3 Modifying the Object Model</a></h2> + + <p>In this section we will learn how to modify the information + stored in the object model for our person records vocabulary. + The following application changes the contents of the + <code>people.xml</code> file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + unique_ptr<people_t> ppl (people ("people.xml")); + + // Iterate over individual person records and increment + // the age. + // + people_t::person_sequence& ps (ppl->person ()); + + for (people_t::person_iterator i (ps.begin ()); i != ps.end (); ++i) + { + // Alternative way: i->age ()++; + // + i->age (i->age () + 1); + } + + // Add middle-name to the first record and remove it from + // the second. + // + person_t& john (ps[0]); + person_t& jane (ps[1]); + + john.middle_name ("Mary"); + jane.middle_name ().reset (); + + // Add another John record. + // + ps.push_back (john); + + // Serialize the modified object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "people.xsd"; + + people (cout, *ppl, map); +} + </pre> + + <p>The first modification the above application performs is iterating + over person records and incrementing the age value. This code + fragment shows how to modify the value of a required attribute + or element. The next modification shows how to set a new value + for the optional <code>middle-name</code> element as well + as clear its value. Finally the example adds a copy of the + John Doe record to the <code>person</code> element sequence.</p> + + <p>Note that in this case using references for the <code>ps</code>, + <code>john</code>, and <code>jane</code> variables is no longer + a performance improvement but a requirement for the application + to function correctly. If we hadn't used references, all our changes + would have been made on copies without affecting the object model.</p> + + <p>If we run the above application on our sample <code>people.xml</code>, + the output looks as follows:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>33</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>29</age> + </person> + + <person id="1"> + <first-name>John</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>33</age> + </person> + +</people> + </pre> + + + <h2><a name="4.4">4.4 Creating the Object Model from Scratch</a></h2> + + <p>In this section we will learn how to create a new object model + for our person records vocabulary. The following application + recreates the content of the original <code>people.xml</code> + file:</p> + + <pre class="c++"> +#include <iostream> +#include "people.hxx" + +using namespace std; + +int +main () +{ + people_t ppl; + people_t::person_sequence& ps (ppl.person ()); + + // Add the John Doe record. + // + ps.push_back ( + person_t ("John", // first-name + "Doe", // last-name + gender_t::male, // gender + 32, // age + 1)); + + // Add the Jane Doe record. + // + ps.push_back ( + person_t ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2)); // id + + // Add middle name to the Jane Doe record. + // + person_t& jane (ps.back ()); + jane.middle_name ("Mary"); + + // Serialize the object model to XML. + // + xml_schema::namespace_infomap map; + map[""].name = ""; + map[""].schema = "people.xsd"; + + people (cout, ppl, map); +} + </pre> + + <p>The only new part in the above application is the calls + to the <code>people_t</code> and <code>person_t</code> + constructors. As a general rule, for each C++ class + XSD generates a constructor with initializers + for each element and attribute belonging to the <em>one</em> + cardinality class. For our vocabulary, the following + constructors are generated:</p> + + <pre class="c++"> +class person_t +{ + person_t (const first_name_type&, + const last_name_type&, + const gender_type&, + const age_type&, + const id_type&); +}; + +class people_t +{ + people_t (); +}; + </pre> + + <p>Note also that we set the <code>middle-name</code> element + on the Jane Doe record by obtaining a reference to that record + in the object model and setting the <code>middle-name</code> + value on it. This is a general rule that should be followed + in order to obtain the best performance: if possible, + direct modifications to the object model should be preferred + to modifications on temporaries with subsequent copying. The + following code fragment shows a semantically equivalent but + slightly slower version:</p> + + <pre class="c++"> +// Add the Jane Doe record. +// +person_t jane ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2); // id + +jane.middle_name ("Mary"); + +ps.push_back (jane); + </pre> + + <p>We can also go one step further to reduce copying and improve + the performance of our application by using the non-copying + <code>push_back()</code> function which assumes ownership + of the passed objects:</p> + + <pre class="c++"> +// Add the Jane Doe record. C++11 version +// +unique_ptr<person_t> jane_p ( + new person_t ("Jane", // first-name + "Doe", // last-name + gender_t::female, // gender + 28, // age + 2)); // id +ps.push_back (std::move (jane_p)); // assumes ownership + +// Add the John Doe record. C++98 version. +// +auto_ptr<person_t> john_p ( + new person_t ("John", // first-name + "Doe", // last-name + gender_t::male, // gender + 32, // age + 1)); +ps.push_back (john_p); // assumes ownership + </pre> + + <p>For more information on the non-copying modifier functions refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.8">Section + 2.8, "Mapping for Local Elements and Attributes"</a> in the C++/Tree Mapping + User Manual. The above application produces the following output:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd"> + + <person id="1"> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + + <person id="2"> + <first-name>Jane</first-name> + <middle-name>Mary</middle-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> + +</people> + </pre> + + <h2><a name="4.5">4.5 Mapping for the Built-in XML Schema Types</a></h2> + + <p>Our person record vocabulary uses several built-in XML Schema + types: <code>string</code>, <code>short</code>, and + <code>unsignedInt</code>. Until now we haven't talked about + the mapping of built-in XML Schema types to C++ types and how + to work with them. This section provides an overview + of the built-in types. For more detailed information refer + to <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.5">Section + 2.5, "Mapping for Built-in Data Types"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>In XML Schema, built-in types are defined in the XML Schema namespace. + By default, the C++/Tree mapping maps this namespace to C++ + namespace <code>xml_schema</code> (this mapping can be altered + with the <code>--namespace-map</code> option). The following table + summarizes the mapping of XML Schema built-in types to C++ types:</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Alias in the <code>xml_schema</code> namespace</th> + <th>C++ type</th> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string</code></td> + <td>type derived from <code>string</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token</code></td> + <td>type derived from <code>normalized_string</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens</code></td> + <td>type derived from <code>sequence<nmtoken></code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>language</code></td> + <td><code>language</code></td> + <td>type derived from <code>token</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname</code></td> + <td><code>xml_schema::qname</code></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs</code></td> + <td>type derived from <code>sequence<idref></code></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary</code></td> + <td><code>xml_schema::base64_binary</code></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary</code></td> + <td><code>xml_schema::hex_binary</code></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date</code></td> + <td><code>xml_schema::date</code></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time</code></td> + <td><code>xml_schema::date_time</code></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration</code></td> + <td><code>xml_schema::duration</code></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday</code></td> + <td><code>xml_schema::gday</code></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth</code></td> + <td><code>xml_schema::gmonth</code></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day</code></td> + <td><code>xml_schema::gmonth_day</code></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear</code></td> + <td><code>xml_schema::gyear</code></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month</code></td> + <td><code>xml_schema::gyear_month</code></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time</code></td> + <td><code>xml_schema::time</code></td> + </tr> + + <tr> + <th colspan="3">entity types</th> + </tr> + <tr> + <td><code>ENTITY</code></td> + <td><code>entity</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>ENTITIES</code></td> + <td><code>entities</code></td> + <td>type derived from <code>sequence<entity></code></td> + </tr> + </table> + + <p>As you can see from the table above a number of built-in + XML Schema types are mapped to fundamental C++ types such + as <code>int</code> or <code>bool</code>. All string-based + XML Schema types are mapped to C++ types that are derived + from either <code>std::string</code> or + <code>std::wstring</code>, depending on the character + type selected. For access and modification purposes these + types can be treated as <code>std::string</code>. A number + of built-in types, such as <code>qname</code>, the binary + types, and the date/time types do not have suitable + fundamental or standard C++ types to map to. As a result, + these types are implemented from scratch in the XSD runtime. + For more information on their interfaces refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.5">Section + 2.5, "Mapping for Built-in Data Types"</a> in the C++/Tree Mapping + User Manual.</p> + + + <!-- Chapater 5 --> + + + <h1><a name="5">5 Parsing</a></h1> + + <p>We have already seen how to parse XML to an object model in this guide + before. In this chapter we will discuss the parsing topic in more + detail.</p> + + <p>By default, the C++/Tree mapping provides a total of 14 overloaded + parsing functions. They differ in the input methods used to + read XML as well as the error reporting mechanisms. It is also possible + to generate types for root elements instead of parsing and serialization + functions. This may be useful if your XML vocabulary has multiple + root elements. For more information on element types refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.9">Section + 2.9, "Mapping for Global Elements"</a> in the C++/Tree Mapping User + Manual.</p> + + + <p>In this section we will discuss the most commonly used versions of + the parsing functions. For a comprehensive description of parsing + refer to <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#3">Chapter + 3, "Parsing"</a> in the C++/Tree Mapping User Manual. For the <code>people</code> + global element from our person record vocabulary, we will concentrate + on the following three parsing functions:</p> + + <pre class="c++"> +std::[unique|auto]_ptr<people_t> +people (const std::string& uri, + xml_schema::flags f = 0, + const xml_schema::properties& p = xml_schema::properties ()); + +std::[unique|auto]_ptr<people_t> +people (std::istream& is, + xml_schema::flags f = 0, + const xml_schema::properties& p = xml_schema::properties ()); + +std::[unique|auto]_ptr<people_t> +people (std::istream& is, + const std::string& resource_id, + xml_schema::flags f = 0, + const xml_schema::properties& p = ::xml_schema::properties ()); + </pre> + + <p>The first function parses a local file or a URI. We have already + used this parsing function in the previous chapters. The second + and third functions read XML from a standard input stream. The + last function also requires a resource id. This id is used to + identify the XML document being parser in diagnostics messages + as well as to resolve relative paths to other documents (for example, + schemas) that might be referenced from the XML document.</p> + + <p>The last two arguments to all three parsing functions are parsing + flags and properties. The flags argument provides a number of ways + to fine-tune the parsing process. The properties argument allows + to pass additional information to the parsing functions. We will + use these two arguments in <a href="#5.1">Section 5.1, "XML Schema + Validation and Searching"</a> below. All three functions return + the object model as either <code>std::unique_ptr</code> (C++11) or + <code>std::auto_ptr</code> (C++98), depending on the C++ standard + selected (<code>--std</code> XSD compiler option). The following + example shows how we can use the above parsing functions:</p> + + <pre class="c++"> +using std::unique_ptr; + +// Parse a local file or URI. +// +unique_ptr<people_t> p1 (people ("people.xml")); +unique_ptr<people_t> p2 (people ("http://example.com/people.xml")); + +// Parse a local file via ifstream. +// +std::ifstream ifs ("people.xml"); +unique_ptr<people_t> p3 (people (ifs, "people.xml")); + +// Parse an XML string. +// +std::string str ("..."); // XML in a string. +std::istringstream iss (str); +unique_ptr<people_t> p4 (people (iss)); + </pre> + + + <h2><a name="5.1">5.1 XML Schema Validation and Searching</a></h2> + + <p>The C++/Tree mapping relies on the underlying Xerces-C++ XML + parser for full XML document validation. The XML Schema + validation is enabled by default and can be disabled by + passing the <code>xml_schema::flags::dont_validate</code> + flag to the parsing functions, for example:</p> + + <pre class="c++"> +unique_ptr<people_t> p ( + people ("people.xml", xml_schema::flags::dont_validate)); + </pre> + + <p>Even when XML Schema validation is disabled, the generated + code still performs a number of checks to prevent + construction of an inconsistent object model (for example, an + object model with missing required attributes or elements).</p> + + <p>When XML Schema validation is enabled, the XML parser needs + to locate a schema to validate against. There are several + methods to provide the schema location information to the + parser. The easiest and most commonly used method is to + specify schema locations in the XML document itself + with the <code>schemaLocation</code> or + <code>noNamespaceSchemaLocation</code> attributes, for example:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="people.xsd" + xsi:schemaLocation="http://www.w3.org/XML/1998/namespace xml.xsd"> + </pre> + + <p>As you might have noticed, we used this method in all the sample XML + documents presented in this guide up until now. Note that the + schema locations specified with these two attributes are relative + to the document's path unless they are absolute URIs (that is + start with <code>http://</code>, <code>file://</code>, etc.). + In particular, if you specify just file names as your schema + locations, as we did above, then the schemas should reside in + the same directory as the XML document itself.</p> + + <p>Another method of providing the schema location information + is via the <code>xml_schema::properties</code> argument, as + shown in the following example:</p> + + <pre class="c++"> +xml_schema::properties props; +props.no_namespace_schema_location ("people.xsd"); +props.schema_location ("http://www.w3.org/XML/1998/namespace", "xml.xsd"); + +unique_ptr<people_t> p (people ("people.xml", 0, props)); + </pre> + + <p>The schema locations provided with this method overrides + those specified in the XML document. As with the previous + method, the schema locations specified this way are + relative to the document's path unless they are absolute URIs. + In particular, if you want to use local schemas that are + not related to the document being parsed, then you will + need to use the <code>file://</code> URI. The following + example shows how to use schemas that reside in the current + working directory:</p> + + <pre class="c++"> +#include <unistd.h> // getcwd +#include <limits.h> // PATH_MAX + +char cwd[PATH_MAX]; +if (getcwd (cwd, PATH_MAX) == 0) +{ + // Buffer too small? +} + +xml_schema::properties props; + +props.no_namespace_schema_location ( + "file:///" + std::string (cwd) + "/people.xsd"); + +props.schema_location ( + "http://www.w3.org/XML/1998/namespace", + "file:///" + std::string (cwd) + "/xml.xsd"); + +unique_ptr<people_t> p (people ("people.xml", 0, props)); + </pre> + + <p>A third method is the most useful if you are planning to parse + several XML documents of the same vocabulary. In that case + it may be beneficial to pre-parse and cache the schemas in + the XML parser which can then be used to parse all documents + without re-parsing the schemas. For more information on + this method refer to the <code>caching</code> example in the + <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package. + It is also possible to convert the schemas into a pre-compiled + binary representation and embed this representation directly into + the application executable. With this approach your application can + perform XML Schema validation without depending on any external + schema files. For more information on how to achieve this refer to + the <code>embedded</code> example in the <code>cxx/tree/</code> + directory in the <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <p>When the XML parser cannot locate a schema for the + XML document, the validation fails and XML document + elements and attributes for which schema definitions could + not be located are reported in the diagnostics. For + example, if we remove the <code>noNamespaceSchemaLocation</code> + attribute in <code>people.xml</code> from the previous chapter, + then we will get the following diagnostics if we try to parse + this file with validation enabled:</p> + + <pre class="terminal"> +people.xml:2:63 error: no declaration found for element 'people' +people.xml:4:18 error: no declaration found for element 'person' +people.xml:4:18 error: attribute 'id' is not declared for element 'person' +people.xml:5:17 error: no declaration found for element 'first-name' +people.xml:6:18 error: no declaration found for element 'middle-name' +people.xml:7:16 error: no declaration found for element 'last-name' +people.xml:8:13 error: no declaration found for element 'gender' +people.xml:9:10 error: no declaration found for element 'age' + </pre> + + <h2><a name="5.2">5.2 Error Handling</a></h2> + + <p>The parsing functions offer a number of ways to handle error conditions + with the C++ exceptions being the most commonly used mechanism. All + C++/Tree exceptions derive from common base <code>xml_schema::exception</code> + which in turn derives from <code>std::exception</code>. The easiest + way to uniformly handle all possible C++/Tree exceptions and print + detailed information about the error is to catch and print + <code>xml_schema::exception</code>, as shown in the following + example:</p> + + <pre class="c++"> +try +{ + unique_ptr<people_t> p (people ("people.xml")); +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>Each individual C++/Tree exception also allows you to obtain + error details programmatically. For example, the + <code>xml_schema::parsing</code> exception is thrown when + the XML parsing and validation in the underlying XML parser + fails. It encapsulates various diagnostics information + such as the file name, line and column numbers, as well as the + error or warning message for each entry. For more information + about this and other exceptions that can be thrown during + parsing, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#3.3">Section + 3.3, "Error Handling"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>Note that if you are parsing <code>std::istream</code> on which + exceptions are not enabled, then you will need to check the + stream state after the call to the parsing function in order + to detect any possible stream failures, for example:</p> + + <pre class="c++"> +std::ifstream ifs ("people.xml"); + +if (ifs.fail ()) +{ + cerr << "people.xml: unable to open" << endl; + return 1; +} + +unique_ptr<people_t> p (people (ifs, "people.xml")); + +if (ifs.fail ()) +{ + cerr << "people.xml: read error" << endl; + return 1; +} + </pre> + + <p>The above example can be rewritten to use exceptions as + shown below:</p> + + <pre class="c++"> +try +{ + std::ifstream ifs; + ifs.exceptions (std::ifstream::badbit | std::ifstream::failbit); + ifs.open ("people.xml"); + + unique_ptr<people_t> p (people (ifs, "people.xml")); +} +catch (const std::ifstream::failure&) +{ + cerr << "people.xml: unable to open or read error" << endl; + return 1; +} + </pre> + + + <!-- Chapater 6 --> + + + <h1><a name="6">6 Serialization</a></h1> + + <p>We have already seen how to serialize an object model back to XML + in this guide before. In this chapter we will discuss the + serialization topic in more detail.</p> + + <p>By default, the C++/Tree mapping provides a total of 8 overloaded + serialization functions. They differ in the output methods used to write + XML as well as the error reporting mechanisms. It is also possible to + generate types for root elements instead of parsing and serialization + functions. This may be useful if your XML vocabulary has multiple + root elements. For more information on element types refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#2.9">Section + 2.9, "Mapping for Global Elements"</a> in the C++/Tree Mapping User + Manual.</p> + + + <p>In this section we will discuss the most commonly + used version of serialization functions. For a comprehensive description + of serialization refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#4">Chapter + 4, "Serialization"</a> in the C++/Tree Mapping User Manual. For the + <code>people</code> global element from our person record vocabulary, + we will concentrate on the following serialization function:</p> + + <pre class="c++"> +void +people (std::ostream& os, + const people_t& x, + const xml_schema::namespace_infomap& map = + xml_schema::namespace_infomap (), + const std::string& encoding = "UTF-8", + xml_schema::flags f = 0); + </pre> + + <p>This function serializes the object model passed as the second + argument to the standard output stream passed as the first + argument. The third argument is a namespace information map + which we will discuss in more detail in the next section. + The fourth argument is a character encoding that the resulting + XML document should be in. Possible valid values for this + argument are "US-ASCII", "ISO8859-1", "UTF-8", "UTF-16BE", + "UTF-16LE", "UCS-4BE", and "UCS-4LE". Finally, the flags + argument allows fine-tuning of the serialization process. + The following example shows how we can use the above serialization + function:</p> + + <pre class="c++"> +people_t& p = ... + +xml_schema::namespace_infomap map; +map[""].schema = "people.xsd"; + +// Serialize to stdout. +// +people (std::cout, p, map); + +// Serialize to a file. +// +std::ofstream ofs ("people.xml"); +people (ofs, p, map); + +// Serialize to a string. +// +std::ostringstream oss; +people (oss, p, map); +std::string xml (oss.str ()); + </pre> + + + <h2><a name="6.1">6.1 Namespace and Schema Information</a></h2> + + <p>While XML serialization can be done just from the object + model alone, it is often desirable to assign meaningful + prefixes to XML namespaces used in the vocabulary as + well as to provide the schema location information. + This is accomplished by passing the namespace information + map to the serialization function. The key in this map is + a namespace prefix that should be assigned to an XML namespace + specified in the <code>name</code> variable of the + map value. You can also assign an optional schema location for + this namespace in the <code>schema</code> variable. Based + on each key-value entry in this map, the serialization + function adds two attributes to the resulting XML document: + the namespace-prefix mapping attribute and schema location + attribute. The empty prefix indicates that the namespace + should be mapped without a prefix. For example, the following + map:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = "http://www.example.com/example"; +map[""].schema = "example.xsd"; + +map["x"].name = "http://www.w3.org/XML/1998/namespace"; +map["x"].schema = "xml.xsd"; + </pre> + + <p>Results in the following XML document:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<example + xmlns="http://www.example.com/example" + xmlns:x="http://www.w3.org/XML/1998/namespace" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.example.com/example example.xsd + http://www.w3.org/XML/1998/namespace xml.xsd"> + </pre> + + <p>The empty namespace indicates that the vocabulary has no target + namespace. For example, the following map results in only the + <code>noNamespaceSchemaLocation</code> attribute being added:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = ""; +map[""].schema = "example.xsd"; + </pre> + + <h2><a name="6.2">6.2 Error Handling</a></h2> + + <p>Similar to the parsing functions, the serialization functions offer a + number of ways to handle error conditions with the C++ exceptions being + the most commonly used mechanisms. As with parsing, the easiest way to + uniformly handle all possible serialization exceptions and print + detailed information about the error is to catch and print + <code>xml_schema::exception</code>:</p> + + <pre class="c++"> +try +{ + people_t& p = ... + + xml_schema::namespace_infomap map; + map[""].schema = "people.xsd"; + + people (std::cout, p, map)); +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>The most commonly encountered serialization exception is + <code>xml_schema::serialization</code>. It is thrown + when the XML serialization in the underlying XML writer + fails. It encapsulates various diagnostics information + such as the file name, line and column numbers, as well as the + error or warning message for each entry. For more information + about this and other exceptions that can be thrown during + serialization, refer to + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/#4.4">Section + 4.4, "Error Handling"</a> in the C++/Tree Mapping + User Manual.</p> + + <p>Note that if you are serializing to <code>std::ostream</code> on + which exceptions are not enabled, then you will need to check the + stream state after the call to the serialization function in order + to detect any possible stream failures, for example:</p> + + <pre class="c++"> +std::ofstream ofs ("people.xml"); + +if (ofs.fail ()) +{ + cerr << "people.xml: unable to open" << endl; + return 1; +} + +people (ofs, p, map)); + +if (ofs.fail ()) +{ + cerr << "people.xml: write error" << endl; + return 1; +} + </pre> + + <p>The above example can be rewritten to use exceptions as + shown below:</p> + + <pre class="c++"> +try +{ + std::ofstream ofs; + ofs.exceptions (std::ofstream::badbit | std::ofstream::failbit); + ofs.open ("people.xml"); + + people (ofs, p, map)); +} +catch (const std::ofstream::failure&) +{ + cerr << "people.xml: unable to open or write error" << endl; + return 1; +} + </pre> + + </div> +</div> + +</body> +</html> diff --git a/doc/cxx/tree/manual/index.xhtml b/doc/cxx/tree/manual/index.xhtml new file mode 100644 index 0000000..f455bff --- /dev/null +++ b/doc/cxx/tree/manual/index.xhtml @@ -0,0 +1,6826 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Tree Mapping User Manual</title> + + <meta name="copyright" content="© 2005-2023 Code Synthesis"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,tree,serialization,guide,manual,examples"/> + <meta name="description" content="C++/Tree Mapping User Manual"/> + <meta name="revision" content="4.1.0"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 130%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage #title { + font-weight: bold; + font-size: 200%; + text-align: center; + padding: 1em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + + /* default-fixed */ + #default-fixed { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #default-fixed th, #default-fixed td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #default-fixed th { + background : #cde8f6; + } + + #default-fixed td { + text-align: center; + } + + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div id="title">C++/Tree Mapping User Manual</div> + + <p>Copyright © 2005-2023 Code Synthesis.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.ps">PostScript</a>.</p> + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a></td> + </tr> + + <tr> + <th>2</th><td><a href="#2">C++/Tree Mapping</a> + <table class="toc"> + <tr> + <th>2.1</th><td><a href="#2.1">Preliminary Information</a> + <table class="toc"> + <tr><th>2.1.1</th><td><a href="#2.1.1">C++ Standard</a></td></tr> + <tr><th>2.1.2</th><td><a href="#2.1.2">Identifiers</a></td></tr> + <tr><th>2.1.3</th><td><a href="#2.1.3">Character Type and Encoding</a></td></tr> + <tr><th>2.1.4</th><td><a href="#2.1.4">XML Schema Namespace</a></td></tr> + <tr><th>2.1.5</th><td><a href="#2.1.5">Anonymous Types</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.2</th><td><a href="#2.2">Error Handling</a> + <table class="toc"> + <tr><th>2.2.1</th><td><a href="#2.2.1"><code>xml_schema::duplicate_id</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.3</th><td><a href="#2.3">Mapping for <code>import</code> and <code>include</code></a> + <table class="toc"> + <tr><th>2.3.1</th><td><a href="#2.3.1">Import</a></td></tr> + <tr><th>2.3.2</th><td><a href="#2.3.2">Inclusion with Target Namespace</a></td></tr> + <tr><th>2.3.3</th><td><a href="#2.3.3">Inclusion without Target Namespace</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.4</th><td><a href="#2.4">Mapping for Namespaces</a></td> + </tr> + <tr> + <th>2.5</th><td><a href="#2.5">Mapping for Built-in Data Types</a> + <table class="toc"> + <tr><th>2.5.1</th><td><a href="#2.5.1">Inheritance from Built-in Data Types</a></td></tr> + <tr><th>2.5.2</th><td><a href="#2.5.2">Mapping for <code>anyType</code></a></td></tr> + <tr><th>2.5.3</th><td><a href="#2.5.3">Mapping for <code>anySimpleType</code></a></td></tr> + <tr><th>2.5.4</th><td><a href="#2.5.4">Mapping for <code>QName</code></a></td></tr> + <tr><th>2.5.5</th><td><a href="#2.5.5">Mapping for <code>IDREF</code></a></td></tr> + <tr><th>2.5.6</th><td><a href="#2.5.6">Mapping for <code>base64Binary</code> and <code>hexBinary</code></a></td></tr> + <tr><th>2.5.7</th><td><a href="#2.5.7">Time Zone Representation</a></td></tr> + <tr><th>2.5.8</th><td><a href="#2.5.8">Mapping for <code>date</code></a></td></tr> + <tr><th>2.5.9</th><td><a href="#2.5.9">Mapping for <code>dateTime</code></a></td></tr> + <tr><th>2.5.10</th><td><a href="#2.5.10">Mapping for <code>duration</code></a></td></tr> + <tr><th>2.5.11</th><td><a href="#2.5.11">Mapping for <code>gDay</code></a></td></tr> + <tr><th>2.5.12</th><td><a href="#2.5.12">Mapping for <code>gMonth</code></a></td></tr> + <tr><th>2.5.13</th><td><a href="#2.5.13">Mapping for <code>gMonthDay</code></a></td></tr> + <tr><th>2.5.14</th><td><a href="#2.5.14">Mapping for <code>gYear</code></a></td></tr> + <tr><th>2.5.15</th><td><a href="#2.5.15">Mapping for <code>gYearMonth</code></a></td></tr> + <tr><th>2.5.16</th><td><a href="#2.5.16">Mapping for <code>time</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.6</th><td><a href="#2.6">Mapping for Simple Types</a> + <table class="toc"> + <tr><th>2.6.1</th><td><a href="#2.6.1">Mapping for Derivation by Restriction</a></td></tr> + <tr><th>2.6.2</th><td><a href="#2.6.2">Mapping for Enumerations</a></td></tr> + <tr><th>2.6.3</th><td><a href="#2.6.3">Mapping for Derivation by List</a></td></tr> + <tr><th>2.6.4</th><td><a href="#2.6.4">Mapping for Derivation by Union</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.7</th><td><a href="#2.7">Mapping for Complex Types</a> + <table class="toc"> + <tr><th>2.7.1</th><td><a href="#2.7.1">Mapping for Derivation by Extension</a></td></tr> + <tr><th>2.7.2</th><td><a href="#2.7.2">Mapping for Derivation by Restriction</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.8</th><td><a href="#2.8">Mapping for Local Elements and Attributes</a> + <table class="toc"> + <tr><th>2.8.1</th><td><a href="#2.8.1">Mapping for Members with the One Cardinality Class</a></td></tr> + <tr><th>2.8.2</th><td><a href="#2.8.2">Mapping for Members with the Optional Cardinality Class</a></td></tr> + <tr><th>2.8.3</th><td><a href="#2.8.3">Mapping for Members with the Sequence Cardinality Class</a></td></tr> + <tr><th>2.8.4</th><td><a href="#2.8.4">Element Order</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.9</th><td><a href="#2.9">Mapping for Global Elements</a> + <table class="toc"> + <tr><th>2.9.1</th><td><a href="#2.9.1">Element Types</a></td></tr> + <tr><th>2.9.2</th><td><a href="#2.9.2">Element Map</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.10</th><td><a href="#2.10">Mapping for Global Attributes</a></td> + </tr> + <tr> + <th>2.11</th><td><a href="#2.11">Mapping for <code>xsi:type</code> and Substitution Groups</a></td> + </tr> + <tr> + <th>2.12</th><td><a href="#2.12">Mapping for <code>any</code> and <code>anyAttribute</code></a> + <table class="toc"> + <tr><th>2.12.1</th><td><a href="#2.12.1">Mapping for <code>any</code> with the One Cardinality Class</a></td></tr> + <tr><th>2.12.2</th><td><a href="#2.12.2">Mapping for <code>any</code> with the Optional Cardinality Class</a></td></tr> + <tr><th>2.12.3</th><td><a href="#2.12.3">Mapping for <code>any</code> with the Sequence Cardinality Class</a></td></tr> + <tr><th>2.12.4</th><td><a href="#2.12.4">Element Wildcard Order</a></td></tr> + <tr><th>2.12.5</th><td><a href="#2.12.5">Mapping for <code>anyAttribute</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.13</th><td><a href="#2.13">Mapping for Mixed Content Models</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Parsing</a> + <table class="toc"> + <tr> + <th>3.1</th><td><a href="#3.1">Initializing the Xerces-C++ Runtime</a></td> + </tr> + <tr> + <th>3.2</th><td><a href="#3.2">Flags and Properties</a></td> + </tr> + <tr> + <th>3.3</th><td><a href="#3.3">Error Handling</a> + <table class="toc"> + <tr><th>3.3.1</th><td><a href="#3.3.1"><code>xml_schema::parsing</code></a></td></tr> + <tr><th>3.3.2</th><td><a href="#3.3.2"><code>xml_schema::expected_element</code></a></td></tr> + <tr><th>3.3.3</th><td><a href="#3.3.3"><code>xml_schema::unexpected_element</code></a></td></tr> + <tr><th>3.3.4</th><td><a href="#3.3.4"><code>xml_schema::expected_attribute</code></a></td></tr> + <tr><th>3.3.5</th><td><a href="#3.3.5"><code>xml_schema::unexpected_enumerator</code></a></td></tr> + <tr><th>3.3.6</th><td><a href="#3.3.6"><code>xml_schema::expected_text_content</code></a></td></tr> + <tr><th>3.3.7</th><td><a href="#3.3.7"><code>xml_schema::no_type_info</code></a></td></tr> + <tr><th>3.3.8</th><td><a href="#3.3.8"><code>xml_schema::not_derived</code></a></td></tr> + <tr><th>3.3.9</th><td><a href="#3.3.9"><code>xml_schema::not_prefix_mapping</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>3.4</th><td><a href="#3.4">Reading from a Local File or URI</a></td> + </tr> + <tr> + <th>3.5</th><td><a href="#3.5">Reading from <code>std::istream</code></a></td> + </tr> + <tr> + <th>3.6</th><td><a href="#3.6">Reading from <code>xercesc::InputSource</code></a></td> + </tr> + <tr> + <th>3.7</th><td><a href="#3.7">Reading from DOM</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Serialization</a> + <table class="toc"> + <tr> + <th>4.1</th><td><a href="#4.1">Initializing the Xerces-C++ Runtime</a></td> + </tr> + <tr> + <th>4.2</th><td><a href="#4.2">Namespace Infomap and Character Encoding</a></td> + </tr> + <tr> + <th>4.3</th><td><a href="#4.3">Flags</a></td> + </tr> + <tr> + <th>4.4</th><td><a href="#4.4">Error Handling</a> + <table class="toc"> + <tr><th>4.4.1</th><td><a href="#4.4.1"><code>xml_schema::serialization</code></a></td></tr> + <tr><th>4.4.2</th><td><a href="#4.4.2"><code>xml_schema::unexpected_element</code></a></td></tr> + <tr><th>4.4.3</th><td><a href="#4.4.3"><code>xml_schema::no_type_info</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>4.5</th><td><a href="#4.5">Serializing to <code>std::ostream</code></a></td> + </tr> + <tr> + <th>4.6</th><td><a href="#4.6">Serializing to <code>xercesc::XMLFormatTarget</code></a></td> + </tr> + <tr> + <th>4.7</th><td><a href="#4.7">Serializing to DOM</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Additional Functionality</a> + <table class="toc"> + <tr> + <th>5.1</th><td><a href="#5.1">DOM Association</a></td> + </tr> + <tr> + <th>5.2</th><td><a href="#5.2">Binary Serialization</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th></th><td><a href="#A">Appendix A — Default and Fixed Values</a></td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>This document describes the mapping of W3C XML Schema + to the C++ programming language as implemented by + <a href="https://www.codesynthesis.com/products/xsd">CodeSynthesis + XSD</a> - an XML Schema to C++ data binding compiler. The mapping + represents information stored in XML instance documents as a + statically-typed, tree-like in-memory data structure and is + called C++/Tree. + </p> + + <p>Revision 4.1.0<br/> <!-- Remember to change revision in other places --> + This revision of the manual describes the C++/Tree + mapping as implemented by CodeSynthesis XSD version 4.1.0. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.ps">PostScript</a>.</p> + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this manual, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/">C++/Tree + Mapping Getting Started Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree + Mapping Frequently Asked Questions (FAQ)</a></li> + + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is a place to ask questions. Furthermore the + <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + </ul> + + + <h1><a name="1">1 Introduction</a></h1> + + <p>C++/Tree is a W3C XML Schema to C++ mapping that represents the + data stored in XML as a statically-typed, vocabulary-specific + object model. Based on a formal description of an XML vocabulary + (schema), the C++/Tree mapping produces a tree-like data structure + suitable for in-memory processing as well as XML parsing and + serialization code.</p> + + <p>A typical application that processes XML documents usually + performs the following three steps: it first reads (parses) an XML + instance document to an object model, it then performs + some useful computations on that model which may involve + modification of the model, and finally it may write (serialize) + the modified object model back to XML. + </p> + + <p>The C++/Tree mapping consists of C++ types that represent the + given vocabulary (<a href="#2">Chapter 2, "C++/Tree Mapping"</a>), + a set of parsing functions that convert XML documents to + a tree-like in-memory data structure (<a href="#3">Chapter 3, + "Parsing"</a>), and a set of serialization functions that convert + the object model back to XML (<a href="#4">Chapter 4, + "Serialization"</a>). Furthermore, the mapping provides a number + of additional features, such as DOM association and binary + serialization, that can be useful in some applications + (<a href="#5">Chapter 5, "Additional Functionality"</a>). + </p> + + + <!-- Chapter 2 --> + + + <h1><a name="2">2 C++/Tree Mapping</a></h1> + + <h2><a name="2.1">2.1 Preliminary Information</a></h2> + + <h3><a name="2.1.1">2.1.1 C++ Standard</a></h3> + + <p>The C++/Tree mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h3><a name="2.1.2">2.1.2 Identifiers</a></h3> + + <p>XML Schema names may happen to be reserved C++ keywords or contain + characters that are illegal in C++ identifiers. To avoid C++ compilation + problems, such names are changed (escaped) when mapped to C++. If an + XML Schema name is a C++ keyword, the "_" suffix is added to it. All + character of an XML Schema name that are not allowed in C++ identifiers + are replaced with "_". + </p> + + <p>For example, XML Schema name <code>try</code> will be mapped to + C++ identifier <code>try_</code>. Similarly, XML Schema name + <code>strange.na-me</code> will be mapped to C++ identifier + <code>strange_na_me</code>. + </p> + + <p>Furthermore, conflicts between type names and function names in the + same scope are resolved using name escaping. Such conflicts include + both a global element (which is mapped to a set of parsing and/or + serialization functions or element types, see <a href="#2.9">Section + 2.9, "Mapping for Global Elements"</a>) and a global type sharing the + same name as well as a local element or attribute inside a type having + the same name as the type itself.</p> + + <p>For example, if we had a global type <code>catalog</code> + and a global element with the same name then the type would be + mapped to a C++ class with name <code>catalog</code> while the + parsing functions corresponding to the global element would have + their names escaped as <code>catalog_</code>. + </p> + + <p>By default the mapping uses the so-called K&R (Kernighan and + Ritchie) identifier naming convention which is also used throughout + this manual. In this convention both type and function names are in + lower case and words are separated by underscores. If your application + code or schemas use a different notation, you may want to change the + naming convention used by the mapping for consistency. + The compiler supports a set of widely-used naming conventions + that you can select with the <code>--type-naming</code> and + <code>--function-naming</code> options. You can also further + refine one of the predefined conventions or create a completely + custom naming scheme by using the <code>--*-regex</code> options. + For more detailed information on these options refer to the NAMING + CONVENTION section in the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + <h3><a name="2.1.3">2.1.3 Character Type and Encoding</a></h3> + + <p>The code that implements the mapping, depending on the + <code>--char-type</code> option, is generated using either + <code>char</code> or <code>wchar_t</code> as the character + type. In this document code samples use symbol <code>C</code> + to refer to the character type you have selected when translating + your schemas, for example <code>std::basic_string<C></code>. + </p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings and can be selected with the + <code>--char-encoding</code> command line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <h3><a name="2.1.4">2.1.4 XML Schema Namespace</a></h3> + + <p>The mapping relies on some predefined types, classes, and functions + that are logically defined in the XML Schema namespace reserved for + the XML Schema language (<code>http://www.w3.org/2001/XMLSchema</code>). + By default, this namespace is mapped to C++ namespace + <code>xml_schema</code>. It is automatically accessible + from a C++ compilation unit that includes a header file generated + from an XML Schema definition. + </p> + + <p>Note that, if desired, the default mapping of this namespace can be + changed as described in <a href="#2.4">Section 2.4, "Mapping for + Namespaces"</a>. + </p> + + + <h3><a name="2.1.5">2.1.5 Anonymous Types</a></h3> + + <p>For the purpose of code generation, anonymous types defined in + XML Schema are automatically assigned names that are derived + from enclosing attributes and elements. Otherwise, such types + follows standard mapping rules for simple and complex type + definitions (see <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a> + and <a href="#2.7">Section 2.7, "Mapping for Complex Types"</a>). + For example, in the following schema fragment: + </p> + + <pre class="xml"> +<element name="object"> + <complexType> + ... + </complexType> +</element> + </pre> + + <p>The anonymous type defined inside element <code>object</code> will + be given name <code>object</code>. The compiler has a number of + options that control the process of anonymous type naming. For more + information refer to the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + + <h2><a name="2.2">2.2 Error Handling</a></h2> + + <p>The mapping uses the C++ exception handling mechanism as a primary way + of reporting error conditions. All exceptions that are specified in + this mapping derive from <code>xml_schema::exception</code> which + itself is derived from <code>std::exception</code>: + </p> + + <pre class="c++"> +struct exception: virtual std::exception +{ + friend + std::basic_ostream<C>& + operator<< (std::basic_ostream<C>& os, const exception& e) + { + e.print (os); + return os; + } + +protected: + virtual void + print (std::basic_ostream<C>&) const = 0; +}; + </pre> + + <p>The exception hierarchy supports "virtual" <code>operator<<</code> + which allows you to obtain diagnostics corresponding to the thrown + exception using the base exception interface. For example:</p> + + <pre class="c++"> +try +{ + ... +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>The following sub-sections describe exceptions thrown by the + types that constitute the object model. + <a href="#3.3">Section 3.3, "Error Handling"</a> of + <a href="#3">Chapter 3, "Parsing"</a> describes exceptions + and error handling mechanisms specific to the parsing functions. + <a href="#4.4">Section 4.4, "Error Handling"</a> of + <a href="#4">Chapter 4, "Serialization"</a> describes exceptions + and error handling mechanisms specific to the serialization functions. + </p> + + + <h3><a name="2.2.1">2.2.1 <code>xml_schema::duplicate_id</code></a></h3> + + <pre class="c++"> +struct duplicate_id: virtual exception +{ + duplicate_id (const std::basic_string<C>& id); + + const std::basic_string<C>& + id () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::duplicate_id</code> is thrown when + a conflicting instance of <code>xml_schema::id</code> (see + <a href="#2.5">Section 2.5, "Mapping for Built-in Data Types"</a>) + is added to a tree. The offending ID value can be obtained using + the <code>id</code> function. + </p> + + <h2><a name="2.3">2.3 Mapping for <code>import</code> and <code>include</code></a></h2> + + <h3><a name="2.3.1">2.3.1 Import</a></h3> + + <p>The XML Schema <code>import</code> element is mapped to the C++ + Preprocessor <code>#include</code> directive. The value of + the <code>schemaLocation</code> attribute is used to derive + the name of the header file that appears in the <code>#include</code> + directive. For instance: + </p> + + <pre class="xml"> +<import namespace="https://www.codesynthesis.com/test" + schemaLocation="test.xsd"/> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +#include "test.hxx" + </pre> + + <p>Note that you will need to compile imported schemas separately + in order to produce corresponding header files.</p> + + <h3><a name="2.3.2">2.3.2 Inclusion with Target Namespace</a></h3> + + <p>The XML Schema <code>include</code> element which refers to a schema + with a target namespace or appears in a schema without a target namespace + follows the same mapping rules as the <code>import</code> element, + see <a href="#2.3.1">Section 2.3.1, "Import"</a>. + </p> + + <h3><a name="2.3.3">2.3.3 Inclusion without Target Namespace</a></h3> + + <p>For the XML Schema <code>include</code> element which refers to a schema + without a target namespace and appears in a schema with a target + namespace (such inclusion sometimes called "chameleon inclusion"), + declarations and definitions from the included schema are generated + in-line in the namespace of the including schema as if they were + declared and defined there verbatim. For example, consider the + following two schemas: + </p> + + <pre class="xml"> +<-- common.xsd --> +<schema> + <complexType name="type"> + ... + </complexType> +</schema> + +<-- test.xsd --> +<schema targetNamespace="https://www.codesynthesis.com/test"> + <include schemaLocation="common.xsd"/> +</schema> + </pre> + + <p>The fragment of interest from the generated header file for + <code>text.xsd</code> would look like this:</p> + + <pre class="c++"> +// test.hxx +namespace test +{ + class type + { + ... + }; +} + </pre> + + <h2><a name="2.4">2.4 Mapping for Namespaces</a></h2> + + <p>An XML Schema namespace is mapped to one or more nested C++ + namespaces. XML Schema namespaces are identified by URIs. + By default, a namespace URI is mapped to a sequence of + C++ namespace names by removing the protocol and host parts + and splitting the rest into a sequence of names with '<code>/</code>' + as the name separator. For instance: + </p> + + <pre class="xml"> +<schema targetNamespace="https://www.codesynthesis.com/system/test"> + ... +</schema> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +namespace system +{ + namespace test + { + ... + } +} + </pre> + + <p>The default mapping of namespace URIs to C++ namespace names can be + altered using the <code>--namespace-map</code> and + <code>--namespace-regex</code> options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information. + </p> + + <h2><a name="2.5">2.5 Mapping for Built-in Data Types</a></h2> + + <p>The mapping of XML Schema built-in data types to C++ types is + summarized in the table below.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Alias in the <code>xml_schema</code> namespace</th> + <th>C++ type</th> + </tr> + + <tr> + <th colspan="3">anyType and anySimpleType types</th> + </tr> + <tr> + <td><code>anyType</code></td> + <td><code>type</code></td> + <td><a href="#2.5.2">Section 2.5.2, "Mapping for <code>anyType</code>"</a></td> + </tr> + <tr> + <td><code>anySimpleType</code></td> + <td><code>simple_type</code></td> + <td><a href="#2.5.3">Section 2.5.3, "Mapping for <code>anySimpleType</code>"</a></td> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string</code></td> + <td>type derived from <code>string</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token</code></td> + <td>type derived from <code>normalized_string</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens</code></td> + <td>type derived from <code>sequence<nmtoken></code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>language</code></td> + <td><code>language</code></td> + <td>type derived from <code>token</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname</code></td> + <td><a href="#2.5.4">Section 2.5.4, "Mapping for <code>QName</code>"</a></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref</code></td> + <td><a href="#2.5.5">Section 2.5.5, "Mapping for <code>IDREF</code>"</a></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs</code></td> + <td>type derived from <code>sequence<idref></code></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary</code></td> + <td rowspan="2"><a href="#2.5.6">Section 2.5.6, "Mapping for + <code>base64Binary</code> and <code>hexBinary</code>"</a></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary</code></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date</code></td> + <td><a href="#2.5.8">Section 2.5.8, "Mapping for + <code>date</code>"</a></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time</code></td> + <td><a href="#2.5.9">Section 2.5.9, "Mapping for + <code>dateTime</code>"</a></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration</code></td> + <td><a href="#2.5.10">Section 2.5.10, "Mapping for + <code>duration</code>"</a></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday</code></td> + <td><a href="#2.5.11">Section 2.5.11, "Mapping for + <code>gDay</code>"</a></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth</code></td> + <td><a href="#2.5.12">Section 2.5.12, "Mapping for + <code>gMonth</code>"</a></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day</code></td> + <td><a href="#2.5.13">Section 2.5.13, "Mapping for + <code>gMonthDay</code>"</a></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear</code></td> + <td><a href="#2.5.14">Section 2.5.14, "Mapping for + <code>gYear</code>"</a></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month</code></td> + <td><a href="#2.5.15">Section 2.5.15, "Mapping for + <code>gYearMonth</code>"</a></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time</code></td> + <td><a href="#2.5.16">Section 2.5.16, "Mapping for + <code>time</code>"</a></td> + </tr> + + <tr> + <th colspan="3">entity types</th> + </tr> + <tr> + <td><code>ENTITY</code></td> + <td><code>entity</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>ENTITIES</code></td> + <td><code>entities</code></td> + <td>type derived from <code>sequence<entity></code></td> + </tr> + </table> + + <p>All XML Schema built-in types are mapped to C++ classes that are + derived from the <code>xml_schema::simple_type</code> class except + where the mapping is to a fundamental C++ type.</p> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. One notable extension + to the standard interface that is available only for + sequences of non-fundamental C++ types is the addition of + the overloaded <code>push_back</code> and <code>insert</code> + member functions which instead of the constant reference + to the element type accept automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to the element type. These functions assume ownership + of the pointed to object and reset the passed automatic pointer. + </p> + + <h3><a name="2.5.1">2.5.1 Inheritance from Built-in Data Types</a></h3> + + <p>In cases where the mapping calls for an inheritance from a built-in + type which is mapped to a fundamental C++ type, a proxy type is + used instead of the fundamental C++ type (C++ does not allow + inheritance from fundamental types). For instance:</p> + + <pre class="xml"> +<simpleType name="my_int"> + <restriction base="int"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class my_int: public fundamental_base<int> +{ + ... +}; + </pre> + + <p>The <code>fundamental_base</code> class template provides a close + emulation (though not exact) of a fundamental C++ type. + It is defined in an implementation-specific namespace and has the + following interface:</p> + + <pre class="c++"> +template <typename X> +class fundamental_base: public simple_type +{ +public: + fundamental_base (); + fundamental_base (X) + fundamental_base (const fundamental_base&) + +public: + fundamental_base& + operator= (const X&); + +public: + operator const X & () const; + operator X& (); + + template <typename Y> + operator Y () const; + + template <typename Y> + operator Y (); +}; + </pre> + + <h3><a name="2.5.2">2.5.2 Mapping for <code>anyType</code></a></h3> + + <p>The XML Schema <code>anyType</code> built-in data type is mapped to the + <code>xml_schema::type</code> C++ class:</p> + + <pre class="c++"> +class type +{ +public: + virtual + ~type (); + + type (); + type (const type&); + + type& + operator= (const type&); + + virtual type* + _clone () const; + + // anyType DOM content. + // +public: + typedef element_optional dom_content_optional; + + const dom_content_optional& + dom_content () const; + + dom_content_optional& + dom_content (); + + void + dom_content (const xercesc::DOMElement&); + + void + dom_content (xercesc::DOMElement*); + + void + dom_content (const dom_content_optional&); + + const xercesc::DOMDocument& + dom_content_document () const; + + xercesc::DOMDocument& + dom_content_document (); + + bool + null_content () const; + + // DOM association. + // +public: + const xercesc::DOMNode* + _node () const; + + xercesc::DOMNode* + _node (); +}; + </pre> + + <p>When <code>xml_schema::type</code> is used to create an instance + (as opposed to being a base of a derived type), it represents + the XML Schema <code>anyType</code> type. <code>anyType</code> + allows any attributes and any content in any order. In the + C++/Tree mapping this content can be represented as a DOM + fragment, similar to XML Schema wildcards (<a href="#2.12">Section + 2.12, "Mapping for <code>any</code> and + <code>anyAttribute</code>"</a>).</p> + + <p>To enable automatic extraction of <code>anyType</code> content + during parsing, the <code>--generate-any-type</code> option must be + specified. Because the DOM API is used to access such content, the + Xerces-C++ runtime should be initialized by the application prior to + parsing and should remain initialized for the lifetime of objects + with the DOM content. For more information on the Xerces-C++ runtime + initialization see <a href="#3.1">Section 3.1, "Initializing the + Xerces-C++ Runtime"</a>.</p> + + <p>The DOM content is stored as the optional DOM element container + and the DOM content accessors and modifiers presented above are + identical to those generated for an optional element wildcard. + Refer to <a href="#2.12.2">Section 2.12.2, "Mapping for <code>any</code> + with the Optional Cardinality Class"</a> for details on their + semantics.</p> + + <p>The <code>dom_content_document()</code> function returns the + DOM document used to store the raw XML content corresponding + to the <code>anyType</code> instance. It is equivalent to the + <code>dom_document()</code> function generated for types + with wildcards.</p> + + <p>The <code>null_content()</code> accessor is an optimization function + that allows us to check for the lack of content without actually + creating its empty representation, that is, empty DOM document for + <code>anyType</code> or empty string for <code>anySimpleType</code> + (see the following section for details on <code>anySimpleType</code>).</p> + + <p>For more information on DOM association refer to + <a href="#5.1">Section 5.1, "DOM Association"</a>.</p> + + <h3><a name="2.5.3">2.5.3 Mapping for <code>anySimpleType</code></a></h3> + + <p>The XML Schema <code>anySimpleType</code> built-in data type is mapped + to the <code>xml_schema::simple_type</code> C++ class:</p> + + <pre class="c++"> +class simple_type: public type +{ +public: + simple_type (); + simple_type (const C*); + simple_type (const std::basic_string<C>&); + + simple_type (const simple_type&); + + simple_type& + operator= (const simple_type&); + + virtual simple_type* + _clone () const; + + // anySimpleType text content. + // +public: + const std::basic_string<C>& + text_content () const; + + std::basic_string<C>& + text_content (); + + void + text_content (const std::basic_string<C>&); +}; + </pre> + + <p>When <code>xml_schema::simple_type</code> is used to create an instance + (as opposed to being a base of a derived type), it represents + the XML Schema <code>anySimpleType</code> type. <code>anySimpleType</code> + allows any simple content. In the C++/Tree mapping this content can + be represented as a string and accessed or modified with the + <code>text_content()</code> functions shown above.</p> + + <h3><a name="2.5.4">2.5.4 Mapping for <code>QName</code></a></h3> + + <p>The XML Schema <code>QName</code> built-in data type is mapped to the + <code>xml_schema::qname</code> C++ class:</p> + + <pre class="c++"> +class qname: public simple_type +{ +public: + qname (const ncname&); + qname (const uri&, const ncname&); + qname (const qname&); + +public: + qname& + operator= (const qname&); + +public: + virtual qname* + _clone () const; + +public: + bool + qualified () const; + + const uri& + namespace_ () const; + + const ncname& + name () const; +}; + </pre> + + <p>The <code>qualified</code> accessor function can be used to determine + if the name is qualified.</p> + + <h3><a name="2.5.5">2.5.5 Mapping for <code>IDREF</code></a></h3> + + <p>The XML Schema <code>IDREF</code> built-in data type is mapped to the + <code>xml_schema::idref</code> C++ class. This class implements the + smart pointer C++ idiom:</p> + + <pre class="c++"> +class idref: public ncname +{ +public: + idref (const C* s); + idref (const C* s, std::size_t n); + idref (std::size_t n, C c); + idref (const std::basic_string<C>&); + idref (const std::basic_string<C>&, + std::size_t pos, + std::size_t n = npos); + +public: + idref (const idref&); + +public: + virtual idref* + _clone () const; + +public: + idref& + operator= (C c); + + idref& + operator= (const C* s); + + idref& + operator= (const std::basic_string<C>&) + + idref& + operator= (const idref&); + +public: + const type* + operator-> () const; + + type* + operator-> (); + + const type& + operator* () const; + + type& + operator* (); + + const type* + get () const; + + type* + get (); + + // Conversion to bool. + // +public: + typedef void (idref::*bool_convertible)(); + operator bool_convertible () const; +}; + </pre> + + <p>The object, <code>idref</code> instance refers to, is the immediate + container of the matching <code>id</code> instance. For example, + with the following instance document and schema: + </p> + + + <pre class="xml"> +<!-- test.xml --> +<root> + <object id="obj-1" text="hello"/> + <reference>obj-1</reference> +</root> + +<!-- test.xsd --> +<schema> + <complexType name="object_type"> + <attribute name="id" type="ID"/> + <attribute name="text" type="string"/> + </complexType> + + <complexType name="root_type"> + <sequence> + <element name="object" type="object_type"/> + <element name="reference" type="IDREF"/> + </sequence> + </complexType> + + <element name="root" type="root_type"/> +</schema> + </pre> + + <p>The <code>ref</code> instance in the code below will refer to + an object of type <code>object_type</code>:</p> + + <pre class="c++"> +root_type& root = ...; +xml_schema::idref& ref (root.reference ()); +object_type& obj (dynamic_cast<object_type&> (*ref)); +cout << obj.text () << endl; + </pre> + + <p>The smart pointer interface of the <code>idref</code> class always + returns a pointer or reference to <code>xml_schema::type</code>. + This means that you will need to manually cast such pointer or + reference to its real (dynamic) type before you can use it (unless + all you need is the base interface provided by + <code>xml_schema::type</code>). As a special extension to the XML + Schema language, the mapping supports static typing of <code>idref</code> + references by employing the <code>refType</code> extension attribute. + The following example illustrates this mechanism: + </p> + + <pre class="xml"> +<!-- test.xsd --> +<schema + xmlns:xse="https://www.codesynthesis.com/xmlns/xml-schema-extension"> + + ... + + <element name="reference" type="IDREF" xse:refType="object_type"/> + + ... + +</schema> + </pre> + + <p>With this modification we do not need to do manual casting anymore: + </p> + + <pre class="c++"> +root_type& root = ...; +root_type::reference_type& ref (root.reference ()); +object_type& obj (*ref); +cout << ref->text () << endl; + </pre> + + + <h3><a name="2.5.6">2.5.6 Mapping for <code>base64Binary</code> and + <code>hexBinary</code></a></h3> + + <p>The XML Schema <code>base64Binary</code> and <code>hexBinary</code> + built-in data types are mapped to the + <code>xml_schema::base64_binary</code> and + <code>xml_schema::hex_binary</code> C++ classes, respectively. The + <code>base64_binary</code> and <code>hex_binary</code> classes + support a simple buffer abstraction by inheriting from the + <code>xml_schema::buffer</code> class: + </p> + + <pre class="c++"> +class bounds: public virtual exception +{ +public: + virtual const char* + what () const throw (); +}; + +class buffer +{ +public: + typedef std::size_t size_t; + +public: + buffer (size_t size = 0); + buffer (size_t size, size_t capacity); + buffer (const void* data, size_t size); + buffer (const void* data, size_t size, size_t capacity); + buffer (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + void + swap (buffer&); + +public: + size_t + capacity () const; + + bool + capacity (size_t); + +public: + size_t + size () const; + + bool + size (size_t); + +public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); +}; + </pre> + + <p>The last overloaded constructor reuses an existing data buffer instead + of making a copy. If the <code>assume_ownership</code> argument is + <code>true</code>, the instance assumes ownership of the + memory block pointed to by the <code>data</code> argument and will + eventually release it by calling <code>operator delete</code>. The + <code>capacity</code> and <code>size</code> modifier functions return + <code>true</code> if the underlying buffer has moved. + </p> + + <p>The <code>bounds</code> exception is thrown if the constructor + arguments violate the <code>(size <= capacity)</code> + constraint.</p> + + <p>The <code>base64_binary</code> and <code>hex_binary</code> classes + support the <code>buffer</code> interface and perform automatic + decoding/encoding from/to the Base64 and Hex formats, respectively: + </p> + + <pre class="c++"> +class base64_binary: public simple_type, public buffer +{ +public: + base64_binary (size_t size = 0); + base64_binary (size_t size, size_t capacity); + base64_binary (const void* data, size_t size); + base64_binary (const void* data, size_t size, size_t capacity); + base64_binary (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + base64_binary (const base64_binary&); + + base64_binary& + operator= (const base64_binary&); + + virtual base64_binary* + _clone () const; + +public: + std::basic_string<C> + encode () const; +}; + </pre> + + <pre class="c++"> +class hex_binary: public simple_type, public buffer +{ +public: + hex_binary (size_t size = 0); + hex_binary (size_t size, size_t capacity); + hex_binary (const void* data, size_t size); + hex_binary (const void* data, size_t size, size_t capacity); + hex_binary (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + hex_binary (const hex_binary&); + + hex_binary& + operator= (const hex_binary&); + + virtual hex_binary* + _clone () const; + +public: + std::basic_string<C> + encode () const; +}; + </pre> + + + <h2><a name="2.5.7">2.5.7 Time Zone Representation</a></h2> + + <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, + <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, + <code>gYearMonth</code>, and <code>time</code> XML Schema built-in + types all include an optional time zone component. The following + <code>xml_schema::time_zone</code> base class is used to represent + this information:</p> + + <pre class="c++"> +class time_zone +{ +public: + time_zone (); + time_zone (short hours, short minutes); + + bool + zone_present () const; + + void + zone_reset (); + + short + zone_hours () const; + + void + zone_hours (short); + + short + zone_minutes () const; + + void + zone_minutes (short); +}; + +bool +operator== (const time_zone&, const time_zone&); + +bool +operator!= (const time_zone&, const time_zone&); + </pre> + + <p>The <code>zone_present()</code> accessor function returns <code>true</code> + if the time zone is specified. The <code>zone_reset()</code> modifier + function resets the time zone object to the <em>not specified</em> + state. If the time zone offset is negative then both hours and + minutes components are represented as negative integers.</p> + + + <h2><a name="2.5.8">2.5.8 Mapping for <code>date</code></a></h2> + + <p>The XML Schema <code>date</code> built-in data type is mapped to the + <code>xml_schema::date</code> C++ class which represents a year, a day, + and a month with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class date: public simple_type, public time_zone +{ +public: + date (int year, unsigned short month, unsigned short day); + date (int year, unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + +public: + date (const date&); + + date& + operator= (const date&); + + virtual date* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const date&, const date&); + +bool +operator!= (const date&, const date&); + </pre> + + <h2><a name="2.5.9">2.5.9 Mapping for <code>dateTime</code></a></h2> + + <p>The XML Schema <code>dateTime</code> built-in data type is mapped to the + <code>xml_schema::date_time</code> C++ class which represents a year, a month, + a day, hours, minutes, and seconds with an optional time zone. Its interface + is presented below. For more information on the base + <code>xml_schema::time_zone</code> class refer to <a href="#2.5.7">Section + 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class date_time: public simple_type, public time_zone +{ +public: + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds, short zone_hours, short zone_minutes); +public: + date_time (const date_time&); + + date_time& + operator= (const date_time&); + + virtual date_time* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const date_time&, const date_time&); + +bool +operator!= (const date_time&, const date_time&); + </pre> + + + <h2><a name="2.5.10">2.5.10 Mapping for <code>duration</code></a></h2> + + <p>The XML Schema <code>duration</code> built-in data type is mapped to the + <code>xml_schema::duration</code> C++ class which represents a potentially + negative duration in the form of years, months, days, hours, minutes, + and seconds. Its interface is presented below.</p> + + <pre class="c++"> +class duration: public simple_type +{ +public: + duration (bool negative, + unsigned int years, unsigned int months, unsigned int days, + unsigned int hours, unsigned int minutes, double seconds); +public: + duration (const duration&); + + duration& + operator= (const duration&); + + virtual duration* + _clone () const; + +public: + bool + negative () const; + + void + negative (bool); + + unsigned int + years () const; + + void + years (unsigned int); + + unsigned int + months () const; + + void + months (unsigned int); + + unsigned int + days () const; + + void + days (unsigned int); + + unsigned int + hours () const; + + void + hours (unsigned int); + + unsigned int + minutes () const; + + void + minutes (unsigned int); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const duration&, const duration&); + +bool +operator!= (const duration&, const duration&); + </pre> + + + <h2><a name="2.5.11">2.5.11 Mapping for <code>gDay</code></a></h2> + + <p>The XML Schema <code>gDay</code> built-in data type is mapped to the + <code>xml_schema::gday</code> C++ class which represents a day of the + month with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gday: public simple_type, public time_zone +{ +public: + explicit + gday (unsigned short day); + gday (unsigned short day, short zone_hours, short zone_minutes); + +public: + gday (const gday&); + + gday& + operator= (const gday&); + + virtual gday* + _clone () const; + +public: + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const gday&, const gday&); + +bool +operator!= (const gday&, const gday&); + </pre> + + + <h2><a name="2.5.12">2.5.12 Mapping for <code>gMonth</code></a></h2> + + <p>The XML Schema <code>gMonth</code> built-in data type is mapped to the + <code>xml_schema::gmonth</code> C++ class which represents a month of the + year with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gmonth: public simple_type, public time_zone +{ +public: + explicit + gmonth (unsigned short month); + gmonth (unsigned short month, + short zone_hours, short zone_minutes); + +public: + gmonth (const gmonth&); + + gmonth& + operator= (const gmonth&); + + virtual gmonth* + _clone () const; + +public: + unsigned short + month () const; + + void + month (unsigned short); +}; + +bool +operator== (const gmonth&, const gmonth&); + +bool +operator!= (const gmonth&, const gmonth&); + </pre> + + + <h2><a name="2.5.13">2.5.13 Mapping for <code>gMonthDay</code></a></h2> + + <p>The XML Schema <code>gMonthDay</code> built-in data type is mapped to the + <code>xml_schema::gmonth_day</code> C++ class which represents a day and + a month of the year with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gmonth_day: public simple_type, public time_zone +{ +public: + gmonth_day (unsigned short month, unsigned short day); + gmonth_day (unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + +public: + gmonth_day (const gmonth_day&); + + gmonth_day& + operator= (const gmonth_day&); + + virtual gmonth_day* + _clone () const; + +public: + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const gmonth_day&, const gmonth_day&); + +bool +operator!= (const gmonth_day&, const gmonth_day&); + </pre> + + + <h2><a name="2.5.14">2.5.14 Mapping for <code>gYear</code></a></h2> + + <p>The XML Schema <code>gYear</code> built-in data type is mapped to the + <code>xml_schema::gyear</code> C++ class which represents a year with + an optional time zone. Its interface is presented below. For more + information on the base <code>xml_schema::time_zone</code> class refer + to <a href="#2.5.7">Section 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class gyear: public simple_type, public time_zone +{ +public: + explicit + gyear (int year); + gyear (int year, short zone_hours, short zone_minutes); + +public: + gyear (const gyear&); + + gyear& + operator= (const gyear&); + + virtual gyear* + _clone () const; + +public: + int + year () const; + + void + year (int); +}; + +bool +operator== (const gyear&, const gyear&); + +bool +operator!= (const gyear&, const gyear&); + </pre> + + + <h2><a name="2.5.15">2.5.15 Mapping for <code>gYearMonth</code></a></h2> + + <p>The XML Schema <code>gYearMonth</code> built-in data type is mapped to + the <code>xml_schema::gyear_month</code> C++ class which represents + a year and a month with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gyear_month: public simple_type, public time_zone +{ +public: + gyear_month (int year, unsigned short month); + gyear_month (int year, unsigned short month, + short zone_hours, short zone_minutes); +public: + gyear_month (const gyear_month&); + + gyear_month& + operator= (const gyear_month&); + + virtual gyear_month* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); +}; + +bool +operator== (const gyear_month&, const gyear_month&); + +bool +operator!= (const gyear_month&, const gyear_month&); + </pre> + + + <h2><a name="2.5.16">2.5.16 Mapping for <code>time</code></a></h2> + + <p>The XML Schema <code>time</code> built-in data type is mapped to + the <code>xml_schema::time</code> C++ class which represents hours, + minutes, and seconds with an optional time zone. Its interface is + presented below. For more information on the base + <code>xml_schema::time_zone</code> class refer to + <a href="#2.5.7">Section 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class time: public simple_type, public time_zone +{ +public: + time (unsigned short hours, unsigned short minutes, double seconds); + time (unsigned short hours, unsigned short minutes, double seconds, + short zone_hours, short zone_minutes); + +public: + time (const time&); + + time& + operator= (const time&); + + virtual time* + _clone () const; + +public: + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const time&, const time&); + +bool +operator!= (const time&, const time&); + </pre> + + + <!-- Mapping for Simple Types --> + + <h2><a name="2.6">2.6 Mapping for Simple Types</a></h2> + + <p>An XML Schema simple type is mapped to a C++ class with the same + name as the simple type. The class defines a public copy constructor, + a public copy assignment operator, and a public virtual + <code>_clone</code> function. The <code>_clone</code> function is + declared <code>const</code>, does not take any arguments, and returns + a pointer to a complete copy of the instance allocated in the free + store. The <code>_clone</code> function shall be used to make copies + when static type and dynamic type of the instance may differ (see + <a href="#2.11">Section 2.11, "Mapping for <code>xsi:type</code> + and Substitution Groups"</a>). For instance:</p> + + <pre class="xml"> +<simpleType name="object"> + ... +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: ... +{ +public: + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; + + ... + +}; + </pre> + + <p>The base class specification and the rest of the class definition + depend on the type of derivation used to define the simple type. </p> + + + <h3><a name="2.6.1">2.6.1 Mapping for Derivation by Restriction</a></h3> + + <p>XML Schema derivation by restriction is mapped to C++ public + inheritance. The base type of the restriction becomes the base + type for the resulting C++ class. In addition to the members described + in <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a>, the + resulting C++ class defines a public constructor with the base type + as its single argument. For instance:</p> + + <pre class="xml"> +<simpleType name="object"> + <restriction base="base"> + ... + </restriction> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public base +{ +public: + object (const base&); + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; +}; + </pre> + + + <h3><a name="2.6.2">2.6.2 Mapping for Enumerations</a></h3> + +<p>XML Schema restriction by enumeration is mapped to a C++ class + with semantics similar to C++ <code>enum</code>. Each XML Schema + enumeration element is mapped to a C++ enumerator with the + name derived from the <code>value</code> attribute and defined + in the class scope. In addition to the members + described in <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a>, + the resulting C++ class defines a public constructor that can be called + with one of the enumerators as its single argument, a public constructor + that can be called with enumeration's base value as its single + argument, a public assignment operator that can be used to assign the + value of one of the enumerators, and a public implicit conversion + operator to the underlying C++ enum type.</p> + +<p>Furthermore, for string-based enumeration types, the resulting C++ + class defines a public constructor with a single argument of type + <code>const C*</code> and a public constructor with a single + argument of type <code>const std::basic_string<C>&</code>. + For instance:</p> + + <pre class="xml"> +<simpleType name="color"> + <restriction base="string"> + <enumeration value="red"/> + <enumeration value="green"/> + <enumeration value="blue"/> + </restriction> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class color: public xml_schema::string +{ +public: + enum value + { + red, + green, + blue + }; + +public: + color (value); + color (const C*); + color (const std::basic_string<C>&); + color (const xml_schema::string&); + color (const color&); + +public: + color& + operator= (value); + + color& + operator= (const color&); + +public: + virtual color* + _clone () const; + +public: + operator value () const; +}; + </pre> + + <h3><a name="2.6.3">2.6.3 Mapping for Derivation by List</a></h3> + + <p>XML Schema derivation by list is mapped to C++ public + inheritance from <code>xml_schema::simple_type</code> + (<a href="#2.5.3">Section 2.5.3, "Mapping for + <code>anySimpleType</code>"</a>) and a suitable sequence type. + The list item type becomes the element type of the sequence. + In addition to the members described in <a href="#2.6">Section 2.6, + "Mapping for Simple Types"</a>, the resulting C++ class defines + a public default constructor, a public constructor + with the first argument of type <code>size_type</code> and + the second argument of list item type that creates + a list object with the specified number of copies of the specified + element value, and a public constructor with the two arguments + of an input iterator type that creates a list object from an + iterator range. For instance: + </p> + + <pre class="xml"> +<simpleType name="int_list"> + <list itemType="int"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class int_list: public simple_type, + public sequence<int> +{ +public: + int_list (); + int_list (size_type n, int x); + + template <typename I> + int_list (const I& begin, const I& end); + int_list (const int_list&); + +public: + int_list& + operator= (const int_list&); + +public: + virtual int_list* + _clone () const; +}; + </pre> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. One notable extension + to the standard interface that is available only for + sequences of non-fundamental C++ types is the addition of + the overloaded <code>push_back</code> and <code>insert</code> + member functions which instead of the constant reference + to the element type accept automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to the element type. These functions assume ownership + of the pointed to object and reset the passed automatic pointer. + </p> + + <h3><a name="2.6.4">2.6.4 Mapping for Derivation by Union</a></h3> + + <p>XML Schema derivation by union is mapped to C++ public + inheritance from <code>xml_schema::simple_type</code> + (<a href="#2.5.3">Section 2.5.3, "Mapping for + <code>anySimpleType</code>"</a>) and <code>std::basic_string<C></code>. + In addition to the members described in <a href="#2.6">Section 2.6, + "Mapping for Simple Types"</a>, the resulting C++ class defines a + public constructor with a single argument of type <code>const C*</code> + and a public constructor with a single argument of type + <code>const std::basic_string<C>&</code>. For instance: + </p> + + <pre class="xml"> +<simpleType name="int_string_union"> + <xsd:union memberTypes="xsd:int xsd:string"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class int_string_union: public simple_type, + public std::basic_string<C> +{ +public: + int_string_union (const C*); + int_string_union (const std::basic_string<C>&); + int_string_union (const int_string_union&); + +public: + int_string_union& + operator= (const int_string_union&); + +public: + virtual int_string_union* + _clone () const; +}; + </pre> + + <h2><a name="2.7">2.7 Mapping for Complex Types</a></h2> + + <p>An XML Schema complex type is mapped to a C++ class with the same + name as the complex type. The class defines a public copy constructor, + a public copy assignment operator, and a public virtual + <code>_clone</code> function. The <code>_clone</code> function is + declared <code>const</code>, does not take any arguments, and returns + a pointer to a complete copy of the instance allocated in the free + store. The <code>_clone</code> function shall be used to make copies + when static type and dynamic type of the instance may differ (see + <a href="#2.11">Section 2.11, "Mapping for <code>xsi:type</code> + and Substitution Groups"</a>).</p> + + <p>Additionally, the resulting C++ class + defines two public constructors that take an initializer for each + member of the complex type and all its base types that belongs to + the One cardinality class (see <a href="#2.8">Section 2.8, "Mapping + for Local Elements and Attributes"</a>). In the first constructor, + the arguments are passed as constant references and the newly created + instance is initialized with copies of the passed objects. In the + second constructor, arguments that are complex types (that is, + they themselves contain elements or attributes) are passed as + either <code>std::unique_ptr</code> (C++11) or <code>std::auto_ptr</code> + (C++98), depending on the C++ standard selected. In this case the newly + created instance is directly initialized with and assumes ownership + of the pointed to objects and the <code>std::[unique|auto]_ptr</code> + arguments are reset to <code>0</code>. For instance:</p> + + <pre class="xml"> +<complexType name="complex"> + <sequence> + <element name="a" type="int"/> + <element name="b" type="string"/> + </sequence> +</complexType> + +<complexType name="object"> + <sequence> + <element name="s-one" type="boolean"/> + <element name="c-one" type="complex"/> + <element name="optional" type="int" minOccurs="0"/> + <element name="sequence" type="string" maxOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class complex: public xml_schema::type +{ +public: + object (const int& a, const xml_schema::string& b); + object (const complex&); + +public: + object& + operator= (const complex&); + +public: + virtual complex* + _clone () const; + + ... + +}; + +class object: public xml_schema::type +{ +public: + object (const bool& s_one, const complex& c_one); + object (const bool& s_one, std::[unique|auto]_ptr<complex> c_one); + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; + + ... + +}; + </pre> + + <p>Notice that the generated <code>complex</code> class does not + have the second (<code>std::[unique|auto]_ptr</code>) version of the + constructor since all its required members are of simple types.</p> + + <p>If an XML Schema complex type has an ultimate base which is an XML + Schema simple type then the resulting C++ class also defines a public + constructor that takes an initializer for the base type as well as + for each member of the complex type and all its base types that + belongs to the One cardinality class. For instance:</p> + + <pre class="xml"> +<complexType name="object"> + <simpleContent> + <extension base="date"> + <attribute name="lang" type="language" use="required"/> + </extension> + </simpleContent> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::string +{ +public: + object (const xml_schema::language& lang); + + object (const xml_schema::date& base, + const xml_schema::language& lang); + + ... + +}; + </pre> + + <p>Furthermore, for string-based XML Schema complex types, the resulting C++ + class also defines two public constructors with the first arguments + of type <code>const C*</code> and <code>std::basic_string<C>&</code>, + respectively, followed by arguments for each member of the complex + type and all its base types that belongs to the One cardinality + class. For enumeration-based complex types the resulting C++ + class also defines a public constructor with the first arguments + of the underlying enum type followed by arguments for each member + of the complex type and all its base types that belongs to the One + cardinality class. For instance:</p> + + <pre class="xml"> +<simpleType name="color"> + <restriction base="string"> + <enumeration value="red"/> + <enumeration value="green"/> + <enumeration value="blue"/> + </restriction> +</simpleType> + +<complexType name="object"> + <simpleContent> + <extension base="color"> + <attribute name="lang" type="language" use="required"/> + </extension> + </simpleContent> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class color: public xml_schema::string +{ +public: + enum value + { + red, + green, + blue + }; + +public: + color (value); + color (const C*); + color (const std::basic_string<C>&); + + ... + +}; + +class object: color +{ +public: + object (const color& base, + const xml_schema::language& lang); + + object (const color::value& base, + const xml_schema::language& lang); + + object (const C* base, + const xml_schema::language& lang); + + object (const std::basic_string<C>& base, + const xml_schema::language& lang); + + ... + +}; + </pre> + + <p>Additional constructors can be requested with the + <code>--generate-default-ctor</code> and + <code>--generate-from-base-ctor</code> options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for details.</p> + + <p>If an XML Schema complex type is not explicitly derived from any type, + the resulting C++ class is derived from <code>xml_schema::type</code>. + In cases where an XML Schema complex type is defined using derivation + by extension or restriction, the resulting C++ base class specification + depends on the type of derivation and is described in the subsequent + sections. + </p> + + <p>The mapping for elements and attributes that are defined in a complex + type is described in <a href="#2.8">Section 2.8, "Mapping for Local + Elements and Attributes"</a>. + </p> + + <h3><a name="2.7.1">2.7.1 Mapping for Derivation by Extension</a></h3> + + <p>XML Schema derivation by extension is mapped to C++ public + inheritance. The base type of the extension becomes the base + type for the resulting C++ class. + </p> + + <h3><a name="2.7.2">2.7.2 Mapping for Derivation by Restriction</a></h3> + + <p>XML Schema derivation by restriction is mapped to C++ public + inheritance. The base type of the restriction becomes the base + type for the resulting C++ class. XML Schema elements and + attributes defined within restriction do not result in any + definitions in the resulting C++ class. Instead, corresponding + (unrestricted) definitions are inherited from the base class. + In the future versions of this mapping, such elements and + attributes may result in redefinitions of accessors and + modifiers to reflect their restricted semantics. + </p> + + <!-- 2.8 Mapping for Local Elements and Attributes --> + + <h2><a name="2.8">2.8 Mapping for Local Elements and Attributes</a></h2> + + <p>XML Schema element and attribute definitions are called local + if they appear within a complex type definition, an element group + definition, or an attribute group definitions. + </p> + + <p>Local XML Schema element and attribute definitions have the same + C++ mapping. Therefore, in this section, local elements and + attributes are collectively called members. + </p> + + <p>While there are many different member cardinality combinations + (determined by the <code>use</code> attribute for attributes and + the <code>minOccurs</code> and <code>maxOccurs</code> attributes + for elements), the mapping divides all possible cardinality + combinations into three cardinality classes: + </p> + + <dl> + <dt><i>one</i></dt> + <dd>attributes: <code>use == "required"</code></dd> + <dd>attributes: <code>use == "optional"</code> and has default or fixed value</dd> + <dd>elements: <code>minOccurs == "1"</code> and <code>maxOccurs == "1"</code></dd> + + <dt><i>optional</i></dt> + <dd>attributes: <code>use == "optional"</code> and doesn't have default or fixed value</dd> + <dd>elements: <code>minOccurs == "0"</code> and <code>maxOccurs == "1"</code></dd> + + <dt><i>sequence</i></dt> + <dd>elements: <code>maxOccurs > "1"</code></dd> + </dl> + + <p>An optional attribute with a default or fixed value acquires this value + if the attribute hasn't been specified in an instance document (see + <a href="#A">Appendix A, "Default and Fixed Values"</a>). This + mapping places such optional attributes to the One cardinality + class.</p> + + <p>A member is mapped to a set of public type definitions + (<code>typedef</code>s) and a set of public accessor and modifier + functions. Type definitions have names derived from the member's + name. The accessor and modifier functions have the same name as the + member. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + typedef xml_schema::string member_type; + + const member_type& + member () const; + + ... + +}; + </pre> + + <p>In addition, if a member has a default or fixed value, a static + accessor function is generated that returns this value. For + example:</p> + +<pre class="xml"> +<complexType name="object"> + <attribute name="data" type="string" default="test"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + typedef xml_schema::string data_type; + + const data_type& + data () const; + + static const data_type& + data_default_value (); + + ... + +}; + </pre> + + <p>Names and semantics of type definitions for the member as well + as signatures of the accessor and modifier functions depend on + the member's cardinality class and are described in the following + sub-sections. + </p> + + + <h3><a name="2.8.1">2.8.1 Mapping for Members with the One Cardinality Class</a></h3> + + <p>For the One cardinality class, the type definitions consist of + an alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name. + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + member and can be used for read-only access. The non-constant + version returns an unrestricted reference to the member and can + be used for read-write access. + </p> + + <p>The first modifier function expects an argument of type reference to + constant of the member's type. It makes a deep copy of its argument. + Except for member's types that are mapped to fundamental C++ types, + the second modifier function is provided that expects an argument + of type automatic pointer (<code>std::unique_ptr</code> or + <code>std::auto_ptr</code>, depending on the C++ standard selected) + to the member's type. It assumes ownership of the pointed to object + and resets the passed automatic pointer. For instance:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + + // Accessors. + // + const member_type& + member () const; + + member_type& + member (); + + // Modifiers. + // + void + member (const member_type&); + + void + member (std::[unique|auto]_ptr<member_type>); + ... + +}; + </pre> + + <p>In addition, if requested by specifying the <code>--generate-detach</code> + option and only for members of non-fundamental C++ types, the mapping + provides a detach function that returns an automatic pointer to the + member's type, for example:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + ... + + std::[unique|auto]_ptr<member_type> + detach_member (); + ... + +}; + </pre> + + <p>This function detaches the value from the tree leaving the member + value uninitialized. Accessing such an uninitialized value prior to + re-initializing it results in undefined behavior.</p> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + string s (o.member ()); // get + object::member_type& sr (o.member ()); // get + + o.member ("hello"); // set, deep copy + o.member () = "hello"; // set, deep copy + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + o.member (std::move (p)); // set, assumes ownership + p = o.detach_member (); // detach, member is uninitialized + o.member (std::move (p)); // re-attach + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + o.member (p); // set, assumes ownership + p = o.detach_member (); // detach, member is uninitialized + o.member (p); // re-attach +} + </pre> + + +<h3><a name="2.8.2">2.8.2 Mapping for Members with the Optional Cardinality Class</a></h3> + + <p>For the Optional cardinality class, the type definitions consist of + an alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name and an alias for + the container type with the name created by appending the + <code>_optional</code> suffix to the member's name. + </p> + + <p>Unlike accessor functions for the One cardinality class, accessor + functions for the Optional cardinality class return references to + corresponding containers rather than directly to members. The + accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to + the container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container + and can be used for read-write access. + </p> + + <p>The modifier functions are overloaded for the member's + type and the container type. The first modifier function + expects an argument of type reference to constant of the + member's type. It makes a deep copy of its argument. + Except for member's types that are mapped to fundamental C++ types, + the second modifier function is provided that expects an argument + of type automatic pointer (<code>std::unique_ptr</code> or + <code>std::auto_ptr</code>, depending on the C++ standard selected) + to the member's type. It assumes ownership of the pointed to object + and resets the passed automatic pointer. The last modifier function + expects an argument of type reference to constant of the container + type. It makes a deep copy of its argument. For instance: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string" minOccurs="0"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + typedef optional<member_type> member_optional; + + // Accessors. + // + const member_optional& + member () const; + + member_optional& + member (); + + // Modifiers. + // + void + member (const member_type&); + + void + member (std::[unique|auto]_ptr<member_type>); + + void + member (const member_optional&); + + ... + +}; + </pre> + + + <p>The <code>optional</code> class template is defined in an + implementation-specific namespace and has the following + interface. The <code>[unique|auto]_ptr</code>-based constructor + and modifier function are only available if the template + argument is not a fundamental C++ type. + </p> + + <pre class="c++"> +template <typename X> +class optional +{ +public: + optional (); + + // Makes a deep copy. + // + explicit + optional (const X&); + + // Assumes ownership. + // + explicit + optional (std::[unique|auto]_ptr<X>); + + optional (const optional&); + +public: + optional& + operator= (const X&); + + optional& + operator= (const optional&); + + // Pointer-like interface. + // +public: + const X* + operator-> () const; + + X* + operator-> (); + + const X& + operator* () const; + + X& + operator* (); + + typedef void (optional::*bool_convertible) (); + operator bool_convertible () const; + + // Get/set interface. + // +public: + bool + present () const; + + const X& + get () const; + + X& + get (); + + // Makes a deep copy. + // + void + set (const X&); + + // Assumes ownership. + // + void + set (std::[unique|auto]_ptr<X>); + + // Detach and return the contained value. + // + std::[unique|auto]_ptr<X> + detach (); + + void + reset (); +}; + +template <typename X> +bool +operator== (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator!= (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator< (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator> (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator<= (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator>= (const optional<X>&, const optional<X>&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + if (o.member ().present ()) // test + { + string& s (o.member ().get ()); // get + o.member ("hello"); // set, deep copy + o.member ().set ("hello"); // set, deep copy + o.member ().reset (); // reset + } + + // Same as above but using pointer notation: + // + if (o.member ()) // test + { + string& s (*o.member ()); // get + o.member ("hello"); // set, deep copy + *o.member () = "hello"; // set, deep copy + o.member ().reset (); // reset + } + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + o.member (std::move (p)); // set, assumes ownership + + p.reset (new string ("hello")); + o.member ().set (std::move (p)); // set, assumes ownership + + p = o.member ().detach (); // detach, member is reset + o.member ().set (std::move (p)); // re-attach + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + o.member (p); // set, assumes ownership + + p = new string ("hello"); + o.member ().set (p); // set, assumes ownership + + p = o.member ().detach (); // detach, member is reset + o.member ().set (p); // re-attach +} + </pre> + + + <h3><a name="2.8.3">2.8.3 Mapping for Members with the Sequence Cardinality Class</a></h3> + + <p>For the Sequence cardinality class, the type definitions consist of an + alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name, an alias of + the container type with the name created by appending the + <code>_sequence</code> suffix to the member's name, an alias of + the iterator type with the name created by appending the + <code>_iterator</code> suffix to the member's name, and an alias + of the constant iterator type with the name created by appending the + <code>_const_iterator</code> suffix to the member's name. + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function + makes a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string" minOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + typedef sequence<member_type> member_sequence; + typedef member_sequence::iterator member_iterator; + typedef member_sequence::const_iterator member_const_iterator; + + // Accessors. + // + const member_sequence& + member () const; + + member_sequence& + member (); + + // Modifier. + // + void + member (const member_sequence&); + + ... + +}; + </pre> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. Two notable extensions + to the standard interface that are available only for + sequences of non-fundamental C++ types are the addition of + the overloaded <code>push_back</code> and <code>insert</code> + as well as the <code>detach_back</code> and <code>detach</code> + member functions. The additional <code>push_back</code> and + <code>insert</code> functions accept an automatic pointer + (<code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected) to the + element type instead of the constant reference. They assume + ownership of the pointed to object and reset the passed + automatic pointer. The <code>detach_back</code> and + <code>detach</code> functions detach the element + value from the sequence container and, by default, remove + the element from the sequence. These additional functions + have the following signatures:</p> + + <pre class="c++"> +template <typename X> +class sequence +{ +public: + ... + + void + push_back (std::[unique|auto]_ptr<X>) + + iterator + insert (iterator position, std::[unique|auto]_ptr<X>) + + std::[unique|auto]_ptr<X> + detach_back (bool pop = true); + + iterator + detach (iterator position, + std::[unique|auto]_ptr<X>& result, + bool erase = true) + + ... +} + </pre> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + object::member_sequence& s (o.member ()); + + // Iteration. + // + for (object::member_iterator i (s.begin ()); i != s.end (); ++i) + { + string& value (*i); + } + + // Modification. + // + s.push_back ("hello"); // deep copy + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + s.push_back (std::move (p)); // assumes ownership + p = s.detach_back (); // detach and pop + s.push_back (std::move (p)); // re-append + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + s.push_back (p); // assumes ownership + p = s.detach_back (); // detach and pop + s.push_back (p); // re-append + + // Setting a new container. + // + object::member_sequence n; + n.push_back ("one"); + n.push_back ("two"); + o.member (n); // deep copy +} + </pre> + + <h3><a name="2.8.4">2.8.4 Element Order</a></h3> + + <p>C++/Tree is a "flattening" mapping in a sense that many levels of + nested compositors (<code>choice</code> and <code>sequence</code>), + all potentially with their own cardinalities, are in the end mapped + to a flat set of elements with one of the three cardinality classes + discussed in the previous sections. While this results in a simple + and easy to use API for most types, in certain cases, the order of + elements in the actual XML documents is not preserved once parsed + into the object model. And sometimes such order has + application-specific significance. As an example, consider a schema + that defines a batch of bank transactions:</p> + + <pre class="xml"> +<complexType name="withdraw"> + <sequence> + <element name="account" type="unsignedInt"/> + <element name="amount" type="unsignedInt"/> + </sequence> +</complexType> + +<complexType name="deposit"> + <sequence> + <element name="account" type="unsignedInt"/> + <element name="amount" type="unsignedInt"/> + </sequence> +</complexType> + +<complexType name="batch"> + <choice minOccurs="0" maxOccurs="unbounded"> + <element name="withdraw" type="withdraw"/> + <element name="deposit" type="deposit"/> + </choice> +</complexType> + </pre> + + <p>The batch can contain any number of transactions in any order + but the order of transactions in each actual batch is significant. + For instance, consider what could happen if we reorder the + transactions and apply all the withdrawals before deposits.</p> + + <p>For the <code>batch</code> schema type defined above the default + C++/Tree mapping will produce a C++ class that contains a pair of + sequence containers, one for each of the two elements. While this + will capture the content (transactions), the order of this content + as it appears in XML will be lost. Also, if we try to serialize the + batch we just loaded back to XML, all the withdrawal transactions + will appear before deposits.</p> + + <p>To overcome this limitation of a flattening mapping, C++/Tree + allows us to mark certain XML Schema types, for which content + order is important, as ordered.</p> + + <p>There are several command line options that control which + schema types are treated as ordered. To make an individual + type ordered, we use the <code>--ordered-type</code> option, + for example:</p> + + <pre class="term"> +--ordered-type batch + </pre> + + <p>To automatically treat all the types that are derived from an ordered + type also ordered, we use the <code>--ordered-type-derived</code> + option. This is primarily useful if you would like to iterate + over the complete hierarchy's content using the content order + sequence (discussed below).</p> + + <p>Ordered types are also useful for handling mixed content. To + automatically mark all the types with mixed content as ordered + we use the <code>--ordered-type-mixed</code> option. For more + information on handling mixed content see <a href="#2.13">Section + 2.13, "Mapping for Mixed Content Models"</a>.</p> + + <p>Finally, we can mark all the types in the schema we are + compiling with the <code>--ordered-type-all</code> option. + You should only resort to this option if all the types in + your schema truly suffer from the loss of content + order since, as we will discuss shortly, ordered types + require extra effort to access and, especially, modify. + See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information on + these options.</p> + + <p>Once a type is marked ordered, C++/Tree alters its mapping + in several ways. Firstly, for each local element, element + wildcard (<a href="#2.12.4">Section 2.12.4, "Element Wildcard + Order"</a>), and mixed content text (<a href="#2.13">Section + 2.13, "Mapping for Mixed Content Models"</a>) in this type, a + content id constant is generated. Secondly, an addition sequence + is added to the class that captures the content order. Here + is how the mapping of our <code>batch</code> class changes + once we make it ordered:</p> + + <pre class="c++"> +class batch: public xml_schema::type +{ +public: + // withdraw + // + typedef withdraw withdraw_type; + typedef sequence<withdraw_type> withdraw_sequence; + typedef withdraw_sequence::iterator withdraw_iterator; + typedef withdraw_sequence::const_iterator withdraw_const_iterator; + + static const std::size_t withdraw_id = 1; + + const withdraw_sequence& + withdraw () const; + + withdraw_sequence& + withdraw (); + + void + withdraw (const withdraw_sequence&); + + // deposit + // + typedef deposit deposit_type; + typedef sequence<deposit_type> deposit_sequence; + typedef deposit_sequence::iterator deposit_iterator; + typedef deposit_sequence::const_iterator deposit_const_iterator; + + static const std::size_t deposit_id = 2; + + const deposit_sequence& + deposit () const; + + deposit_sequence& + deposit (); + + void + deposit (const deposit_sequence&); + + // content_order + // + typedef xml_schema::content_order content_order_type; + typedef std::vector<content_order_type> content_order_sequence; + typedef content_order_sequence::iterator content_order_iterator; + typedef content_order_sequence::const_iterator content_order_const_iterator; + + const content_order_sequence& + content_order () const; + + content_order_sequence& + content_order (); + + void + content_order (const content_order_sequence&); + + ... +}; + </pre> + + <p>Notice the <code>withdraw_id</code> and <code>deposit_id</code> + content ids as well as the extra <code>content_order</code> + sequence that does not correspond to any element in the + schema definition. The other changes to the mapping for ordered + types has to do with XML parsing and serialization code. During + parsing the content order is captured in the <code>content_order</code> + sequence while during serialization this sequence is used to + determine the order in which content is serialized. The + <code>content_order</code> sequence is also copied during + copy construction and assigned during copy assignment. It is also + taken into account during comparison.</p> + + <p>The entry type of the <code>content_order</code> sequence is the + <code>xml_schema::content_order</code> type that has the following + interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + struct content_order + { + content_order (std::size_t id, std::size_t index = 0); + + std::size_t id; + std::size_t index; + }; + + bool + operator== (const content_order&, const content_order&); + + bool + operator!= (const content_order&, const content_order&); + + bool + operator< (const content_order&, const content_order&); +} + </pre> + + <p>The <code>content_order</code> sequence describes the order of + content (elements, including wildcards, as well as mixed content + text). Each entry in this sequence consists of the content id + (for example, <code>withdraw_id</code> or <code>deposit_id</code> + in our case) as well as, for elements of the sequence cardinality + class, an index into the corresponding sequence container (the + index is unused for the one and optional cardinality classes). + For example, in our case, if the content id is <code>withdraw_id</code>, + then the index will point into the <code>withdraw</code> element + sequence.</p> + + <p>With all this information we can now examine how to iterate over + transaction in the batch in content order:</p> + + <pre class="c++"> +batch& b = ... + +for (batch::content_order_const_iterator i (b.content_order ().begin ()); + i != b.content_order ().end (); + ++i) +{ + switch (i->id) + { + case batch::withdraw_id: + { + const withdraw& t (b.withdraw ()[i->index]); + cerr << t.account () << " withdraw " << t.amount () << endl; + break; + } + case batch::deposit_id: + { + const deposit& t (b.deposit ()[i->index]); + cerr << t.account () << " deposit " << t.amount () << endl; + break; + } + default: + { + assert (false); // Unknown content id. + } + } +} + </pre> + + <p>If we serialized our batch back to XML, we would also see that the + order of transactions in the output is exactly the same as in the + input rather than all the withdrawals first followed by all the + deposits.</p> + + <p>The most complex aspect of working with ordered types is + modifications. Now we not only need to change the content, + but also remember to update the order information corresponding + to this change. As a first example, we add a deposit transaction + to the batch:</p> + + <pre class="c++"> +using xml_schema::content_order; + +batch::deposit_sequence& d (b.deposit ()); +batch::withdraw_sequence& w (b.withdraw ()); +batch::content_order_sequence& co (b.content_order ()); + +d.push_back (deposit (123456789, 100000)); +co.push_back (content_order (batch::deposit_id, d.size () - 1)); + </pre> + + <p>In the above example we first added the content (deposit + transaction) and then updated the content order information + by adding an entry with <code>deposit_id</code> content + id and the index of the just added deposit transaction.</p> + + <p>Removing the last transaction can be easy if we know which + transaction (deposit or withdrawal) is last:</p> + + <pre class="c++"> +d.pop_back (); +co.pop_back (); + </pre> + + <p>If, however, we do not know which transaction is last, then + things get a bit more complicated:</p> + + <pre class="c++"> +switch (co.back ().id) +{ +case batch::withdraw_id: + { + d.pop_back (); + break; + } +case batch::deposit_id: + { + w.pop_back (); + break; + } +} + +co.pop_back (); + </pre> + + <p>The following example shows how to add a transaction at the + beginning of the batch:</p> + + <pre class="c++"> +w.push_back (withdraw (123456789, 100000)); +co.insert (co.begin (), + content_order (batch::withdraw_id, w.size () - 1)); + </pre> + + <p>Note also that when we merely modify the content of one + of the elements in place, we do not need to update its + order since it doesn't change. For example, here is how + we can change the amount in the first withdrawal:</p> + + <pre class="c++"> +w[0].amount (10000); + </pre> + + <p>For the complete working code shown in this section refer to the + <code>order/element</code> example in the + <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <p>If both the base and derived types are ordered, then the + content order sequence is only added to the base and the content + ids are unique within the whole hierarchy. In this case + the content order sequence for the derived type contains + ordering information for both base and derived content.</p> + + <p>In some applications we may need to perform more complex + content processing. For example, in our case, we may need + to remove all the withdrawal transactions. The default + container, <code>std::vector</code>, is not particularly + suitable for such operations. What may be required by + some applications is a multi-index container that not + only allows us to iterate in content order similar to + <code>std::vector</code> but also search by the content + id as well as the content id and index pair.</p> + + <p>While C++/Tree does not provide this functionality by + default, it allows us to specify a custom container + type for content order with the <code>--order-container</code> + command line option. The only requirement from the + generated code side for such a container is to provide + the <code>vector</code>-like <code>push_back()</code>, + <code>size()</code>, and const iteration interfaces.</p> + + <p>As an example, here is how we can use the Boost Multi-Index + container for content order. First we create the + <code>content-order-container.hxx</code> header with the + following definition:</p> + + <pre class="c++"> +#ifndef CONTENT_ORDER_CONTAINER +#define CONTENT_ORDER_CONTAINER + +#include <cstddef> // std::size_t + +#include <boost/multi_index_container.hpp> +#include <boost/multi_index/member.hpp> +#include <boost/multi_index/identity.hpp> +#include <boost/multi_index/ordered_index.hpp> +#include <boost/multi_index/random_access_index.hpp> + +struct by_id {}; +struct by_id_index {}; + +template <typename T> +using content_order_container = + boost::multi_index::multi_index_container< + T, + boost::multi_index::indexed_by< + boost::multi_index::random_access<>, + boost::multi_index::ordered_unique< + boost::multi_index::tag<by_id_index>, + boost::multi_index::identity<T> + >, + boost::multi_index::ordered_non_unique< + boost::multi_index::tag<by_id>, + boost::multi_index::member<T, std::size_t, &T::id> + > + > + >; + +#endif + </pre> + + <p>Next we add the following two XSD compiler options to include + this header into every generated header file and to use the + custom container type (see the XSD compiler command line manual + for more information on shell quoting for the first option):</p> + + <pre class="term"> +--hxx-prologue '#include "content-order-container.hxx"' +--order-container content_order_container + </pre> + + <p>With these changes we can now use the multi-index functionality, + for example, to search for a specific content id:</p> + + <pre class="c++"> +typedef batch::content_order_sequence::index<by_id>::type id_set; +typedef id_set::iterator id_iterator; + +const id_set& ids (b.content_order ().get<by_id> ()); + +std::pair<id_iterator, id_iterator> r ( + ids.equal_range (std::size_t (batch::deposit_id)); + +for (id_iterator i (r.first); i != r.second; ++i) +{ + const deposit& t (b.deposit ()[i->index]); + cerr << t.account () << " deposit " << t.amount () << endl; +} + </pre> + + <h2><a name="2.9">2.9 Mapping for Global Elements</a></h2> + + <p>An XML Schema element definition is called global if it appears + directly under the <code>schema</code> element. + A global element is a valid root of an instance document. By + default, a global element is mapped to a set of overloaded + parsing and, optionally, serialization functions with the + same name as the element. It is also possible to generate types + for root elements instead of parsing and serialization functions. + This is primarily useful to distinguish object models with the + same root type but with different root elements. See + <a href="#2.9.1">Section 2.9.1, "Element Types"</a> for details. + It is also possible to request the generation of an element map + which allows uniform parsing and serialization of multiple root + elements. See <a href="#2.9.2">Section 2.9.2, "Element Map"</a> + for details. + </p> + + <p>The parsing functions read XML instance documents and return + corresponding object models as an automatic pointer + (<code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected). Their signatures + have the following pattern (<code>type</code> denotes + element's type and <code>name</code> denotes element's + name): + </p> + + <pre class="c++"> +std::[unique|auto]_ptr<type> +name (....); + </pre> + + <p>The process of parsing, including the exact signatures of the parsing + functions, is the subject of <a href="#3">Chapter 3, "Parsing"</a>. + </p> + + <p>The serialization functions write object models back to XML instance + documents. Their signatures have the following pattern: + </p> + + <pre class="c++"> +void +name (<stream type>&, const type&, ....); + </pre> + + <p>The process of serialization, including the exact signatures of the + serialization functions, is the subject of <a href="#4">Chapter 4, + "Serialization"</a>. + </p> + + + <h3><a name="2.9.1">2.9.1 Element Types</a></h3> + + <p>The generation of element types is requested with the + <code>--generate-element-type</code> option. With this option + each global element is mapped to a C++ class with the + same name as the element. Such a class is derived from + <code>xml_schema::element_type</code> and contains the same set + of type definitions, constructors, and member function as would a + type containing a single element with the One cardinality class + named <code>"value"</code>. In addition, the element type also + contains a set of member functions for accessing the element + name and namespace as well as its value in a uniform manner. + For example:</p> + + <pre class="xml"> +<complexType name="type"> + <sequence> + ... + </sequence> +</complexType> + +<element name="root" type="type"/> + </pre> + +<p>is mapped to:</p> + + <pre class="c++"> +class type +{ + ... +}; + +class root: public xml_schema::element_type +{ +public: + // Element value. + // + typedef type value_type; + + const value_type& + value () const; + + value_type& + value (); + + void + value (const value_type&); + + void + value (std::[unique|auto]_ptr<value_type>); + + // Constructors. + // + root (const value_type&); + + root (std::[unique|auto]_ptr<value_type>); + + root (const xercesc::DOMElement&, xml_schema::flags = 0); + + root (const root&, xml_schema::flags = 0); + + virtual root* + _clone (xml_schema::flags = 0) const; + + // Element name and namespace. + // + static const std::string& + name (); + + static const std::string& + namespace_ (); + + virtual const std::string& + _name () const; + + virtual const std::string& + _namespace () const; + + // Element value as xml_schema::type. + // + virtual const xml_schema::type* + _value () const; + + virtual xml_schema::type* + _value (); +}; + +void +operator<< (xercesc::DOMElement&, const root&); + </pre> + + <p>The <code>xml_schema::element_type</code> class is a common + base type for all element types and is defined as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class element_type + { + public: + virtual + ~element_type (); + + virtual element_type* + _clone (flags f = 0) const = 0; + + virtual const std::basic_string<C>& + _name () const = 0; + + virtual const std::basic_string<C>& + _namespace () const = 0; + + virtual xml_schema::type* + _value () = 0; + + virtual const xml_schema::type* + _value () const = 0; + }; +} + </pre> + + <p>The <code>_value()</code> member function returns a pointer to + the element value or 0 if the element is of a fundamental C++ + type and therefore is not derived from <code>xml_schema::type</code>. + </p> + + <p>Unlike parsing and serialization functions, element types + are only capable of parsing and serializing from/to a + <code>DOMElement</code> object. This means that the application + will need to perform its own XML-to-DOM parsing and DOM-to-XML + serialization. The following section describes a mechanism + provided by the mapping to uniformly parse and serialize + multiple root elements.</p> + + + <h3><a name="2.9.2">2.9.2 Element Map</a></h3> + + <p>When element types are generated for root elements it is also + possible to request the generation of an element map with the + <code>--generate-element-map</code> option. The element map + allows uniform parsing and serialization of multiple root + elements via the common <code>xml_schema::element_type</code> + base type. The <code>xml_schema::element_map</code> class is + defined as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class element_map + { + public: + static std::[unique|auto]_ptr<xml_schema::element_type> + parse (const xercesc::DOMElement&, flags = 0); + + static void + serialize (xercesc::DOMElement&, const element_type&); + }; +} + </pre> + + <p>The <code>parse()</code> function creates the corresponding + element type object based on the element name and namespace + and returns it as an automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to <code>xml_schema::element_type</code>. + The <code>serialize()</code> function serializes the passed element + object to <code>DOMElement</code>. Note that in case of + <code>serialize()</code>, the <code>DOMElement</code> object + should have the correct name and namespace. If no element type is + available for an element, both functions throw the + <code>xml_schema::no_element_info</code> exception:</p> + + <pre class="c++"> +struct no_element_info: virtual exception +{ + no_element_info (const std::basic_string<C>& element_name, + const std::basic_string<C>& element_namespace); + + const std::basic_string<C>& + element_name () const; + + const std::basic_string<C>& + element_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The application can discover the actual type of the element + object returned by <code>parse()</code> either using + <code>dynamic_cast</code> or by comparing element names and + namespaces. The following code fragments illustrate how the + element map can be used:</p> + + <pre class="c++"> +// Parsing. +// +DOMElement& e = ... // Parse XML to DOM. + +unique_ptr<xml_schema::element_type> r ( + xml_schema::element_map::parse (e)); + +if (root1 r1 = dynamic_cast<root1*> (r.get ())) +{ + ... +} +else if (r->_name == root2::name () && + r->_namespace () == root2::namespace_ ()) +{ + root2& r2 (static_cast<root2&> (*r)); + + ... +} + </pre> + + <pre class="c++"> +// Serialization. +// +xml_schema::element_type& r = ... + +string name (r._name ()); +string ns (r._namespace ()); + +DOMDocument& doc = ... // Create a new DOMDocument with name and ns. +DOMElement& e (*doc->getDocumentElement ()); + +xml_schema::element_map::serialize (e, r); + +// Serialize DOMDocument to XML. + </pre> + + <!-- --> + + <h2><a name="2.10">2.10 Mapping for Global Attributes</a></h2> + + <p>An XML Schema attribute definition is called global if it appears + directly under the <code>schema</code> element. A global + attribute does not have any mapping. + </p> + + <!-- + When it is referenced from + a local attribute definition (using the <code>ref</code> attribute) + it is treated as a local attribute (see Section 2.8, "Mapping for + Local Elements and Attributes"). + --> + + <h2><a name="2.11">2.11 Mapping for <code>xsi:type</code> and Substitution + Groups</a></h2> + + <p>The mapping provides optional support for the XML Schema polymorphism + features (<code>xsi:type</code> and substitution groups) which can + be requested with the <code>--generate-polymorphic</code> option. + When used, the dynamic type of a member may be different from + its static type. Consider the following schema definition and + instance document: + </p> + + <pre class="xml"> +<!-- test.xsd --> +<schema> + <complexType name="base"> + <attribute name="text" type="string"/> + </complexType> + + <complexType name="derived"> + <complexContent> + <extension base="base"> + <attribute name="extra-text" type="string"/> + </extension> + </complexContent> + </complexType> + + <complexType name="root_type"> + <sequence> + <element name="item" type="base" maxOccurs="unbounded"/> + </sequence> + </complexType> + + <element name="root" type="root_type"/> +</schema> + +<!-- test.xml --> +<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <item text="hello"/> + <item text="hello" extra-text="world" xsi:type="derived"/> +</root> + </pre> + + <p>In the resulting object model, the container for + the <code>root::item</code> member will have two elements: + the first element's type will be <code>base</code> while + the second element's (dynamic) type will be + <code>derived</code>. This can be discovered using the + <code>dynamic_cast</code> operator as shown in the following + example: + </p> + + <pre class="c++"> +void +f (root& r) +{ + for (root::item_const_iterator i (r.item ().begin ()); + i != r.item ().end () + ++i) + { + if (derived* d = dynamic_cast<derived*> (&(*i))) + { + // derived + } + else + { + // base + } + } +} + </pre> + + <p>The <code>_clone</code> virtual function should be used instead of + copy constructors to make copies of members that might use + polymorphism: + </p> + + <pre class="c++"> +void +f (root& r) +{ + for (root::item_const_iterator i (r.item ().begin ()); + i != r.item ().end () + ++i) + { + std::unique_ptr<base> c (i->_clone ()); + } +} + </pre> + + <p>The mapping can often automatically determine which types are + polymorphic based on the substitution group declarations. However, + if your XML vocabulary is not using substitution groups or if + substitution groups are defined in a separate schema, then you will + need to use the <code>--polymorphic-type</code> option to specify + which types are polymorphic. When using this option you only need + to specify the root of a polymorphic type hierarchy and the mapping + will assume that all the derived types are also polymorphic. + Also note that you need to specify this option when compiling every + schema file that references the polymorphic type. Consider the following + two schemas as an example:</p> + + <pre class="xml"> +<!-- base.xsd --> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="base"> + <xs:sequence> + <xs:element name="b" type="xs:int"/> + </xs:sequence> + </xs:complexType> + + <!-- substitution group root --> + <xs:element name="base" type="base"/> + +</xs:schema> + </pre> + + <pre class="xml"> +<!-- derived.xsd --> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <include schemaLocation="base.xsd"/> + + <xs:complexType name="derived"> + <xs:complexContent> + <xs:extension base="base"> + <xs:sequence> + <xs:element name="d" type="xs:string"/> + </xs:sequence> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="derived" type="derived" substitutionGroup="base"/> + +</xs:schema> + </pre> + + <p>In this example we need to specify "<code>--polymorphic-type base</code>" + when compiling both schemas because the substitution group is declared + in a schema other than the one defining type <code>base</code>.</p> + + <p>You can also indicate that all types should be treated as polymorphic + with the <code>--polymorphic-type-all</code>. However, this may result + in slower generated code with a greater footprint.</p> + + + <!-- Mapping for any and anyAttribute --> + + + <h2><a name="2.12">2.12 Mapping for <code>any</code> and <code>anyAttribute</code></a></h2> + + <p>For the XML Schema <code>any</code> and <code>anyAttribute</code> + wildcards an optional mapping can be requested with the + <code>--generate-wildcard</code> option. The mapping represents + the content matched by wildcards as DOM fragments. Because the + DOM API is used to access such content, the Xerces-C++ runtime + should be initialized by the application prior to parsing and + should remain initialized for the lifetime of objects with + the wildcard content. For more information on the Xerces-C++ + runtime initialization see <a href="#3.1">Section 3.1, + "Initializing the Xerces-C++ Runtime"</a>. + </p> + + <p>The mapping for <code>any</code> is similar to the mapping for + local elements (see <a href="#2.8">Section 2.8, "Mapping for Local + Elements and Attributes"</a>) except that the type used in the + wildcard mapping is <code>xercesc::DOMElement</code>. As with local + elements, the mapping divides all possible cardinality combinations + into three cardinality classes: <i>one</i>, <i>optional</i>, and + <i>sequence</i>. + </p> + + <p>The mapping for <code>anyAttribute</code> represents the attributes + matched by this wildcard as a set of <code>xercesc::DOMAttr</code> + objects with a key being the attribute's name and namespace.</p> + + <p>Similar to local elements and attributes, the <code>any</code> and + <code>anyAttribute</code> wildcards are mapped to a set of public type + definitions (typedefs) and a set of public accessor and modifier + functions. Type definitions have names derived from <code>"any"</code> + for the <code>any</code> wildcard and <code>"any_attribute"</code> + for the <code>anyAttribute</code> wildcard. The accessor and modifier + functions are named <code>"any"</code> for the <code>any</code> wildcard + and <code>"any_attribute"</code> for the <code>anyAttribute</code> + wildcard. Subsequent wildcards in the same type have escaped names + such as <code>"any1"</code> or <code>"any_attribute1"</code>. + </p> + + <p>Because Xerces-C++ DOM nodes always belong to a <code>DOMDocument</code>, + each type with a wildcard has an associated <code>DOMDocument</code> + object. The reference to this object can be obtained using the accessor + function called <code>dom_document</code>. The access to the document + object from the application code may be necessary to create or modify + the wildcard content. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other"/> + </sequence> + <anyAttribute namespace="##other"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // any + // + const xercesc::DOMElement& + any () const; + + void + any (const xercesc::DOMElement&); + + ... + + // any_attribute + // + typedef attribute_set any_attribute_set; + typedef any_attribute_set::iterator any_attribute_iterator; + typedef any_attribute_set::const_iterator any_attribute_const_iterator; + + const any_attribute_set& + any_attribute () const; + + any_attribute_set& + any_attribute (); + + ... + + // DOMDocument object for wildcard content. + // + const xercesc::DOMDocument& + dom_document () const; + + xercesc::DOMDocument& + dom_document (); + + ... +}; + </pre> + + + <p>Names and semantics of type definitions for the wildcards as well + as signatures of the accessor and modifier functions depend on the + wildcard type as well as the cardinality class for the <code>any</code> + wildcard. They are described in the following sub-sections. + </p> + + + <h3><a name="2.12.1">2.12.1 Mapping for <code>any</code> with the One Cardinality Class</a></h3> + + <p>For <code>any</code> with the One cardinality class, + there are no type definitions. The accessor functions come in + constant and non-constant versions. The constant accessor function + returns a constant reference to <code>xercesc::DOMElement</code> and + can be used for read-only access. The non-constant version returns + an unrestricted reference to <code>xercesc::DOMElement</code> and can + be used for read-write access. + </p> + + <p>The first modifier function expects an argument of type reference + to constant <code>xercesc::DOMElement</code> and makes a deep copy + of its argument. The second modifier function expects an argument of + type pointer to <code>xercesc::DOMElement</code>. This modifier + function assumes ownership of its argument and expects the element + object to be created using the DOM document associated with this + instance. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Accessors. + // + const xercesc::DOMElement& + any () const; + + xercesc::DOMElement& + any (); + + // Modifiers. + // + void + any (const xercesc::DOMElement&); + + void + any (xercesc::DOMElement*); + + ... + +}; + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + DOMElement& e1 (o.any ()); // get + o.any (e) // set, deep copy + DOMDocument& doc (o.dom_document ()); + o.any (doc.createElement (...)); // set, assumes ownership +} + </pre> + + <h3><a name="2.12.2">2.12.2 Mapping for <code>any</code> with the Optional Cardinality Class</a></h3> + + <p>For <code>any</code> with the Optional cardinality class, the type + definitions consist of an alias for the container type with name + <code>any_optional</code> (or <code>any1_optional</code>, etc., for + subsequent wildcards in the type definition). + </p> + + <p>Unlike accessor functions for the One cardinality class, accessor + functions for the Optional cardinality class return references to + corresponding containers rather than directly to <code>DOMElement</code>. + The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to + the container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container + and can be used for read-write access. + </p> + + <p>The modifier functions are overloaded for <code>xercesc::DOMElement</code> + and the container type. The first modifier function expects an argument of + type reference to constant <code>xercesc::DOMElement</code> and + makes a deep copy of its argument. The second modifier function + expects an argument of type pointer to <code>xercesc::DOMElement</code>. + This modifier function assumes ownership of its argument and expects + the element object to be created using the DOM document associated + with this instance. The third modifier function expects an argument + of type reference to constant of the container type and makes a + deep copy of its argument. For instance: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other" minOccurs="0"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef element_optional any_optional; + + // Accessors. + // + const any_optional& + any () const; + + any_optional& + any (); + + // Modifiers. + // + void + any (const xercesc::DOMElement&); + + void + any (xercesc::DOMElement*); + + void + any (const any_optional&); + + ... + +}; + </pre> + + + <p>The <code>element_optional</code> container is a + specialization of the <code>optional</code> class template described + in <a href="#2.8.2">Section 2.8.2, "Mapping for Members with the Optional + Cardinality Class"</a>. Its interface is presented below: + </p> + + <pre class="c++"> +class element_optional +{ +public: + explicit + element_optional (xercesc::DOMDocument&); + + // Makes a deep copy. + // + element_optional (const xercesc::DOMElement&, xercesc::DOMDocument&); + + // Assumes ownership. + // + element_optional (xercesc::DOMElement*, xercesc::DOMDocument&); + + element_optional (const element_optional&, xercesc::DOMDocument&); + +public: + element_optional& + operator= (const xercesc::DOMElement&); + + element_optional& + operator= (const element_optional&); + + // Pointer-like interface. + // +public: + const xercesc::DOMElement* + operator-> () const; + + xercesc::DOMElement* + operator-> (); + + const xercesc::DOMElement& + operator* () const; + + xercesc::DOMElement& + operator* (); + + typedef void (element_optional::*bool_convertible) (); + operator bool_convertible () const; + + // Get/set interface. + // +public: + bool + present () const; + + const xercesc::DOMElement& + get () const; + + xercesc::DOMElement& + get (); + + // Makes a deep copy. + // + void + set (const xercesc::DOMElement&); + + // Assumes ownership. + // + void + set (xercesc::DOMElement*); + + void + reset (); +}; + +bool +operator== (const element_optional&, const element_optional&); + +bool +operator!= (const element_optional&, const element_optional&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + DOMDocument& doc (o.dom_document ()); + + if (o.any ().present ()) // test + { + DOMElement& e1 (o.any ().get ()); // get + o.any ().set (e); // set, deep copy + o.any ().set (doc.createElement (...)); // set, assumes ownership + o.any ().reset (); // reset + } + + // Same as above but using pointer notation: + // + if (o.member ()) // test + { + DOMElement& e1 (*o.any ()); // get + o.any (e); // set, deep copy + o.any (doc.createElement (...)); // set, assumes ownership + o.any ().reset (); // reset + } +} + </pre> + + + + <h3><a name="2.12.3">2.12.3 Mapping for <code>any</code> with the Sequence Cardinality Class</a></h3> + + <p>For <code>any</code> with the Sequence cardinality class, the type + definitions consist of an alias of the container type with name + <code>any_sequence</code> (or <code>any1_sequence</code>, etc., for + subsequent wildcards in the type definition), an alias of the iterator + type with name <code>any_iterator</code> (or <code>any1_iterator</code>, + etc., for subsequent wildcards in the type definition), and an alias + of the constant iterator type with name <code>any_const_iterator</code> + (or <code>any1_const_iterator</code>, etc., for subsequent wildcards + in the type definition). + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function makes + a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other" minOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef element_sequence any_sequence; + typedef any_sequence::iterator any_iterator; + typedef any_sequence::const_iterator any_const_iterator; + + // Accessors. + // + const any_sequence& + any () const; + + any_sequence& + any (); + + // Modifier. + // + void + any (const any_sequence&); + + ... + +}; + </pre> + + <p>The <code>element_sequence</code> container is a + specialization of the <code>sequence</code> class template described + in <a href="#2.8.3">Section 2.8.3, "Mapping for Members with the + Sequence Cardinality Class"</a>. Its interface is similar to + the sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences") and is + presented below: + </p> + + <pre class="c++"> +class element_sequence +{ +public: + typedef xercesc::DOMElement value_type; + typedef xercesc::DOMElement* pointer; + typedef const xercesc::DOMElement* const_pointer; + typedef xercesc::DOMElement& reference; + typedef const xercesc::DOMElement& const_reference; + + typedef <implementation-defined> iterator; + typedef <implementation-defined> const_iterator; + typedef <implementation-defined> reverse_iterator; + typedef <implementation-defined> const_reverse_iterator; + + typedef <implementation-defined> size_type; + typedef <implementation-defined> difference_type; + typedef <implementation-defined> allocator_type; + +public: + explicit + element_sequence (xercesc::DOMDocument&); + + // DOMElement cannot be default-constructed. + // + // explicit + // element_sequence (size_type n); + + element_sequence (size_type n, + const xercesc::DOMElement&, + xercesc::DOMDocument&); + + template <typename I> + element_sequence (const I& begin, + const I& end, + xercesc::DOMDocument&); + + element_sequence (const element_sequence&, xercesc::DOMDocument&); + + element_sequence& + operator= (const element_sequence&); + +public: + void + assign (size_type n, const xercesc::DOMElement&); + + template <typename I> + void + assign (const I& begin, const I& end); + +public: + // This version of resize can only be used to shrink the + // sequence because DOMElement cannot be default-constructed. + // + void + resize (size_type); + + void + resize (size_type, const xercesc::DOMElement&); + +public: + size_type + size () const; + + size_type + max_size () const; + + size_type + capacity () const; + + bool + empty () const; + + void + reserve (size_type); + + void + clear (); + +public: + const_iterator + begin () const; + + const_iterator + end () const; + + iterator + begin (); + + iterator + end (); + + const_reverse_iterator + rbegin () const; + + const_reverse_iterator + rend () const + + reverse_iterator + rbegin (); + + reverse_iterator + rend (); + +public: + xercesc::DOMElement& + operator[] (size_type); + + const xercesc::DOMElement& + operator[] (size_type) const; + + xercesc::DOMElement& + at (size_type); + + const xercesc::DOMElement& + at (size_type) const; + + xercesc::DOMElement& + front (); + + const xercesc::DOMElement& + front () const; + + xercesc::DOMElement& + back (); + + const xercesc::DOMElement& + back () const; + +public: + // Makes a deep copy. + // + void + push_back (const xercesc::DOMElement&); + + // Assumes ownership. + // + void + push_back (xercesc::DOMElement*); + + void + pop_back (); + + // Makes a deep copy. + // + iterator + insert (iterator position, const xercesc::DOMElement&); + + // Assumes ownership. + // + iterator + insert (iterator position, xercesc::DOMElement*); + + void + insert (iterator position, size_type n, const xercesc::DOMElement&); + + template <typename I> + void + insert (iterator position, const I& begin, const I& end); + + iterator + erase (iterator position); + + iterator + erase (iterator begin, iterator end); + +public: + // Note that the DOMDocument object of the two sequences being + // swapped should be the same. + // + void + swap (sequence& x); +}; + +inline bool +operator== (const element_sequence&, const element_sequence&); + +inline bool +operator!= (const element_sequence&, const element_sequence&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + object::any_sequence& s (o.any ()); + + // Iteration. + // + for (object::any_iterator i (s.begin ()); i != s.end (); ++i) + { + DOMElement& e (*i); + } + + // Modification. + // + s.push_back (e); // deep copy + DOMDocument& doc (o.dom_document ()); + s.push_back (doc.createElement (...)); // assumes ownership +} + </pre> + + <h3><a name="2.12.4">2.12.4 Element Wildcard Order</a></h3> + + <p>Similar to elements, element wildcards in ordered types + (<a href="#2.8.4">Section 2.8.4, "Element Order"</a>) are assigned + content ids and are included in the content order sequence. + Continuing with the bank transactions example started in Section + 2.8.4, we can extend the batch by allowing custom transactions:</p> + + <pre class="xml"> +<complexType name="batch"> + <choice minOccurs="0" maxOccurs="unbounded"> + <element name="withdraw" type="withdraw"/> + <element name="deposit" type="deposit"/> + <any namespace="##other" processContents="lax"/> + </choice> +</complexType> + </pre> + + <p>This will lead to the following changes in the generated + <code>batch</code> C++ class:</p> + + <pre class="c++"> +class batch: public xml_schema::type +{ +public: + ... + + // any + // + typedef element_sequence any_sequence; + typedef any_sequence::iterator any_iterator; + typedef any_sequence::const_iterator any_const_iterator; + + static const std::size_t any_id = 3UL; + + const any_sequence& + any () const; + + any_sequence& + any (); + + void + any (const any_sequence&); + + ... +}; + </pre> + + <p>With this change we also need to update the iteration code to handle + the new content id:</p> + + <pre class="c++"> +for (batch::content_order_const_iterator i (b.content_order ().begin ()); + i != b.content_order ().end (); + ++i) +{ + switch (i->id) + { + ... + + case batch::any_id: + { + const DOMElement& e (b.any ()[i->index]); + ... + break; + } + + ... + } +} + </pre> + + <p>For the complete working code that shows the use of wildcards in + ordered types refer to the <code>order/element</code> example in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <h3><a name="2.12.5">2.12.5 Mapping for <code>anyAttribute</code></a></h3> + + <p>For <code>anyAttribute</code> the type definitions consist of an alias + of the container type with name <code>any_attribute_set</code> + (or <code>any1_attribute_set</code>, etc., for subsequent wildcards + in the type definition), an alias of the iterator type with name + <code>any_attribute_iterator</code> (or <code>any1_attribute_iterator</code>, + etc., for subsequent wildcards in the type definition), and an alias + of the constant iterator type with name <code>any_attribute_const_iterator</code> + (or <code>any1_attribute_const_iterator</code>, etc., for subsequent + wildcards in the type definition). + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function makes + a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + ... + </sequence> + <anyAttribute namespace="##other"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef attribute_set any_attribute_set; + typedef any_attribute_set::iterator any_attribute_iterator; + typedef any_attribute_set::const_iterator any_attribute_const_iterator; + + // Accessors. + // + const any_attribute_set& + any_attribute () const; + + any_attribute_set& + any_attribute (); + + // Modifier. + // + void + any_attribute (const any_attribute_set&); + + ... + +}; + </pre> + + <p>The <code>attribute_set</code> class is an associative container + similar to the <code>std::set</code> class template as defined by + the ISO/ANSI Standard for C++ (ISO/IEC 14882:1998, Section 23.3.3, + "Class template set") with the key being the attribute's name + and namespace. Unlike <code>std::set</code>, <code>attribute_set</code> + allows searching using names and namespaces instead of + <code>xercesc::DOMAttr</code> objects. It is defined in an + implementation-specific namespace and its interface is presented + below: + </p> + + <pre class="c++"> +class attribute_set +{ +public: + typedef xercesc::DOMAttr key_type; + typedef xercesc::DOMAttr value_type; + typedef xercesc::DOMAttr* pointer; + typedef const xercesc::DOMAttr* const_pointer; + typedef xercesc::DOMAttr& reference; + typedef const xercesc::DOMAttr& const_reference; + + typedef <implementation-defined> iterator; + typedef <implementation-defined> const_iterator; + typedef <implementation-defined> reverse_iterator; + typedef <implementation-defined> const_reverse_iterator; + + typedef <implementation-defined> size_type; + typedef <implementation-defined> difference_type; + typedef <implementation-defined> allocator_type; + +public: + attribute_set (xercesc::DOMDocument&); + + template <typename I> + attribute_set (const I& begin, const I& end, xercesc::DOMDocument&); + + attribute_set (const attribute_set&, xercesc::DOMDocument&); + + attribute_set& + operator= (const attribute_set&); + +public: + const_iterator + begin () const; + + const_iterator + end () const; + + iterator + begin (); + + iterator + end (); + + const_reverse_iterator + rbegin () const; + + const_reverse_iterator + rend () const; + + reverse_iterator + rbegin (); + + reverse_iterator + rend (); + +public: + size_type + size () const; + + size_type + max_size () const; + + bool + empty () const; + + void + clear (); + +public: + // Makes a deep copy. + // + std::pair<iterator, bool> + insert (const xercesc::DOMAttr&); + + // Assumes ownership. + // + std::pair<iterator, bool> + insert (xercesc::DOMAttr*); + + // Makes a deep copy. + // + iterator + insert (iterator position, const xercesc::DOMAttr&); + + // Assumes ownership. + // + iterator + insert (iterator position, xercesc::DOMAttr*); + + template <typename I> + void + insert (const I& begin, const I& end); + +public: + void + erase (iterator position); + + size_type + erase (const std::basic_string<C>& name); + + size_type + erase (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name); + + size_type + erase (const XMLCh* name); + + size_type + erase (const XMLCh* namespace_, const XMLCh* name); + + void + erase (iterator begin, iterator end); + +public: + size_type + count (const std::basic_string<C>& name) const; + + size_type + count (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name) const; + + size_type + count (const XMLCh* name) const; + + size_type + count (const XMLCh* namespace_, const XMLCh* name) const; + + iterator + find (const std::basic_string<C>& name); + + iterator + find (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name); + + iterator + find (const XMLCh* name); + + iterator + find (const XMLCh* namespace_, const XMLCh* name); + + const_iterator + find (const std::basic_string<C>& name) const; + + const_iterator + find (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name) const; + + const_iterator + find (const XMLCh* name) const; + + const_iterator + find (const XMLCh* namespace_, const XMLCh* name) const; + +public: + // Note that the DOMDocument object of the two sets being + // swapped should be the same. + // + void + swap (attribute_set&); +}; + +bool +operator== (const attribute_set&, const attribute_set&); + +bool +operator!= (const attribute_set&, const attribute_set&); + </pre> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMAttr& a) +{ + using namespace xercesc; + + object::any_attribute_set& s (o.any_attribute ()); + + // Iteration. + // + for (object::any_attribute_iterator i (s.begin ()); i != s.end (); ++i) + { + DOMAttr& a (*i); + } + + // Modification. + // + s.insert (a); // deep copy + DOMDocument& doc (o.dom_document ()); + s.insert (doc.createAttribute (...)); // assumes ownership + + // Searching. + // + object::any_attribute_iterator i (s.find ("name")); + i = s.find ("http://www.w3.org/XML/1998/namespace", "lang"); +} + </pre> + + <!-- Mapping for Mixed Content Models --> + + <h2><a name="2.13">2.13 Mapping for Mixed Content Models</a></h2> + + <p>For XML Schema types with mixed content models C++/Tree provides + mapping support only if the type is marked as ordered + (<a href="#2.8.4">Section 2.8.4, "Element Order"</a>). Use the + <code>--ordered-type-mixed</code> XSD compiler option to + automatically mark all types with mixed content as ordered.</p> + + <p>For an ordered type with mixed content, C++/Tree adds an extra + text content sequence that is used to store the text fragments. + This text content sequence is also assigned the content id and + its entries are included in the content order sequence, just + like elements. As a result, it is possible to capture the order + between elements and text fragments.</p> + + <p>As an example, consider the following schema that describes text + with embedded links:</p> + + <pre class="xml"> +<complexType name="anchor"> + <simpleContent> + <extension base="string"> + <attribute name="href" type="anyURI" use="required"/> + </extension> + </simpleContent> +</complexType> + +<complexType name="text" mixed="true"> + <sequence> + <element name="a" type="anchor" minOccurs="0" maxOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>The generated <code>text</code> C++ class will provide the following + API (assuming it is marked as ordered):</p> + + <pre class="c++"> +class text: public xml_schema::type +{ +public: + // a + // + typedef anchor a_type; + typedef sequence<a_type> a_sequence; + typedef a_sequence::iterator a_iterator; + typedef a_sequence::const_iterator a_const_iterator; + + static const std::size_t a_id = 1UL; + + const a_sequence& + a () const; + + a_sequence& + a (); + + void + a (const a_sequence&); + + // text_content + // + typedef xml_schema::string text_content_type; + typedef sequence<text_content_type> text_content_sequence; + typedef text_content_sequence::iterator text_content_iterator; + typedef text_content_sequence::const_iterator text_content_const_iterator; + + static const std::size_t text_content_id = 2UL; + + const text_content_sequence& + text_content () const; + + text_content_sequence& + text_content (); + + void + text_content (const text_content_sequence&); + + // content_order + // + typedef xml_schema::content_order content_order_type; + typedef std::vector<content_order_type> content_order_sequence; + typedef content_order_sequence::iterator content_order_iterator; + typedef content_order_sequence::const_iterator content_order_const_iterator; + + const content_order_sequence& + content_order () const; + + content_order_sequence& + content_order (); + + void + content_order (const content_order_sequence&); + + ... +}; + </pre> + + <p>Given this interface we can iterate over both link elements + and text in content order. The following code fragment converts + our format to plain text with references.</p> + + <pre class="c++"> +const text& t = ... + +for (text::content_order_const_iterator i (t.content_order ().begin ()); + i != t.content_order ().end (); + ++i) +{ + switch (i->id) + { + case text::a_id: + { + const anchor& a (t.a ()[i->index]); + cerr << a << "[" << a.href () << "]"; + break; + } + case text::text_content_id: + { + const xml_schema::string& s (t.text_content ()[i->index]); + cerr << s; + break; + } + default: + { + assert (false); // Unknown content id. + } + } +} + </pre> + + <p>For the complete working code that shows the use of mixed content + in ordered types refer to the <code>order/mixed</code> example in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <!-- Parsing --> + + + <h1><a name="3">3 Parsing</a></h1> + + <p>This chapter covers various aspects of parsing XML instance + documents in order to obtain corresponding tree-like object + model. + </p> + + <p>Each global XML Schema element in the form:</p> + + <pre class="xml"> +<element name="name" type="type"/> + </pre> + + <p>is mapped to 14 overloaded C++ functions in the form:</p> + + <pre class="c++"> +// Read from a URI or a local file. +// + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from std::istream. +// + +std::[unique|auto]_ptr<type> +name (std::istream&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from InputSource. +// + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from DOM. +// + +std::[unique|auto]_ptr<type> +name (const xercesc::DOMDocument&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xml_schema::dom::[unique|auto]_ptr<xercesc::DOMDocument>, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + </pre> + + <p>You can choose between reading an XML instance from a local file, + URI, <code>std::istream</code>, <code>xercesc::InputSource</code>, + or a pre-parsed DOM instance in the form of + <code>xercesc::DOMDocument</code>. All the parsing functions + return a dynamically allocated object model as either + <code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected. Each of these parsing + functions is discussed in more detail in the following sections. + </p> + + <h2><a name="3.1">3.1 Initializing the Xerces-C++ Runtime</a></h2> + + <p>Some parsing functions expect you to initialize the Xerces-C++ + runtime while others initialize and terminate it as part of their + work. The general rule is as follows: if a function has any arguments + or return a value that is an instance of a Xerces-C++ type, then + this function expects you to initialize the Xerces-C++ runtime. + Otherwise, the function initializes and terminates the runtime for + you. Note that it is legal to have nested calls to the Xerces-C++ + initialize and terminate functions as long as the calls are balanced. + </p> + + <p>You can instruct parsing functions that initialize and terminate + the runtime not to do so by passing the + <code>xml_schema::flags::dont_initialize</code> flag (see + <a href="#3.2">Section 3.2, "Flags and Properties"</a>). + </p> + + + <h2><a name="3.2">3.2 Flags and Properties</a></h2> + + <p>Parsing flags and properties are the last two arguments of every + parsing function. They allow you to fine-tune the process of + instance validation and parsing. Both arguments are optional. + </p> + + + <p>The following flags are recognized by the parsing functions:</p> + + <dl> + <dt><code>xml_schema::flags::keep_dom</code></dt> + <dd>Keep association between DOM nodes and the resulting + object model nodes. For more information about DOM association + refer to <a href="#5.1">Section 5.1, "DOM Association"</a>.</dd> + + <dt><code>xml_schema::flags::own_dom</code></dt> + <dd>Assume ownership of the DOM document passed. This flag only + makes sense together with the <code>keep_dom</code> flag in + the call to the parsing function with the + <code>xml_schema::dom::[unique|auto]_ptr<DOMDocument></code> + argument.</dd> + + <dt><code>xml_schema::flags::dont_validate</code></dt> + <dd>Do not validate instance documents against schemas.</dd> + + <dt><code>xml_schema::flags::dont_initialize</code></dt> + <dd>Do not initialize the Xerces-C++ runtime.</dd> + </dl> + + <p>You can pass several flags by combining them using the bit-wise OR + operator. For example:</p> + + <pre class="c++"> +using xml_schema::flags; + +std::unique_ptr<type> r ( + name ("test.xml", flags::keep_dom | flags::dont_validate)); + </pre> + + <p>By default, validation of instance documents is turned on even + though parsers generated by XSD do not assume instance + documents are valid. They include a number of checks that prevent + construction of inconsistent object models. This, + however, does not mean that an instance document that was + successfully parsed by the XSD-generated parsers is + valid per the corresponding schema. If an instance document is not + "valid enough" for the generated parsers to construct consistent + object model, one of the exceptions defined in + <code>xml_schema</code> namespace is thrown (see + <a href="#3.3">Section 3.3, "Error Handling"</a>). + </p> + + <p>For more information on the Xerces-C++ runtime initialization + refer to <a href="#3.1">Section 3.1, "Initializing the Xerces-C++ + Runtime"</a>. + </p> + + <p>The <code>xml_schema::properties</code> class allows you to + programmatically specify schema locations to be used instead + of those specified with the <code>xsi::schemaLocation</code> + and <code>xsi::noNamespaceSchemaLocation</code> attributes + in instance documents. The interface of the <code>properties</code> + class is presented below: + </p> + + <pre class="c++"> +class properties +{ +public: + void + schema_location (const std::basic_string<C>& namespace_, + const std::basic_string<C>& location); + void + no_namespace_schema_location (const std::basic_string<C>& location); +}; + </pre> + + <p>Note that all locations are relative to an instance document unless + they are URIs. For example, if you want to use a local file as your + schema, then you will need to pass + <code>file:///absolute/path/to/your/schema</code> as the location + argument. + </p> + + <h2><a name="3.3">3.3 Error Handling</a></h2> + + <p>As discussed in <a href="#2.2">Section 2.2, "Error Handling"</a>, + the mapping uses the C++ exception handling mechanism as its primary + way of reporting error conditions. However, to handle recoverable + parsing and validation errors and warnings, a callback interface maybe + preferred by the application.</p> + + <p>To better understand error handling and reporting strategies employed + by the parsing functions, it is useful to know that the + transformation of an XML instance document to a statically-typed + tree happens in two stages. The first stage, performed by Xerces-C++, + consists of parsing an XML document into a DOM instance. For short, + we will call this stage the XML-DOM stage. Validation, if not disabled, + happens during this stage. The second stage, + performed by the generated parsers, consist of parsing the DOM + instance into the statically-typed tree. We will call this stage + the DOM-Tree stage. Additional checks are performed during this + stage in order to prevent construction of inconsistent tree which + could otherwise happen when validation is disabled, for example.</p> + + <p>All parsing functions except the one that operates on a DOM instance + come in overloaded triples. The first function in such a triple + reports error conditions exclusively by throwing exceptions. It + accumulates all the parsing and validation errors of the XML-DOM + stage and throws them in a single instance of the + <code>xml_schema::parsing</code> exception (described below). + The second and the third functions in the triple use callback + interfaces to report parsing and validation errors and warnings. + The two callback interfaces are <code>xml_schema::error_handler</code> + and <code>xercesc::DOMErrorHandler</code>. For more information + on the <code>xercesc::DOMErrorHandler</code> interface refer to + the Xerces-C++ documentation. The <code>xml_schema::error_handler</code> + interface is presented below: + </p> + + <pre class="c++"> +class error_handler +{ +public: + struct severity + { + enum value + { + warning, + error, + fatal + }; + }; + + virtual bool + handle (const std::basic_string<C>& id, + unsigned long line, + unsigned long column, + severity, + const std::basic_string<C>& message) = 0; + + virtual + ~error_handler (); +}; + </pre> + + <p>The <code>id</code> argument of the <code>error_handler::handle</code> + function identifies the resource being parsed (e.g., a file name or + URI). + </p> + + <p>By returning <code>true</code> from the <code>handle</code> function + you instruct the parser to recover and continue parsing. Returning + <code>false</code> results in termination of the parsing process. + An error with the <code>fatal</code> severity level results in + termination of the parsing process no matter what is returned from + the <code>handle</code> function. It is safe to throw an exception + from the <code>handle</code> function. + </p> + + <p>The DOM-Tree stage reports error conditions exclusively by throwing + exceptions. Individual exceptions thrown by the parsing functions + are described in the following sub-sections. + </p> + + + <h3><a name="3.3.1">3.3.1 <code>xml_schema::parsing</code></a></h3> + + <pre class="c++"> +struct severity +{ + enum value + { + warning, + error + }; + + severity (value); + operator value () const; +}; + +struct error +{ + error (severity, + const std::basic_string<C>& id, + unsigned long line, + unsigned long column, + const std::basic_string<C>& message); + + severity + severity () const; + + const std::basic_string<C>& + id () const; + + unsigned long + line () const; + + unsigned long + column () const; + + const std::basic_string<C>& + message () const; +}; + +std::basic_ostream<C>& +operator<< (std::basic_ostream<C>&, const error&); + +struct diagnostics: std::vector<error> +{ +}; + +std::basic_ostream<C>& +operator<< (std::basic_ostream<C>&, const diagnostics&); + +struct parsing: virtual exception +{ + parsing (); + parsing (const diagnostics&); + + const diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::parsing</code> exception is thrown if there + were parsing or validation errors reported during the XML-DOM stage. + If no callback interface was provided to the parsing function, the + exception contains a list of errors and warnings accessible using + the <code>diagnostics</code> function. The usual conditions when + this exception is thrown include malformed XML instances and, if + validation is turned on, invalid instance documents. + </p> + + <h3><a name="3.3.2">3.3.2 <code>xml_schema::expected_element</code></a></h3> + + <pre class="c++"> +struct expected_element: virtual exception +{ + expected_element (const std::basic_string<C>& name, + const std::basic_string<C>& namespace_); + + + const std::basic_string<C>& + name () const; + + const std::basic_string<C>& + namespace_ () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_element</code> exception is thrown + when an expected element is not encountered by the DOM-Tree stage. + The name and namespace of the expected element can be obtained using + the <code>name</code> and <code>namespace_</code> functions respectively. + </p> + + + <h3><a name="3.3.3">3.3.3 <code>xml_schema::unexpected_element</code></a></h3> + + <pre class="c++"> +struct unexpected_element: virtual exception +{ + unexpected_element (const std::basic_string<C>& encountered_name, + const std::basic_string<C>& encountered_namespace, + const std::basic_string<C>& expected_name, + const std::basic_string<C>& expected_namespace) + + + const std::basic_string<C>& + encountered_name () const; + + const std::basic_string<C>& + encountered_namespace () const; + + + const std::basic_string<C>& + expected_name () const; + + const std::basic_string<C>& + expected_namespace () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::unexpected_element</code> exception is thrown + when an unexpected element is encountered by the DOM-Tree stage. + The name and namespace of the encountered element can be obtained + using the <code>encountered_name</code> and + <code>encountered_namespace</code> functions respectively. If an + element was expected instead of the encountered one, its name + and namespace can be obtained using the <code>expected_name</code> and + <code>expected_namespace</code> functions respectively. Otherwise + these functions return empty strings. + </p> + + <h3><a name="3.3.4">3.3.4 <code>xml_schema::expected_attribute</code></a></h3> + + <pre class="c++"> +struct expected_attribute: virtual exception +{ + expected_attribute (const std::basic_string<C>& name, + const std::basic_string<C>& namespace_); + + + const std::basic_string<C>& + name () const; + + const std::basic_string<C>& + namespace_ () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_attribute</code> exception is thrown + when an expected attribute is not encountered by the DOM-Tree stage. + The name and namespace of the expected attribute can be obtained using + the <code>name</code> and <code>namespace_</code> functions respectively. + </p> + + + <h3><a name="3.3.5">3.3.5 <code>xml_schema::unexpected_enumerator</code></a></h3> + + <pre class="c++"> +struct unexpected_enumerator: virtual exception +{ + unexpected_enumerator (const std::basic_string<C>& enumerator); + + const std::basic_string<C>& + enumerator () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::unexpected_enumerator</code> exception is thrown + when an unexpected enumerator is encountered by the DOM-Tree stage. + The enumerator can be obtained using the <code>enumerator</code> + functions. + </p> + + <h3><a name="3.3.6">3.3.6 <code>xml_schema::expected_text_content</code></a></h3> + + <pre class="c++"> +struct expected_text_content: virtual exception +{ + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_text_content</code> exception is thrown + when a content other than text is encountered and the text content was + expected by the DOM-Tree stage. + </p> + + <h3><a name="3.3.7">3.3.7 <code>xml_schema::no_type_info</code></a></h3> + + <pre class="c++"> +struct no_type_info: virtual exception +{ + no_type_info (const std::basic_string<C>& type_name, + const std::basic_string<C>& type_namespace); + + const std::basic_string<C>& + type_name () const; + + const std::basic_string<C>& + type_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::no_type_info</code> exception is thrown + when there is no type information associated with a type specified + by the <code>xsi:type</code> attribute. This exception is thrown + by the DOM-Tree stage. The name and namespace of the type in question + can be obtained using the <code>type_name</code> and + <code>type_namespace</code> functions respectively. Usually, catching + this exception means that you haven't linked the code generated + from the schema defining the type in question with your application + or this schema has been compiled without the + <code>--generate-polymorphic</code> option. + </p> + + + <h3><a name="3.3.8">3.3.8 <code>xml_schema::not_derived</code></a></h3> + + <pre class="c++"> +struct not_derived: virtual exception +{ + not_derived (const std::basic_string<C>& base_type_name, + const std::basic_string<C>& base_type_namespace, + const std::basic_string<C>& derived_type_name, + const std::basic_string<C>& derived_type_namespace); + + const std::basic_string<C>& + base_type_name () const; + + const std::basic_string<C>& + base_type_namespace () const; + + + const std::basic_string<C>& + derived_type_name () const; + + const std::basic_string<C>& + derived_type_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::not_derived</code> exception is thrown + when a type specified by the <code>xsi:type</code> attribute is + not derived from the expected base type. This exception is thrown + by the DOM-Tree stage. The name and namespace of the expected + base type can be obtained using the <code>base_type_name</code> and + <code>base_type_namespace</code> functions respectively. The name + and namespace of the offending type can be obtained using the + <code>derived_type_name</code> and + <code>derived_type_namespace</code> functions respectively. + </p> + + <h3><a name="3.3.9">3.3.9 <code>xml_schema::no_prefix_mapping</code></a></h3> + + <pre class="c++"> +struct no_prefix_mapping: virtual exception +{ + no_prefix_mapping (const std::basic_string<C>& prefix); + + const std::basic_string<C>& + prefix () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::no_prefix_mapping</code> exception is thrown + during the DOM-Tree stage if a namespace prefix is encountered for + which a prefix-namespace mapping hasn't been provided. The namespace + prefix in question can be obtained using the <code>prefix</code> + function. + </p> + + <h2><a name="3.4">3.4 Reading from a Local File or URI</a></h2> + + <p>Using a local file or URI is the simplest way to parse an XML instance. + For example:</p> + + <pre class="c++"> +using std::unique_ptr; + +unique_ptr<type> r1 (name ("test.xml")); +unique_ptr<type> r2 (name ("https://www.codesynthesis.com/test.xml")); + </pre> + + <p>Or, in the C++98 mode:</p> + + <pre class="c++"> +using std::auto_ptr; + +auto_ptr<type> r1 (name ("test.xml")); +auto_ptr<type> r2 (name ("https://www.codesynthesis.com/test.xml")); + </pre> + + <h2><a name="3.5">3.5 Reading from <code>std::istream</code></a></h2> + + <p>When using an <code>std::istream</code> instance, you may also + pass an optional resource id. This id is used to identify the + resource (for example in error messages) as well as to resolve + relative paths. For instance:</p> + + <pre class="c++"> +using std::unique_ptr; + +{ + std::ifstream ifs ("test.xml"); + unique_ptr<type> r (name (ifs, "test.xml")); +} + +{ + std::string str ("..."); // Some XML fragment. + std::istringstream iss (str); + unique_ptr<type> r (name (iss)); +} + </pre> + + <h2><a name="3.6">3.6 Reading from <code>xercesc::InputSource</code></a></h2> + + <p>Reading from a <code>xercesc::InputSource</code> instance + is similar to the <code>std::istream</code> case except + the resource id is maintained by the <code>InputSource</code> + object. For instance:</p> + + <pre class="c++"> +xercesc::StdInInputSource is; +std::unique_ptr<type> r (name (is)); + </pre> + + <h2><a name="3.7">3.7 Reading from DOM</a></h2> + + <p>Reading from a <code>xercesc::DOMDocument</code> instance allows + you to setup a custom XML-DOM stage. Things like DOM + parser reuse, schema pre-parsing, and schema caching can be achieved + with this approach. For more information on how to obtain DOM + representation from an XML instance refer to the Xerces-C++ + documentation. In addition, the + <a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree Mapping + FAQ</a> shows how to parse an XML instance to a Xerces-C++ + DOM document using the XSD runtime utilities. + </p> + + <p>The last parsing function is useful when you would like to perform + your own XML-to-DOM parsing and associate the resulting DOM document + with the object model nodes. The automatic <code>DOMDocument</code> + pointer is reset and the resulting object model assumes ownership + of the DOM document passed. For example:</p> + + <pre class="c++"> +// C++11 version. +// +xml_schema::dom::unique_ptr<xercesc::DOMDocument> doc = ... + +std::unique_ptr<type> r ( + name (std::move (doc), + xml_schema::flags::keep_dom | xml_schema::flags::own_dom)); + +// At this point doc is reset to 0. + +// C++98 version. +// +xml_schema::dom::auto_ptr<xercesc::DOMDocument> doc = ... + +std::auto_ptr<type> r ( + name (doc, xml_schema::flags::keep_dom | xml_schema::flags::own_dom)); + +// At this point doc is reset to 0. + </pre> + + <h1><a name="4">4 Serialization</a></h1> + + <p>This chapter covers various aspects of serializing a + tree-like object model to DOM or XML. + In this regard, serialization is complimentary to the reverse + process of parsing a DOM or XML instance into an object model + which is discussed in <a href="#3">Chapter 3, + "Parsing"</a>. Note that the generation of the serialization code + is optional and should be explicitly requested with the + <code>--generate-serialization</code> option. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information. + </p> + + <p>Each global XML Schema element in the form: + </p> + + + <pre class="xml"> +<xsd:element name="name" type="type"/> + </pre> + + <p>is mapped to 8 overloaded C++ functions in the form:</p> + + <pre class="c++"> +// Serialize to std::ostream. +// +void +name (std::ostream&, + const type&, + const xml_schema::namespace_fomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (std::ostream&, + const type&, + xml_schema::error_handler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (std::ostream&, + const type&, + xercesc::DOMErrorHandler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + + +// Serialize to XMLFormatTarget. +// +void +name (xercesc::XMLFormatTarget&, + const type&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (xercesc::XMLFormatTarget&, + const type&, + xml_schema::error_handler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (xercesc::XMLFormatTarget&, + const type&, + xercesc::DOMErrorHandler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + + +// Serialize to DOM. +// +xml_schema::dom::[unique|auto]_ptr<xercesc::DOMDocument> +name (const type&, + const xml_schema::namespace_infomap& + xml_schema::namespace_infomap (), + xml_schema::flags = 0); + +void +name (xercesc::DOMDocument&, + const type&, + xml_schema::flags = 0); + </pre> + + <p>You can choose between writing XML to <code>std::ostream</code> or + <code>xercesc::XMLFormatTarget</code> and creating a DOM instance + in the form of <code>xercesc::DOMDocument</code>. Serialization + to <code>ostream</code> or <code>XMLFormatTarget</code> requires a + considerably less work while serialization to DOM provides + for greater flexibility. Each of these serialization functions + is discussed in more detail in the following sections. + </p> + + + <h2><a name="4.1">4.1 Initializing the Xerces-C++ Runtime</a></h2> + + <p>Some serialization functions expect you to initialize the Xerces-C++ + runtime while others initialize and terminate it as part of their + work. The general rule is as follows: if a function has any arguments + or return a value that is an instance of a Xerces-C++ type, then + this function expects you to initialize the Xerces-C++ runtime. + Otherwise, the function initializes and terminates the runtime for + you. Note that it is legal to have nested calls to the Xerces-C++ + initialize and terminate functions as long as the calls are balanced. + </p> + + <p>You can instruct serialization functions that initialize and terminate + the runtime not to do so by passing the + <code>xml_schema::flags::dont_initialize</code> flag (see + <a href="#4.3">Section 4.3, "Flags"</a>). + </p> + + <h2><a name="4.2">4.2 Namespace Infomap and Character Encoding</a></h2> + + <p>When a document being serialized uses XML namespaces, custom + prefix-namespace associations can to be established. If custom + prefix-namespace mapping is not provided then generic prefixes + (<code>p1</code>, <code>p2</code>, etc) are automatically assigned + to namespaces as needed. Also, if + you would like the resulting instance document to contain the + <code>schemaLocation</code> or <code>noNamespaceSchemaLocation</code> + attributes, you will need to provide namespace-schema associations. + The <code>xml_schema::namespace_infomap</code> class is used + to capture this information:</p> + + <pre class="c++"> +struct namespace_info +{ + namespace_info (); + namespace_info (const std::basic_string<C>& name, + const std::basic_string<C>& schema); + + std::basic_string<C> name; + std::basic_string<C> schema; +}; + +// Map of namespace prefix to namespace_info. +// +struct namespace_infomap: public std::map<std::basic_string<C>, + namespace_info> +{ +}; + </pre> + + <p>Consider the following associations as an example:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + </pre> + + <p>This map, if passed to one of the serialization functions, + could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<t:name xmlns:t="https://www.codesynthesis.com/test" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + <p>As you can see, the serialization function automatically added namespace + mapping for the <code>xsi</code> prefix. You can change this by + providing your own prefix:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map["xsn"].name = "http://www.w3.org/2001/XMLSchema-instance"; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + </pre> + + <p>This could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<t:name xmlns:t="https://www.codesynthesis.com/test" + xmlns:xsn="http://www.w3.org/2001/XMLSchema-instance" + xsn:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + <p>To specify the location of a schema without a namespace you can use + an empty prefix as in the example below: </p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].schema = "test.xsd"; + </pre> + + <p>This would result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<name xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="test.xsd"> + </pre> + + <p>To make a particular namespace default you can use an empty + prefix, for example:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = "https://www.codesynthesis.com/test"; +map[""].schema = "test.xsd"; + </pre> + + <p>This could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<name xmlns="https://www.codesynthesis.com/test" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + + <p>Another bit of information that you can pass to the serialization + functions is the character encoding method that you would like to use. + Common values for this argument are <code>"US-ASCII"</code>, + <code>"ISO8859-1"</code>, <code>"UTF-8"</code>, + <code>"UTF-16BE"</code>, <code>"UTF-16LE"</code>, + <code>"UCS-4BE"</code>, and <code>"UCS-4LE"</code>. The default + encoding is <code>"UTF-8"</code>. For more information on + encoding methods see the + "<a href="http://en.wikipedia.org/wiki/Character_code">Character + Encoding</a>" article from Wikipedia. + </p> + + <h2><a name="4.3">4.3 Flags</a></h2> + + <p>Serialization flags are the last argument of every serialization + function. They allow you to fine-tune the process of serialization. + The flags argument is optional. + </p> + + + <p>The following flags are recognized by the serialization + functions:</p> + + <dl> + <dt><code>xml_schema::flags::dont_initialize</code></dt> + <dd>Do not initialize the Xerces-C++ runtime.</dd> + + <dt><code>xml_schema::flags::dont_pretty_print</code></dt> + <dd>Do not add extra spaces or new lines that make the resulting XML + slightly bigger but easier to read.</dd> + + <dt><code>xml_schema::flags::no_xml_declaration</code></dt> + <dd>Do not write XML declaration (<?xml ... ?>).</dd> + </dl> + + <p>You can pass several flags by combining them using the bit-wise OR + operator. For example:</p> + + <pre class="c++"> +std::unique_ptr<type> r = ... +std::ofstream ofs ("test.xml"); +xml_schema::namespace_infomap map; +name (ofs, + *r, + map, + "UTF-8", + xml_schema::flags::no_xml_declaration | + xml_schema::flags::dont_pretty_print); + </pre> + + <p>For more information on the Xerces-C++ runtime initialization + refer to <a href="#4.1">Section 4.1, "Initializing the Xerces-C++ + Runtime"</a>. + </p> + + <h2><a name="4.4">4.4 Error Handling</a></h2> + + <p>As with the parsing functions (see <a href="#3.3">Section 3.3, + "Error Handling"</a>), to better understand error handling and + reporting strategies employed by the serialization functions, it + is useful to know that the transformation of a statically-typed + tree to an XML instance document happens in two stages. The first + stage, performed by the generated code, consist of building a DOM + instance from the statically-typed tree . For short, we will call + this stage the Tree-DOM stage. The second stage, performed by + Xerces-C++, consists of serializing the DOM instance into the XML + document. We will call this stage the DOM-XML stage. + </p> + + <p>All serialization functions except the two that serialize into + a DOM instance come in overloaded triples. The first function + in such a triple reports error conditions exclusively by throwing + exceptions. It accumulates all the serialization errors of the + DOM-XML stage and throws them in a single instance of the + <code>xml_schema::serialization</code> exception (described below). + The second and the third functions in the triple use callback + interfaces to report serialization errors and warnings. The two + callback interfaces are <code>xml_schema::error_handler</code> and + <code>xercesc::DOMErrorHandler</code>. The + <code>xml_schema::error_handler</code> interface is described in + <a href="#3.3">Section 3.3, "Error Handling"</a>. For more information + on the <code>xercesc::DOMErrorHandler</code> interface refer to the + Xerces-C++ documentation. + </p> + + <p>The Tree-DOM stage reports error conditions exclusively by throwing + exceptions. Individual exceptions thrown by the serialization functions + are described in the following sub-sections. + </p> + + <h3><a name="4.4.1">4.4.1 <code>xml_schema::serialization</code></a></h3> + + <pre class="c++"> +struct serialization: virtual exception +{ + serialization (); + serialization (const diagnostics&); + + const diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::diagnostics</code> class is described in + <a href="#3.3.1">Section 3.3.1, "<code>xml_schema::parsing</code>"</a>. + The <code>xml_schema::serialization</code> exception is thrown if + there were serialization errors reported during the DOM-XML stage. + If no callback interface was provided to the serialization function, + the exception contains a list of errors and warnings accessible using + the <code>diagnostics</code> function. + </p> + + + <h3><a name="4.4.2">4.4.2 <code>xml_schema::unexpected_element</code></a></h3> + + <p>The <code>xml_schema::unexpected_element</code> exception is + described in <a href="#3.3.3">Section 3.3.3, + "<code>xml_schema::unexpected_element</code>"</a>. It is thrown + by the serialization functions during the Tree-DOM stage if the + root element name of the provided DOM instance does not match with + the name of the element this serialization function is for. + </p> + + <h3><a name="4.4.3">4.4.3 <code>xml_schema::no_type_info</code></a></h3> + + <p>The <code>xml_schema::no_type_info</code> exception is + described in <a href="#3.3.7">Section 3.3.7, + "<code>xml_schema::no_type_info</code>"</a>. It is thrown + by the serialization functions during the Tree-DOM stage when there + is no type information associated with a dynamic type of an + element. Usually, catching this exception means that you haven't + linked the code generated from the schema defining the type in + question with your application or this schema has been compiled + without the <code>--generate-polymorphic</code> option. + </p> + + <h2><a name="4.5">4.5 Serializing to <code>std::ostream</code></a></h2> + + <p>In order to serialize to <code>std::ostream</code> you will need + an object model, an output stream and, optionally, a namespace + infomap. For instance:</p> + + <pre class="c++"> +// Obtain the object model. +// +std::unique_ptr<type> r = ... + +// Prepare namespace mapping and schema location information. +// +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + +// Write it out. +// +name (std::cout, *r, map); + </pre> + + <p>Note that the output stream is treated as a binary stream. This + becomes important when you use a character encoding that is wider + than 8-bit <code>char</code>, for instance UTF-16 or UCS-4. For + example, things will most likely break if you try to serialize + to <code>std::ostringstream</code> with UTF-16 or UCS-4 as an + encoding. This is due to the special value, + <code>'\0'</code>, that will most likely occur as part of such + serialization and it won't have the special meaning assumed by + <code>std::ostringstream</code>. + </p> + + + <h2><a name="4.6">4.6 Serializing to <code>xercesc::XMLFormatTarget</code></a></h2> + + <p>Serializing to an <code>xercesc::XMLFormatTarget</code> instance + is similar the <code>std::ostream</code> case. For instance: + </p> + + <pre class="c++"> +using std::unique_ptr; + +// Obtain the object model. +// +unique_ptr<type> r = ... + +// Prepare namespace mapping and schema location information. +// +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Choose a target. + // + unique_ptr<XMLFormatTarget> ft; + + if (argc != 2) + { + ft = unique_ptr<XMLFormatTarget> (new StdOutFormatTarget ()); + } + else + { + ft = unique_ptr<XMLFormatTarget> ( + new LocalFileFormatTarget (argv[1])); + } + + // Write it out. + // + name (*ft, *r, map); +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>Note that we had to initialize the Xerces-C++ runtime before we + could call this serialization function.</p> + + <h2><a name="4.7">4.7 Serializing to DOM</a></h2> + + <p>The mapping provides two overloaded functions that implement + serialization to a DOM instance. The first creates a DOM instance + for you and the second serializes to an existing DOM instance. + While serializing to a new DOM instance is similar to serializing + to <code>std::ostream</code> or <code>xercesc::XMLFormatTarget</code>, + serializing to an existing DOM instance requires quite a bit of work + from your side. You will need to set all the custom namespace mapping + attributes as well as the <code>schemaLocation</code> and/or + <code>noNamespaceSchemaLocation</code> attributes. The following + listing should give you an idea about what needs to be done: + </p> + + <pre class="c++"> +// Obtain the object model. +// +std::unique_ptr<type> r = ... + +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Create a DOM instance. Set custom namespace mapping and schema + // location attributes. + // + DOMDocument& doc = ... + + // Serialize to DOM. + // + name (doc, *r); + + // Serialize the DOM document to XML. + // + ... +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>For more information on how to create and serialize a DOM instance + refer to the Xerces-C++ documentation. In addition, the + <a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree Mapping + FAQ</a> shows how to implement these operations using the XSD + runtime utilities. + </p> + + <h1><a name="5">5 Additional Functionality</a></h1> + + <p>The C++/Tree mapping provides a number of optional features + that can be useful in certain situations. They are described + in the following sections.</p> + + <h2><a name="5.1">5.1 DOM Association</a></h2> + + <p>Normally, after parsing is complete, the DOM document which + was used to extract the data is discarded. However, the parsing + functions can be instructed to preserve the DOM document + and create an association between the DOM nodes and object model + nodes. When there is an association between the DOM and + object model nodes, you can obtain the corresponding DOM element + or attribute node from an object model node as well as perform + the reverse transition: obtain the corresponding object model + from a DOM element or attribute node.</p> + + <p>Maintaining DOM association is normally useful when the application + needs access to XML constructs that are not preserved in the + object model, for example, XML comments. + Another useful aspect of DOM association is the ability of the + application to navigate the document tree using the generic DOM + interface (for example, with the help of an XPath processor) + and then move back to the statically-typed object model. Note + also that while you can change the underlying DOM document, + these changes are not reflected in the object model and will + be ignored during serialization. If you need to not only access + but also modify some aspects of XML that are not preserved in + the object model, then type customization with custom parsing + constructors and serialization operators should be used instead.</p> + + <p>To request DOM association you will need to pass the + <code>xml_schema::flags::keep_dom</code> flag to one of the + parsing functions (see <a href="#3.2">Section 3.2, + "Flags and Properties"</a> for more information). In this case the + DOM document is retained and will be released when the object model + is deleted. Note that since DOM nodes "out-live" the parsing function + call, you need to initialize the Xerces-C++ runtime before calling + one of the parsing functions with the <code>keep_dom</code> flag and + terminate it after the object model is destroyed (see + <a href="#3.1">Section 3.1, "Initializing the Xerces-C++ Runtime"</a>).</p> + + <p>If the <code>keep_dom</code> flag is passed + as the second argument to the copy constructor and the copy + being made is of a complete tree, then the DOM association + is also maintained in the copy by cloning the underlying + DOM document and reestablishing the associations. For example:</p> + + <pre class="c++"> +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Parse XML to object model. + // + std::unique_ptr<type> r (root ( + "root.xml", + xml_schema::flags::keep_dom | + xml_schema::flags::dont_initialize)); + + // Copy without DOM association. + // + type copy1 (*r); + + // Copy with DOM association. + // + type copy2 (*r, xml_schema::flags::keep_dom); +} + +XMLPlatformUtils::Terminate (); + </pre> + + + <p>To obtain the corresponding DOM node from an object model node + you will need to call the <code>_node</code> accessor function + which returns a pointer to <code>DOMNode</code>. You can then query + this DOM node's type and cast it to either <code>DOMAttr*</code> + or <code>DOMElement*</code>. To obtain the corresponding object + model node from a DOM node, the DOM user data API is used. The + <code>xml_schema::dom::tree_node_key</code> variable contains + the key for object model nodes. The following schema and code + fragment show how to navigate from DOM to object model nodes + and in the opposite direction:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="a" type="string"/> + </sequence> +</complexType> + +<element name="root" type="object"/> + </pre> + + <pre class="c++"> +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Parse XML to object model. + // + std::unique_ptr<type> r (root ( + "root.xml", + xml_schema::flags::keep_dom | + xml_schema::flags::dont_initialize)); + + DOMNode* n = r->_node (); + assert (n->getNodeType () == DOMNode::ELEMENT_NODE); + DOMElement* re = static_cast<DOMElement*> (n); + + // Get the 'a' element. Note that it is not necessarily the + // first child node of 'root' since there could be whitespace + // nodes before it. + // + DOMElement* ae; + + for (n = re->getFirstChild (); n != 0; n = n->getNextSibling ()) + { + if (n->getNodeType () == DOMNode::ELEMENT_NODE) + { + ae = static_cast<DOMElement*> (n); + break; + } + } + + // Get from the 'a' DOM element to xml_schema::string object model + // node. + // + xml_schema::type& t ( + *reinterpret_cast<xml_schema::type*> ( + ae->getUserData (xml_schema::dom::tree_node_key))); + + xml_schema::string& a (dynamic_cast<xml_schema::string&> (t)); +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>The 'mixed' example which can be found in the XSD distribution + shows how to handle the mixed content using DOM association.</p> + + <h2><a name="5.2">5.2 Binary Serialization</a></h2> + + <p>Besides reading from and writing to XML, the C++/Tree mapping + also allows you to save the object model to and load it from a + number of predefined as well as custom data representation + formats. The predefined binary formats are CDR (Common Data + Representation) and XDR (eXternal Data Representation). A + custom format can easily be supported by providing + insertion and extraction operators for basic types.</p> + + <p>Binary serialization saves only the data without any meta + information or markup. As a result, saving to and loading + from a binary representation can be an order of magnitude + faster than parsing and serializing the same data in XML. + Furthermore, the resulting representation is normally several + times smaller than the equivalent XML representation. These + properties make binary serialization ideal for internal data + exchange and storage. A typical application that uses this + facility stores the data and communicates within the + system using a binary format and reads/writes the data + in XML when communicating with the outside world.</p> + + <p>In order to request the generation of insertion operators and + extraction constructors for a specific predefined or custom + data representation stream, you will need to use the + <code>--generate-insertion</code> and <code>--generate-extraction</code> + compiler options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information.</p> + + <p>Once the insertion operators and extraction constructors are + generated, you can use the <code>xml_schema::istream</code> + and <code>xml_schema::ostream</code> wrapper stream templates + to save the object model to and load it from a specific format. + The following code fragment shows how to do this using ACE + (Adaptive Communication Environment) CDR streams as an example:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="a" type="string"/> + <element name="b" type="int"/> + </sequence> +</complexType> + +<element name="root" type="object"/> + </pre> + + <pre class="c++"> +// Parse XML to object model. +// +std::unique_ptr<type> r (root ("root.xml")); + +// Save to a CDR stream. +// +ACE_OutputCDR ace_ocdr; +xml_schema::ostream<ACE_OutputCDR> ocdr (ace_ocdr); + +ocdr << *r; + +// Load from a CDR stream. +// +ACE_InputCDR ace_icdr (buf, size); +xml_schema::istream<ACE_InputCDR> icdr (ace_icdr); + +std::unique_ptr<object> copy (new object (icdr)); + +// Serialize to XML. +// +root (std::cout, *copy); + </pre> + + <p>The XSD distribution contains a number of examples that + show how to save the object model to and load it from + CDR, XDR, and a custom format.</p> + + <!-- Appendix A --> + + + <h1><a name="A">Appendix A — Default and Fixed Values</a></h1> + + <p>The following table summarizes the effect of default and fixed + values (specified with the <code>default</code> and <code>fixed</code> + attributes, respectively) on attribute and element values. The + <code>default</code> and <code>fixed</code> attributes are mutually + exclusive. It is also worthwhile to note that the fixed value semantics + is a superset of the default value semantics. + </p> + + <!-- border="1" is necessary for html2ps --> + <table id="default-fixed" border="1"> + <tr> + <th></th> + <th></th> + <th colspan="2">default</th> + <th colspan="2">fixed</th> + </tr> + + <!-- element --> + + <tr> + <th rowspan="4">element</th> + <th rowspan="2">not present</th> + <th>optional</th> + <th>required</th> + <th>optional</th> + <th>required</th> + </tr> + <tr> + <td>not present</td> + <td>invalid instance</td> + <td>not present</td> + <td>invalid instance</td> + </tr> + + + <tr> + <th>empty</th> + <td colspan="2">default value is used</td> + <td colspan="2">fixed value is used</td> + </tr> + + <tr> + <th>value</th> + <td colspan="2">value is used</td> + <td colspan="2">value is used provided it's the same as fixed</td> + </tr> + + <!-- attribute --> + + <!-- element --> + + <tr> + <th rowspan="4">attribute</th> + <th rowspan="2">not present</th> + <th>optional</th> + <th>required</th> + <th>optional</th> + <th>required</th> + </tr> + <tr> + <td>default value is used</td> + <td>invalid schema</td> + <td>fixed value is used</td> + <td>invalid instance</td> + </tr> + + + <tr> + <th>empty</th> + <td colspan="2">empty value is used</td> + <td colspan="2">empty value is used provided it's the same as fixed</td> + </tr> + + <tr> + <th>value</th> + <td colspan="2">value is used</td> + <td colspan="2">value is used provided it's the same as fixed</td> + </tr> + + </table> + + </div> +</div> + + +</body> +</html> diff --git a/doc/cxx/tree/manual/index.xhtml.in b/doc/cxx/tree/manual/index.xhtml.in new file mode 100644 index 0000000..5a7240a --- /dev/null +++ b/doc/cxx/tree/manual/index.xhtml.in @@ -0,0 +1,6826 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>C++/Tree Mapping User Manual</title> + + <meta name="copyright" content="© @copyright@"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,tree,serialization,guide,manual,examples"/> + <meta name="description" content="C++/Tree Mapping User Manual"/> + <meta name="revision" content="4.1.0"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 130%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage #title { + font-weight: bold; + font-size: 200%; + text-align: center; + padding: 1em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + + /* default-fixed */ + #default-fixed { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #default-fixed th, #default-fixed td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #default-fixed th { + background : #cde8f6; + } + + #default-fixed td { + text-align: center; + } + + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div id="title">C++/Tree Mapping User Manual</div> + + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="https://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.ps">PostScript</a>.</p> + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a></td> + </tr> + + <tr> + <th>2</th><td><a href="#2">C++/Tree Mapping</a> + <table class="toc"> + <tr> + <th>2.1</th><td><a href="#2.1">Preliminary Information</a> + <table class="toc"> + <tr><th>2.1.1</th><td><a href="#2.1.1">C++ Standard</a></td></tr> + <tr><th>2.1.2</th><td><a href="#2.1.2">Identifiers</a></td></tr> + <tr><th>2.1.3</th><td><a href="#2.1.3">Character Type and Encoding</a></td></tr> + <tr><th>2.1.4</th><td><a href="#2.1.4">XML Schema Namespace</a></td></tr> + <tr><th>2.1.5</th><td><a href="#2.1.5">Anonymous Types</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.2</th><td><a href="#2.2">Error Handling</a> + <table class="toc"> + <tr><th>2.2.1</th><td><a href="#2.2.1"><code>xml_schema::duplicate_id</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.3</th><td><a href="#2.3">Mapping for <code>import</code> and <code>include</code></a> + <table class="toc"> + <tr><th>2.3.1</th><td><a href="#2.3.1">Import</a></td></tr> + <tr><th>2.3.2</th><td><a href="#2.3.2">Inclusion with Target Namespace</a></td></tr> + <tr><th>2.3.3</th><td><a href="#2.3.3">Inclusion without Target Namespace</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.4</th><td><a href="#2.4">Mapping for Namespaces</a></td> + </tr> + <tr> + <th>2.5</th><td><a href="#2.5">Mapping for Built-in Data Types</a> + <table class="toc"> + <tr><th>2.5.1</th><td><a href="#2.5.1">Inheritance from Built-in Data Types</a></td></tr> + <tr><th>2.5.2</th><td><a href="#2.5.2">Mapping for <code>anyType</code></a></td></tr> + <tr><th>2.5.3</th><td><a href="#2.5.3">Mapping for <code>anySimpleType</code></a></td></tr> + <tr><th>2.5.4</th><td><a href="#2.5.4">Mapping for <code>QName</code></a></td></tr> + <tr><th>2.5.5</th><td><a href="#2.5.5">Mapping for <code>IDREF</code></a></td></tr> + <tr><th>2.5.6</th><td><a href="#2.5.6">Mapping for <code>base64Binary</code> and <code>hexBinary</code></a></td></tr> + <tr><th>2.5.7</th><td><a href="#2.5.7">Time Zone Representation</a></td></tr> + <tr><th>2.5.8</th><td><a href="#2.5.8">Mapping for <code>date</code></a></td></tr> + <tr><th>2.5.9</th><td><a href="#2.5.9">Mapping for <code>dateTime</code></a></td></tr> + <tr><th>2.5.10</th><td><a href="#2.5.10">Mapping for <code>duration</code></a></td></tr> + <tr><th>2.5.11</th><td><a href="#2.5.11">Mapping for <code>gDay</code></a></td></tr> + <tr><th>2.5.12</th><td><a href="#2.5.12">Mapping for <code>gMonth</code></a></td></tr> + <tr><th>2.5.13</th><td><a href="#2.5.13">Mapping for <code>gMonthDay</code></a></td></tr> + <tr><th>2.5.14</th><td><a href="#2.5.14">Mapping for <code>gYear</code></a></td></tr> + <tr><th>2.5.15</th><td><a href="#2.5.15">Mapping for <code>gYearMonth</code></a></td></tr> + <tr><th>2.5.16</th><td><a href="#2.5.16">Mapping for <code>time</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.6</th><td><a href="#2.6">Mapping for Simple Types</a> + <table class="toc"> + <tr><th>2.6.1</th><td><a href="#2.6.1">Mapping for Derivation by Restriction</a></td></tr> + <tr><th>2.6.2</th><td><a href="#2.6.2">Mapping for Enumerations</a></td></tr> + <tr><th>2.6.3</th><td><a href="#2.6.3">Mapping for Derivation by List</a></td></tr> + <tr><th>2.6.4</th><td><a href="#2.6.4">Mapping for Derivation by Union</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.7</th><td><a href="#2.7">Mapping for Complex Types</a> + <table class="toc"> + <tr><th>2.7.1</th><td><a href="#2.7.1">Mapping for Derivation by Extension</a></td></tr> + <tr><th>2.7.2</th><td><a href="#2.7.2">Mapping for Derivation by Restriction</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.8</th><td><a href="#2.8">Mapping for Local Elements and Attributes</a> + <table class="toc"> + <tr><th>2.8.1</th><td><a href="#2.8.1">Mapping for Members with the One Cardinality Class</a></td></tr> + <tr><th>2.8.2</th><td><a href="#2.8.2">Mapping for Members with the Optional Cardinality Class</a></td></tr> + <tr><th>2.8.3</th><td><a href="#2.8.3">Mapping for Members with the Sequence Cardinality Class</a></td></tr> + <tr><th>2.8.4</th><td><a href="#2.8.4">Element Order</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.9</th><td><a href="#2.9">Mapping for Global Elements</a> + <table class="toc"> + <tr><th>2.9.1</th><td><a href="#2.9.1">Element Types</a></td></tr> + <tr><th>2.9.2</th><td><a href="#2.9.2">Element Map</a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.10</th><td><a href="#2.10">Mapping for Global Attributes</a></td> + </tr> + <tr> + <th>2.11</th><td><a href="#2.11">Mapping for <code>xsi:type</code> and Substitution Groups</a></td> + </tr> + <tr> + <th>2.12</th><td><a href="#2.12">Mapping for <code>any</code> and <code>anyAttribute</code></a> + <table class="toc"> + <tr><th>2.12.1</th><td><a href="#2.12.1">Mapping for <code>any</code> with the One Cardinality Class</a></td></tr> + <tr><th>2.12.2</th><td><a href="#2.12.2">Mapping for <code>any</code> with the Optional Cardinality Class</a></td></tr> + <tr><th>2.12.3</th><td><a href="#2.12.3">Mapping for <code>any</code> with the Sequence Cardinality Class</a></td></tr> + <tr><th>2.12.4</th><td><a href="#2.12.4">Element Wildcard Order</a></td></tr> + <tr><th>2.12.5</th><td><a href="#2.12.5">Mapping for <code>anyAttribute</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>2.13</th><td><a href="#2.13">Mapping for Mixed Content Models</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Parsing</a> + <table class="toc"> + <tr> + <th>3.1</th><td><a href="#3.1">Initializing the Xerces-C++ Runtime</a></td> + </tr> + <tr> + <th>3.2</th><td><a href="#3.2">Flags and Properties</a></td> + </tr> + <tr> + <th>3.3</th><td><a href="#3.3">Error Handling</a> + <table class="toc"> + <tr><th>3.3.1</th><td><a href="#3.3.1"><code>xml_schema::parsing</code></a></td></tr> + <tr><th>3.3.2</th><td><a href="#3.3.2"><code>xml_schema::expected_element</code></a></td></tr> + <tr><th>3.3.3</th><td><a href="#3.3.3"><code>xml_schema::unexpected_element</code></a></td></tr> + <tr><th>3.3.4</th><td><a href="#3.3.4"><code>xml_schema::expected_attribute</code></a></td></tr> + <tr><th>3.3.5</th><td><a href="#3.3.5"><code>xml_schema::unexpected_enumerator</code></a></td></tr> + <tr><th>3.3.6</th><td><a href="#3.3.6"><code>xml_schema::expected_text_content</code></a></td></tr> + <tr><th>3.3.7</th><td><a href="#3.3.7"><code>xml_schema::no_type_info</code></a></td></tr> + <tr><th>3.3.8</th><td><a href="#3.3.8"><code>xml_schema::not_derived</code></a></td></tr> + <tr><th>3.3.9</th><td><a href="#3.3.9"><code>xml_schema::not_prefix_mapping</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>3.4</th><td><a href="#3.4">Reading from a Local File or URI</a></td> + </tr> + <tr> + <th>3.5</th><td><a href="#3.5">Reading from <code>std::istream</code></a></td> + </tr> + <tr> + <th>3.6</th><td><a href="#3.6">Reading from <code>xercesc::InputSource</code></a></td> + </tr> + <tr> + <th>3.7</th><td><a href="#3.7">Reading from DOM</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Serialization</a> + <table class="toc"> + <tr> + <th>4.1</th><td><a href="#4.1">Initializing the Xerces-C++ Runtime</a></td> + </tr> + <tr> + <th>4.2</th><td><a href="#4.2">Namespace Infomap and Character Encoding</a></td> + </tr> + <tr> + <th>4.3</th><td><a href="#4.3">Flags</a></td> + </tr> + <tr> + <th>4.4</th><td><a href="#4.4">Error Handling</a> + <table class="toc"> + <tr><th>4.4.1</th><td><a href="#4.4.1"><code>xml_schema::serialization</code></a></td></tr> + <tr><th>4.4.2</th><td><a href="#4.4.2"><code>xml_schema::unexpected_element</code></a></td></tr> + <tr><th>4.4.3</th><td><a href="#4.4.3"><code>xml_schema::no_type_info</code></a></td></tr> + </table> + </td> + </tr> + <tr> + <th>4.5</th><td><a href="#4.5">Serializing to <code>std::ostream</code></a></td> + </tr> + <tr> + <th>4.6</th><td><a href="#4.6">Serializing to <code>xercesc::XMLFormatTarget</code></a></td> + </tr> + <tr> + <th>4.7</th><td><a href="#4.7">Serializing to DOM</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Additional Functionality</a> + <table class="toc"> + <tr> + <th>5.1</th><td><a href="#5.1">DOM Association</a></td> + </tr> + <tr> + <th>5.2</th><td><a href="#5.2">Binary Serialization</a></td> + </tr> + </table> + </td> + </tr> + + <tr> + <th></th><td><a href="#A">Appendix A — Default and Fixed Values</a></td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>This document describes the mapping of W3C XML Schema + to the C++ programming language as implemented by + <a href="https://www.codesynthesis.com/products/xsd">CodeSynthesis + XSD</a> - an XML Schema to C++ data binding compiler. The mapping + represents information stored in XML instance documents as a + statically-typed, tree-like in-memory data structure and is + called C++/Tree. + </p> + + <p>Revision 4.1.0<br/> <!-- Remember to change revision in other places --> + This revision of the manual describes the C++/Tree + mapping as implemented by CodeSynthesis XSD version 4.1.0. + </p> + + <p>This document is available in the following formats: + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/index.xhtml">XHTML</a>, + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.pdf">PDF</a>, and + <a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.ps">PostScript</a>.</p> + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this manual, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/guide/">C++/Tree + Mapping Getting Started Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/Customization_guide">C++/Tree + Mapping Customization Guide</a></li> + + <li><a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree + Mapping Frequently Asked Questions (FAQ)</a></li> + + <li><a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a></li> + + <li>The <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + contains a collection of examples and a README file with an overview + of each example.</li> + + <li>The <code>README</code> file in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> package + explains how to build the examples.</li> + + <li>The <a href="https://www.codesynthesis.com/mailman/listinfo/xsd-users">xsd-users</a> + mailing list is a place to ask questions. Furthermore the + <a href="https://www.codesynthesis.com/pipermail/xsd-users/">archives</a> + may already have answers to some of your questions.</li> + </ul> + + + <h1><a name="1">1 Introduction</a></h1> + + <p>C++/Tree is a W3C XML Schema to C++ mapping that represents the + data stored in XML as a statically-typed, vocabulary-specific + object model. Based on a formal description of an XML vocabulary + (schema), the C++/Tree mapping produces a tree-like data structure + suitable for in-memory processing as well as XML parsing and + serialization code.</p> + + <p>A typical application that processes XML documents usually + performs the following three steps: it first reads (parses) an XML + instance document to an object model, it then performs + some useful computations on that model which may involve + modification of the model, and finally it may write (serialize) + the modified object model back to XML. + </p> + + <p>The C++/Tree mapping consists of C++ types that represent the + given vocabulary (<a href="#2">Chapter 2, "C++/Tree Mapping"</a>), + a set of parsing functions that convert XML documents to + a tree-like in-memory data structure (<a href="#3">Chapter 3, + "Parsing"</a>), and a set of serialization functions that convert + the object model back to XML (<a href="#4">Chapter 4, + "Serialization"</a>). Furthermore, the mapping provides a number + of additional features, such as DOM association and binary + serialization, that can be useful in some applications + (<a href="#5">Chapter 5, "Additional Functionality"</a>). + </p> + + + <!-- Chapter 2 --> + + + <h1><a name="2">2 C++/Tree Mapping</a></h1> + + <h2><a name="2.1">2.1 Preliminary Information</a></h2> + + <h3><a name="2.1.1">2.1.1 C++ Standard</a></h3> + + <p>The C++/Tree mapping provides support for ISO/IEC C++ 2011 (C++11) + and ISO/IEC C++ 1998/2003 (C++98). To select the C++ standard for the + generated code we use the <code>--std</code> XSD compiler command + line option. While the majority of the examples in this guide use + C++11, the document explains the C++11/98 usage difference and so + they can easily be converted to C++98.</p> + + <h3><a name="2.1.2">2.1.2 Identifiers</a></h3> + + <p>XML Schema names may happen to be reserved C++ keywords or contain + characters that are illegal in C++ identifiers. To avoid C++ compilation + problems, such names are changed (escaped) when mapped to C++. If an + XML Schema name is a C++ keyword, the "_" suffix is added to it. All + character of an XML Schema name that are not allowed in C++ identifiers + are replaced with "_". + </p> + + <p>For example, XML Schema name <code>try</code> will be mapped to + C++ identifier <code>try_</code>. Similarly, XML Schema name + <code>strange.na-me</code> will be mapped to C++ identifier + <code>strange_na_me</code>. + </p> + + <p>Furthermore, conflicts between type names and function names in the + same scope are resolved using name escaping. Such conflicts include + both a global element (which is mapped to a set of parsing and/or + serialization functions or element types, see <a href="#2.9">Section + 2.9, "Mapping for Global Elements"</a>) and a global type sharing the + same name as well as a local element or attribute inside a type having + the same name as the type itself.</p> + + <p>For example, if we had a global type <code>catalog</code> + and a global element with the same name then the type would be + mapped to a C++ class with name <code>catalog</code> while the + parsing functions corresponding to the global element would have + their names escaped as <code>catalog_</code>. + </p> + + <p>By default the mapping uses the so-called K&R (Kernighan and + Ritchie) identifier naming convention which is also used throughout + this manual. In this convention both type and function names are in + lower case and words are separated by underscores. If your application + code or schemas use a different notation, you may want to change the + naming convention used by the mapping for consistency. + The compiler supports a set of widely-used naming conventions + that you can select with the <code>--type-naming</code> and + <code>--function-naming</code> options. You can also further + refine one of the predefined conventions or create a completely + custom naming scheme by using the <code>--*-regex</code> options. + For more detailed information on these options refer to the NAMING + CONVENTION section in the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + <h3><a name="2.1.3">2.1.3 Character Type and Encoding</a></h3> + + <p>The code that implements the mapping, depending on the + <code>--char-type</code> option, is generated using either + <code>char</code> or <code>wchar_t</code> as the character + type. In this document code samples use symbol <code>C</code> + to refer to the character type you have selected when translating + your schemas, for example <code>std::basic_string<C></code>. + </p> + + <p>Another aspect of the mapping that depends on the character type + is character encoding. For the <code>char</code> character type + the default encoding is UTF-8. Other supported encodings are + ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as + custom encodings and can be selected with the + <code>--char-encoding</code> command line option.</p> + + <p>For the <code>wchar_t</code> character type the encoding is + automatically selected between UTF-16 and UTF-32/UCS-4 depending + on the size of the <code>wchar_t</code> type. On some platforms + (for example, Windows with Visual C++ and AIX with IBM XL C++) + <code>wchar_t</code> is 2 bytes long. For these platforms the + encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes + long and UTF-32/UCS-4 is used.</p> + + <h3><a name="2.1.4">2.1.4 XML Schema Namespace</a></h3> + + <p>The mapping relies on some predefined types, classes, and functions + that are logically defined in the XML Schema namespace reserved for + the XML Schema language (<code>http://www.w3.org/2001/XMLSchema</code>). + By default, this namespace is mapped to C++ namespace + <code>xml_schema</code>. It is automatically accessible + from a C++ compilation unit that includes a header file generated + from an XML Schema definition. + </p> + + <p>Note that, if desired, the default mapping of this namespace can be + changed as described in <a href="#2.4">Section 2.4, "Mapping for + Namespaces"</a>. + </p> + + + <h3><a name="2.1.5">2.1.5 Anonymous Types</a></h3> + + <p>For the purpose of code generation, anonymous types defined in + XML Schema are automatically assigned names that are derived + from enclosing attributes and elements. Otherwise, such types + follows standard mapping rules for simple and complex type + definitions (see <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a> + and <a href="#2.7">Section 2.7, "Mapping for Complex Types"</a>). + For example, in the following schema fragment: + </p> + + <pre class="xml"> +<element name="object"> + <complexType> + ... + </complexType> +</element> + </pre> + + <p>The anonymous type defined inside element <code>object</code> will + be given name <code>object</code>. The compiler has a number of + options that control the process of anonymous type naming. For more + information refer to the <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a>.</p> + + + <h2><a name="2.2">2.2 Error Handling</a></h2> + + <p>The mapping uses the C++ exception handling mechanism as a primary way + of reporting error conditions. All exceptions that are specified in + this mapping derive from <code>xml_schema::exception</code> which + itself is derived from <code>std::exception</code>: + </p> + + <pre class="c++"> +struct exception: virtual std::exception +{ + friend + std::basic_ostream<C>& + operator<< (std::basic_ostream<C>& os, const exception& e) + { + e.print (os); + return os; + } + +protected: + virtual void + print (std::basic_ostream<C>&) const = 0; +}; + </pre> + + <p>The exception hierarchy supports "virtual" <code>operator<<</code> + which allows you to obtain diagnostics corresponding to the thrown + exception using the base exception interface. For example:</p> + + <pre class="c++"> +try +{ + ... +} +catch (const xml_schema::exception& e) +{ + cerr << e << endl; +} + </pre> + + <p>The following sub-sections describe exceptions thrown by the + types that constitute the object model. + <a href="#3.3">Section 3.3, "Error Handling"</a> of + <a href="#3">Chapter 3, "Parsing"</a> describes exceptions + and error handling mechanisms specific to the parsing functions. + <a href="#4.4">Section 4.4, "Error Handling"</a> of + <a href="#4">Chapter 4, "Serialization"</a> describes exceptions + and error handling mechanisms specific to the serialization functions. + </p> + + + <h3><a name="2.2.1">2.2.1 <code>xml_schema::duplicate_id</code></a></h3> + + <pre class="c++"> +struct duplicate_id: virtual exception +{ + duplicate_id (const std::basic_string<C>& id); + + const std::basic_string<C>& + id () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::duplicate_id</code> is thrown when + a conflicting instance of <code>xml_schema::id</code> (see + <a href="#2.5">Section 2.5, "Mapping for Built-in Data Types"</a>) + is added to a tree. The offending ID value can be obtained using + the <code>id</code> function. + </p> + + <h2><a name="2.3">2.3 Mapping for <code>import</code> and <code>include</code></a></h2> + + <h3><a name="2.3.1">2.3.1 Import</a></h3> + + <p>The XML Schema <code>import</code> element is mapped to the C++ + Preprocessor <code>#include</code> directive. The value of + the <code>schemaLocation</code> attribute is used to derive + the name of the header file that appears in the <code>#include</code> + directive. For instance: + </p> + + <pre class="xml"> +<import namespace="https://www.codesynthesis.com/test" + schemaLocation="test.xsd"/> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +#include "test.hxx" + </pre> + + <p>Note that you will need to compile imported schemas separately + in order to produce corresponding header files.</p> + + <h3><a name="2.3.2">2.3.2 Inclusion with Target Namespace</a></h3> + + <p>The XML Schema <code>include</code> element which refers to a schema + with a target namespace or appears in a schema without a target namespace + follows the same mapping rules as the <code>import</code> element, + see <a href="#2.3.1">Section 2.3.1, "Import"</a>. + </p> + + <h3><a name="2.3.3">2.3.3 Inclusion without Target Namespace</a></h3> + + <p>For the XML Schema <code>include</code> element which refers to a schema + without a target namespace and appears in a schema with a target + namespace (such inclusion sometimes called "chameleon inclusion"), + declarations and definitions from the included schema are generated + in-line in the namespace of the including schema as if they were + declared and defined there verbatim. For example, consider the + following two schemas: + </p> + + <pre class="xml"> +<-- common.xsd --> +<schema> + <complexType name="type"> + ... + </complexType> +</schema> + +<-- test.xsd --> +<schema targetNamespace="https://www.codesynthesis.com/test"> + <include schemaLocation="common.xsd"/> +</schema> + </pre> + + <p>The fragment of interest from the generated header file for + <code>text.xsd</code> would look like this:</p> + + <pre class="c++"> +// test.hxx +namespace test +{ + class type + { + ... + }; +} + </pre> + + <h2><a name="2.4">2.4 Mapping for Namespaces</a></h2> + + <p>An XML Schema namespace is mapped to one or more nested C++ + namespaces. XML Schema namespaces are identified by URIs. + By default, a namespace URI is mapped to a sequence of + C++ namespace names by removing the protocol and host parts + and splitting the rest into a sequence of names with '<code>/</code>' + as the name separator. For instance: + </p> + + <pre class="xml"> +<schema targetNamespace="https://www.codesynthesis.com/system/test"> + ... +</schema> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +namespace system +{ + namespace test + { + ... + } +} + </pre> + + <p>The default mapping of namespace URIs to C++ namespace names can be + altered using the <code>--namespace-map</code> and + <code>--namespace-regex</code> options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information. + </p> + + <h2><a name="2.5">2.5 Mapping for Built-in Data Types</a></h2> + + <p>The mapping of XML Schema built-in data types to C++ types is + summarized in the table below.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Alias in the <code>xml_schema</code> namespace</th> + <th>C++ type</th> + </tr> + + <tr> + <th colspan="3">anyType and anySimpleType types</th> + </tr> + <tr> + <td><code>anyType</code></td> + <td><code>type</code></td> + <td><a href="#2.5.2">Section 2.5.2, "Mapping for <code>anyType</code>"</a></td> + </tr> + <tr> + <td><code>anySimpleType</code></td> + <td><code>simple_type</code></td> + <td><a href="#2.5.3">Section 2.5.3, "Mapping for <code>anySimpleType</code>"</a></td> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long</code></td> + <td><code>unsigned long long</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer</code></td> + <td><code>long long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer</code></td> + <td><code>unsigned long long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer</code></td> + <td><code>long long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string</code></td> + <td>type derived from <code>string</code></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token</code></td> + <td>type derived from <code>normalized_string</code></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken</code></td> + <td>type derived from <code>token</code></td> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens</code></td> + <td>type derived from <code>sequence<nmtoken></code></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>language</code></td> + <td><code>language</code></td> + <td>type derived from <code>token</code></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname</code></td> + <td><a href="#2.5.4">Section 2.5.4, "Mapping for <code>QName</code>"</a></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id</code></td> + <td>type derived from <code>ncname</code></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref</code></td> + <td><a href="#2.5.5">Section 2.5.5, "Mapping for <code>IDREF</code>"</a></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs</code></td> + <td>type derived from <code>sequence<idref></code></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri</code></td> + <td>type derived from <code>std::basic_string</code></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary</code></td> + <td rowspan="2"><a href="#2.5.6">Section 2.5.6, "Mapping for + <code>base64Binary</code> and <code>hexBinary</code>"</a></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary</code></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date</code></td> + <td><a href="#2.5.8">Section 2.5.8, "Mapping for + <code>date</code>"</a></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time</code></td> + <td><a href="#2.5.9">Section 2.5.9, "Mapping for + <code>dateTime</code>"</a></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration</code></td> + <td><a href="#2.5.10">Section 2.5.10, "Mapping for + <code>duration</code>"</a></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday</code></td> + <td><a href="#2.5.11">Section 2.5.11, "Mapping for + <code>gDay</code>"</a></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth</code></td> + <td><a href="#2.5.12">Section 2.5.12, "Mapping for + <code>gMonth</code>"</a></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day</code></td> + <td><a href="#2.5.13">Section 2.5.13, "Mapping for + <code>gMonthDay</code>"</a></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear</code></td> + <td><a href="#2.5.14">Section 2.5.14, "Mapping for + <code>gYear</code>"</a></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month</code></td> + <td><a href="#2.5.15">Section 2.5.15, "Mapping for + <code>gYearMonth</code>"</a></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time</code></td> + <td><a href="#2.5.16">Section 2.5.16, "Mapping for + <code>time</code>"</a></td> + </tr> + + <tr> + <th colspan="3">entity types</th> + </tr> + <tr> + <td><code>ENTITY</code></td> + <td><code>entity</code></td> + <td>type derived from <code>name</code></td> + </tr> + <tr> + <td><code>ENTITIES</code></td> + <td><code>entities</code></td> + <td>type derived from <code>sequence<entity></code></td> + </tr> + </table> + + <p>All XML Schema built-in types are mapped to C++ classes that are + derived from the <code>xml_schema::simple_type</code> class except + where the mapping is to a fundamental C++ type.</p> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. One notable extension + to the standard interface that is available only for + sequences of non-fundamental C++ types is the addition of + the overloaded <code>push_back</code> and <code>insert</code> + member functions which instead of the constant reference + to the element type accept automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to the element type. These functions assume ownership + of the pointed to object and reset the passed automatic pointer. + </p> + + <h3><a name="2.5.1">2.5.1 Inheritance from Built-in Data Types</a></h3> + + <p>In cases where the mapping calls for an inheritance from a built-in + type which is mapped to a fundamental C++ type, a proxy type is + used instead of the fundamental C++ type (C++ does not allow + inheritance from fundamental types). For instance:</p> + + <pre class="xml"> +<simpleType name="my_int"> + <restriction base="int"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class my_int: public fundamental_base<int> +{ + ... +}; + </pre> + + <p>The <code>fundamental_base</code> class template provides a close + emulation (though not exact) of a fundamental C++ type. + It is defined in an implementation-specific namespace and has the + following interface:</p> + + <pre class="c++"> +template <typename X> +class fundamental_base: public simple_type +{ +public: + fundamental_base (); + fundamental_base (X) + fundamental_base (const fundamental_base&) + +public: + fundamental_base& + operator= (const X&); + +public: + operator const X & () const; + operator X& (); + + template <typename Y> + operator Y () const; + + template <typename Y> + operator Y (); +}; + </pre> + + <h3><a name="2.5.2">2.5.2 Mapping for <code>anyType</code></a></h3> + + <p>The XML Schema <code>anyType</code> built-in data type is mapped to the + <code>xml_schema::type</code> C++ class:</p> + + <pre class="c++"> +class type +{ +public: + virtual + ~type (); + + type (); + type (const type&); + + type& + operator= (const type&); + + virtual type* + _clone () const; + + // anyType DOM content. + // +public: + typedef element_optional dom_content_optional; + + const dom_content_optional& + dom_content () const; + + dom_content_optional& + dom_content (); + + void + dom_content (const xercesc::DOMElement&); + + void + dom_content (xercesc::DOMElement*); + + void + dom_content (const dom_content_optional&); + + const xercesc::DOMDocument& + dom_content_document () const; + + xercesc::DOMDocument& + dom_content_document (); + + bool + null_content () const; + + // DOM association. + // +public: + const xercesc::DOMNode* + _node () const; + + xercesc::DOMNode* + _node (); +}; + </pre> + + <p>When <code>xml_schema::type</code> is used to create an instance + (as opposed to being a base of a derived type), it represents + the XML Schema <code>anyType</code> type. <code>anyType</code> + allows any attributes and any content in any order. In the + C++/Tree mapping this content can be represented as a DOM + fragment, similar to XML Schema wildcards (<a href="#2.12">Section + 2.12, "Mapping for <code>any</code> and + <code>anyAttribute</code>"</a>).</p> + + <p>To enable automatic extraction of <code>anyType</code> content + during parsing, the <code>--generate-any-type</code> option must be + specified. Because the DOM API is used to access such content, the + Xerces-C++ runtime should be initialized by the application prior to + parsing and should remain initialized for the lifetime of objects + with the DOM content. For more information on the Xerces-C++ runtime + initialization see <a href="#3.1">Section 3.1, "Initializing the + Xerces-C++ Runtime"</a>.</p> + + <p>The DOM content is stored as the optional DOM element container + and the DOM content accessors and modifiers presented above are + identical to those generated for an optional element wildcard. + Refer to <a href="#2.12.2">Section 2.12.2, "Mapping for <code>any</code> + with the Optional Cardinality Class"</a> for details on their + semantics.</p> + + <p>The <code>dom_content_document()</code> function returns the + DOM document used to store the raw XML content corresponding + to the <code>anyType</code> instance. It is equivalent to the + <code>dom_document()</code> function generated for types + with wildcards.</p> + + <p>The <code>null_content()</code> accessor is an optimization function + that allows us to check for the lack of content without actually + creating its empty representation, that is, empty DOM document for + <code>anyType</code> or empty string for <code>anySimpleType</code> + (see the following section for details on <code>anySimpleType</code>).</p> + + <p>For more information on DOM association refer to + <a href="#5.1">Section 5.1, "DOM Association"</a>.</p> + + <h3><a name="2.5.3">2.5.3 Mapping for <code>anySimpleType</code></a></h3> + + <p>The XML Schema <code>anySimpleType</code> built-in data type is mapped + to the <code>xml_schema::simple_type</code> C++ class:</p> + + <pre class="c++"> +class simple_type: public type +{ +public: + simple_type (); + simple_type (const C*); + simple_type (const std::basic_string<C>&); + + simple_type (const simple_type&); + + simple_type& + operator= (const simple_type&); + + virtual simple_type* + _clone () const; + + // anySimpleType text content. + // +public: + const std::basic_string<C>& + text_content () const; + + std::basic_string<C>& + text_content (); + + void + text_content (const std::basic_string<C>&); +}; + </pre> + + <p>When <code>xml_schema::simple_type</code> is used to create an instance + (as opposed to being a base of a derived type), it represents + the XML Schema <code>anySimpleType</code> type. <code>anySimpleType</code> + allows any simple content. In the C++/Tree mapping this content can + be represented as a string and accessed or modified with the + <code>text_content()</code> functions shown above.</p> + + <h3><a name="2.5.4">2.5.4 Mapping for <code>QName</code></a></h3> + + <p>The XML Schema <code>QName</code> built-in data type is mapped to the + <code>xml_schema::qname</code> C++ class:</p> + + <pre class="c++"> +class qname: public simple_type +{ +public: + qname (const ncname&); + qname (const uri&, const ncname&); + qname (const qname&); + +public: + qname& + operator= (const qname&); + +public: + virtual qname* + _clone () const; + +public: + bool + qualified () const; + + const uri& + namespace_ () const; + + const ncname& + name () const; +}; + </pre> + + <p>The <code>qualified</code> accessor function can be used to determine + if the name is qualified.</p> + + <h3><a name="2.5.5">2.5.5 Mapping for <code>IDREF</code></a></h3> + + <p>The XML Schema <code>IDREF</code> built-in data type is mapped to the + <code>xml_schema::idref</code> C++ class. This class implements the + smart pointer C++ idiom:</p> + + <pre class="c++"> +class idref: public ncname +{ +public: + idref (const C* s); + idref (const C* s, std::size_t n); + idref (std::size_t n, C c); + idref (const std::basic_string<C>&); + idref (const std::basic_string<C>&, + std::size_t pos, + std::size_t n = npos); + +public: + idref (const idref&); + +public: + virtual idref* + _clone () const; + +public: + idref& + operator= (C c); + + idref& + operator= (const C* s); + + idref& + operator= (const std::basic_string<C>&) + + idref& + operator= (const idref&); + +public: + const type* + operator-> () const; + + type* + operator-> (); + + const type& + operator* () const; + + type& + operator* (); + + const type* + get () const; + + type* + get (); + + // Conversion to bool. + // +public: + typedef void (idref::*bool_convertible)(); + operator bool_convertible () const; +}; + </pre> + + <p>The object, <code>idref</code> instance refers to, is the immediate + container of the matching <code>id</code> instance. For example, + with the following instance document and schema: + </p> + + + <pre class="xml"> +<!-- test.xml --> +<root> + <object id="obj-1" text="hello"/> + <reference>obj-1</reference> +</root> + +<!-- test.xsd --> +<schema> + <complexType name="object_type"> + <attribute name="id" type="ID"/> + <attribute name="text" type="string"/> + </complexType> + + <complexType name="root_type"> + <sequence> + <element name="object" type="object_type"/> + <element name="reference" type="IDREF"/> + </sequence> + </complexType> + + <element name="root" type="root_type"/> +</schema> + </pre> + + <p>The <code>ref</code> instance in the code below will refer to + an object of type <code>object_type</code>:</p> + + <pre class="c++"> +root_type& root = ...; +xml_schema::idref& ref (root.reference ()); +object_type& obj (dynamic_cast<object_type&> (*ref)); +cout << obj.text () << endl; + </pre> + + <p>The smart pointer interface of the <code>idref</code> class always + returns a pointer or reference to <code>xml_schema::type</code>. + This means that you will need to manually cast such pointer or + reference to its real (dynamic) type before you can use it (unless + all you need is the base interface provided by + <code>xml_schema::type</code>). As a special extension to the XML + Schema language, the mapping supports static typing of <code>idref</code> + references by employing the <code>refType</code> extension attribute. + The following example illustrates this mechanism: + </p> + + <pre class="xml"> +<!-- test.xsd --> +<schema + xmlns:xse="https://www.codesynthesis.com/xmlns/xml-schema-extension"> + + ... + + <element name="reference" type="IDREF" xse:refType="object_type"/> + + ... + +</schema> + </pre> + + <p>With this modification we do not need to do manual casting anymore: + </p> + + <pre class="c++"> +root_type& root = ...; +root_type::reference_type& ref (root.reference ()); +object_type& obj (*ref); +cout << ref->text () << endl; + </pre> + + + <h3><a name="2.5.6">2.5.6 Mapping for <code>base64Binary</code> and + <code>hexBinary</code></a></h3> + + <p>The XML Schema <code>base64Binary</code> and <code>hexBinary</code> + built-in data types are mapped to the + <code>xml_schema::base64_binary</code> and + <code>xml_schema::hex_binary</code> C++ classes, respectively. The + <code>base64_binary</code> and <code>hex_binary</code> classes + support a simple buffer abstraction by inheriting from the + <code>xml_schema::buffer</code> class: + </p> + + <pre class="c++"> +class bounds: public virtual exception +{ +public: + virtual const char* + what () const throw (); +}; + +class buffer +{ +public: + typedef std::size_t size_t; + +public: + buffer (size_t size = 0); + buffer (size_t size, size_t capacity); + buffer (const void* data, size_t size); + buffer (const void* data, size_t size, size_t capacity); + buffer (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + void + swap (buffer&); + +public: + size_t + capacity () const; + + bool + capacity (size_t); + +public: + size_t + size () const; + + bool + size (size_t); + +public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); +}; + </pre> + + <p>The last overloaded constructor reuses an existing data buffer instead + of making a copy. If the <code>assume_ownership</code> argument is + <code>true</code>, the instance assumes ownership of the + memory block pointed to by the <code>data</code> argument and will + eventually release it by calling <code>operator delete</code>. The + <code>capacity</code> and <code>size</code> modifier functions return + <code>true</code> if the underlying buffer has moved. + </p> + + <p>The <code>bounds</code> exception is thrown if the constructor + arguments violate the <code>(size <= capacity)</code> + constraint.</p> + + <p>The <code>base64_binary</code> and <code>hex_binary</code> classes + support the <code>buffer</code> interface and perform automatic + decoding/encoding from/to the Base64 and Hex formats, respectively: + </p> + + <pre class="c++"> +class base64_binary: public simple_type, public buffer +{ +public: + base64_binary (size_t size = 0); + base64_binary (size_t size, size_t capacity); + base64_binary (const void* data, size_t size); + base64_binary (const void* data, size_t size, size_t capacity); + base64_binary (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + base64_binary (const base64_binary&); + + base64_binary& + operator= (const base64_binary&); + + virtual base64_binary* + _clone () const; + +public: + std::basic_string<C> + encode () const; +}; + </pre> + + <pre class="c++"> +class hex_binary: public simple_type, public buffer +{ +public: + hex_binary (size_t size = 0); + hex_binary (size_t size, size_t capacity); + hex_binary (const void* data, size_t size); + hex_binary (const void* data, size_t size, size_t capacity); + hex_binary (void* data, + size_t size, + size_t capacity, + bool assume_ownership); + +public: + hex_binary (const hex_binary&); + + hex_binary& + operator= (const hex_binary&); + + virtual hex_binary* + _clone () const; + +public: + std::basic_string<C> + encode () const; +}; + </pre> + + + <h2><a name="2.5.7">2.5.7 Time Zone Representation</a></h2> + + <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, + <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, + <code>gYearMonth</code>, and <code>time</code> XML Schema built-in + types all include an optional time zone component. The following + <code>xml_schema::time_zone</code> base class is used to represent + this information:</p> + + <pre class="c++"> +class time_zone +{ +public: + time_zone (); + time_zone (short hours, short minutes); + + bool + zone_present () const; + + void + zone_reset (); + + short + zone_hours () const; + + void + zone_hours (short); + + short + zone_minutes () const; + + void + zone_minutes (short); +}; + +bool +operator== (const time_zone&, const time_zone&); + +bool +operator!= (const time_zone&, const time_zone&); + </pre> + + <p>The <code>zone_present()</code> accessor function returns <code>true</code> + if the time zone is specified. The <code>zone_reset()</code> modifier + function resets the time zone object to the <em>not specified</em> + state. If the time zone offset is negative then both hours and + minutes components are represented as negative integers.</p> + + + <h2><a name="2.5.8">2.5.8 Mapping for <code>date</code></a></h2> + + <p>The XML Schema <code>date</code> built-in data type is mapped to the + <code>xml_schema::date</code> C++ class which represents a year, a day, + and a month with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class date: public simple_type, public time_zone +{ +public: + date (int year, unsigned short month, unsigned short day); + date (int year, unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + +public: + date (const date&); + + date& + operator= (const date&); + + virtual date* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const date&, const date&); + +bool +operator!= (const date&, const date&); + </pre> + + <h2><a name="2.5.9">2.5.9 Mapping for <code>dateTime</code></a></h2> + + <p>The XML Schema <code>dateTime</code> built-in data type is mapped to the + <code>xml_schema::date_time</code> C++ class which represents a year, a month, + a day, hours, minutes, and seconds with an optional time zone. Its interface + is presented below. For more information on the base + <code>xml_schema::time_zone</code> class refer to <a href="#2.5.7">Section + 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class date_time: public simple_type, public time_zone +{ +public: + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds, short zone_hours, short zone_minutes); +public: + date_time (const date_time&); + + date_time& + operator= (const date_time&); + + virtual date_time* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const date_time&, const date_time&); + +bool +operator!= (const date_time&, const date_time&); + </pre> + + + <h2><a name="2.5.10">2.5.10 Mapping for <code>duration</code></a></h2> + + <p>The XML Schema <code>duration</code> built-in data type is mapped to the + <code>xml_schema::duration</code> C++ class which represents a potentially + negative duration in the form of years, months, days, hours, minutes, + and seconds. Its interface is presented below.</p> + + <pre class="c++"> +class duration: public simple_type +{ +public: + duration (bool negative, + unsigned int years, unsigned int months, unsigned int days, + unsigned int hours, unsigned int minutes, double seconds); +public: + duration (const duration&); + + duration& + operator= (const duration&); + + virtual duration* + _clone () const; + +public: + bool + negative () const; + + void + negative (bool); + + unsigned int + years () const; + + void + years (unsigned int); + + unsigned int + months () const; + + void + months (unsigned int); + + unsigned int + days () const; + + void + days (unsigned int); + + unsigned int + hours () const; + + void + hours (unsigned int); + + unsigned int + minutes () const; + + void + minutes (unsigned int); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const duration&, const duration&); + +bool +operator!= (const duration&, const duration&); + </pre> + + + <h2><a name="2.5.11">2.5.11 Mapping for <code>gDay</code></a></h2> + + <p>The XML Schema <code>gDay</code> built-in data type is mapped to the + <code>xml_schema::gday</code> C++ class which represents a day of the + month with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gday: public simple_type, public time_zone +{ +public: + explicit + gday (unsigned short day); + gday (unsigned short day, short zone_hours, short zone_minutes); + +public: + gday (const gday&); + + gday& + operator= (const gday&); + + virtual gday* + _clone () const; + +public: + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const gday&, const gday&); + +bool +operator!= (const gday&, const gday&); + </pre> + + + <h2><a name="2.5.12">2.5.12 Mapping for <code>gMonth</code></a></h2> + + <p>The XML Schema <code>gMonth</code> built-in data type is mapped to the + <code>xml_schema::gmonth</code> C++ class which represents a month of the + year with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gmonth: public simple_type, public time_zone +{ +public: + explicit + gmonth (unsigned short month); + gmonth (unsigned short month, + short zone_hours, short zone_minutes); + +public: + gmonth (const gmonth&); + + gmonth& + operator= (const gmonth&); + + virtual gmonth* + _clone () const; + +public: + unsigned short + month () const; + + void + month (unsigned short); +}; + +bool +operator== (const gmonth&, const gmonth&); + +bool +operator!= (const gmonth&, const gmonth&); + </pre> + + + <h2><a name="2.5.13">2.5.13 Mapping for <code>gMonthDay</code></a></h2> + + <p>The XML Schema <code>gMonthDay</code> built-in data type is mapped to the + <code>xml_schema::gmonth_day</code> C++ class which represents a day and + a month of the year with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gmonth_day: public simple_type, public time_zone +{ +public: + gmonth_day (unsigned short month, unsigned short day); + gmonth_day (unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + +public: + gmonth_day (const gmonth_day&); + + gmonth_day& + operator= (const gmonth_day&); + + virtual gmonth_day* + _clone () const; + +public: + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); +}; + +bool +operator== (const gmonth_day&, const gmonth_day&); + +bool +operator!= (const gmonth_day&, const gmonth_day&); + </pre> + + + <h2><a name="2.5.14">2.5.14 Mapping for <code>gYear</code></a></h2> + + <p>The XML Schema <code>gYear</code> built-in data type is mapped to the + <code>xml_schema::gyear</code> C++ class which represents a year with + an optional time zone. Its interface is presented below. For more + information on the base <code>xml_schema::time_zone</code> class refer + to <a href="#2.5.7">Section 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class gyear: public simple_type, public time_zone +{ +public: + explicit + gyear (int year); + gyear (int year, short zone_hours, short zone_minutes); + +public: + gyear (const gyear&); + + gyear& + operator= (const gyear&); + + virtual gyear* + _clone () const; + +public: + int + year () const; + + void + year (int); +}; + +bool +operator== (const gyear&, const gyear&); + +bool +operator!= (const gyear&, const gyear&); + </pre> + + + <h2><a name="2.5.15">2.5.15 Mapping for <code>gYearMonth</code></a></h2> + + <p>The XML Schema <code>gYearMonth</code> built-in data type is mapped to + the <code>xml_schema::gyear_month</code> C++ class which represents + a year and a month with an optional time zone. Its interface is presented + below. For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#2.5.7">Section 2.5.7, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +class gyear_month: public simple_type, public time_zone +{ +public: + gyear_month (int year, unsigned short month); + gyear_month (int year, unsigned short month, + short zone_hours, short zone_minutes); +public: + gyear_month (const gyear_month&); + + gyear_month& + operator= (const gyear_month&); + + virtual gyear_month* + _clone () const; + +public: + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); +}; + +bool +operator== (const gyear_month&, const gyear_month&); + +bool +operator!= (const gyear_month&, const gyear_month&); + </pre> + + + <h2><a name="2.5.16">2.5.16 Mapping for <code>time</code></a></h2> + + <p>The XML Schema <code>time</code> built-in data type is mapped to + the <code>xml_schema::time</code> C++ class which represents hours, + minutes, and seconds with an optional time zone. Its interface is + presented below. For more information on the base + <code>xml_schema::time_zone</code> class refer to + <a href="#2.5.7">Section 2.5.7, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +class time: public simple_type, public time_zone +{ +public: + time (unsigned short hours, unsigned short minutes, double seconds); + time (unsigned short hours, unsigned short minutes, double seconds, + short zone_hours, short zone_minutes); + +public: + time (const time&); + + time& + operator= (const time&); + + virtual time* + _clone () const; + +public: + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); +}; + +bool +operator== (const time&, const time&); + +bool +operator!= (const time&, const time&); + </pre> + + + <!-- Mapping for Simple Types --> + + <h2><a name="2.6">2.6 Mapping for Simple Types</a></h2> + + <p>An XML Schema simple type is mapped to a C++ class with the same + name as the simple type. The class defines a public copy constructor, + a public copy assignment operator, and a public virtual + <code>_clone</code> function. The <code>_clone</code> function is + declared <code>const</code>, does not take any arguments, and returns + a pointer to a complete copy of the instance allocated in the free + store. The <code>_clone</code> function shall be used to make copies + when static type and dynamic type of the instance may differ (see + <a href="#2.11">Section 2.11, "Mapping for <code>xsi:type</code> + and Substitution Groups"</a>). For instance:</p> + + <pre class="xml"> +<simpleType name="object"> + ... +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: ... +{ +public: + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; + + ... + +}; + </pre> + + <p>The base class specification and the rest of the class definition + depend on the type of derivation used to define the simple type. </p> + + + <h3><a name="2.6.1">2.6.1 Mapping for Derivation by Restriction</a></h3> + + <p>XML Schema derivation by restriction is mapped to C++ public + inheritance. The base type of the restriction becomes the base + type for the resulting C++ class. In addition to the members described + in <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a>, the + resulting C++ class defines a public constructor with the base type + as its single argument. For instance:</p> + + <pre class="xml"> +<simpleType name="object"> + <restriction base="base"> + ... + </restriction> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public base +{ +public: + object (const base&); + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; +}; + </pre> + + + <h3><a name="2.6.2">2.6.2 Mapping for Enumerations</a></h3> + +<p>XML Schema restriction by enumeration is mapped to a C++ class + with semantics similar to C++ <code>enum</code>. Each XML Schema + enumeration element is mapped to a C++ enumerator with the + name derived from the <code>value</code> attribute and defined + in the class scope. In addition to the members + described in <a href="#2.6">Section 2.6, "Mapping for Simple Types"</a>, + the resulting C++ class defines a public constructor that can be called + with one of the enumerators as its single argument, a public constructor + that can be called with enumeration's base value as its single + argument, a public assignment operator that can be used to assign the + value of one of the enumerators, and a public implicit conversion + operator to the underlying C++ enum type.</p> + +<p>Furthermore, for string-based enumeration types, the resulting C++ + class defines a public constructor with a single argument of type + <code>const C*</code> and a public constructor with a single + argument of type <code>const std::basic_string<C>&</code>. + For instance:</p> + + <pre class="xml"> +<simpleType name="color"> + <restriction base="string"> + <enumeration value="red"/> + <enumeration value="green"/> + <enumeration value="blue"/> + </restriction> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class color: public xml_schema::string +{ +public: + enum value + { + red, + green, + blue + }; + +public: + color (value); + color (const C*); + color (const std::basic_string<C>&); + color (const xml_schema::string&); + color (const color&); + +public: + color& + operator= (value); + + color& + operator= (const color&); + +public: + virtual color* + _clone () const; + +public: + operator value () const; +}; + </pre> + + <h3><a name="2.6.3">2.6.3 Mapping for Derivation by List</a></h3> + + <p>XML Schema derivation by list is mapped to C++ public + inheritance from <code>xml_schema::simple_type</code> + (<a href="#2.5.3">Section 2.5.3, "Mapping for + <code>anySimpleType</code>"</a>) and a suitable sequence type. + The list item type becomes the element type of the sequence. + In addition to the members described in <a href="#2.6">Section 2.6, + "Mapping for Simple Types"</a>, the resulting C++ class defines + a public default constructor, a public constructor + with the first argument of type <code>size_type</code> and + the second argument of list item type that creates + a list object with the specified number of copies of the specified + element value, and a public constructor with the two arguments + of an input iterator type that creates a list object from an + iterator range. For instance: + </p> + + <pre class="xml"> +<simpleType name="int_list"> + <list itemType="int"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class int_list: public simple_type, + public sequence<int> +{ +public: + int_list (); + int_list (size_type n, int x); + + template <typename I> + int_list (const I& begin, const I& end); + int_list (const int_list&); + +public: + int_list& + operator= (const int_list&); + +public: + virtual int_list* + _clone () const; +}; + </pre> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. One notable extension + to the standard interface that is available only for + sequences of non-fundamental C++ types is the addition of + the overloaded <code>push_back</code> and <code>insert</code> + member functions which instead of the constant reference + to the element type accept automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to the element type. These functions assume ownership + of the pointed to object and reset the passed automatic pointer. + </p> + + <h3><a name="2.6.4">2.6.4 Mapping for Derivation by Union</a></h3> + + <p>XML Schema derivation by union is mapped to C++ public + inheritance from <code>xml_schema::simple_type</code> + (<a href="#2.5.3">Section 2.5.3, "Mapping for + <code>anySimpleType</code>"</a>) and <code>std::basic_string<C></code>. + In addition to the members described in <a href="#2.6">Section 2.6, + "Mapping for Simple Types"</a>, the resulting C++ class defines a + public constructor with a single argument of type <code>const C*</code> + and a public constructor with a single argument of type + <code>const std::basic_string<C>&</code>. For instance: + </p> + + <pre class="xml"> +<simpleType name="int_string_union"> + <xsd:union memberTypes="xsd:int xsd:string"/> +</simpleType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class int_string_union: public simple_type, + public std::basic_string<C> +{ +public: + int_string_union (const C*); + int_string_union (const std::basic_string<C>&); + int_string_union (const int_string_union&); + +public: + int_string_union& + operator= (const int_string_union&); + +public: + virtual int_string_union* + _clone () const; +}; + </pre> + + <h2><a name="2.7">2.7 Mapping for Complex Types</a></h2> + + <p>An XML Schema complex type is mapped to a C++ class with the same + name as the complex type. The class defines a public copy constructor, + a public copy assignment operator, and a public virtual + <code>_clone</code> function. The <code>_clone</code> function is + declared <code>const</code>, does not take any arguments, and returns + a pointer to a complete copy of the instance allocated in the free + store. The <code>_clone</code> function shall be used to make copies + when static type and dynamic type of the instance may differ (see + <a href="#2.11">Section 2.11, "Mapping for <code>xsi:type</code> + and Substitution Groups"</a>).</p> + + <p>Additionally, the resulting C++ class + defines two public constructors that take an initializer for each + member of the complex type and all its base types that belongs to + the One cardinality class (see <a href="#2.8">Section 2.8, "Mapping + for Local Elements and Attributes"</a>). In the first constructor, + the arguments are passed as constant references and the newly created + instance is initialized with copies of the passed objects. In the + second constructor, arguments that are complex types (that is, + they themselves contain elements or attributes) are passed as + either <code>std::unique_ptr</code> (C++11) or <code>std::auto_ptr</code> + (C++98), depending on the C++ standard selected. In this case the newly + created instance is directly initialized with and assumes ownership + of the pointed to objects and the <code>std::[unique|auto]_ptr</code> + arguments are reset to <code>0</code>. For instance:</p> + + <pre class="xml"> +<complexType name="complex"> + <sequence> + <element name="a" type="int"/> + <element name="b" type="string"/> + </sequence> +</complexType> + +<complexType name="object"> + <sequence> + <element name="s-one" type="boolean"/> + <element name="c-one" type="complex"/> + <element name="optional" type="int" minOccurs="0"/> + <element name="sequence" type="string" maxOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class complex: public xml_schema::type +{ +public: + object (const int& a, const xml_schema::string& b); + object (const complex&); + +public: + object& + operator= (const complex&); + +public: + virtual complex* + _clone () const; + + ... + +}; + +class object: public xml_schema::type +{ +public: + object (const bool& s_one, const complex& c_one); + object (const bool& s_one, std::[unique|auto]_ptr<complex> c_one); + object (const object&); + +public: + object& + operator= (const object&); + +public: + virtual object* + _clone () const; + + ... + +}; + </pre> + + <p>Notice that the generated <code>complex</code> class does not + have the second (<code>std::[unique|auto]_ptr</code>) version of the + constructor since all its required members are of simple types.</p> + + <p>If an XML Schema complex type has an ultimate base which is an XML + Schema simple type then the resulting C++ class also defines a public + constructor that takes an initializer for the base type as well as + for each member of the complex type and all its base types that + belongs to the One cardinality class. For instance:</p> + + <pre class="xml"> +<complexType name="object"> + <simpleContent> + <extension base="date"> + <attribute name="lang" type="language" use="required"/> + </extension> + </simpleContent> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::string +{ +public: + object (const xml_schema::language& lang); + + object (const xml_schema::date& base, + const xml_schema::language& lang); + + ... + +}; + </pre> + + <p>Furthermore, for string-based XML Schema complex types, the resulting C++ + class also defines two public constructors with the first arguments + of type <code>const C*</code> and <code>std::basic_string<C>&</code>, + respectively, followed by arguments for each member of the complex + type and all its base types that belongs to the One cardinality + class. For enumeration-based complex types the resulting C++ + class also defines a public constructor with the first arguments + of the underlying enum type followed by arguments for each member + of the complex type and all its base types that belongs to the One + cardinality class. For instance:</p> + + <pre class="xml"> +<simpleType name="color"> + <restriction base="string"> + <enumeration value="red"/> + <enumeration value="green"/> + <enumeration value="blue"/> + </restriction> +</simpleType> + +<complexType name="object"> + <simpleContent> + <extension base="color"> + <attribute name="lang" type="language" use="required"/> + </extension> + </simpleContent> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class color: public xml_schema::string +{ +public: + enum value + { + red, + green, + blue + }; + +public: + color (value); + color (const C*); + color (const std::basic_string<C>&); + + ... + +}; + +class object: color +{ +public: + object (const color& base, + const xml_schema::language& lang); + + object (const color::value& base, + const xml_schema::language& lang); + + object (const C* base, + const xml_schema::language& lang); + + object (const std::basic_string<C>& base, + const xml_schema::language& lang); + + ... + +}; + </pre> + + <p>Additional constructors can be requested with the + <code>--generate-default-ctor</code> and + <code>--generate-from-base-ctor</code> options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for details.</p> + + <p>If an XML Schema complex type is not explicitly derived from any type, + the resulting C++ class is derived from <code>xml_schema::type</code>. + In cases where an XML Schema complex type is defined using derivation + by extension or restriction, the resulting C++ base class specification + depends on the type of derivation and is described in the subsequent + sections. + </p> + + <p>The mapping for elements and attributes that are defined in a complex + type is described in <a href="#2.8">Section 2.8, "Mapping for Local + Elements and Attributes"</a>. + </p> + + <h3><a name="2.7.1">2.7.1 Mapping for Derivation by Extension</a></h3> + + <p>XML Schema derivation by extension is mapped to C++ public + inheritance. The base type of the extension becomes the base + type for the resulting C++ class. + </p> + + <h3><a name="2.7.2">2.7.2 Mapping for Derivation by Restriction</a></h3> + + <p>XML Schema derivation by restriction is mapped to C++ public + inheritance. The base type of the restriction becomes the base + type for the resulting C++ class. XML Schema elements and + attributes defined within restriction do not result in any + definitions in the resulting C++ class. Instead, corresponding + (unrestricted) definitions are inherited from the base class. + In the future versions of this mapping, such elements and + attributes may result in redefinitions of accessors and + modifiers to reflect their restricted semantics. + </p> + + <!-- 2.8 Mapping for Local Elements and Attributes --> + + <h2><a name="2.8">2.8 Mapping for Local Elements and Attributes</a></h2> + + <p>XML Schema element and attribute definitions are called local + if they appear within a complex type definition, an element group + definition, or an attribute group definitions. + </p> + + <p>Local XML Schema element and attribute definitions have the same + C++ mapping. Therefore, in this section, local elements and + attributes are collectively called members. + </p> + + <p>While there are many different member cardinality combinations + (determined by the <code>use</code> attribute for attributes and + the <code>minOccurs</code> and <code>maxOccurs</code> attributes + for elements), the mapping divides all possible cardinality + combinations into three cardinality classes: + </p> + + <dl> + <dt><i>one</i></dt> + <dd>attributes: <code>use == "required"</code></dd> + <dd>attributes: <code>use == "optional"</code> and has default or fixed value</dd> + <dd>elements: <code>minOccurs == "1"</code> and <code>maxOccurs == "1"</code></dd> + + <dt><i>optional</i></dt> + <dd>attributes: <code>use == "optional"</code> and doesn't have default or fixed value</dd> + <dd>elements: <code>minOccurs == "0"</code> and <code>maxOccurs == "1"</code></dd> + + <dt><i>sequence</i></dt> + <dd>elements: <code>maxOccurs > "1"</code></dd> + </dl> + + <p>An optional attribute with a default or fixed value acquires this value + if the attribute hasn't been specified in an instance document (see + <a href="#A">Appendix A, "Default and Fixed Values"</a>). This + mapping places such optional attributes to the One cardinality + class.</p> + + <p>A member is mapped to a set of public type definitions + (<code>typedef</code>s) and a set of public accessor and modifier + functions. Type definitions have names derived from the member's + name. The accessor and modifier functions have the same name as the + member. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + typedef xml_schema::string member_type; + + const member_type& + member () const; + + ... + +}; + </pre> + + <p>In addition, if a member has a default or fixed value, a static + accessor function is generated that returns this value. For + example:</p> + +<pre class="xml"> +<complexType name="object"> + <attribute name="data" type="string" default="test"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + typedef xml_schema::string data_type; + + const data_type& + data () const; + + static const data_type& + data_default_value (); + + ... + +}; + </pre> + + <p>Names and semantics of type definitions for the member as well + as signatures of the accessor and modifier functions depend on + the member's cardinality class and are described in the following + sub-sections. + </p> + + + <h3><a name="2.8.1">2.8.1 Mapping for Members with the One Cardinality Class</a></h3> + + <p>For the One cardinality class, the type definitions consist of + an alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name. + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + member and can be used for read-only access. The non-constant + version returns an unrestricted reference to the member and can + be used for read-write access. + </p> + + <p>The first modifier function expects an argument of type reference to + constant of the member's type. It makes a deep copy of its argument. + Except for member's types that are mapped to fundamental C++ types, + the second modifier function is provided that expects an argument + of type automatic pointer (<code>std::unique_ptr</code> or + <code>std::auto_ptr</code>, depending on the C++ standard selected) + to the member's type. It assumes ownership of the pointed to object + and resets the passed automatic pointer. For instance:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + + // Accessors. + // + const member_type& + member () const; + + member_type& + member (); + + // Modifiers. + // + void + member (const member_type&); + + void + member (std::[unique|auto]_ptr<member_type>); + ... + +}; + </pre> + + <p>In addition, if requested by specifying the <code>--generate-detach</code> + option and only for members of non-fundamental C++ types, the mapping + provides a detach function that returns an automatic pointer to the + member's type, for example:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + ... + + std::[unique|auto]_ptr<member_type> + detach_member (); + ... + +}; + </pre> + + <p>This function detaches the value from the tree leaving the member + value uninitialized. Accessing such an uninitialized value prior to + re-initializing it results in undefined behavior.</p> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + string s (o.member ()); // get + object::member_type& sr (o.member ()); // get + + o.member ("hello"); // set, deep copy + o.member () = "hello"; // set, deep copy + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + o.member (std::move (p)); // set, assumes ownership + p = o.detach_member (); // detach, member is uninitialized + o.member (std::move (p)); // re-attach + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + o.member (p); // set, assumes ownership + p = o.detach_member (); // detach, member is uninitialized + o.member (p); // re-attach +} + </pre> + + +<h3><a name="2.8.2">2.8.2 Mapping for Members with the Optional Cardinality Class</a></h3> + + <p>For the Optional cardinality class, the type definitions consist of + an alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name and an alias for + the container type with the name created by appending the + <code>_optional</code> suffix to the member's name. + </p> + + <p>Unlike accessor functions for the One cardinality class, accessor + functions for the Optional cardinality class return references to + corresponding containers rather than directly to members. The + accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to + the container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container + and can be used for read-write access. + </p> + + <p>The modifier functions are overloaded for the member's + type and the container type. The first modifier function + expects an argument of type reference to constant of the + member's type. It makes a deep copy of its argument. + Except for member's types that are mapped to fundamental C++ types, + the second modifier function is provided that expects an argument + of type automatic pointer (<code>std::unique_ptr</code> or + <code>std::auto_ptr</code>, depending on the C++ standard selected) + to the member's type. It assumes ownership of the pointed to object + and resets the passed automatic pointer. The last modifier function + expects an argument of type reference to constant of the container + type. It makes a deep copy of its argument. For instance: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string" minOccurs="0"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + typedef optional<member_type> member_optional; + + // Accessors. + // + const member_optional& + member () const; + + member_optional& + member (); + + // Modifiers. + // + void + member (const member_type&); + + void + member (std::[unique|auto]_ptr<member_type>); + + void + member (const member_optional&); + + ... + +}; + </pre> + + + <p>The <code>optional</code> class template is defined in an + implementation-specific namespace and has the following + interface. The <code>[unique|auto]_ptr</code>-based constructor + and modifier function are only available if the template + argument is not a fundamental C++ type. + </p> + + <pre class="c++"> +template <typename X> +class optional +{ +public: + optional (); + + // Makes a deep copy. + // + explicit + optional (const X&); + + // Assumes ownership. + // + explicit + optional (std::[unique|auto]_ptr<X>); + + optional (const optional&); + +public: + optional& + operator= (const X&); + + optional& + operator= (const optional&); + + // Pointer-like interface. + // +public: + const X* + operator-> () const; + + X* + operator-> (); + + const X& + operator* () const; + + X& + operator* (); + + typedef void (optional::*bool_convertible) (); + operator bool_convertible () const; + + // Get/set interface. + // +public: + bool + present () const; + + const X& + get () const; + + X& + get (); + + // Makes a deep copy. + // + void + set (const X&); + + // Assumes ownership. + // + void + set (std::[unique|auto]_ptr<X>); + + // Detach and return the contained value. + // + std::[unique|auto]_ptr<X> + detach (); + + void + reset (); +}; + +template <typename X> +bool +operator== (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator!= (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator< (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator> (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator<= (const optional<X>&, const optional<X>&); + +template <typename X> +bool +operator>= (const optional<X>&, const optional<X>&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + if (o.member ().present ()) // test + { + string& s (o.member ().get ()); // get + o.member ("hello"); // set, deep copy + o.member ().set ("hello"); // set, deep copy + o.member ().reset (); // reset + } + + // Same as above but using pointer notation: + // + if (o.member ()) // test + { + string& s (*o.member ()); // get + o.member ("hello"); // set, deep copy + *o.member () = "hello"; // set, deep copy + o.member ().reset (); // reset + } + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + o.member (std::move (p)); // set, assumes ownership + + p.reset (new string ("hello")); + o.member ().set (std::move (p)); // set, assumes ownership + + p = o.member ().detach (); // detach, member is reset + o.member ().set (std::move (p)); // re-attach + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + o.member (p); // set, assumes ownership + + p = new string ("hello"); + o.member ().set (p); // set, assumes ownership + + p = o.member ().detach (); // detach, member is reset + o.member ().set (p); // re-attach +} + </pre> + + + <h3><a name="2.8.3">2.8.3 Mapping for Members with the Sequence Cardinality Class</a></h3> + + <p>For the Sequence cardinality class, the type definitions consist of an + alias for the member's type with the name created by appending + the <code>_type</code> suffix to the member's name, an alias of + the container type with the name created by appending the + <code>_sequence</code> suffix to the member's name, an alias of + the iterator type with the name created by appending the + <code>_iterator</code> suffix to the member's name, and an alias + of the constant iterator type with the name created by appending the + <code>_const_iterator</code> suffix to the member's name. + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function + makes a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="member" type="string" minOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef xml_schema::string member_type; + typedef sequence<member_type> member_sequence; + typedef member_sequence::iterator member_iterator; + typedef member_sequence::const_iterator member_const_iterator; + + // Accessors. + // + const member_sequence& + member () const; + + member_sequence& + member (); + + // Modifier. + // + void + member (const member_sequence&); + + ... + +}; + </pre> + + <p>The <code>sequence</code> class template is defined in an + implementation-specific namespace. It conforms to the + sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences"). + Practically, this means that you can treat such a sequence + as if it was <code>std::vector</code>. Two notable extensions + to the standard interface that are available only for + sequences of non-fundamental C++ types are the addition of + the overloaded <code>push_back</code> and <code>insert</code> + as well as the <code>detach_back</code> and <code>detach</code> + member functions. The additional <code>push_back</code> and + <code>insert</code> functions accept an automatic pointer + (<code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected) to the + element type instead of the constant reference. They assume + ownership of the pointed to object and reset the passed + automatic pointer. The <code>detach_back</code> and + <code>detach</code> functions detach the element + value from the sequence container and, by default, remove + the element from the sequence. These additional functions + have the following signatures:</p> + + <pre class="c++"> +template <typename X> +class sequence +{ +public: + ... + + void + push_back (std::[unique|auto]_ptr<X>) + + iterator + insert (iterator position, std::[unique|auto]_ptr<X>) + + std::[unique|auto]_ptr<X> + detach_back (bool pop = true); + + iterator + detach (iterator position, + std::[unique|auto]_ptr<X>& result, + bool erase = true) + + ... +} + </pre> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o) +{ + using xml_schema::string; + + object::member_sequence& s (o.member ()); + + // Iteration. + // + for (object::member_iterator i (s.begin ()); i != s.end (); ++i) + { + string& value (*i); + } + + // Modification. + // + s.push_back ("hello"); // deep copy + + // C++11 version. + // + std::unique_ptr<string> p (new string ("hello")); + s.push_back (std::move (p)); // assumes ownership + p = s.detach_back (); // detach and pop + s.push_back (std::move (p)); // re-append + + // C++98 version. + // + std::auto_ptr<string> p (new string ("hello")); + s.push_back (p); // assumes ownership + p = s.detach_back (); // detach and pop + s.push_back (p); // re-append + + // Setting a new container. + // + object::member_sequence n; + n.push_back ("one"); + n.push_back ("two"); + o.member (n); // deep copy +} + </pre> + + <h3><a name="2.8.4">2.8.4 Element Order</a></h3> + + <p>C++/Tree is a "flattening" mapping in a sense that many levels of + nested compositors (<code>choice</code> and <code>sequence</code>), + all potentially with their own cardinalities, are in the end mapped + to a flat set of elements with one of the three cardinality classes + discussed in the previous sections. While this results in a simple + and easy to use API for most types, in certain cases, the order of + elements in the actual XML documents is not preserved once parsed + into the object model. And sometimes such order has + application-specific significance. As an example, consider a schema + that defines a batch of bank transactions:</p> + + <pre class="xml"> +<complexType name="withdraw"> + <sequence> + <element name="account" type="unsignedInt"/> + <element name="amount" type="unsignedInt"/> + </sequence> +</complexType> + +<complexType name="deposit"> + <sequence> + <element name="account" type="unsignedInt"/> + <element name="amount" type="unsignedInt"/> + </sequence> +</complexType> + +<complexType name="batch"> + <choice minOccurs="0" maxOccurs="unbounded"> + <element name="withdraw" type="withdraw"/> + <element name="deposit" type="deposit"/> + </choice> +</complexType> + </pre> + + <p>The batch can contain any number of transactions in any order + but the order of transactions in each actual batch is significant. + For instance, consider what could happen if we reorder the + transactions and apply all the withdrawals before deposits.</p> + + <p>For the <code>batch</code> schema type defined above the default + C++/Tree mapping will produce a C++ class that contains a pair of + sequence containers, one for each of the two elements. While this + will capture the content (transactions), the order of this content + as it appears in XML will be lost. Also, if we try to serialize the + batch we just loaded back to XML, all the withdrawal transactions + will appear before deposits.</p> + + <p>To overcome this limitation of a flattening mapping, C++/Tree + allows us to mark certain XML Schema types, for which content + order is important, as ordered.</p> + + <p>There are several command line options that control which + schema types are treated as ordered. To make an individual + type ordered, we use the <code>--ordered-type</code> option, + for example:</p> + + <pre class="term"> +--ordered-type batch + </pre> + + <p>To automatically treat all the types that are derived from an ordered + type also ordered, we use the <code>--ordered-type-derived</code> + option. This is primarily useful if you would like to iterate + over the complete hierarchy's content using the content order + sequence (discussed below).</p> + + <p>Ordered types are also useful for handling mixed content. To + automatically mark all the types with mixed content as ordered + we use the <code>--ordered-type-mixed</code> option. For more + information on handling mixed content see <a href="#2.13">Section + 2.13, "Mapping for Mixed Content Models"</a>.</p> + + <p>Finally, we can mark all the types in the schema we are + compiling with the <code>--ordered-type-all</code> option. + You should only resort to this option if all the types in + your schema truly suffer from the loss of content + order since, as we will discuss shortly, ordered types + require extra effort to access and, especially, modify. + See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information on + these options.</p> + + <p>Once a type is marked ordered, C++/Tree alters its mapping + in several ways. Firstly, for each local element, element + wildcard (<a href="#2.12.4">Section 2.12.4, "Element Wildcard + Order"</a>), and mixed content text (<a href="#2.13">Section + 2.13, "Mapping for Mixed Content Models"</a>) in this type, a + content id constant is generated. Secondly, an addition sequence + is added to the class that captures the content order. Here + is how the mapping of our <code>batch</code> class changes + once we make it ordered:</p> + + <pre class="c++"> +class batch: public xml_schema::type +{ +public: + // withdraw + // + typedef withdraw withdraw_type; + typedef sequence<withdraw_type> withdraw_sequence; + typedef withdraw_sequence::iterator withdraw_iterator; + typedef withdraw_sequence::const_iterator withdraw_const_iterator; + + static const std::size_t withdraw_id = 1; + + const withdraw_sequence& + withdraw () const; + + withdraw_sequence& + withdraw (); + + void + withdraw (const withdraw_sequence&); + + // deposit + // + typedef deposit deposit_type; + typedef sequence<deposit_type> deposit_sequence; + typedef deposit_sequence::iterator deposit_iterator; + typedef deposit_sequence::const_iterator deposit_const_iterator; + + static const std::size_t deposit_id = 2; + + const deposit_sequence& + deposit () const; + + deposit_sequence& + deposit (); + + void + deposit (const deposit_sequence&); + + // content_order + // + typedef xml_schema::content_order content_order_type; + typedef std::vector<content_order_type> content_order_sequence; + typedef content_order_sequence::iterator content_order_iterator; + typedef content_order_sequence::const_iterator content_order_const_iterator; + + const content_order_sequence& + content_order () const; + + content_order_sequence& + content_order (); + + void + content_order (const content_order_sequence&); + + ... +}; + </pre> + + <p>Notice the <code>withdraw_id</code> and <code>deposit_id</code> + content ids as well as the extra <code>content_order</code> + sequence that does not correspond to any element in the + schema definition. The other changes to the mapping for ordered + types has to do with XML parsing and serialization code. During + parsing the content order is captured in the <code>content_order</code> + sequence while during serialization this sequence is used to + determine the order in which content is serialized. The + <code>content_order</code> sequence is also copied during + copy construction and assigned during copy assignment. It is also + taken into account during comparison.</p> + + <p>The entry type of the <code>content_order</code> sequence is the + <code>xml_schema::content_order</code> type that has the following + interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + struct content_order + { + content_order (std::size_t id, std::size_t index = 0); + + std::size_t id; + std::size_t index; + }; + + bool + operator== (const content_order&, const content_order&); + + bool + operator!= (const content_order&, const content_order&); + + bool + operator< (const content_order&, const content_order&); +} + </pre> + + <p>The <code>content_order</code> sequence describes the order of + content (elements, including wildcards, as well as mixed content + text). Each entry in this sequence consists of the content id + (for example, <code>withdraw_id</code> or <code>deposit_id</code> + in our case) as well as, for elements of the sequence cardinality + class, an index into the corresponding sequence container (the + index is unused for the one and optional cardinality classes). + For example, in our case, if the content id is <code>withdraw_id</code>, + then the index will point into the <code>withdraw</code> element + sequence.</p> + + <p>With all this information we can now examine how to iterate over + transaction in the batch in content order:</p> + + <pre class="c++"> +batch& b = ... + +for (batch::content_order_const_iterator i (b.content_order ().begin ()); + i != b.content_order ().end (); + ++i) +{ + switch (i->id) + { + case batch::withdraw_id: + { + const withdraw& t (b.withdraw ()[i->index]); + cerr << t.account () << " withdraw " << t.amount () << endl; + break; + } + case batch::deposit_id: + { + const deposit& t (b.deposit ()[i->index]); + cerr << t.account () << " deposit " << t.amount () << endl; + break; + } + default: + { + assert (false); // Unknown content id. + } + } +} + </pre> + + <p>If we serialized our batch back to XML, we would also see that the + order of transactions in the output is exactly the same as in the + input rather than all the withdrawals first followed by all the + deposits.</p> + + <p>The most complex aspect of working with ordered types is + modifications. Now we not only need to change the content, + but also remember to update the order information corresponding + to this change. As a first example, we add a deposit transaction + to the batch:</p> + + <pre class="c++"> +using xml_schema::content_order; + +batch::deposit_sequence& d (b.deposit ()); +batch::withdraw_sequence& w (b.withdraw ()); +batch::content_order_sequence& co (b.content_order ()); + +d.push_back (deposit (123456789, 100000)); +co.push_back (content_order (batch::deposit_id, d.size () - 1)); + </pre> + + <p>In the above example we first added the content (deposit + transaction) and then updated the content order information + by adding an entry with <code>deposit_id</code> content + id and the index of the just added deposit transaction.</p> + + <p>Removing the last transaction can be easy if we know which + transaction (deposit or withdrawal) is last:</p> + + <pre class="c++"> +d.pop_back (); +co.pop_back (); + </pre> + + <p>If, however, we do not know which transaction is last, then + things get a bit more complicated:</p> + + <pre class="c++"> +switch (co.back ().id) +{ +case batch::withdraw_id: + { + d.pop_back (); + break; + } +case batch::deposit_id: + { + w.pop_back (); + break; + } +} + +co.pop_back (); + </pre> + + <p>The following example shows how to add a transaction at the + beginning of the batch:</p> + + <pre class="c++"> +w.push_back (withdraw (123456789, 100000)); +co.insert (co.begin (), + content_order (batch::withdraw_id, w.size () - 1)); + </pre> + + <p>Note also that when we merely modify the content of one + of the elements in place, we do not need to update its + order since it doesn't change. For example, here is how + we can change the amount in the first withdrawal:</p> + + <pre class="c++"> +w[0].amount (10000); + </pre> + + <p>For the complete working code shown in this section refer to the + <code>order/element</code> example in the + <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <p>If both the base and derived types are ordered, then the + content order sequence is only added to the base and the content + ids are unique within the whole hierarchy. In this case + the content order sequence for the derived type contains + ordering information for both base and derived content.</p> + + <p>In some applications we may need to perform more complex + content processing. For example, in our case, we may need + to remove all the withdrawal transactions. The default + container, <code>std::vector</code>, is not particularly + suitable for such operations. What may be required by + some applications is a multi-index container that not + only allows us to iterate in content order similar to + <code>std::vector</code> but also search by the content + id as well as the content id and index pair.</p> + + <p>While C++/Tree does not provide this functionality by + default, it allows us to specify a custom container + type for content order with the <code>--order-container</code> + command line option. The only requirement from the + generated code side for such a container is to provide + the <code>vector</code>-like <code>push_back()</code>, + <code>size()</code>, and const iteration interfaces.</p> + + <p>As an example, here is how we can use the Boost Multi-Index + container for content order. First we create the + <code>content-order-container.hxx</code> header with the + following definition:</p> + + <pre class="c++"> +#ifndef CONTENT_ORDER_CONTAINER +#define CONTENT_ORDER_CONTAINER + +#include <cstddef> // std::size_t + +#include <boost/multi_index_container.hpp> +#include <boost/multi_index/member.hpp> +#include <boost/multi_index/identity.hpp> +#include <boost/multi_index/ordered_index.hpp> +#include <boost/multi_index/random_access_index.hpp> + +struct by_id {}; +struct by_id_index {}; + +template <typename T> +using content_order_container = + boost::multi_index::multi_index_container< + T, + boost::multi_index::indexed_by< + boost::multi_index::random_access<>, + boost::multi_index::ordered_unique< + boost::multi_index::tag<by_id_index>, + boost::multi_index::identity<T> + >, + boost::multi_index::ordered_non_unique< + boost::multi_index::tag<by_id>, + boost::multi_index::member<T, std::size_t, &T::id> + > + > + >; + +#endif + </pre> + + <p>Next we add the following two XSD compiler options to include + this header into every generated header file and to use the + custom container type (see the XSD compiler command line manual + for more information on shell quoting for the first option):</p> + + <pre class="term"> +--hxx-prologue '#include "content-order-container.hxx"' +--order-container content_order_container + </pre> + + <p>With these changes we can now use the multi-index functionality, + for example, to search for a specific content id:</p> + + <pre class="c++"> +typedef batch::content_order_sequence::index<by_id>::type id_set; +typedef id_set::iterator id_iterator; + +const id_set& ids (b.content_order ().get<by_id> ()); + +std::pair<id_iterator, id_iterator> r ( + ids.equal_range (std::size_t (batch::deposit_id)); + +for (id_iterator i (r.first); i != r.second; ++i) +{ + const deposit& t (b.deposit ()[i->index]); + cerr << t.account () << " deposit " << t.amount () << endl; +} + </pre> + + <h2><a name="2.9">2.9 Mapping for Global Elements</a></h2> + + <p>An XML Schema element definition is called global if it appears + directly under the <code>schema</code> element. + A global element is a valid root of an instance document. By + default, a global element is mapped to a set of overloaded + parsing and, optionally, serialization functions with the + same name as the element. It is also possible to generate types + for root elements instead of parsing and serialization functions. + This is primarily useful to distinguish object models with the + same root type but with different root elements. See + <a href="#2.9.1">Section 2.9.1, "Element Types"</a> for details. + It is also possible to request the generation of an element map + which allows uniform parsing and serialization of multiple root + elements. See <a href="#2.9.2">Section 2.9.2, "Element Map"</a> + for details. + </p> + + <p>The parsing functions read XML instance documents and return + corresponding object models as an automatic pointer + (<code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected). Their signatures + have the following pattern (<code>type</code> denotes + element's type and <code>name</code> denotes element's + name): + </p> + + <pre class="c++"> +std::[unique|auto]_ptr<type> +name (....); + </pre> + + <p>The process of parsing, including the exact signatures of the parsing + functions, is the subject of <a href="#3">Chapter 3, "Parsing"</a>. + </p> + + <p>The serialization functions write object models back to XML instance + documents. Their signatures have the following pattern: + </p> + + <pre class="c++"> +void +name (<stream type>&, const type&, ....); + </pre> + + <p>The process of serialization, including the exact signatures of the + serialization functions, is the subject of <a href="#4">Chapter 4, + "Serialization"</a>. + </p> + + + <h3><a name="2.9.1">2.9.1 Element Types</a></h3> + + <p>The generation of element types is requested with the + <code>--generate-element-type</code> option. With this option + each global element is mapped to a C++ class with the + same name as the element. Such a class is derived from + <code>xml_schema::element_type</code> and contains the same set + of type definitions, constructors, and member function as would a + type containing a single element with the One cardinality class + named <code>"value"</code>. In addition, the element type also + contains a set of member functions for accessing the element + name and namespace as well as its value in a uniform manner. + For example:</p> + + <pre class="xml"> +<complexType name="type"> + <sequence> + ... + </sequence> +</complexType> + +<element name="root" type="type"/> + </pre> + +<p>is mapped to:</p> + + <pre class="c++"> +class type +{ + ... +}; + +class root: public xml_schema::element_type +{ +public: + // Element value. + // + typedef type value_type; + + const value_type& + value () const; + + value_type& + value (); + + void + value (const value_type&); + + void + value (std::[unique|auto]_ptr<value_type>); + + // Constructors. + // + root (const value_type&); + + root (std::[unique|auto]_ptr<value_type>); + + root (const xercesc::DOMElement&, xml_schema::flags = 0); + + root (const root&, xml_schema::flags = 0); + + virtual root* + _clone (xml_schema::flags = 0) const; + + // Element name and namespace. + // + static const std::string& + name (); + + static const std::string& + namespace_ (); + + virtual const std::string& + _name () const; + + virtual const std::string& + _namespace () const; + + // Element value as xml_schema::type. + // + virtual const xml_schema::type* + _value () const; + + virtual xml_schema::type* + _value (); +}; + +void +operator<< (xercesc::DOMElement&, const root&); + </pre> + + <p>The <code>xml_schema::element_type</code> class is a common + base type for all element types and is defined as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class element_type + { + public: + virtual + ~element_type (); + + virtual element_type* + _clone (flags f = 0) const = 0; + + virtual const std::basic_string<C>& + _name () const = 0; + + virtual const std::basic_string<C>& + _namespace () const = 0; + + virtual xml_schema::type* + _value () = 0; + + virtual const xml_schema::type* + _value () const = 0; + }; +} + </pre> + + <p>The <code>_value()</code> member function returns a pointer to + the element value or 0 if the element is of a fundamental C++ + type and therefore is not derived from <code>xml_schema::type</code>. + </p> + + <p>Unlike parsing and serialization functions, element types + are only capable of parsing and serializing from/to a + <code>DOMElement</code> object. This means that the application + will need to perform its own XML-to-DOM parsing and DOM-to-XML + serialization. The following section describes a mechanism + provided by the mapping to uniformly parse and serialize + multiple root elements.</p> + + + <h3><a name="2.9.2">2.9.2 Element Map</a></h3> + + <p>When element types are generated for root elements it is also + possible to request the generation of an element map with the + <code>--generate-element-map</code> option. The element map + allows uniform parsing and serialization of multiple root + elements via the common <code>xml_schema::element_type</code> + base type. The <code>xml_schema::element_map</code> class is + defined as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class element_map + { + public: + static std::[unique|auto]_ptr<xml_schema::element_type> + parse (const xercesc::DOMElement&, flags = 0); + + static void + serialize (xercesc::DOMElement&, const element_type&); + }; +} + </pre> + + <p>The <code>parse()</code> function creates the corresponding + element type object based on the element name and namespace + and returns it as an automatic pointer (<code>std::unique_ptr</code> + or <code>std::auto_ptr</code>, depending on the C++ standard + selected) to <code>xml_schema::element_type</code>. + The <code>serialize()</code> function serializes the passed element + object to <code>DOMElement</code>. Note that in case of + <code>serialize()</code>, the <code>DOMElement</code> object + should have the correct name and namespace. If no element type is + available for an element, both functions throw the + <code>xml_schema::no_element_info</code> exception:</p> + + <pre class="c++"> +struct no_element_info: virtual exception +{ + no_element_info (const std::basic_string<C>& element_name, + const std::basic_string<C>& element_namespace); + + const std::basic_string<C>& + element_name () const; + + const std::basic_string<C>& + element_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The application can discover the actual type of the element + object returned by <code>parse()</code> either using + <code>dynamic_cast</code> or by comparing element names and + namespaces. The following code fragments illustrate how the + element map can be used:</p> + + <pre class="c++"> +// Parsing. +// +DOMElement& e = ... // Parse XML to DOM. + +unique_ptr<xml_schema::element_type> r ( + xml_schema::element_map::parse (e)); + +if (root1 r1 = dynamic_cast<root1*> (r.get ())) +{ + ... +} +else if (r->_name == root2::name () && + r->_namespace () == root2::namespace_ ()) +{ + root2& r2 (static_cast<root2&> (*r)); + + ... +} + </pre> + + <pre class="c++"> +// Serialization. +// +xml_schema::element_type& r = ... + +string name (r._name ()); +string ns (r._namespace ()); + +DOMDocument& doc = ... // Create a new DOMDocument with name and ns. +DOMElement& e (*doc->getDocumentElement ()); + +xml_schema::element_map::serialize (e, r); + +// Serialize DOMDocument to XML. + </pre> + + <!-- --> + + <h2><a name="2.10">2.10 Mapping for Global Attributes</a></h2> + + <p>An XML Schema attribute definition is called global if it appears + directly under the <code>schema</code> element. A global + attribute does not have any mapping. + </p> + + <!-- + When it is referenced from + a local attribute definition (using the <code>ref</code> attribute) + it is treated as a local attribute (see Section 2.8, "Mapping for + Local Elements and Attributes"). + --> + + <h2><a name="2.11">2.11 Mapping for <code>xsi:type</code> and Substitution + Groups</a></h2> + + <p>The mapping provides optional support for the XML Schema polymorphism + features (<code>xsi:type</code> and substitution groups) which can + be requested with the <code>--generate-polymorphic</code> option. + When used, the dynamic type of a member may be different from + its static type. Consider the following schema definition and + instance document: + </p> + + <pre class="xml"> +<!-- test.xsd --> +<schema> + <complexType name="base"> + <attribute name="text" type="string"/> + </complexType> + + <complexType name="derived"> + <complexContent> + <extension base="base"> + <attribute name="extra-text" type="string"/> + </extension> + </complexContent> + </complexType> + + <complexType name="root_type"> + <sequence> + <element name="item" type="base" maxOccurs="unbounded"/> + </sequence> + </complexType> + + <element name="root" type="root_type"/> +</schema> + +<!-- test.xml --> +<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <item text="hello"/> + <item text="hello" extra-text="world" xsi:type="derived"/> +</root> + </pre> + + <p>In the resulting object model, the container for + the <code>root::item</code> member will have two elements: + the first element's type will be <code>base</code> while + the second element's (dynamic) type will be + <code>derived</code>. This can be discovered using the + <code>dynamic_cast</code> operator as shown in the following + example: + </p> + + <pre class="c++"> +void +f (root& r) +{ + for (root::item_const_iterator i (r.item ().begin ()); + i != r.item ().end () + ++i) + { + if (derived* d = dynamic_cast<derived*> (&(*i))) + { + // derived + } + else + { + // base + } + } +} + </pre> + + <p>The <code>_clone</code> virtual function should be used instead of + copy constructors to make copies of members that might use + polymorphism: + </p> + + <pre class="c++"> +void +f (root& r) +{ + for (root::item_const_iterator i (r.item ().begin ()); + i != r.item ().end () + ++i) + { + std::unique_ptr<base> c (i->_clone ()); + } +} + </pre> + + <p>The mapping can often automatically determine which types are + polymorphic based on the substitution group declarations. However, + if your XML vocabulary is not using substitution groups or if + substitution groups are defined in a separate schema, then you will + need to use the <code>--polymorphic-type</code> option to specify + which types are polymorphic. When using this option you only need + to specify the root of a polymorphic type hierarchy and the mapping + will assume that all the derived types are also polymorphic. + Also note that you need to specify this option when compiling every + schema file that references the polymorphic type. Consider the following + two schemas as an example:</p> + + <pre class="xml"> +<!-- base.xsd --> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="base"> + <xs:sequence> + <xs:element name="b" type="xs:int"/> + </xs:sequence> + </xs:complexType> + + <!-- substitution group root --> + <xs:element name="base" type="base"/> + +</xs:schema> + </pre> + + <pre class="xml"> +<!-- derived.xsd --> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <include schemaLocation="base.xsd"/> + + <xs:complexType name="derived"> + <xs:complexContent> + <xs:extension base="base"> + <xs:sequence> + <xs:element name="d" type="xs:string"/> + </xs:sequence> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="derived" type="derived" substitutionGroup="base"/> + +</xs:schema> + </pre> + + <p>In this example we need to specify "<code>--polymorphic-type base</code>" + when compiling both schemas because the substitution group is declared + in a schema other than the one defining type <code>base</code>.</p> + + <p>You can also indicate that all types should be treated as polymorphic + with the <code>--polymorphic-type-all</code>. However, this may result + in slower generated code with a greater footprint.</p> + + + <!-- Mapping for any and anyAttribute --> + + + <h2><a name="2.12">2.12 Mapping for <code>any</code> and <code>anyAttribute</code></a></h2> + + <p>For the XML Schema <code>any</code> and <code>anyAttribute</code> + wildcards an optional mapping can be requested with the + <code>--generate-wildcard</code> option. The mapping represents + the content matched by wildcards as DOM fragments. Because the + DOM API is used to access such content, the Xerces-C++ runtime + should be initialized by the application prior to parsing and + should remain initialized for the lifetime of objects with + the wildcard content. For more information on the Xerces-C++ + runtime initialization see <a href="#3.1">Section 3.1, + "Initializing the Xerces-C++ Runtime"</a>. + </p> + + <p>The mapping for <code>any</code> is similar to the mapping for + local elements (see <a href="#2.8">Section 2.8, "Mapping for Local + Elements and Attributes"</a>) except that the type used in the + wildcard mapping is <code>xercesc::DOMElement</code>. As with local + elements, the mapping divides all possible cardinality combinations + into three cardinality classes: <i>one</i>, <i>optional</i>, and + <i>sequence</i>. + </p> + + <p>The mapping for <code>anyAttribute</code> represents the attributes + matched by this wildcard as a set of <code>xercesc::DOMAttr</code> + objects with a key being the attribute's name and namespace.</p> + + <p>Similar to local elements and attributes, the <code>any</code> and + <code>anyAttribute</code> wildcards are mapped to a set of public type + definitions (typedefs) and a set of public accessor and modifier + functions. Type definitions have names derived from <code>"any"</code> + for the <code>any</code> wildcard and <code>"any_attribute"</code> + for the <code>anyAttribute</code> wildcard. The accessor and modifier + functions are named <code>"any"</code> for the <code>any</code> wildcard + and <code>"any_attribute"</code> for the <code>anyAttribute</code> + wildcard. Subsequent wildcards in the same type have escaped names + such as <code>"any1"</code> or <code>"any_attribute1"</code>. + </p> + + <p>Because Xerces-C++ DOM nodes always belong to a <code>DOMDocument</code>, + each type with a wildcard has an associated <code>DOMDocument</code> + object. The reference to this object can be obtained using the accessor + function called <code>dom_document</code>. The access to the document + object from the application code may be necessary to create or modify + the wildcard content. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other"/> + </sequence> + <anyAttribute namespace="##other"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // any + // + const xercesc::DOMElement& + any () const; + + void + any (const xercesc::DOMElement&); + + ... + + // any_attribute + // + typedef attribute_set any_attribute_set; + typedef any_attribute_set::iterator any_attribute_iterator; + typedef any_attribute_set::const_iterator any_attribute_const_iterator; + + const any_attribute_set& + any_attribute () const; + + any_attribute_set& + any_attribute (); + + ... + + // DOMDocument object for wildcard content. + // + const xercesc::DOMDocument& + dom_document () const; + + xercesc::DOMDocument& + dom_document (); + + ... +}; + </pre> + + + <p>Names and semantics of type definitions for the wildcards as well + as signatures of the accessor and modifier functions depend on the + wildcard type as well as the cardinality class for the <code>any</code> + wildcard. They are described in the following sub-sections. + </p> + + + <h3><a name="2.12.1">2.12.1 Mapping for <code>any</code> with the One Cardinality Class</a></h3> + + <p>For <code>any</code> with the One cardinality class, + there are no type definitions. The accessor functions come in + constant and non-constant versions. The constant accessor function + returns a constant reference to <code>xercesc::DOMElement</code> and + can be used for read-only access. The non-constant version returns + an unrestricted reference to <code>xercesc::DOMElement</code> and can + be used for read-write access. + </p> + + <p>The first modifier function expects an argument of type reference + to constant <code>xercesc::DOMElement</code> and makes a deep copy + of its argument. The second modifier function expects an argument of + type pointer to <code>xercesc::DOMElement</code>. This modifier + function assumes ownership of its argument and expects the element + object to be created using the DOM document associated with this + instance. For example: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Accessors. + // + const xercesc::DOMElement& + any () const; + + xercesc::DOMElement& + any (); + + // Modifiers. + // + void + any (const xercesc::DOMElement&); + + void + any (xercesc::DOMElement*); + + ... + +}; + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + DOMElement& e1 (o.any ()); // get + o.any (e) // set, deep copy + DOMDocument& doc (o.dom_document ()); + o.any (doc.createElement (...)); // set, assumes ownership +} + </pre> + + <h3><a name="2.12.2">2.12.2 Mapping for <code>any</code> with the Optional Cardinality Class</a></h3> + + <p>For <code>any</code> with the Optional cardinality class, the type + definitions consist of an alias for the container type with name + <code>any_optional</code> (or <code>any1_optional</code>, etc., for + subsequent wildcards in the type definition). + </p> + + <p>Unlike accessor functions for the One cardinality class, accessor + functions for the Optional cardinality class return references to + corresponding containers rather than directly to <code>DOMElement</code>. + The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to + the container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container + and can be used for read-write access. + </p> + + <p>The modifier functions are overloaded for <code>xercesc::DOMElement</code> + and the container type. The first modifier function expects an argument of + type reference to constant <code>xercesc::DOMElement</code> and + makes a deep copy of its argument. The second modifier function + expects an argument of type pointer to <code>xercesc::DOMElement</code>. + This modifier function assumes ownership of its argument and expects + the element object to be created using the DOM document associated + with this instance. The third modifier function expects an argument + of type reference to constant of the container type and makes a + deep copy of its argument. For instance: + </p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other" minOccurs="0"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef element_optional any_optional; + + // Accessors. + // + const any_optional& + any () const; + + any_optional& + any (); + + // Modifiers. + // + void + any (const xercesc::DOMElement&); + + void + any (xercesc::DOMElement*); + + void + any (const any_optional&); + + ... + +}; + </pre> + + + <p>The <code>element_optional</code> container is a + specialization of the <code>optional</code> class template described + in <a href="#2.8.2">Section 2.8.2, "Mapping for Members with the Optional + Cardinality Class"</a>. Its interface is presented below: + </p> + + <pre class="c++"> +class element_optional +{ +public: + explicit + element_optional (xercesc::DOMDocument&); + + // Makes a deep copy. + // + element_optional (const xercesc::DOMElement&, xercesc::DOMDocument&); + + // Assumes ownership. + // + element_optional (xercesc::DOMElement*, xercesc::DOMDocument&); + + element_optional (const element_optional&, xercesc::DOMDocument&); + +public: + element_optional& + operator= (const xercesc::DOMElement&); + + element_optional& + operator= (const element_optional&); + + // Pointer-like interface. + // +public: + const xercesc::DOMElement* + operator-> () const; + + xercesc::DOMElement* + operator-> (); + + const xercesc::DOMElement& + operator* () const; + + xercesc::DOMElement& + operator* (); + + typedef void (element_optional::*bool_convertible) (); + operator bool_convertible () const; + + // Get/set interface. + // +public: + bool + present () const; + + const xercesc::DOMElement& + get () const; + + xercesc::DOMElement& + get (); + + // Makes a deep copy. + // + void + set (const xercesc::DOMElement&); + + // Assumes ownership. + // + void + set (xercesc::DOMElement*); + + void + reset (); +}; + +bool +operator== (const element_optional&, const element_optional&); + +bool +operator!= (const element_optional&, const element_optional&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + DOMDocument& doc (o.dom_document ()); + + if (o.any ().present ()) // test + { + DOMElement& e1 (o.any ().get ()); // get + o.any ().set (e); // set, deep copy + o.any ().set (doc.createElement (...)); // set, assumes ownership + o.any ().reset (); // reset + } + + // Same as above but using pointer notation: + // + if (o.member ()) // test + { + DOMElement& e1 (*o.any ()); // get + o.any (e); // set, deep copy + o.any (doc.createElement (...)); // set, assumes ownership + o.any ().reset (); // reset + } +} + </pre> + + + + <h3><a name="2.12.3">2.12.3 Mapping for <code>any</code> with the Sequence Cardinality Class</a></h3> + + <p>For <code>any</code> with the Sequence cardinality class, the type + definitions consist of an alias of the container type with name + <code>any_sequence</code> (or <code>any1_sequence</code>, etc., for + subsequent wildcards in the type definition), an alias of the iterator + type with name <code>any_iterator</code> (or <code>any1_iterator</code>, + etc., for subsequent wildcards in the type definition), and an alias + of the constant iterator type with name <code>any_const_iterator</code> + (or <code>any1_const_iterator</code>, etc., for subsequent wildcards + in the type definition). + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function makes + a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + <any namespace="##other" minOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef element_sequence any_sequence; + typedef any_sequence::iterator any_iterator; + typedef any_sequence::const_iterator any_const_iterator; + + // Accessors. + // + const any_sequence& + any () const; + + any_sequence& + any (); + + // Modifier. + // + void + any (const any_sequence&); + + ... + +}; + </pre> + + <p>The <code>element_sequence</code> container is a + specialization of the <code>sequence</code> class template described + in <a href="#2.8.3">Section 2.8.3, "Mapping for Members with the + Sequence Cardinality Class"</a>. Its interface is similar to + the sequence interface as defined by the ISO/ANSI Standard for + C++ (ISO/IEC 14882:1998, Section 23.1.1, "Sequences") and is + presented below: + </p> + + <pre class="c++"> +class element_sequence +{ +public: + typedef xercesc::DOMElement value_type; + typedef xercesc::DOMElement* pointer; + typedef const xercesc::DOMElement* const_pointer; + typedef xercesc::DOMElement& reference; + typedef const xercesc::DOMElement& const_reference; + + typedef <implementation-defined> iterator; + typedef <implementation-defined> const_iterator; + typedef <implementation-defined> reverse_iterator; + typedef <implementation-defined> const_reverse_iterator; + + typedef <implementation-defined> size_type; + typedef <implementation-defined> difference_type; + typedef <implementation-defined> allocator_type; + +public: + explicit + element_sequence (xercesc::DOMDocument&); + + // DOMElement cannot be default-constructed. + // + // explicit + // element_sequence (size_type n); + + element_sequence (size_type n, + const xercesc::DOMElement&, + xercesc::DOMDocument&); + + template <typename I> + element_sequence (const I& begin, + const I& end, + xercesc::DOMDocument&); + + element_sequence (const element_sequence&, xercesc::DOMDocument&); + + element_sequence& + operator= (const element_sequence&); + +public: + void + assign (size_type n, const xercesc::DOMElement&); + + template <typename I> + void + assign (const I& begin, const I& end); + +public: + // This version of resize can only be used to shrink the + // sequence because DOMElement cannot be default-constructed. + // + void + resize (size_type); + + void + resize (size_type, const xercesc::DOMElement&); + +public: + size_type + size () const; + + size_type + max_size () const; + + size_type + capacity () const; + + bool + empty () const; + + void + reserve (size_type); + + void + clear (); + +public: + const_iterator + begin () const; + + const_iterator + end () const; + + iterator + begin (); + + iterator + end (); + + const_reverse_iterator + rbegin () const; + + const_reverse_iterator + rend () const + + reverse_iterator + rbegin (); + + reverse_iterator + rend (); + +public: + xercesc::DOMElement& + operator[] (size_type); + + const xercesc::DOMElement& + operator[] (size_type) const; + + xercesc::DOMElement& + at (size_type); + + const xercesc::DOMElement& + at (size_type) const; + + xercesc::DOMElement& + front (); + + const xercesc::DOMElement& + front () const; + + xercesc::DOMElement& + back (); + + const xercesc::DOMElement& + back () const; + +public: + // Makes a deep copy. + // + void + push_back (const xercesc::DOMElement&); + + // Assumes ownership. + // + void + push_back (xercesc::DOMElement*); + + void + pop_back (); + + // Makes a deep copy. + // + iterator + insert (iterator position, const xercesc::DOMElement&); + + // Assumes ownership. + // + iterator + insert (iterator position, xercesc::DOMElement*); + + void + insert (iterator position, size_type n, const xercesc::DOMElement&); + + template <typename I> + void + insert (iterator position, const I& begin, const I& end); + + iterator + erase (iterator position); + + iterator + erase (iterator begin, iterator end); + +public: + // Note that the DOMDocument object of the two sequences being + // swapped should be the same. + // + void + swap (sequence& x); +}; + +inline bool +operator== (const element_sequence&, const element_sequence&); + +inline bool +operator!= (const element_sequence&, const element_sequence&); + </pre> + + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMElement& e) +{ + using namespace xercesc; + + object::any_sequence& s (o.any ()); + + // Iteration. + // + for (object::any_iterator i (s.begin ()); i != s.end (); ++i) + { + DOMElement& e (*i); + } + + // Modification. + // + s.push_back (e); // deep copy + DOMDocument& doc (o.dom_document ()); + s.push_back (doc.createElement (...)); // assumes ownership +} + </pre> + + <h3><a name="2.12.4">2.12.4 Element Wildcard Order</a></h3> + + <p>Similar to elements, element wildcards in ordered types + (<a href="#2.8.4">Section 2.8.4, "Element Order"</a>) are assigned + content ids and are included in the content order sequence. + Continuing with the bank transactions example started in Section + 2.8.4, we can extend the batch by allowing custom transactions:</p> + + <pre class="xml"> +<complexType name="batch"> + <choice minOccurs="0" maxOccurs="unbounded"> + <element name="withdraw" type="withdraw"/> + <element name="deposit" type="deposit"/> + <any namespace="##other" processContents="lax"/> + </choice> +</complexType> + </pre> + + <p>This will lead to the following changes in the generated + <code>batch</code> C++ class:</p> + + <pre class="c++"> +class batch: public xml_schema::type +{ +public: + ... + + // any + // + typedef element_sequence any_sequence; + typedef any_sequence::iterator any_iterator; + typedef any_sequence::const_iterator any_const_iterator; + + static const std::size_t any_id = 3UL; + + const any_sequence& + any () const; + + any_sequence& + any (); + + void + any (const any_sequence&); + + ... +}; + </pre> + + <p>With this change we also need to update the iteration code to handle + the new content id:</p> + + <pre class="c++"> +for (batch::content_order_const_iterator i (b.content_order ().begin ()); + i != b.content_order ().end (); + ++i) +{ + switch (i->id) + { + ... + + case batch::any_id: + { + const DOMElement& e (b.any ()[i->index]); + ... + break; + } + + ... + } +} + </pre> + + <p>For the complete working code that shows the use of wildcards in + ordered types refer to the <code>order/element</code> example in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <h3><a name="2.12.5">2.12.5 Mapping for <code>anyAttribute</code></a></h3> + + <p>For <code>anyAttribute</code> the type definitions consist of an alias + of the container type with name <code>any_attribute_set</code> + (or <code>any1_attribute_set</code>, etc., for subsequent wildcards + in the type definition), an alias of the iterator type with name + <code>any_attribute_iterator</code> (or <code>any1_attribute_iterator</code>, + etc., for subsequent wildcards in the type definition), and an alias + of the constant iterator type with name <code>any_attribute_const_iterator</code> + (or <code>any1_attribute_const_iterator</code>, etc., for subsequent + wildcards in the type definition). + </p> + + <p>The accessor functions come in constant and non-constant versions. + The constant accessor function returns a constant reference to the + container and can be used for read-only access. The non-constant + version returns an unrestricted reference to the container and can + be used for read-write access. + </p> + + <p>The modifier function expects an argument of type reference to + constant of the container type. The modifier function makes + a deep copy of its argument. For instance: + </p> + + + <pre class="xml"> +<complexType name="object"> + <sequence> + ... + </sequence> + <anyAttribute namespace="##other"/> +</complexType> + </pre> + + <p>is mapped to:</p> + + <pre class="c++"> +class object: public xml_schema::type +{ +public: + // Type definitions. + // + typedef attribute_set any_attribute_set; + typedef any_attribute_set::iterator any_attribute_iterator; + typedef any_attribute_set::const_iterator any_attribute_const_iterator; + + // Accessors. + // + const any_attribute_set& + any_attribute () const; + + any_attribute_set& + any_attribute (); + + // Modifier. + // + void + any_attribute (const any_attribute_set&); + + ... + +}; + </pre> + + <p>The <code>attribute_set</code> class is an associative container + similar to the <code>std::set</code> class template as defined by + the ISO/ANSI Standard for C++ (ISO/IEC 14882:1998, Section 23.3.3, + "Class template set") with the key being the attribute's name + and namespace. Unlike <code>std::set</code>, <code>attribute_set</code> + allows searching using names and namespaces instead of + <code>xercesc::DOMAttr</code> objects. It is defined in an + implementation-specific namespace and its interface is presented + below: + </p> + + <pre class="c++"> +class attribute_set +{ +public: + typedef xercesc::DOMAttr key_type; + typedef xercesc::DOMAttr value_type; + typedef xercesc::DOMAttr* pointer; + typedef const xercesc::DOMAttr* const_pointer; + typedef xercesc::DOMAttr& reference; + typedef const xercesc::DOMAttr& const_reference; + + typedef <implementation-defined> iterator; + typedef <implementation-defined> const_iterator; + typedef <implementation-defined> reverse_iterator; + typedef <implementation-defined> const_reverse_iterator; + + typedef <implementation-defined> size_type; + typedef <implementation-defined> difference_type; + typedef <implementation-defined> allocator_type; + +public: + attribute_set (xercesc::DOMDocument&); + + template <typename I> + attribute_set (const I& begin, const I& end, xercesc::DOMDocument&); + + attribute_set (const attribute_set&, xercesc::DOMDocument&); + + attribute_set& + operator= (const attribute_set&); + +public: + const_iterator + begin () const; + + const_iterator + end () const; + + iterator + begin (); + + iterator + end (); + + const_reverse_iterator + rbegin () const; + + const_reverse_iterator + rend () const; + + reverse_iterator + rbegin (); + + reverse_iterator + rend (); + +public: + size_type + size () const; + + size_type + max_size () const; + + bool + empty () const; + + void + clear (); + +public: + // Makes a deep copy. + // + std::pair<iterator, bool> + insert (const xercesc::DOMAttr&); + + // Assumes ownership. + // + std::pair<iterator, bool> + insert (xercesc::DOMAttr*); + + // Makes a deep copy. + // + iterator + insert (iterator position, const xercesc::DOMAttr&); + + // Assumes ownership. + // + iterator + insert (iterator position, xercesc::DOMAttr*); + + template <typename I> + void + insert (const I& begin, const I& end); + +public: + void + erase (iterator position); + + size_type + erase (const std::basic_string<C>& name); + + size_type + erase (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name); + + size_type + erase (const XMLCh* name); + + size_type + erase (const XMLCh* namespace_, const XMLCh* name); + + void + erase (iterator begin, iterator end); + +public: + size_type + count (const std::basic_string<C>& name) const; + + size_type + count (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name) const; + + size_type + count (const XMLCh* name) const; + + size_type + count (const XMLCh* namespace_, const XMLCh* name) const; + + iterator + find (const std::basic_string<C>& name); + + iterator + find (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name); + + iterator + find (const XMLCh* name); + + iterator + find (const XMLCh* namespace_, const XMLCh* name); + + const_iterator + find (const std::basic_string<C>& name) const; + + const_iterator + find (const std::basic_string<C>& namespace_, + const std::basic_string<C>& name) const; + + const_iterator + find (const XMLCh* name) const; + + const_iterator + find (const XMLCh* namespace_, const XMLCh* name) const; + +public: + // Note that the DOMDocument object of the two sets being + // swapped should be the same. + // + void + swap (attribute_set&); +}; + +bool +operator== (const attribute_set&, const attribute_set&); + +bool +operator!= (const attribute_set&, const attribute_set&); + </pre> + + <p>The following code shows how one could use this mapping:</p> + + <pre class="c++"> +void +f (object& o, const xercesc::DOMAttr& a) +{ + using namespace xercesc; + + object::any_attribute_set& s (o.any_attribute ()); + + // Iteration. + // + for (object::any_attribute_iterator i (s.begin ()); i != s.end (); ++i) + { + DOMAttr& a (*i); + } + + // Modification. + // + s.insert (a); // deep copy + DOMDocument& doc (o.dom_document ()); + s.insert (doc.createAttribute (...)); // assumes ownership + + // Searching. + // + object::any_attribute_iterator i (s.find ("name")); + i = s.find ("http://www.w3.org/XML/1998/namespace", "lang"); +} + </pre> + + <!-- Mapping for Mixed Content Models --> + + <h2><a name="2.13">2.13 Mapping for Mixed Content Models</a></h2> + + <p>For XML Schema types with mixed content models C++/Tree provides + mapping support only if the type is marked as ordered + (<a href="#2.8.4">Section 2.8.4, "Element Order"</a>). Use the + <code>--ordered-type-mixed</code> XSD compiler option to + automatically mark all types with mixed content as ordered.</p> + + <p>For an ordered type with mixed content, C++/Tree adds an extra + text content sequence that is used to store the text fragments. + This text content sequence is also assigned the content id and + its entries are included in the content order sequence, just + like elements. As a result, it is possible to capture the order + between elements and text fragments.</p> + + <p>As an example, consider the following schema that describes text + with embedded links:</p> + + <pre class="xml"> +<complexType name="anchor"> + <simpleContent> + <extension base="string"> + <attribute name="href" type="anyURI" use="required"/> + </extension> + </simpleContent> +</complexType> + +<complexType name="text" mixed="true"> + <sequence> + <element name="a" type="anchor" minOccurs="0" maxOccurs="unbounded"/> + </sequence> +</complexType> + </pre> + + <p>The generated <code>text</code> C++ class will provide the following + API (assuming it is marked as ordered):</p> + + <pre class="c++"> +class text: public xml_schema::type +{ +public: + // a + // + typedef anchor a_type; + typedef sequence<a_type> a_sequence; + typedef a_sequence::iterator a_iterator; + typedef a_sequence::const_iterator a_const_iterator; + + static const std::size_t a_id = 1UL; + + const a_sequence& + a () const; + + a_sequence& + a (); + + void + a (const a_sequence&); + + // text_content + // + typedef xml_schema::string text_content_type; + typedef sequence<text_content_type> text_content_sequence; + typedef text_content_sequence::iterator text_content_iterator; + typedef text_content_sequence::const_iterator text_content_const_iterator; + + static const std::size_t text_content_id = 2UL; + + const text_content_sequence& + text_content () const; + + text_content_sequence& + text_content (); + + void + text_content (const text_content_sequence&); + + // content_order + // + typedef xml_schema::content_order content_order_type; + typedef std::vector<content_order_type> content_order_sequence; + typedef content_order_sequence::iterator content_order_iterator; + typedef content_order_sequence::const_iterator content_order_const_iterator; + + const content_order_sequence& + content_order () const; + + content_order_sequence& + content_order (); + + void + content_order (const content_order_sequence&); + + ... +}; + </pre> + + <p>Given this interface we can iterate over both link elements + and text in content order. The following code fragment converts + our format to plain text with references.</p> + + <pre class="c++"> +const text& t = ... + +for (text::content_order_const_iterator i (t.content_order ().begin ()); + i != t.content_order ().end (); + ++i) +{ + switch (i->id) + { + case text::a_id: + { + const anchor& a (t.a ()[i->index]); + cerr << a << "[" << a.href () << "]"; + break; + } + case text::text_content_id: + { + const xml_schema::string& s (t.text_content ()[i->index]); + cerr << s; + break; + } + default: + { + assert (false); // Unknown content id. + } + } +} + </pre> + + <p>For the complete working code that shows the use of mixed content + in ordered types refer to the <code>order/mixed</code> example in + the <code>cxx/tree/</code> directory in the + <a href="https://cppget.org/xsd-examples">xsd-examples</a> + package.</p> + + <!-- Parsing --> + + + <h1><a name="3">3 Parsing</a></h1> + + <p>This chapter covers various aspects of parsing XML instance + documents in order to obtain corresponding tree-like object + model. + </p> + + <p>Each global XML Schema element in the form:</p> + + <pre class="xml"> +<element name="name" type="type"/> + </pre> + + <p>is mapped to 14 overloaded C++ functions in the form:</p> + + <pre class="c++"> +// Read from a URI or a local file. +// + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (const std::basic_string<C>& uri, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from std::istream. +// + +std::[unique|auto]_ptr<type> +name (std::istream&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (std::istream&, + const std::basic_string<C>& id, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from InputSource. +// + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xml_schema::error_handler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xercesc::InputSource&, + xercesc::DOMErrorHandler&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + + +// Read from DOM. +// + +std::[unique|auto]_ptr<type> +name (const xercesc::DOMDocument&, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + +std::[unique|auto]_ptr<type> +name (xml_schema::dom::[unique|auto]_ptr<xercesc::DOMDocument>, + xml_schema::flags = 0, + const xml_schema::properties& = xml_schema::properties ()); + </pre> + + <p>You can choose between reading an XML instance from a local file, + URI, <code>std::istream</code>, <code>xercesc::InputSource</code>, + or a pre-parsed DOM instance in the form of + <code>xercesc::DOMDocument</code>. All the parsing functions + return a dynamically allocated object model as either + <code>std::unique_ptr</code> or <code>std::auto_ptr</code>, + depending on the C++ standard selected. Each of these parsing + functions is discussed in more detail in the following sections. + </p> + + <h2><a name="3.1">3.1 Initializing the Xerces-C++ Runtime</a></h2> + + <p>Some parsing functions expect you to initialize the Xerces-C++ + runtime while others initialize and terminate it as part of their + work. The general rule is as follows: if a function has any arguments + or return a value that is an instance of a Xerces-C++ type, then + this function expects you to initialize the Xerces-C++ runtime. + Otherwise, the function initializes and terminates the runtime for + you. Note that it is legal to have nested calls to the Xerces-C++ + initialize and terminate functions as long as the calls are balanced. + </p> + + <p>You can instruct parsing functions that initialize and terminate + the runtime not to do so by passing the + <code>xml_schema::flags::dont_initialize</code> flag (see + <a href="#3.2">Section 3.2, "Flags and Properties"</a>). + </p> + + + <h2><a name="3.2">3.2 Flags and Properties</a></h2> + + <p>Parsing flags and properties are the last two arguments of every + parsing function. They allow you to fine-tune the process of + instance validation and parsing. Both arguments are optional. + </p> + + + <p>The following flags are recognized by the parsing functions:</p> + + <dl> + <dt><code>xml_schema::flags::keep_dom</code></dt> + <dd>Keep association between DOM nodes and the resulting + object model nodes. For more information about DOM association + refer to <a href="#5.1">Section 5.1, "DOM Association"</a>.</dd> + + <dt><code>xml_schema::flags::own_dom</code></dt> + <dd>Assume ownership of the DOM document passed. This flag only + makes sense together with the <code>keep_dom</code> flag in + the call to the parsing function with the + <code>xml_schema::dom::[unique|auto]_ptr<DOMDocument></code> + argument.</dd> + + <dt><code>xml_schema::flags::dont_validate</code></dt> + <dd>Do not validate instance documents against schemas.</dd> + + <dt><code>xml_schema::flags::dont_initialize</code></dt> + <dd>Do not initialize the Xerces-C++ runtime.</dd> + </dl> + + <p>You can pass several flags by combining them using the bit-wise OR + operator. For example:</p> + + <pre class="c++"> +using xml_schema::flags; + +std::unique_ptr<type> r ( + name ("test.xml", flags::keep_dom | flags::dont_validate)); + </pre> + + <p>By default, validation of instance documents is turned on even + though parsers generated by XSD do not assume instance + documents are valid. They include a number of checks that prevent + construction of inconsistent object models. This, + however, does not mean that an instance document that was + successfully parsed by the XSD-generated parsers is + valid per the corresponding schema. If an instance document is not + "valid enough" for the generated parsers to construct consistent + object model, one of the exceptions defined in + <code>xml_schema</code> namespace is thrown (see + <a href="#3.3">Section 3.3, "Error Handling"</a>). + </p> + + <p>For more information on the Xerces-C++ runtime initialization + refer to <a href="#3.1">Section 3.1, "Initializing the Xerces-C++ + Runtime"</a>. + </p> + + <p>The <code>xml_schema::properties</code> class allows you to + programmatically specify schema locations to be used instead + of those specified with the <code>xsi::schemaLocation</code> + and <code>xsi::noNamespaceSchemaLocation</code> attributes + in instance documents. The interface of the <code>properties</code> + class is presented below: + </p> + + <pre class="c++"> +class properties +{ +public: + void + schema_location (const std::basic_string<C>& namespace_, + const std::basic_string<C>& location); + void + no_namespace_schema_location (const std::basic_string<C>& location); +}; + </pre> + + <p>Note that all locations are relative to an instance document unless + they are URIs. For example, if you want to use a local file as your + schema, then you will need to pass + <code>file:///absolute/path/to/your/schema</code> as the location + argument. + </p> + + <h2><a name="3.3">3.3 Error Handling</a></h2> + + <p>As discussed in <a href="#2.2">Section 2.2, "Error Handling"</a>, + the mapping uses the C++ exception handling mechanism as its primary + way of reporting error conditions. However, to handle recoverable + parsing and validation errors and warnings, a callback interface maybe + preferred by the application.</p> + + <p>To better understand error handling and reporting strategies employed + by the parsing functions, it is useful to know that the + transformation of an XML instance document to a statically-typed + tree happens in two stages. The first stage, performed by Xerces-C++, + consists of parsing an XML document into a DOM instance. For short, + we will call this stage the XML-DOM stage. Validation, if not disabled, + happens during this stage. The second stage, + performed by the generated parsers, consist of parsing the DOM + instance into the statically-typed tree. We will call this stage + the DOM-Tree stage. Additional checks are performed during this + stage in order to prevent construction of inconsistent tree which + could otherwise happen when validation is disabled, for example.</p> + + <p>All parsing functions except the one that operates on a DOM instance + come in overloaded triples. The first function in such a triple + reports error conditions exclusively by throwing exceptions. It + accumulates all the parsing and validation errors of the XML-DOM + stage and throws them in a single instance of the + <code>xml_schema::parsing</code> exception (described below). + The second and the third functions in the triple use callback + interfaces to report parsing and validation errors and warnings. + The two callback interfaces are <code>xml_schema::error_handler</code> + and <code>xercesc::DOMErrorHandler</code>. For more information + on the <code>xercesc::DOMErrorHandler</code> interface refer to + the Xerces-C++ documentation. The <code>xml_schema::error_handler</code> + interface is presented below: + </p> + + <pre class="c++"> +class error_handler +{ +public: + struct severity + { + enum value + { + warning, + error, + fatal + }; + }; + + virtual bool + handle (const std::basic_string<C>& id, + unsigned long line, + unsigned long column, + severity, + const std::basic_string<C>& message) = 0; + + virtual + ~error_handler (); +}; + </pre> + + <p>The <code>id</code> argument of the <code>error_handler::handle</code> + function identifies the resource being parsed (e.g., a file name or + URI). + </p> + + <p>By returning <code>true</code> from the <code>handle</code> function + you instruct the parser to recover and continue parsing. Returning + <code>false</code> results in termination of the parsing process. + An error with the <code>fatal</code> severity level results in + termination of the parsing process no matter what is returned from + the <code>handle</code> function. It is safe to throw an exception + from the <code>handle</code> function. + </p> + + <p>The DOM-Tree stage reports error conditions exclusively by throwing + exceptions. Individual exceptions thrown by the parsing functions + are described in the following sub-sections. + </p> + + + <h3><a name="3.3.1">3.3.1 <code>xml_schema::parsing</code></a></h3> + + <pre class="c++"> +struct severity +{ + enum value + { + warning, + error + }; + + severity (value); + operator value () const; +}; + +struct error +{ + error (severity, + const std::basic_string<C>& id, + unsigned long line, + unsigned long column, + const std::basic_string<C>& message); + + severity + severity () const; + + const std::basic_string<C>& + id () const; + + unsigned long + line () const; + + unsigned long + column () const; + + const std::basic_string<C>& + message () const; +}; + +std::basic_ostream<C>& +operator<< (std::basic_ostream<C>&, const error&); + +struct diagnostics: std::vector<error> +{ +}; + +std::basic_ostream<C>& +operator<< (std::basic_ostream<C>&, const diagnostics&); + +struct parsing: virtual exception +{ + parsing (); + parsing (const diagnostics&); + + const diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::parsing</code> exception is thrown if there + were parsing or validation errors reported during the XML-DOM stage. + If no callback interface was provided to the parsing function, the + exception contains a list of errors and warnings accessible using + the <code>diagnostics</code> function. The usual conditions when + this exception is thrown include malformed XML instances and, if + validation is turned on, invalid instance documents. + </p> + + <h3><a name="3.3.2">3.3.2 <code>xml_schema::expected_element</code></a></h3> + + <pre class="c++"> +struct expected_element: virtual exception +{ + expected_element (const std::basic_string<C>& name, + const std::basic_string<C>& namespace_); + + + const std::basic_string<C>& + name () const; + + const std::basic_string<C>& + namespace_ () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_element</code> exception is thrown + when an expected element is not encountered by the DOM-Tree stage. + The name and namespace of the expected element can be obtained using + the <code>name</code> and <code>namespace_</code> functions respectively. + </p> + + + <h3><a name="3.3.3">3.3.3 <code>xml_schema::unexpected_element</code></a></h3> + + <pre class="c++"> +struct unexpected_element: virtual exception +{ + unexpected_element (const std::basic_string<C>& encountered_name, + const std::basic_string<C>& encountered_namespace, + const std::basic_string<C>& expected_name, + const std::basic_string<C>& expected_namespace) + + + const std::basic_string<C>& + encountered_name () const; + + const std::basic_string<C>& + encountered_namespace () const; + + + const std::basic_string<C>& + expected_name () const; + + const std::basic_string<C>& + expected_namespace () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::unexpected_element</code> exception is thrown + when an unexpected element is encountered by the DOM-Tree stage. + The name and namespace of the encountered element can be obtained + using the <code>encountered_name</code> and + <code>encountered_namespace</code> functions respectively. If an + element was expected instead of the encountered one, its name + and namespace can be obtained using the <code>expected_name</code> and + <code>expected_namespace</code> functions respectively. Otherwise + these functions return empty strings. + </p> + + <h3><a name="3.3.4">3.3.4 <code>xml_schema::expected_attribute</code></a></h3> + + <pre class="c++"> +struct expected_attribute: virtual exception +{ + expected_attribute (const std::basic_string<C>& name, + const std::basic_string<C>& namespace_); + + + const std::basic_string<C>& + name () const; + + const std::basic_string<C>& + namespace_ () const; + + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_attribute</code> exception is thrown + when an expected attribute is not encountered by the DOM-Tree stage. + The name and namespace of the expected attribute can be obtained using + the <code>name</code> and <code>namespace_</code> functions respectively. + </p> + + + <h3><a name="3.3.5">3.3.5 <code>xml_schema::unexpected_enumerator</code></a></h3> + + <pre class="c++"> +struct unexpected_enumerator: virtual exception +{ + unexpected_enumerator (const std::basic_string<C>& enumerator); + + const std::basic_string<C>& + enumerator () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::unexpected_enumerator</code> exception is thrown + when an unexpected enumerator is encountered by the DOM-Tree stage. + The enumerator can be obtained using the <code>enumerator</code> + functions. + </p> + + <h3><a name="3.3.6">3.3.6 <code>xml_schema::expected_text_content</code></a></h3> + + <pre class="c++"> +struct expected_text_content: virtual exception +{ + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::expected_text_content</code> exception is thrown + when a content other than text is encountered and the text content was + expected by the DOM-Tree stage. + </p> + + <h3><a name="3.3.7">3.3.7 <code>xml_schema::no_type_info</code></a></h3> + + <pre class="c++"> +struct no_type_info: virtual exception +{ + no_type_info (const std::basic_string<C>& type_name, + const std::basic_string<C>& type_namespace); + + const std::basic_string<C>& + type_name () const; + + const std::basic_string<C>& + type_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::no_type_info</code> exception is thrown + when there is no type information associated with a type specified + by the <code>xsi:type</code> attribute. This exception is thrown + by the DOM-Tree stage. The name and namespace of the type in question + can be obtained using the <code>type_name</code> and + <code>type_namespace</code> functions respectively. Usually, catching + this exception means that you haven't linked the code generated + from the schema defining the type in question with your application + or this schema has been compiled without the + <code>--generate-polymorphic</code> option. + </p> + + + <h3><a name="3.3.8">3.3.8 <code>xml_schema::not_derived</code></a></h3> + + <pre class="c++"> +struct not_derived: virtual exception +{ + not_derived (const std::basic_string<C>& base_type_name, + const std::basic_string<C>& base_type_namespace, + const std::basic_string<C>& derived_type_name, + const std::basic_string<C>& derived_type_namespace); + + const std::basic_string<C>& + base_type_name () const; + + const std::basic_string<C>& + base_type_namespace () const; + + + const std::basic_string<C>& + derived_type_name () const; + + const std::basic_string<C>& + derived_type_namespace () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::not_derived</code> exception is thrown + when a type specified by the <code>xsi:type</code> attribute is + not derived from the expected base type. This exception is thrown + by the DOM-Tree stage. The name and namespace of the expected + base type can be obtained using the <code>base_type_name</code> and + <code>base_type_namespace</code> functions respectively. The name + and namespace of the offending type can be obtained using the + <code>derived_type_name</code> and + <code>derived_type_namespace</code> functions respectively. + </p> + + <h3><a name="3.3.9">3.3.9 <code>xml_schema::no_prefix_mapping</code></a></h3> + + <pre class="c++"> +struct no_prefix_mapping: virtual exception +{ + no_prefix_mapping (const std::basic_string<C>& prefix); + + const std::basic_string<C>& + prefix () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::no_prefix_mapping</code> exception is thrown + during the DOM-Tree stage if a namespace prefix is encountered for + which a prefix-namespace mapping hasn't been provided. The namespace + prefix in question can be obtained using the <code>prefix</code> + function. + </p> + + <h2><a name="3.4">3.4 Reading from a Local File or URI</a></h2> + + <p>Using a local file or URI is the simplest way to parse an XML instance. + For example:</p> + + <pre class="c++"> +using std::unique_ptr; + +unique_ptr<type> r1 (name ("test.xml")); +unique_ptr<type> r2 (name ("https://www.codesynthesis.com/test.xml")); + </pre> + + <p>Or, in the C++98 mode:</p> + + <pre class="c++"> +using std::auto_ptr; + +auto_ptr<type> r1 (name ("test.xml")); +auto_ptr<type> r2 (name ("https://www.codesynthesis.com/test.xml")); + </pre> + + <h2><a name="3.5">3.5 Reading from <code>std::istream</code></a></h2> + + <p>When using an <code>std::istream</code> instance, you may also + pass an optional resource id. This id is used to identify the + resource (for example in error messages) as well as to resolve + relative paths. For instance:</p> + + <pre class="c++"> +using std::unique_ptr; + +{ + std::ifstream ifs ("test.xml"); + unique_ptr<type> r (name (ifs, "test.xml")); +} + +{ + std::string str ("..."); // Some XML fragment. + std::istringstream iss (str); + unique_ptr<type> r (name (iss)); +} + </pre> + + <h2><a name="3.6">3.6 Reading from <code>xercesc::InputSource</code></a></h2> + + <p>Reading from a <code>xercesc::InputSource</code> instance + is similar to the <code>std::istream</code> case except + the resource id is maintained by the <code>InputSource</code> + object. For instance:</p> + + <pre class="c++"> +xercesc::StdInInputSource is; +std::unique_ptr<type> r (name (is)); + </pre> + + <h2><a name="3.7">3.7 Reading from DOM</a></h2> + + <p>Reading from a <code>xercesc::DOMDocument</code> instance allows + you to setup a custom XML-DOM stage. Things like DOM + parser reuse, schema pre-parsing, and schema caching can be achieved + with this approach. For more information on how to obtain DOM + representation from an XML instance refer to the Xerces-C++ + documentation. In addition, the + <a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree Mapping + FAQ</a> shows how to parse an XML instance to a Xerces-C++ + DOM document using the XSD runtime utilities. + </p> + + <p>The last parsing function is useful when you would like to perform + your own XML-to-DOM parsing and associate the resulting DOM document + with the object model nodes. The automatic <code>DOMDocument</code> + pointer is reset and the resulting object model assumes ownership + of the DOM document passed. For example:</p> + + <pre class="c++"> +// C++11 version. +// +xml_schema::dom::unique_ptr<xercesc::DOMDocument> doc = ... + +std::unique_ptr<type> r ( + name (std::move (doc), + xml_schema::flags::keep_dom | xml_schema::flags::own_dom)); + +// At this point doc is reset to 0. + +// C++98 version. +// +xml_schema::dom::auto_ptr<xercesc::DOMDocument> doc = ... + +std::auto_ptr<type> r ( + name (doc, xml_schema::flags::keep_dom | xml_schema::flags::own_dom)); + +// At this point doc is reset to 0. + </pre> + + <h1><a name="4">4 Serialization</a></h1> + + <p>This chapter covers various aspects of serializing a + tree-like object model to DOM or XML. + In this regard, serialization is complimentary to the reverse + process of parsing a DOM or XML instance into an object model + which is discussed in <a href="#3">Chapter 3, + "Parsing"</a>. Note that the generation of the serialization code + is optional and should be explicitly requested with the + <code>--generate-serialization</code> option. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information. + </p> + + <p>Each global XML Schema element in the form: + </p> + + + <pre class="xml"> +<xsd:element name="name" type="type"/> + </pre> + + <p>is mapped to 8 overloaded C++ functions in the form:</p> + + <pre class="c++"> +// Serialize to std::ostream. +// +void +name (std::ostream&, + const type&, + const xml_schema::namespace_fomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (std::ostream&, + const type&, + xml_schema::error_handler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (std::ostream&, + const type&, + xercesc::DOMErrorHandler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + + +// Serialize to XMLFormatTarget. +// +void +name (xercesc::XMLFormatTarget&, + const type&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (xercesc::XMLFormatTarget&, + const type&, + xml_schema::error_handler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + +void +name (xercesc::XMLFormatTarget&, + const type&, + xercesc::DOMErrorHandler&, + const xml_schema::namespace_infomap& = + xml_schema::namespace_infomap (), + const std::basic_string<C>& encoding = "UTF-8", + xml_schema::flags = 0); + + +// Serialize to DOM. +// +xml_schema::dom::[unique|auto]_ptr<xercesc::DOMDocument> +name (const type&, + const xml_schema::namespace_infomap& + xml_schema::namespace_infomap (), + xml_schema::flags = 0); + +void +name (xercesc::DOMDocument&, + const type&, + xml_schema::flags = 0); + </pre> + + <p>You can choose between writing XML to <code>std::ostream</code> or + <code>xercesc::XMLFormatTarget</code> and creating a DOM instance + in the form of <code>xercesc::DOMDocument</code>. Serialization + to <code>ostream</code> or <code>XMLFormatTarget</code> requires a + considerably less work while serialization to DOM provides + for greater flexibility. Each of these serialization functions + is discussed in more detail in the following sections. + </p> + + + <h2><a name="4.1">4.1 Initializing the Xerces-C++ Runtime</a></h2> + + <p>Some serialization functions expect you to initialize the Xerces-C++ + runtime while others initialize and terminate it as part of their + work. The general rule is as follows: if a function has any arguments + or return a value that is an instance of a Xerces-C++ type, then + this function expects you to initialize the Xerces-C++ runtime. + Otherwise, the function initializes and terminates the runtime for + you. Note that it is legal to have nested calls to the Xerces-C++ + initialize and terminate functions as long as the calls are balanced. + </p> + + <p>You can instruct serialization functions that initialize and terminate + the runtime not to do so by passing the + <code>xml_schema::flags::dont_initialize</code> flag (see + <a href="#4.3">Section 4.3, "Flags"</a>). + </p> + + <h2><a name="4.2">4.2 Namespace Infomap and Character Encoding</a></h2> + + <p>When a document being serialized uses XML namespaces, custom + prefix-namespace associations can to be established. If custom + prefix-namespace mapping is not provided then generic prefixes + (<code>p1</code>, <code>p2</code>, etc) are automatically assigned + to namespaces as needed. Also, if + you would like the resulting instance document to contain the + <code>schemaLocation</code> or <code>noNamespaceSchemaLocation</code> + attributes, you will need to provide namespace-schema associations. + The <code>xml_schema::namespace_infomap</code> class is used + to capture this information:</p> + + <pre class="c++"> +struct namespace_info +{ + namespace_info (); + namespace_info (const std::basic_string<C>& name, + const std::basic_string<C>& schema); + + std::basic_string<C> name; + std::basic_string<C> schema; +}; + +// Map of namespace prefix to namespace_info. +// +struct namespace_infomap: public std::map<std::basic_string<C>, + namespace_info> +{ +}; + </pre> + + <p>Consider the following associations as an example:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + </pre> + + <p>This map, if passed to one of the serialization functions, + could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<t:name xmlns:t="https://www.codesynthesis.com/test" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + <p>As you can see, the serialization function automatically added namespace + mapping for the <code>xsi</code> prefix. You can change this by + providing your own prefix:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map["xsn"].name = "http://www.w3.org/2001/XMLSchema-instance"; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + </pre> + + <p>This could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<t:name xmlns:t="https://www.codesynthesis.com/test" + xmlns:xsn="http://www.w3.org/2001/XMLSchema-instance" + xsn:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + <p>To specify the location of a schema without a namespace you can use + an empty prefix as in the example below: </p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].schema = "test.xsd"; + </pre> + + <p>This would result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<name xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="test.xsd"> + </pre> + + <p>To make a particular namespace default you can use an empty + prefix, for example:</p> + + <pre class="c++"> +xml_schema::namespace_infomap map; + +map[""].name = "https://www.codesynthesis.com/test"; +map[""].schema = "test.xsd"; + </pre> + + <p>This could result in the following XML fragment:</p> + + <pre class="xml"> +<?xml version="1.0" ?> +<name xmlns="https://www.codesynthesis.com/test" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://www.codesynthesis.com/test test.xsd"> + </pre> + + + <p>Another bit of information that you can pass to the serialization + functions is the character encoding method that you would like to use. + Common values for this argument are <code>"US-ASCII"</code>, + <code>"ISO8859-1"</code>, <code>"UTF-8"</code>, + <code>"UTF-16BE"</code>, <code>"UTF-16LE"</code>, + <code>"UCS-4BE"</code>, and <code>"UCS-4LE"</code>. The default + encoding is <code>"UTF-8"</code>. For more information on + encoding methods see the + "<a href="http://en.wikipedia.org/wiki/Character_code">Character + Encoding</a>" article from Wikipedia. + </p> + + <h2><a name="4.3">4.3 Flags</a></h2> + + <p>Serialization flags are the last argument of every serialization + function. They allow you to fine-tune the process of serialization. + The flags argument is optional. + </p> + + + <p>The following flags are recognized by the serialization + functions:</p> + + <dl> + <dt><code>xml_schema::flags::dont_initialize</code></dt> + <dd>Do not initialize the Xerces-C++ runtime.</dd> + + <dt><code>xml_schema::flags::dont_pretty_print</code></dt> + <dd>Do not add extra spaces or new lines that make the resulting XML + slightly bigger but easier to read.</dd> + + <dt><code>xml_schema::flags::no_xml_declaration</code></dt> + <dd>Do not write XML declaration (<?xml ... ?>).</dd> + </dl> + + <p>You can pass several flags by combining them using the bit-wise OR + operator. For example:</p> + + <pre class="c++"> +std::unique_ptr<type> r = ... +std::ofstream ofs ("test.xml"); +xml_schema::namespace_infomap map; +name (ofs, + *r, + map, + "UTF-8", + xml_schema::flags::no_xml_declaration | + xml_schema::flags::dont_pretty_print); + </pre> + + <p>For more information on the Xerces-C++ runtime initialization + refer to <a href="#4.1">Section 4.1, "Initializing the Xerces-C++ + Runtime"</a>. + </p> + + <h2><a name="4.4">4.4 Error Handling</a></h2> + + <p>As with the parsing functions (see <a href="#3.3">Section 3.3, + "Error Handling"</a>), to better understand error handling and + reporting strategies employed by the serialization functions, it + is useful to know that the transformation of a statically-typed + tree to an XML instance document happens in two stages. The first + stage, performed by the generated code, consist of building a DOM + instance from the statically-typed tree . For short, we will call + this stage the Tree-DOM stage. The second stage, performed by + Xerces-C++, consists of serializing the DOM instance into the XML + document. We will call this stage the DOM-XML stage. + </p> + + <p>All serialization functions except the two that serialize into + a DOM instance come in overloaded triples. The first function + in such a triple reports error conditions exclusively by throwing + exceptions. It accumulates all the serialization errors of the + DOM-XML stage and throws them in a single instance of the + <code>xml_schema::serialization</code> exception (described below). + The second and the third functions in the triple use callback + interfaces to report serialization errors and warnings. The two + callback interfaces are <code>xml_schema::error_handler</code> and + <code>xercesc::DOMErrorHandler</code>. The + <code>xml_schema::error_handler</code> interface is described in + <a href="#3.3">Section 3.3, "Error Handling"</a>. For more information + on the <code>xercesc::DOMErrorHandler</code> interface refer to the + Xerces-C++ documentation. + </p> + + <p>The Tree-DOM stage reports error conditions exclusively by throwing + exceptions. Individual exceptions thrown by the serialization functions + are described in the following sub-sections. + </p> + + <h3><a name="4.4.1">4.4.1 <code>xml_schema::serialization</code></a></h3> + + <pre class="c++"> +struct serialization: virtual exception +{ + serialization (); + serialization (const diagnostics&); + + const diagnostics& + diagnostics () const; + + virtual const char* + what () const throw (); +}; + </pre> + + <p>The <code>xml_schema::diagnostics</code> class is described in + <a href="#3.3.1">Section 3.3.1, "<code>xml_schema::parsing</code>"</a>. + The <code>xml_schema::serialization</code> exception is thrown if + there were serialization errors reported during the DOM-XML stage. + If no callback interface was provided to the serialization function, + the exception contains a list of errors and warnings accessible using + the <code>diagnostics</code> function. + </p> + + + <h3><a name="4.4.2">4.4.2 <code>xml_schema::unexpected_element</code></a></h3> + + <p>The <code>xml_schema::unexpected_element</code> exception is + described in <a href="#3.3.3">Section 3.3.3, + "<code>xml_schema::unexpected_element</code>"</a>. It is thrown + by the serialization functions during the Tree-DOM stage if the + root element name of the provided DOM instance does not match with + the name of the element this serialization function is for. + </p> + + <h3><a name="4.4.3">4.4.3 <code>xml_schema::no_type_info</code></a></h3> + + <p>The <code>xml_schema::no_type_info</code> exception is + described in <a href="#3.3.7">Section 3.3.7, + "<code>xml_schema::no_type_info</code>"</a>. It is thrown + by the serialization functions during the Tree-DOM stage when there + is no type information associated with a dynamic type of an + element. Usually, catching this exception means that you haven't + linked the code generated from the schema defining the type in + question with your application or this schema has been compiled + without the <code>--generate-polymorphic</code> option. + </p> + + <h2><a name="4.5">4.5 Serializing to <code>std::ostream</code></a></h2> + + <p>In order to serialize to <code>std::ostream</code> you will need + an object model, an output stream and, optionally, a namespace + infomap. For instance:</p> + + <pre class="c++"> +// Obtain the object model. +// +std::unique_ptr<type> r = ... + +// Prepare namespace mapping and schema location information. +// +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + +// Write it out. +// +name (std::cout, *r, map); + </pre> + + <p>Note that the output stream is treated as a binary stream. This + becomes important when you use a character encoding that is wider + than 8-bit <code>char</code>, for instance UTF-16 or UCS-4. For + example, things will most likely break if you try to serialize + to <code>std::ostringstream</code> with UTF-16 or UCS-4 as an + encoding. This is due to the special value, + <code>'\0'</code>, that will most likely occur as part of such + serialization and it won't have the special meaning assumed by + <code>std::ostringstream</code>. + </p> + + + <h2><a name="4.6">4.6 Serializing to <code>xercesc::XMLFormatTarget</code></a></h2> + + <p>Serializing to an <code>xercesc::XMLFormatTarget</code> instance + is similar the <code>std::ostream</code> case. For instance: + </p> + + <pre class="c++"> +using std::unique_ptr; + +// Obtain the object model. +// +unique_ptr<type> r = ... + +// Prepare namespace mapping and schema location information. +// +xml_schema::namespace_infomap map; + +map["t"].name = "https://www.codesynthesis.com/test"; +map["t"].schema = "test.xsd"; + +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Choose a target. + // + unique_ptr<XMLFormatTarget> ft; + + if (argc != 2) + { + ft = unique_ptr<XMLFormatTarget> (new StdOutFormatTarget ()); + } + else + { + ft = unique_ptr<XMLFormatTarget> ( + new LocalFileFormatTarget (argv[1])); + } + + // Write it out. + // + name (*ft, *r, map); +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>Note that we had to initialize the Xerces-C++ runtime before we + could call this serialization function.</p> + + <h2><a name="4.7">4.7 Serializing to DOM</a></h2> + + <p>The mapping provides two overloaded functions that implement + serialization to a DOM instance. The first creates a DOM instance + for you and the second serializes to an existing DOM instance. + While serializing to a new DOM instance is similar to serializing + to <code>std::ostream</code> or <code>xercesc::XMLFormatTarget</code>, + serializing to an existing DOM instance requires quite a bit of work + from your side. You will need to set all the custom namespace mapping + attributes as well as the <code>schemaLocation</code> and/or + <code>noNamespaceSchemaLocation</code> attributes. The following + listing should give you an idea about what needs to be done: + </p> + + <pre class="c++"> +// Obtain the object model. +// +std::unique_ptr<type> r = ... + +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Create a DOM instance. Set custom namespace mapping and schema + // location attributes. + // + DOMDocument& doc = ... + + // Serialize to DOM. + // + name (doc, *r); + + // Serialize the DOM document to XML. + // + ... +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>For more information on how to create and serialize a DOM instance + refer to the Xerces-C++ documentation. In addition, the + <a href="http://wiki.codesynthesis.com/Tree/FAQ">C++/Tree Mapping + FAQ</a> shows how to implement these operations using the XSD + runtime utilities. + </p> + + <h1><a name="5">5 Additional Functionality</a></h1> + + <p>The C++/Tree mapping provides a number of optional features + that can be useful in certain situations. They are described + in the following sections.</p> + + <h2><a name="5.1">5.1 DOM Association</a></h2> + + <p>Normally, after parsing is complete, the DOM document which + was used to extract the data is discarded. However, the parsing + functions can be instructed to preserve the DOM document + and create an association between the DOM nodes and object model + nodes. When there is an association between the DOM and + object model nodes, you can obtain the corresponding DOM element + or attribute node from an object model node as well as perform + the reverse transition: obtain the corresponding object model + from a DOM element or attribute node.</p> + + <p>Maintaining DOM association is normally useful when the application + needs access to XML constructs that are not preserved in the + object model, for example, XML comments. + Another useful aspect of DOM association is the ability of the + application to navigate the document tree using the generic DOM + interface (for example, with the help of an XPath processor) + and then move back to the statically-typed object model. Note + also that while you can change the underlying DOM document, + these changes are not reflected in the object model and will + be ignored during serialization. If you need to not only access + but also modify some aspects of XML that are not preserved in + the object model, then type customization with custom parsing + constructors and serialization operators should be used instead.</p> + + <p>To request DOM association you will need to pass the + <code>xml_schema::flags::keep_dom</code> flag to one of the + parsing functions (see <a href="#3.2">Section 3.2, + "Flags and Properties"</a> for more information). In this case the + DOM document is retained and will be released when the object model + is deleted. Note that since DOM nodes "out-live" the parsing function + call, you need to initialize the Xerces-C++ runtime before calling + one of the parsing functions with the <code>keep_dom</code> flag and + terminate it after the object model is destroyed (see + <a href="#3.1">Section 3.1, "Initializing the Xerces-C++ Runtime"</a>).</p> + + <p>If the <code>keep_dom</code> flag is passed + as the second argument to the copy constructor and the copy + being made is of a complete tree, then the DOM association + is also maintained in the copy by cloning the underlying + DOM document and reestablishing the associations. For example:</p> + + <pre class="c++"> +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Parse XML to object model. + // + std::unique_ptr<type> r (root ( + "root.xml", + xml_schema::flags::keep_dom | + xml_schema::flags::dont_initialize)); + + // Copy without DOM association. + // + type copy1 (*r); + + // Copy with DOM association. + // + type copy2 (*r, xml_schema::flags::keep_dom); +} + +XMLPlatformUtils::Terminate (); + </pre> + + + <p>To obtain the corresponding DOM node from an object model node + you will need to call the <code>_node</code> accessor function + which returns a pointer to <code>DOMNode</code>. You can then query + this DOM node's type and cast it to either <code>DOMAttr*</code> + or <code>DOMElement*</code>. To obtain the corresponding object + model node from a DOM node, the DOM user data API is used. The + <code>xml_schema::dom::tree_node_key</code> variable contains + the key for object model nodes. The following schema and code + fragment show how to navigate from DOM to object model nodes + and in the opposite direction:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="a" type="string"/> + </sequence> +</complexType> + +<element name="root" type="object"/> + </pre> + + <pre class="c++"> +using namespace xercesc; + +XMLPlatformUtils::Initialize (); + +{ + // Parse XML to object model. + // + std::unique_ptr<type> r (root ( + "root.xml", + xml_schema::flags::keep_dom | + xml_schema::flags::dont_initialize)); + + DOMNode* n = r->_node (); + assert (n->getNodeType () == DOMNode::ELEMENT_NODE); + DOMElement* re = static_cast<DOMElement*> (n); + + // Get the 'a' element. Note that it is not necessarily the + // first child node of 'root' since there could be whitespace + // nodes before it. + // + DOMElement* ae; + + for (n = re->getFirstChild (); n != 0; n = n->getNextSibling ()) + { + if (n->getNodeType () == DOMNode::ELEMENT_NODE) + { + ae = static_cast<DOMElement*> (n); + break; + } + } + + // Get from the 'a' DOM element to xml_schema::string object model + // node. + // + xml_schema::type& t ( + *reinterpret_cast<xml_schema::type*> ( + ae->getUserData (xml_schema::dom::tree_node_key))); + + xml_schema::string& a (dynamic_cast<xml_schema::string&> (t)); +} + +XMLPlatformUtils::Terminate (); + </pre> + + <p>The 'mixed' example which can be found in the XSD distribution + shows how to handle the mixed content using DOM association.</p> + + <h2><a name="5.2">5.2 Binary Serialization</a></h2> + + <p>Besides reading from and writing to XML, the C++/Tree mapping + also allows you to save the object model to and load it from a + number of predefined as well as custom data representation + formats. The predefined binary formats are CDR (Common Data + Representation) and XDR (eXternal Data Representation). A + custom format can easily be supported by providing + insertion and extraction operators for basic types.</p> + + <p>Binary serialization saves only the data without any meta + information or markup. As a result, saving to and loading + from a binary representation can be an order of magnitude + faster than parsing and serializing the same data in XML. + Furthermore, the resulting representation is normally several + times smaller than the equivalent XML representation. These + properties make binary serialization ideal for internal data + exchange and storage. A typical application that uses this + facility stores the data and communicates within the + system using a binary format and reads/writes the data + in XML when communicating with the outside world.</p> + + <p>In order to request the generation of insertion operators and + extraction constructors for a specific predefined or custom + data representation stream, you will need to use the + <code>--generate-insertion</code> and <code>--generate-extraction</code> + compiler options. See the + <a href="https://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD + Compiler Command Line Manual</a> for more information.</p> + + <p>Once the insertion operators and extraction constructors are + generated, you can use the <code>xml_schema::istream</code> + and <code>xml_schema::ostream</code> wrapper stream templates + to save the object model to and load it from a specific format. + The following code fragment shows how to do this using ACE + (Adaptive Communication Environment) CDR streams as an example:</p> + + <pre class="xml"> +<complexType name="object"> + <sequence> + <element name="a" type="string"/> + <element name="b" type="int"/> + </sequence> +</complexType> + +<element name="root" type="object"/> + </pre> + + <pre class="c++"> +// Parse XML to object model. +// +std::unique_ptr<type> r (root ("root.xml")); + +// Save to a CDR stream. +// +ACE_OutputCDR ace_ocdr; +xml_schema::ostream<ACE_OutputCDR> ocdr (ace_ocdr); + +ocdr << *r; + +// Load from a CDR stream. +// +ACE_InputCDR ace_icdr (buf, size); +xml_schema::istream<ACE_InputCDR> icdr (ace_icdr); + +std::unique_ptr<object> copy (new object (icdr)); + +// Serialize to XML. +// +root (std::cout, *copy); + </pre> + + <p>The XSD distribution contains a number of examples that + show how to save the object model to and load it from + CDR, XDR, and a custom format.</p> + + <!-- Appendix A --> + + + <h1><a name="A">Appendix A — Default and Fixed Values</a></h1> + + <p>The following table summarizes the effect of default and fixed + values (specified with the <code>default</code> and <code>fixed</code> + attributes, respectively) on attribute and element values. The + <code>default</code> and <code>fixed</code> attributes are mutually + exclusive. It is also worthwhile to note that the fixed value semantics + is a superset of the default value semantics. + </p> + + <!-- border="1" is necessary for html2ps --> + <table id="default-fixed" border="1"> + <tr> + <th></th> + <th></th> + <th colspan="2">default</th> + <th colspan="2">fixed</th> + </tr> + + <!-- element --> + + <tr> + <th rowspan="4">element</th> + <th rowspan="2">not present</th> + <th>optional</th> + <th>required</th> + <th>optional</th> + <th>required</th> + </tr> + <tr> + <td>not present</td> + <td>invalid instance</td> + <td>not present</td> + <td>invalid instance</td> + </tr> + + + <tr> + <th>empty</th> + <td colspan="2">default value is used</td> + <td colspan="2">fixed value is used</td> + </tr> + + <tr> + <th>value</th> + <td colspan="2">value is used</td> + <td colspan="2">value is used provided it's the same as fixed</td> + </tr> + + <!-- attribute --> + + <!-- element --> + + <tr> + <th rowspan="4">attribute</th> + <th rowspan="2">not present</th> + <th>optional</th> + <th>required</th> + <th>optional</th> + <th>required</th> + </tr> + <tr> + <td>default value is used</td> + <td>invalid schema</td> + <td>fixed value is used</td> + <td>invalid instance</td> + </tr> + + + <tr> + <th>empty</th> + <td colspan="2">empty value is used</td> + <td colspan="2">empty value is used provided it's the same as fixed</td> + </tr> + + <tr> + <th>value</th> + <td colspan="2">value is used</td> + <td colspan="2">value is used provided it's the same as fixed</td> + </tr> + + </table> + + </div> +</div> + + +</body> +</html> diff --git a/doc/cxx/tree/manual/manual.html2ps.in b/doc/cxx/tree/manual/manual.html2ps.in new file mode 100644 index 0000000..5629122 --- /dev/null +++ b/doc/cxx/tree/manual/manual.html2ps.in @@ -0,0 +1,66 @@ +@@html2ps { + option { + toc: hb; + colour: 1; + hyphenate: 1; + titlepage: 1; + } + + datefmt: "%B %Y"; + + titlepage { + content: " +<div align=center> + <h1><big>C++/Tree Mapping User Manual</big></h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> + <h1> </h1> +</div> + <p>Revision $[revision] $D</p> + <p>Copyright © @copyright@.</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href='https://www.codesynthesis.com/licenses/fdl-1.2.txt'>GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/index.xhtml'>XHTML</a>, + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.pdf'>PDF</a>, and + <a href='https://www.codesynthesis.com/projects/xsd/documentation/cxx/tree/manual/cxx-tree-manual.ps'>PostScript</a>.</p>"; + } + + toc { + indent: 2em; + } + + header { + odd-right: $H; + even-left: $H; + } + + footer { + odd-left: $D; + odd-center: $T, v$[revision]; + odd-right: $N; + + even-left: $N; + even-center: $T, v$[revision]; + even-right: $D; + } +} + +body { + font-size: 12pt; + text-align: justify; +} + +pre { + font-size: 10pt; +} |