diff options
Diffstat (limited to 'libxsd-frontend/xsd-frontend/parser.cxx')
-rw-r--r-- | libxsd-frontend/xsd-frontend/parser.cxx | 5164 |
1 files changed, 5164 insertions, 0 deletions
diff --git a/libxsd-frontend/xsd-frontend/parser.cxx b/libxsd-frontend/xsd-frontend/parser.cxx new file mode 100644 index 0000000..6b582ab --- /dev/null +++ b/libxsd-frontend/xsd-frontend/parser.cxx @@ -0,0 +1,5164 @@ +// file : xsd-frontend/parser.cxx +// copyright : Copyright (c) 2005-2014 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#include <map> +#include <stack> +#include <vector> +#include <iostream> +#include <sstream> + +#include <cutl/compiler/type-id.hxx> + +#include <xsd-frontend/version.hxx> // Check Xerces-C++ version. +#include <xsd-frontend/xml.hxx> +#include <xsd-frontend/parser.hxx> +#include <xsd-frontend/schema-dom-parser.hxx> + +#include <xsd-frontend/semantic-graph.hxx> +#include <xsd-frontend/traversal.hxx> + +//@@ Do i need this? +// +#include <xercesc/dom/DOM.hpp> + +#include <xercesc/sax/ErrorHandler.hpp> +#include <xercesc/sax/SAXParseException.hpp> + +#include <xercesc/sax2/SAX2XMLReader.hpp> +#include <xercesc/sax2/XMLReaderFactory.hpp> + +#include <xercesc/util/XMLUniDefs.hpp> +#include <xercesc/util/XMLString.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/util/BinInputStream.hpp> +#include <xercesc/util/BinFileInputStream.hpp> + +#include <xercesc/validators/common/Grammar.hpp> + +#include <xercesc/sax/InputSource.hpp> +#include <xercesc/framework/LocalFileInputSource.hpp> +#include <xercesc/framework/Wrapper4InputSource.hpp> + +using namespace std; + +using cutl::compiler::type_id; + +namespace XSDFrontend +{ + namespace Xerces = XML::Xerces; + using namespace SemanticGraph; + + //@@ Port to tracing facility. + // + bool trace_ = false; + + String const xsd = L"http://www.w3.org/2001/XMLSchema"; + String const xse = L"http://www.codesynthesis.com/xmlns/xml-schema-extension"; + + namespace + { + // + // Exceptions. + // + + struct NotNamespace + { + NotNamespace (String const& ns) + : ns_ (ns) + { + } + + String const& + ns () const + { + return ns_; + } + + private: + String ns_; + }; + + struct NotName + { + NotName (String const& ns, String const& name) + : ns_ (ns), name_ (name) + { + } + + String const& + ns () const + { + return ns_; + } + + String const& + name () const + { + return name_; + } + + private: + String ns_; + String name_; + }; + + // Trim leading and trailing whitespaces. + // + template <typename C> + StringTemplate<C> + trim (StringTemplate<C> const& s) + { + typedef StringTemplate<C> String; + + size_t size (s.size ()); + + if (size == 0) + return s; + + C const* f (s.c_str ()); + C const* l (f + size); + + C const* of (f); + + while (f < l && + (*f == C (0x20) || *f == C (0x0A) || + *f == C (0x0D) || *f == C (0x09))) + ++f; + + --l; + + C const* ol (l); + + while (l > f && + (*l == C (0x20) || *l == C (0x0A) || + *l == C (0x0D) || *l == C (0x09))) + --l; + + if (f != of || l != ol) + return f <= l ? String (f, l - f + 1) : String (); + else + return s; + } + + // Name cache. We only support maximum two nodes with the same + // name in the cache (e.g., element and type). For (rare) cases + // where there is three or more names, there will be a cache miss. + // + struct CacheNodes + { + CacheNodes () : first (0), second (0) {} + + Nameable* first; + Nameable* second; + }; + + typedef std::map<String, CacheNodes> NodeMap; + typedef std::map<String, NodeMap> NamespaceMap; + typedef std::vector<SemanticGraph::Member*> DefaultValues; + + template <typename X> + X& + resolve (String const& ns_name, + String const& uq_name, + Schema& s_, + NamespaceMap& cache) + { + // First check the cache. + // + NamespaceMap::iterator i (cache.find (ns_name)); + + if (i != cache.end ()) + { + NodeMap::iterator j (i->second.find (uq_name)); + + if (j != i->second.end ()) + { + X* x; + + if ((x = dynamic_cast<X*> (j->second.first)) || + (x = dynamic_cast<X*> (j->second.second))) + return *x; + } + } + + Scope::NamesIteratorPair nss (s_.find (ns_name)); + + if (nss.first == nss.second) + throw NotNamespace (ns_name); + + for (; nss.first != nss.second; ++nss.first) + { + Namespace& ns (dynamic_cast<Namespace&> (nss.first->named ())); + + Scope::NamesIteratorPair types (ns.find (uq_name)); + + for (; types.first != types.second; ++types.first) + { + if (X* x = dynamic_cast<X*> (&types.first->named ())) + { + if (trace_) + wcout << "successfully resolved '" << ns_name << '#' << uq_name + << "'" << endl; + + // Add to the cache if there are free slots. + // + NodeMap& m (i != cache.end () ? i->second : cache[ns_name]); + CacheNodes& n (m[uq_name]); + + if (n.first == 0) + n.first = x; + else if (n.second == 0) + n.second = x; + + return *x; + } + } + } + + throw NotName (ns_name, uq_name); + } + + // + // + typedef std::map<String, String> Facets; + + void + copy_facets (Restricts& r, Facets const& f) + { + for (Facets::const_iterator i (f.begin ()), e (f.end ()); i != e; ++i) + r.facet_insert (i->first, i->second); + } + + // + // + struct UnionMemberType + { + UnionMemberType (String const& ns, String const& uq) + : ns_name (ns), uq_name (uq) + { + } + + String ns_name; + String uq_name; + }; + + typedef std::vector<UnionMemberType> UnionMemberTypes; + + // + // + struct ElementGroupRef + { + ElementGroupRef (String const& uq_name_, String const& ns_name_, + unsigned long min_, unsigned long max_, + Compositor& compositor, Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_), + min (min_), max (max_) + { + contains_pos = compositor.contains_end (); + if (compositor.contains_begin () != contains_pos) + --contains_pos; + + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + ElementGroupRef (String const& uq_name_, String const& ns_name_, + unsigned long min_, unsigned long max_, + Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_), + min (min_), max (max_) + { + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + String uq_name; + String ns_name; + unsigned long min, max; + Compositor::ContainsIterator contains_pos; + Scope::NamesIterator names_pos; + }; + + typedef std::vector<ElementGroupRef> ElementGroupRefs; + + // + // + struct AttributeGroupRef + { + AttributeGroupRef (String const& uq_name_, + String const& ns_name_, + Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_) + { + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + String uq_name; + String ns_name; + Scope::NamesIterator names_pos; + }; + + typedef std::vector<AttributeGroupRef> AttributeGroupRefs; + + + // + // + template <typename N, typename A> + struct NodeArgs + { + NodeArgs (N& node, A arg) + : node_ (node), arg_ (arg) + { + } + + operator N& () const + { + return node_; + } + + template <typename E> + void + add_edge_left (E& e) + { + node_.add_edge_left (e, arg_); + } + + template <typename E> + void + add_edge_right (E& e) + { + node_.add_edge_right (e, arg_); + } + + private: + N& node_; + A arg_; + }; + + + // + // + struct Resolver : Traversal::Element, + Traversal::Attribute, + Traversal::Fundamental::IdRef, + Traversal::Fundamental::IdRefs, + Traversal::List, + Traversal::Union, + Traversal::Complex, + Traversal::Enumeration, + Traversal::ElementGroup, + Traversal::AttributeGroup, + Traversal::Compositor + { + Resolver (Schema& s, + bool& valid, + NamespaceMap& cache, + DefaultValues& default_values) + : s_ (s), + valid_ (valid), + cache_ (cache), + default_values_ (default_values) + { + *this >> contains_compositor >> *this; + } + + void + traverse (SemanticGraph::Attribute& a) + { + // Avoid traversing attribute more than once. + // + if (!a.context ().count ("attribute-traversed")) + { + a.context ().set ("attribute-traversed", true); + SemanticGraph::Member& m (a); + resolve_member (m); + } + } + + void + traverse (SemanticGraph::Element& e) + { + resolve_element (e); + } + + void + resolve_element (SemanticGraph::Element& e) + { + // Avoid resolving element more than once. + // + if (e.context ().count ("element-resolved")) + return; + + e.context ().set ("element-resolved", true); + + { + SemanticGraph::Member& m (e); + resolve_member (m); + } + + if (e.context ().count ("substitution-ns-name")) + { + String ns_name (e.context ().get<String> ("substitution-ns-name")); + String uq_name (e.context ().get<String> ("substitution-uq-name")); + + e.context ().remove ("substitution-ns-name"); + e.context ().remove ("substitution-uq-name"); + + try + { + SemanticGraph::Element& root ( + resolve<SemanticGraph::Element> (ns_name, uq_name, s_, cache_)); + + s_.new_edge<Substitutes> (e, root); + + // See if we need to derive the type of this element from the + // one it substitutes. + // + if (!e.typed_p ()) + { + resolve_member (root); // Make sure the type is resolved. + s_.new_edge<Belongs> (e, root.type ()); + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" <<endl; + abort (); + } + } + } + } + + void + resolve_member (SemanticGraph::Member& m) + { + using SemanticGraph::Member; + using SemanticGraph::Element; + using SemanticGraph::Attribute; + + try + { + String ns_name; + String uq_name; + + if (m.context ().count ("type-ns-name")) + { + ns_name = m.context ().get<String> ("type-ns-name"); + uq_name = m.context ().get<String> ("type-uq-name"); + + m.context ().remove ("type-ns-name"); + m.context ().remove ("type-uq-name"); + m.context ().remove ("edge-type-id"); + + s_.new_edge<Belongs> ( + m, resolve<SemanticGraph::Type> (ns_name, uq_name, s_, cache_)); + } + else if (m.context ().count ("instance-ns-name")) + { + ns_name = m.context ().get<String> ("instance-ns-name"); + uq_name = m.context ().get<String> ("instance-uq-name"); + + m.context ().remove ("instance-ns-name"); + m.context ().remove ("instance-uq-name"); + + // Resolve the name to the same type. It is legal to have + // an element and an attribute with the same name. + // + Member& ref ( + m.is_a<Element> () + ? static_cast<Member&> ( + resolve<Element> (ns_name, uq_name, s_, cache_)) + : static_cast<Member&> ( + resolve<Attribute> (ns_name, uq_name, s_, cache_))); + + // Make sure the referenced member is fully resolved. + // @@ Substitutes edge won't be resolved. + // + resolve_member (ref); + + + // Substitution group info. We have to test for both resolved + // and unresolved cases since we don't know whether it was + // resolved or not. + // + if (ref.is_a<Element> ()) + { + Element& m_e (dynamic_cast<Element&> (m)); + Element& ref_e (dynamic_cast<Element&> (ref)); + + if (ref_e.substitutes_p ()) + { + s_.new_edge<Substitutes> (m_e, ref_e.substitutes ().root ()); + } + else if (ref_e.context ().count ("substitution-ns-name")) + { + m_e.context ().set ( + "substitution-ns-name", + ref_e.context ().get<String> ("substitution-ns-name")); + + m_e.context ().set ( + "substitution-uq-name", + ref_e.context ().get<String> ("substitution-uq-name")); + } + } + + // + // + s_.new_edge<BelongsToNamespace> (m, ref.namespace_ ()); + + // Transfer default and fixed values if we haven't already + // gotten them. + // + if (!m.default_p ()) + { + if (ref.fixed_p ()) + m.fixed (ref.value ()); + else if (ref.default_p ()) + { + // Default value applies only if the attribute is optional. + // + if (Attribute* a = dynamic_cast<Attribute*> (&m)) + { + if (a->optional_p ()) + m.default_ (ref.value ()); + } + else + m.default_ (ref.value ()); + } + + if (m.default_p ()) + { + m.context ().set ( + "dom-node", + ref.context ().get<Xerces::DOMElement*> ("dom-node")); + default_values_.push_back (&m); + } + } + + // Transfer annotation if we haven't already gotten it. + // + if (!m.annotated_p () && ref.annotated_p ()) + s_.new_edge<Annotates> (ref.annotation (), m); + + // Type info. Can be missing for a substitution group member. + // + if (ref.typed_p ()) + s_.new_edge<Belongs> (m, ref.type ()); + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" <<endl; + abort (); + } + } + } + + void + traverse (SemanticGraph::Fundamental::IdRef& i) + { + ref_type (i); + } + + void + traverse (SemanticGraph::Fundamental::IdRefs& i) + { + ref_type (i); + } + + void + ref_type (SemanticGraph::Specialization& s) + { + if (s.context ().count ("type-ns-name")) + { + String ns_name (s.context ().get<String> ("type-ns-name")); + String uq_name (s.context ().get<String> ("type-uq-name")); + + s.context ().remove ("type-ns-name"); + s.context ().remove ("type-uq-name"); + s.context ().remove ("edge-type-id"); + + try + { + s_.new_edge<Arguments> ( + resolve<SemanticGraph::Type> (ns_name, uq_name, s_, cache_), s); + } + catch (NotName const& ex) + { + wcerr << s.file () << ":" << s.line () << ":" << s.column () << ": " + << "error: unable to resolve type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + } + + void + traverse (SemanticGraph::List& l) + { + if (l.context ().count ("type-ns-name")) + { + String ns_name (l.context ().get<String> ("type-ns-name")); + String uq_name (l.context ().get<String> ("type-uq-name")); + + l.context ().remove ("type-ns-name"); + l.context ().remove ("type-uq-name"); + l.context ().remove ("edge-type-id"); + + try + { + s_.new_edge<Arguments> ( + resolve<SemanticGraph::Type> (ns_name, uq_name, s_, cache_), l); + } + catch (NotName const& ex) + { + wcerr << l.file () << ":" << l.line () << ":" << l.column () << ": " + << "error: unable to resolve item type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + Traversal::List::traverse (l); + } + + void + traverse (SemanticGraph::Union& u) + { + using SemanticGraph::Union; + + if (u.context ().count ("union-member-types")) + { + UnionMemberTypes const& m ( + u.context ().get<UnionMemberTypes> ("union-member-types")); + + // Process it backwards so that we can just insert each + // edge in the front. + // + for (UnionMemberTypes::const_reverse_iterator i (m.rbegin ()); + i != m.rend (); i++) + { + try + { + NodeArgs<Union, Union::ArgumentedIterator> na ( + u, u.argumented_begin ()); + + s_.new_edge<Arguments> ( + resolve<SemanticGraph::Type> ( + i->ns_name, i->uq_name, s_, cache_), na); + } + catch (NotName const& ex) + { + wcerr << u.file () << ":" << u.line () << ":" << u.column () << ": " + << "error: unable to resolve item type '" << i->uq_name << "' " + << "in namespace '" << i->ns_name << "'" << endl; + + valid_ = false; + } + } + + u.context ().remove ("union-member-types"); + } + + Traversal::Union::traverse (u); + } + + void + traverse (SemanticGraph::Complex& c) + { + // Avoid traversing complex type more than once. + // + if (c.context ().count ("complex-type-resolved")) + return; + + c.context ().set ("complex-type-resolved", true); + + // Resolve base type if any. + // + if (c.context ().count ("type-ns-name")) + { + String ns_name (c.context ().get<String> ("type-ns-name")); + String uq_name (c.context ().get<String> ("type-uq-name")); + type_id edge_id (c.context ().get<type_id> ("edge-type-id")); + + c.context ().remove ("type-ns-name"); + c.context ().remove ("type-uq-name"); + c.context ().remove ("edge-type-id"); + + try + { + if (edge_id == typeid (Extends)) + { + s_.new_edge<Extends> ( + c, resolve<SemanticGraph::Type> ( + ns_name, uq_name, s_, cache_)); + } + else if (edge_id == typeid (Restricts)) + { + Restricts& r ( + s_.new_edge<Restricts> ( + c, resolve<SemanticGraph::Type> ( + ns_name, uq_name, s_, cache_))); + + if (c.context ().count ("facets")) + { + Facets const& f (c.context ().get<Facets> ("facets")); + copy_facets (r, f); + c.context ().remove ("facets"); + } + } + else + assert (false); + } + catch (NotName const& ex) + { + wcerr << c.file () << ":" << c.line () << ":" << c.column () << ": " + << "error: unable to resolve base type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + // Resolve attribute-group-refs. Do it before element-group-refs + // so that if the scope was empty they end up at the end. + // + if (c.context ().count ("attribute-group-refs")) + { + AttributeGroupRefs& refs ( + c.context ().get<AttributeGroupRefs> ("attribute-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (AttributeGroupRefs::reverse_iterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + clone_attribute_group_content (*i, c); + } + + c.context ().remove ("attribute-group-refs"); + } + + // Resolve element-group-ref if any. + // + if (c.context ().count ("element-group-ref")) + { + using SemanticGraph::Compositor; + + ElementGroupRef& ref ( + c.context ().get<ElementGroupRef> ("element-group-ref")); + + Compositor* comp (clone_element_group_content (c, ref)); + + // Create ContainsCompositor edge. + // + if (comp) + s_.new_edge<ContainsCompositor> (c, *comp, ref.min, ref.max); + + c.context ().remove ("element-group-ref"); + } + + Traversal::Complex::traverse (c); + } + + void + traverse (SemanticGraph::Enumeration& e) + { + // Resolve base type if any. + // + if (e.context ().count ("type-ns-name")) + { + String ns_name (e.context ().get<String> ("type-ns-name")); + String uq_name (e.context ().get<String> ("type-uq-name")); + + e.context ().remove ("type-ns-name"); + e.context ().remove ("type-uq-name"); + e.context ().remove ("edge-type-id"); + + try + { + Restricts& r ( + s_.new_edge<Restricts> ( + e, resolve<SemanticGraph::Type> ( + ns_name, uq_name, s_, cache_))); + + if (e.context ().count ("facets")) + { + Facets const& f (e.context ().get<Facets> ("facets")); + copy_facets (r, f); + e.context ().remove ("facets"); + } + } + catch (NotName const& ex) + { + wcerr << e.file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve base type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + Traversal::Enumeration::traverse (e); + } + + void + traverse (SemanticGraph::ElementGroup& g) + { + // Avoid traversing groups more than once. + // + if (!g.context ().count ("element-group-traversed")) + { + g.context ().set ("element-group-traversed", true); + Traversal::ElementGroup::traverse (g); + + // Note that setting element-group-resolved after traversing + // the group allows for a recursive shallow resolution using + // resolve_element_group. + // + g.context ().set ("element-group-resolved", true); + } + } + + // We need a "shallow" resolve to break possible recursing: + // group->element->complexType->group. + // + void + resolve_element_group (SemanticGraph::ElementGroup& g) + { + using SemanticGraph::Scope; + using SemanticGraph::Element; + + // Avoid resolving groups more than once. + // + if (!g.context ().count ("element-group-resolved")) + { + g.context ().set ("element-group-resolved", true); + + for (Scope::NamesIterator i (g.names_begin ()); + i != g.names_end (); ++i) + { + if (Element* e = dynamic_cast<Element*> (&i->named ())) + resolve_element (*e); + } + + traverse (g.contains_compositor ().compositor ()); + } + } + + void + traverse (SemanticGraph::AttributeGroup& g) + { + // Avoid traversing groups more than once. + // + if (g.context ().count ("attribute-group-resolved")) + return; + + g.context ().set ("attribute-group-resolved", true); + + // Resolve attribute-group-refs. + // + if (g.context ().count ("attribute-group-refs")) + { + AttributeGroupRefs& refs ( + g.context ().get<AttributeGroupRefs> ("attribute-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (AttributeGroupRefs::reverse_iterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + clone_attribute_group_content (*i, g); + } + + g.context ().remove ("attribute-group-refs"); + } + + Traversal::AttributeGroup::traverse (g); + } + + void + traverse (SemanticGraph::Compositor& c) + { + using SemanticGraph::Compositor; + + // Resolve element-group-refs if any. + // + if (c.context ().count ("element-group-refs")) + { + using SemanticGraph::Scope; + + ElementGroupRefs& refs ( + c.context ().get<ElementGroupRefs> ("element-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (ElementGroupRefs::reverse_iterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + // Find our scope. + // + Compositor* j (&c); + + while(!j->contained_compositor_p ()) + j = &j->contained_particle ().compositor (); + + Compositor* comp ( + clone_element_group_content ( + dynamic_cast<Scope&> (j->contained_compositor ().container ()), + *i)); + + // Create ContainsParticle edge. + // + if (comp) + { + NodeArgs<Compositor, Compositor::ContainsIterator> na ( + c, i->contains_pos); + s_.new_edge<ContainsParticle> (na, *comp, i->min, i->max); + } + } + + c.context ().remove ("element-group-refs"); + } + + // Traverse recursively but only particles that are compositors. + // This way we won't trigger anonymous type traversal (via member) + // and therefore can call this functions from resolve_element_group + // to completely resolve a group. + // + for (Compositor::ContainsIterator i (c.contains_begin ()), + e (c.contains_end ()); i != e; ++i) + { + SemanticGraph::Particle& p (i->particle ()); + + if (p.is_a<Compositor> ()) + dispatch (p); + } + + // Traversal::Compositor::traverse (c); + } + + SemanticGraph::Compositor* + clone_element_group_content (SemanticGraph::Scope& s, + ElementGroupRef const& ref) + { + using SemanticGraph::Scope; + using SemanticGraph::Compositor; + using SemanticGraph::ElementGroup; + + try + { + ElementGroup& g ( + resolve<ElementGroup> (ref.ns_name, ref.uq_name, s_, cache_)); + + // Make sure the group and all its content are fully resolved. + // + resolve_element_group (g); + + Scope::NamesIterator pos (ref.names_pos); + Compositor& root (g.contains_compositor ().compositor ()); + Compositor& copy (clone_compositor (root, s, pos)); + + return © + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl; + abort (); + } + } + + return 0; + } + + SemanticGraph::Compositor& + clone_compositor (SemanticGraph::Compositor& c, + SemanticGraph::Scope& scope, + SemanticGraph::Scope::NamesIterator& pos) + { + using SemanticGraph::Any; + using SemanticGraph::Element; + using SemanticGraph::Particle; + using SemanticGraph::Compositor; + + Compositor* tmp (0); + + if (c.is_a<All> ()) + tmp = &s_.new_node<All> (c.file (), c.line (), c.column ()); + else if (c.is_a<Choice> ()) + tmp = &s_.new_node<Choice> (c.file (), c.line (), c.column ()); + else if (c.is_a<Sequence> ()) + tmp = &s_.new_node<Sequence> (c.file (), c.line (), c.column ()); + else + assert (false); + + Compositor& copy (*tmp); + + // Copy annotation. + // + if (c.annotated_p ()) + s_.new_edge<Annotates> (c.annotation (), copy); + + for (Compositor::ContainsIterator i (c.contains_begin ()); + i != c.contains_end (); ++i) + { + Particle& p (i->particle ()); + + if (p.is_a<Compositor> ()) + { + Compositor& c (dynamic_cast<Compositor&> (p)); + Compositor& cc (clone_compositor (c, scope, pos)); + + s_.new_edge<ContainsParticle> (copy, cc, i->min (), i->max ()); + } + else if (p.is_a<Element> ()) + { + Element& e (dynamic_cast<Element&> (p)); + Element& ec (clone_element (e)); + + s_.new_edge<ContainsParticle> (copy, ec, i->min (), i->max ()); + + NodeArgs<Scope, Scope::NamesIterator> na (scope, pos); + s_.new_edge<Names> (na, ec, e.name ()); + ++pos; + } + else if (p.is_a<Any> ()) + { + Any& a (dynamic_cast<Any&> (p)); + Any& ac ( + s_.new_node<Any> (a.file (), a.line (), a.column (), + a.namespace_begin (), a.namespace_end ())); + + ac.prototype (a); + + s_.new_edge<ContainsParticle> (copy, ac, i->min (), i->max ()); + + // Transfer annotation. + // + if (a.annotated_p ()) + s_.new_edge<Annotates> (a.annotation (), ac); + + // Any has no name so we have to come up with a fake one in + // order to put it into the scope. Note that we cannot reuse + // the name from the prototype. + + unsigned long count; + SemanticGraph::Context& ctx (scope.context ()); + + if (!ctx.count ("any-name-count")) + { + count = 0; + ctx.set ("any-name-count", count); + } + else + count = ++(ctx.get<unsigned long> ("any-name-count")); + + std::basic_ostringstream<wchar_t> os; + os << "any #" << count; + + NodeArgs<Scope, Scope::NamesIterator> na (scope, pos); + s_.new_edge<Names> (na, ac, os.str ()); + ++pos; + } + else + assert (false); + } + + return copy; + } + + // Clone a fully-resolved element. Note that it cannot be used as + // is to clone ref'ed element (default/fixed value, etc). + // + SemanticGraph::Element& + clone_element (SemanticGraph::Element& e) + { + using SemanticGraph::Element; + + Element& copy ( + s_.new_node<Element> ( + e.file (), e.line (), e.column (), e.global_p (), e.qualified_p ())); + + if (e.qualified_p ()) + s_.new_edge<BelongsToNamespace> (copy, e.namespace_ ()); + + // Transfer default and fixed values. + // + if (e.fixed_p ()) + copy.fixed (e.value ()); + else if (e.default_p ()) + copy.default_ (e.value ()); + + if (copy.default_p ()) + { + copy.context ().set ( + "dom-node", + e.context ().get<Xerces::DOMElement*> ("dom-node")); + default_values_.push_back (©); + } + + // Transfer annotation. + // + if (e.annotated_p ()) + s_.new_edge<Annotates> (e.annotation (), copy); + + // Belongs edge. + // + if (e.typed_p ()) + s_.new_edge<Belongs> (copy, e.type ()); + else + assert (!valid_); + + // Substitutes edge. + // + if (e.substitutes_p ()) + s_.new_edge<Substitutes> (copy, e.substitutes ().root ()); + + return copy; + } + + void + clone_attribute_group_content (AttributeGroupRef& ref, + SemanticGraph::Scope& s) + { + using SemanticGraph::Scope; + using SemanticGraph::Attribute; + using SemanticGraph::AttributeGroup; + + try + { + AttributeGroup& g ( + resolve<AttributeGroup> (ref.ns_name, ref.uq_name, s_, cache_)); + + // Make sure the group and all its content are fully resolved. + // + traverse (g); + + Scope::NamesIterator pos (ref.names_pos); + + for (Scope::NamesIterator i (g.names_begin ()); + i != g.names_end (); ++i) + { + if (Attribute* p = dynamic_cast<Attribute*> (&i->named ())) + { + Attribute& a ( + s_.new_node<Attribute> (p->file (), + p->line (), + p->column (), + p->optional_p (), + p->global_p (), + p->qualified_p ())); + + NodeArgs<Scope, Scope::NamesIterator> na (s, pos); + s_.new_edge<Names> (na, a, p->name ()); + ++pos; + + if (p->qualified_p ()) + s_.new_edge<BelongsToNamespace> (a, p->namespace_ ()); + + // Transfer default and fixed values if any. + // + if (p->fixed_p ()) + a.fixed (p->value ()); + else if (p->default_p ()) + a.default_ (p->value ()); + + if (a.default_p ()) + { + a.context ().set ( + "dom-node", + p->context ().get<Xerces::DOMElement*> ("dom-node")); + default_values_.push_back (&a); + } + + // Transfer annotation. + // + if (p->annotated_p ()) + s_.new_edge<Annotates> (p->annotation (), a); + + // Belongs edge. + // + if (p->typed_p ()) + s_.new_edge<Belongs> (a, p->type ()); + else + assert (!valid_); + } + else if ( + AnyAttribute* p = dynamic_cast<AnyAttribute*> (&i->named ())) + { + AnyAttribute& any ( + s_.new_node<AnyAttribute> (p->file (), + p->line (), + p->column (), + p->namespace_begin (), + p->namespace_end ())); + + any.prototype (*p); + + // Transfer annotation. + // + if (p->annotated_p ()) + s_.new_edge<Annotates> (p->annotation (), any); + + // AnyAttribute has no name so we have to come up with a fake + // one in order to put it into the scope. Note that we cannot + // reuse the name from the attribute group. + + unsigned long count; + SemanticGraph::Context& ctx (s.context ()); + + if (!ctx.count ("any-attribute-name-count")) + { + count = 0; + ctx.set ("any-attribute-name-count", count); + } + else + count = ++(ctx.get<unsigned long> ("any-attribute-name-count")); + + std::basic_ostringstream<wchar_t> os; + os << "any-attribute #" << count; + + NodeArgs<Scope, Scope::NamesIterator> na (s, pos); + s_.new_edge<Names> (na, any, os.str ()); + ++pos; + } + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve attribute group name '" + << ex.name () << "' inside namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + } + + private: + Schema& s_; + bool& valid_; + NamespaceMap& cache_; + DefaultValues& default_values_; + + private: + //Traversal::ContainsParticle contains_particle; + Traversal::ContainsCompositor contains_compositor; + }; + } + + // Parser::Impl + // + + class Parser::Impl + { + Impl (Impl const&); + Impl& operator= (Impl const&); + + public: + ~Impl (); + + Impl (bool proper_restriction, + bool multiple_imports, + bool full_schema_check, + LocationTranslator*, + const WarningSet*); + + auto_ptr<Schema> + parse (Path const&); + + auto_ptr<Schema> + parse (Paths const&); + + auto_ptr<Schema> + xml_schema (Path const&); + + private: + void + fill_xml_schema (Schema&, Path const&); + + private: + XML::AutoPtr<Xerces::DOMDocument> + dom (SemanticGraph::Path const&, bool validate); + + void + schema (XML::Element const&); + + SemanticGraph::Annotation* + annotation (bool process); + + void + import (XML::Element const&); + + void + include (XML::Element const&); + + void + element_group (XML::Element const&, bool in_compositor); + + SemanticGraph::Type* + simple_type (XML::Element const&); + + SemanticGraph::Type* + list (XML::Element const& l, XML::Element const& type); + + SemanticGraph::Type* + union_ (XML::Element const& u, XML::Element const& type); + + SemanticGraph::Type* + restriction (XML::Element const& r, XML::Element const& type); + + void + enumeration (XML::Element const&); + + SemanticGraph::Type* + complex_type (XML::Element const&); + + All* + all (XML::Element const&); + + Choice* + choice (XML::Element const&, bool in_compositor); + + Sequence* + sequence (XML::Element const&, bool in_compositor); + + void + simple_content (XML::Element const&); + + void + complex_content (XML::Element const&, Complex&); + + void + simple_content_extension (XML::Element const&); + + void + simple_content_restriction (XML::Element const&); + + void + complex_content_extension (XML::Element const&, Complex&); + + void + complex_content_restriction (XML::Element const&, Complex&); + + void + element (XML::Element const&, bool global); + + void + attribute (XML::Element const&, bool global); + + void + attribute_group (XML::Element const&); + + void + any (XML::Element const&); + + void + any_attribute (XML::Element const&); + + private: + bool + is_disabled (char const* warning) + { + return disabled_warnings_all_ || + (disabled_warnings_ && + disabled_warnings_->find (warning) != disabled_warnings_->end ()); + } + + private: + bool + more () const + { + iterator const& it (iteration_state_.top ()); + + return it.l_->getLength () > it.i_; + } + + XML::Element + next () + { + iterator& it (iteration_state_.top ()); + + return XML::Element ( + dynamic_cast<Xerces::DOMElement*> (it.l_->item (it.i_++))); + } + + void + prev () + { + iterator& it (iteration_state_.top ()); + + if (it.i_) + --it.i_; + } + + void + push (XML::Element const& e) + { + iteration_state_.push (e.dom_element ()); + } + + void + pop () + { + iteration_state_.pop (); + } + + private: + void + push_scope (SemanticGraph::Scope& s) + { + scope_stack_.push (&s); + } + + void + pop_scope () + { + scope_stack_.pop (); + } + + SemanticGraph::Scope& + scope () const + { + return *(scope_stack_.top ()); + } + + private: + void + push_compositor (SemanticGraph::Compositor& c) + { + compositor_stack_.push (&c); + } + + void + pop_compositor () + { + assert (!compositor_stack_.empty ()); + compositor_stack_.pop (); + } + + SemanticGraph::Compositor& + compositor () const + { + assert (!compositor_stack_.empty ()); + return *(compositor_stack_.top ()); + } + + private: + static unsigned long const unbounded = ~static_cast<unsigned long> (0); + + unsigned long + parse_min (String const& m) + { + if (m.empty ()) + return 1; + + unsigned long v; + std::basic_istringstream<wchar_t> is (m); + + is >> v; + return v; + } + + unsigned long + parse_max (String const& m) + { + if (m.empty ()) + return 1; + + if (m == L"unbounded") + return unbounded; + + unsigned long v; + std::basic_istringstream<wchar_t> is (m); + + is >> v; + return v; + } + + private: + SemanticGraph::Namespace& + cur_ns () const + { + // Here I am using the fact that each Schema Names only one + // Namespace. + // + return dynamic_cast<Namespace&> (cur_->names_begin ()->named ()); + } + + private: + String + unqualified_name (String const& n) + { + return XML::uq_name (n); + } + + String + namespace_name (XML::Element const& e, String const& n) + { + try + { + String p (XML::prefix (n)); + + // If we are currently handling a chameleon-included schema then + // the empty prefix is logically translated into acquired target + // namespace. + // + if (cur_chameleon_ && p.empty ()) + return cur_ns ().name (); + + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + return XML::ns_name (e.dom_element (), p); + } + catch (XML::NoMapping const& ex) + { + if (ex.prefix ().empty ()) + return String (); + else + throw; + } + } + + SemanticGraph::Type& + ultimate_base (SemanticGraph::Type& t) + { + using namespace SemanticGraph; + + Complex* c = dynamic_cast<Complex*> (&t); + + if (c != 0 && c->inherits_p ()) + { + Type* b (&c->inherits ().base ()); + + while (true) + { + Complex* cb (dynamic_cast<Complex*> (b)); + + if (cb != 0 && cb->inherits_p ()) + { + b = &cb->inherits ().base (); + continue; + } + + break; + } + + return *b; + } + else + return t; + } + + private: + template <typename Edge, typename Node> + Edge* + set_type (String const& type, XML::Element const& e, Node& node); + + private: + XML::PtrVector<Xerces::DOMDocument>* dom_docs_; + + struct iterator + { + iterator (Xerces::DOMElement* e) + : l_ (e->getChildNodes ()), i_ (0) + { + } + + Xerces::DOMNodeList* l_; + size_t i_; + }; + + std::stack<iterator> iteration_state_; + SemanticGraph::Schema* s_; // root schema file + SemanticGraph::Schema* cur_; // current schema file + bool cur_chameleon_; // whethere cur_ is chameleon + + SemanticGraph::Schema* xml_schema_; // XML Schema file + SemanticGraph::Path xml_schema_path_; + + // + // + std::stack<SemanticGraph::Scope*> scope_stack_; + + // + // + std::stack<SemanticGraph::Compositor*> compositor_stack_; + + + // Map of absolute file path and namespace pair to a Schema node. + // + struct SchemaId + { + SchemaId (SemanticGraph::Path const& path, String const& ns) + : path_ (path), ns_ (ns) + { + } + + + friend bool + operator< (SchemaId const& x, SchemaId const& y) + { + return x.path_ < y.path_ || (x.path_ == y.path_ && x.ns_ < y.ns_); + } + + private: + SemanticGraph::Path path_; + String ns_; + }; + + + typedef std::map<SchemaId, SemanticGraph::Schema*> SchemaMap; + SchemaMap schema_map_; + + // Path stack for diagnostic. + // + struct PathPair + { + PathPair (SemanticGraph::Path const& r, SemanticGraph::Path const& a) + : rel (r), abs (a) + { + } + + SemanticGraph::Path rel, abs; + }; + + std::stack<PathPair> file_stack_; + + SemanticGraph::Path const& + file () + { + return file_stack_.top ().rel; + } + + SemanticGraph::Path const& + abs_file () + { + return file_stack_.top ().abs; + } + + // Members with default/fixed values (needed for QName handling). + // + DefaultValues default_values_; + + private: + bool qualify_attribute_; + bool qualify_element_; + + bool valid_; + + bool proper_restriction_; + bool multiple_imports_; + bool full_schema_check_; + LocationTranslator* loc_translator_; + const WarningSet* disabled_warnings_; + bool disabled_warnings_all_; + + NamespaceMap* cache_; + }; + + + Parser::Impl:: + Impl (bool proper_restriction, + bool multiple_imports, + bool full_schema_check, + LocationTranslator* t, + const WarningSet* dw) + : s_ (0), + cur_ (0), + cur_chameleon_ (false), + xml_schema_path_ ("XMLSchema.xsd"), + qualify_attribute_ (false), + qualify_element_ (false), + proper_restriction_ (proper_restriction), + multiple_imports_ (multiple_imports), + full_schema_check_ (full_schema_check), + loc_translator_ (t), + disabled_warnings_ (dw), + disabled_warnings_all_ (false) + { + if (dw && dw->find ("all") != dw->end ()) + disabled_warnings_all_ = true; + + // Initialize the Xerces-C++ runtime. + // + Xerces::XMLPlatformUtils::Initialize (); + } + + Parser::Impl:: + ~Impl () + { + // Terminate the Xerces-C++ runtime. + // + Xerces::XMLPlatformUtils::Terminate (); + } + + template<typename T> T& + add_type (Schema& s, Namespace& ns, String name) + { + Path path ("XMLSchema.xsd"); + T& node (s.new_node<T> (path, 0, 0)); + s.new_edge<Names> (ns, node, name); + + return node; + } + + void Parser::Impl:: + fill_xml_schema (Schema& s, Path const& path) + { + Namespace& ns (s.new_node<Namespace> (path, 1, 1)); + s.new_edge<Names> (s, ns, xsd); + + // anyType and & anySimpleType + // + AnyType& any_type ( + add_type<AnyType> (s, ns, L"anyType")); + add_type<AnySimpleType> (s, ns, L"anySimpleType"); + + // Integers. + // + add_type<Fundamental::Byte> (s, ns, L"byte"); + add_type<Fundamental::UnsignedByte> (s, ns, L"unsignedByte"); + add_type<Fundamental::Short> (s, ns, L"short"); + add_type<Fundamental::UnsignedShort> (s, ns, L"unsignedShort"); + add_type<Fundamental::Int> (s, ns, L"int"); + add_type<Fundamental::UnsignedInt> (s, ns, L"unsignedInt"); + add_type<Fundamental::Long> (s, ns, L"long"); + add_type<Fundamental::UnsignedLong> (s, ns, L"unsignedLong"); + add_type<Fundamental::Integer> (s, ns, L"integer"); + add_type<Fundamental::NonPositiveInteger> (s, ns, L"nonPositiveInteger"); + add_type<Fundamental::NonNegativeInteger> (s, ns, L"nonNegativeInteger"); + add_type<Fundamental::PositiveInteger> (s, ns, L"positiveInteger"); + add_type<Fundamental::NegativeInteger> (s, ns, L"negativeInteger"); + + // Boolean. + // + add_type<Fundamental::Boolean> (s, ns, L"boolean"); + + // Floats. + // + add_type<Fundamental::Float> (s, ns, L"float"); + add_type<Fundamental::Double> (s, ns, L"double"); + add_type<Fundamental::Decimal> (s, ns, L"decimal"); + + // Strings + // + add_type<Fundamental::String> (s, ns, L"string"); + add_type<Fundamental::NormalizedString> (s, ns, L"normalizedString"); + add_type<Fundamental::Token> (s, ns, L"token"); + add_type<Fundamental::Name> (s, ns, L"Name"); + add_type<Fundamental::NameToken> (s, ns, L"NMTOKEN"); + add_type<Fundamental::NameTokens> (s, ns, L"NMTOKENS"); + add_type<Fundamental::NCName> (s, ns, L"NCName"); + add_type<Fundamental::Language> (s, ns, L"language"); + + // ID/IDREF. + // + add_type<Fundamental::Id> (s, ns, L"ID"); + + Fundamental::IdRef& id_ref ( + s.new_node<Fundamental::IdRef> (path, 0, 0)); + s.new_edge<Names> (ns, id_ref, L"IDREF"); + s.new_edge<Arguments> (any_type, id_ref); + + Fundamental::IdRefs& id_refs ( + s.new_node<Fundamental::IdRefs> (path, 0, 0)); + s.new_edge<Names> (ns, id_refs, L"IDREFS"); + s.new_edge<Arguments> (any_type, id_refs); + + // URI. + // + add_type<Fundamental::AnyURI> (s, ns, L"anyURI"); + + // Qualified name. + // + add_type<Fundamental::QName> (s, ns, L"QName"); + + // Binary. + // + add_type<Fundamental::Base64Binary> (s, ns, L"base64Binary"); + add_type<Fundamental::HexBinary> (s, ns, L"hexBinary"); + + // Date/time. + // + add_type<Fundamental::Date> (s, ns, L"date"); + add_type<Fundamental::DateTime> (s, ns, L"dateTime"); + add_type<Fundamental::Duration> (s, ns, L"duration"); + add_type<Fundamental::Day> (s, ns, L"gDay"); + add_type<Fundamental::Month> (s, ns, L"gMonth"); + add_type<Fundamental::MonthDay> (s, ns, L"gMonthDay"); + add_type<Fundamental::Year> (s, ns, L"gYear"); + add_type<Fundamental::YearMonth> (s, ns, L"gYearMonth"); + add_type<Fundamental::Time> (s, ns, L"time"); + + // Entity. + // + add_type<Fundamental::Entity> (s, ns, L"ENTITY"); + add_type<Fundamental::Entities> (s, ns, L"ENTITIES"); + + // Notation. + // + add_type<Fundamental::Notation> (s, ns, L"NOTATION"); + } + + + auto_ptr<Schema> Parser::Impl:: + xml_schema (Path const& tu) + { + valid_ = true; + + auto_ptr<Schema> rs (new Schema (tu, 1, 1)); + fill_xml_schema (*rs, tu); + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + auto_ptr<Schema> Parser::Impl:: + parse (Path const& tu) + { + valid_ = true; + schema_map_.clear (); + default_values_.clear (); + + XML::PtrVector<Xerces::DOMDocument> dom_docs; + dom_docs_ = &dom_docs; + + NamespaceMap cache; + cache_ = &cache; + + XML::AutoPtr<Xerces::DOMDocument> d (dom (tu, true)); + + if (!d) + throw InvalidSchema (); + + XML::Element root (d->getDocumentElement ()); + String ns (trim (root["targetNamespace"])); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + auto_ptr<Schema> rs (new Schema (tu, root.line (), root.column ())); + + // Implied schema with fundamental types. + // + xml_schema_ = &rs->new_node<Schema> (xml_schema_path_, 1, 1); + rs->new_edge<Implies> (*rs, *xml_schema_, xml_schema_path_); + + fill_xml_schema (*xml_schema_, xml_schema_path_); + + // Parse. + // + { + // Enter the file into schema_map_. Do normalize() before + // complete() to avoid hitting system path limits with '..' + // directories. + // + Path abs_path (tu); + abs_path.normalize ().complete (); + schema_map_[SchemaId (abs_path, ns)] = rs.get (); + rs->context ().set ("absolute-path", abs_path); + + s_ = cur_ = rs.get (); + { + file_stack_.push (PathPair (tu, abs_path)); + + { + push_scope ( + s_->new_node<Namespace> ( + file (), root.line (), root.column ())); + s_->new_edge<Names> (*cur_, scope (), ns); + + { + schema (root); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + s_ = cur_ = 0; + } + + dom_docs_->push_back (d); + + // Second pass to resolve forward references to types, elements, + // attributes and groups. + // + if (valid_) + { + Traversal::Schema schema; + + struct Uses: Traversal::Uses + { + virtual void + traverse (Type& u) + { + Schema& s (u.schema ()); + + if (!s.context ().count ("schema-resolved")) + { + s.context ().set ("schema-resolved", true); + Traversal::Uses::traverse (u); + } + } + } uses; + + Traversal::Names schema_names; + Traversal::Namespace ns; + Traversal::Names ns_names; + + schema >> uses >> schema; + schema >> schema_names >> ns >> ns_names; + + Resolver resolver (*rs, valid_, *cache_, default_values_); + + struct AnonymousMember: Traversal::Attribute, + Traversal::Element, + Traversal::Member + { + AnonymousMember (Traversal::NodeDispatcher& d) + { + belongs_.node_traverser (d); + } + + virtual void + traverse (SemanticGraph::Attribute& a) + { + traverse_member (a); + } + + virtual void + traverse (SemanticGraph::Element& e) + { + traverse_member (e); + } + + void + traverse_member (SemanticGraph::Member& m) + { + if (m.typed_p () && + !m.type ().named_p () && + !m.type ().context ().count ("seen")) + { + m.type().context ().set ("seen", true); + + Traversal::Member::belongs (m, belongs_); + + m.type ().context ().remove ("seen"); + } + } + + private: + Traversal::Belongs belongs_; + } anonymous_member (resolver); + + struct AnonymousBase: Traversal::Type + { + AnonymousBase (Traversal::NodeDispatcher& d) + : base_ (d) + { + } + + virtual void + traverse (SemanticGraph::Type& t) + { + if (!t.named_p ()) + base_.dispatch (t); + } + + private: + Traversal::NodeDispatcher& base_; + } anonymous_base (resolver); + + ns_names >> resolver; + ns_names >> anonymous_member; + + Traversal::Names names; + Traversal::Inherits inherits; + Traversal::Argumented argumented; + resolver >> names >> resolver; + names >> anonymous_member; + resolver >> inherits >> anonymous_base; + resolver >> argumented >> anonymous_base; + + if (trace_) + wcout << "starting resolution pass" << endl; + + schema.dispatch (*rs); + } + + // Resolve default/fixed values of QName type. + // + if (valid_) + { + for (DefaultValues::const_iterator i (default_values_.begin ()), + e (default_values_.end ()); i != e; ++i) + { + SemanticGraph::Member& m (**i); + SemanticGraph::Type& t (m.type ()); + SemanticGraph::Context& c (m.context ()); + + if (ultimate_base (t).is_a<SemanticGraph::Fundamental::QName> ()) + { + String v (m.value ()); + Xerces::DOMElement* e (c.get<Xerces::DOMElement*> ("dom-node")); + + try + { + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + String ns (XML::ns_name (e, XML::prefix (v))); + + if (m.fixed_p ()) + m.fixed (ns + L'#' + v); + else + m.default_ (ns + L'#' + v); + } + catch (XML::NoMapping const& ex) + { + if (!ex.prefix ().empty ()) + { + wcerr << m.file () << ":" << m.line () << ":" << m.column () + << ": error: unable to resolve namespace for prefix '" + << ex.prefix () << "'" << endl; + + valid_ = false; + } + } + } + + c.remove ("dom-node"); + } + } + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + + auto_ptr<Schema> Parser::Impl:: + parse (Paths const& paths) + { + valid_ = true; + schema_map_.clear (); + default_values_.clear (); + + XML::PtrVector<Xerces::DOMDocument> dom_docs; + dom_docs_ = &dom_docs; + + NamespaceMap cache; + cache_ = &cache; + + auto_ptr<Schema> rs (new Schema (Path (), 0, 0)); + + // Implied schema with fundamental types. + // + xml_schema_ = &rs->new_node<Schema> (xml_schema_path_, 1, 1); + rs->new_edge<Implies> (*rs, *xml_schema_, xml_schema_path_); + + fill_xml_schema (*xml_schema_, xml_schema_path_); + + // Parse individual schemas. + // + s_ = rs.get (); + + for (Paths::const_iterator i (paths.begin ()); i != paths.end (); ++i) + { + Path const& tu (*i); + XML::AutoPtr<Xerces::DOMDocument> d (dom (tu, true)); + + if (!d) + throw InvalidSchema (); + + XML::Element root (d->getDocumentElement ()); + String ns (trim (root["targetNamespace"])); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + // Check if we already have this schema. Do normalize() before + // complete() to avoid hitting system path limits with '..' + // directories. + // + Path abs_path (tu); + abs_path.normalize ().complete (); + SchemaId schema_id (abs_path, ns); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + continue; + + Schema& s (s_->new_node<Schema> (tu, root.line (), root.column ())); + s_->new_edge<Implies> (s, *xml_schema_, xml_schema_path_); + s_->new_edge<Imports> (*s_, s, tu); + + // Enter the file into schema_map_. + // + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + cur_ = &s; + + { + file_stack_.push (PathPair (tu, abs_path)); + + { + push_scope ( + s_->new_node<Namespace> ( + file (), root.line (), root.column ())); + s_->new_edge<Names> (*cur_, scope (), ns); + + { + schema (root); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_ = 0; + + dom_docs_->push_back (d); + + if (!valid_) + break; + } + + s_ = 0; + + // Second pass to resolve forward references to types, elements, + // attributes and groups. + // + if (valid_) + { + Traversal::Schema schema; + + struct Uses: Traversal::Uses + { + virtual void + traverse (Type& u) + { + Schema& s (u.schema ()); + + if (!s.context ().count ("schema-resolved")) + { + s.context ().set ("schema-resolved", true); + Traversal::Uses::traverse (u); + } + } + } uses; + + Traversal::Names schema_names; + Traversal::Namespace ns; + Traversal::Names ns_names; + + schema >> uses >> schema; + schema >> schema_names >> ns >> ns_names; + + Resolver resolver (*rs, valid_, *cache_, default_values_); + + struct AnonymousMember: Traversal::Attribute, + Traversal::Element, + Traversal::Member + { + AnonymousMember (Traversal::NodeDispatcher& d) + { + belongs_.node_traverser (d); + } + + virtual void + traverse (SemanticGraph::Attribute& a) + { + traverse_member (a); + } + + virtual void + traverse (SemanticGraph::Element& e) + { + traverse_member (e); + } + + virtual void + traverse_member (SemanticGraph::Member& m) + { + if (m.typed_p () && + !m.type ().named_p () && + !m.type ().context ().count ("seen")) + { + m.type().context ().set ("seen", true); + + Traversal::Member::belongs (m, belongs_); + + m.type ().context ().remove ("seen"); + } + } + + private: + Traversal::Belongs belongs_; + } anonymous_member (resolver); + + struct AnonymousBase: Traversal::Type + { + AnonymousBase (Traversal::NodeDispatcher& d) + : base_ (d) + { + } + + virtual void + traverse (SemanticGraph::Type& t) + { + if (!t.named_p ()) + base_.dispatch (t); + } + + private: + Traversal::NodeDispatcher& base_; + } anonymous_base (resolver); + + ns_names >> resolver; + ns_names >> anonymous_member; + + Traversal::Names names; + Traversal::Inherits inherits; + Traversal::Argumented argumented; + resolver >> names >> resolver; + names >> anonymous_member; + resolver >> inherits >> anonymous_base; + resolver >> argumented >> anonymous_base; + + if (trace_) + wcout << "starting resolution pass" << endl; + + schema.dispatch (*rs); + } + + // Resolve default/fixed values of QName type. + // + if (valid_) + { + for (DefaultValues::const_iterator i (default_values_.begin ()), + e (default_values_.end ()); i != e; ++i) + { + SemanticGraph::Member& m (**i); + SemanticGraph::Type& t (m.type ()); + SemanticGraph::Context& c (m.context ()); + + if (ultimate_base (t).is_a<SemanticGraph::Fundamental::QName> ()) + { + String v (m.value ()); + Xerces::DOMElement* e (c.get<Xerces::DOMElement*> ("dom-node")); + + try + { + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + String ns (XML::ns_name (e, XML::prefix (v))); + + if (m.fixed_p ()) + m.fixed (ns + L'#' + v); + else + m.default_ (ns + L'#' + v); + } + catch (XML::NoMapping const& ex) + { + if (!ex.prefix ().empty ()) + { + wcerr << m.file () << ":" << m.line () << ":" << m.column () + << ": error: unable to resolve namespace for prefix '" + << ex.prefix () << "'" << endl; + + valid_ = false; + } + } + } + + c.remove ("dom-node"); + } + } + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + + void Parser::Impl:: + schema (XML::Element const& s) + { + bool old_qa (qualify_attribute_); + bool old_qe (qualify_element_); + + if (String af = trim (s["attributeFormDefault"])) + qualify_attribute_ = af == L"qualified"; + else + qualify_attribute_ = false; + + if (String ef = trim (s["elementFormDefault"])) + qualify_element_ = ef == L"qualified"; + else + qualify_element_ = false; + + push (s); + + // Parse leading annotation if any and add it as an annotation for + // this schema. + // + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, *cur_); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"import") import (e); else + if (name == L"include") include (e); else + if (name == L"element") element (e, true); else + if (name == L"attribute") attribute (e, true); else + if (name == L"simpleType") simple_type (e); else + if (name == L"annotation"); else + if (name == L"complexType") complex_type (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected top-level element: '" << name << "'" + << endl; + + valid_ = false; + } + } + + pop (); + + qualify_attribute_ = old_qa; + qualify_element_ = old_qe; + } + + void Parser::Impl:: + import (XML::Element const& i) + { + NarrowString loc ( + trim ( + XML::transcode_to_narrow ( + i.dom_element ()->getAttribute ( + XML::XMLChString ("schemaLocation").c_str ())))); + + if (loc_translator_) + loc = loc_translator_->translate (loc); + + String ins (trim (i["namespace"])); + + // Ignore empty <import>. + // + if (!loc && !ins) + return; + + Path path, rel_path, abs_path; + try + { + path = Path (loc); + + if (path.absolute ()) + { + abs_path = rel_path = path; + abs_path.normalize (); + } + else + { + // Do normalize() before complete() to avoid hitting system path + // limits with '..' directories. + // + abs_path = rel_path = file ().directory () / path; + abs_path.normalize ().complete (); + } + } + catch (InvalidPath const&) + { + wcerr << file () << ":" << i.line () << ":" << i.column () << ": " + << "error: '" << loc.c_str () << "' is not a valid " + << "filesystem path" << endl; + + valid_ = false; + return; + } + + SchemaId schema_id (abs_path, ins); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + { + s_->new_edge<Imports> (*cur_, *schema_map_[schema_id], path); + return; + } + + if (trace_) + wcout << "importing " << rel_path << endl; + + if (XML::AutoPtr<Xerces::DOMDocument> d = dom (abs_path, false)) + { + XML::Element r (d->getDocumentElement ()); + String ns (trim (r["targetNamespace"])); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + Schema& s (s_->new_node<Schema> (rel_path, r.line (), r.column ())); + s_->new_edge<Implies> (s, *xml_schema_, xml_schema_path_); + s_->new_edge<Imports> (*cur_, s, path); + + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + Schema* old_cur (cur_); + bool old_cur_chameleon (cur_chameleon_); + cur_ = &s; + cur_chameleon_ = false; + + { + file_stack_.push (PathPair (rel_path, abs_path)); + + { + push_scope ( + s_->new_node<Namespace> (file (), r.line (), r.column ())); + s_->new_edge<Names> (*cur_, scope (), ns); + + { + schema (r); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_chameleon_ = old_cur_chameleon; + cur_ = old_cur; + + dom_docs_->push_back (d); + } + } + + void Parser::Impl:: + include (XML::Element const& i) + { + NarrowString loc ( + trim ( + XML::transcode_to_narrow ( + i.dom_element ()->getAttribute ( + XML::XMLChString ("schemaLocation").c_str ())))); + + if (loc_translator_) + loc = loc_translator_->translate (loc); + + Path path, rel_path, abs_path; + try + { + path = Path (loc); + + if (path.absolute ()) + { + abs_path = rel_path = path; + abs_path.normalize (); + } + else + { + // Do normalize() before complete() to avoid hitting system path + // limits with '..' directories. + // + abs_path = rel_path = file ().directory () / path; + abs_path.normalize ().complete (); + } + } + catch (InvalidPath const&) + { + wcerr << file () << ":" << i.line () << ":" << i.column () << ": " + << "error: '" << loc.c_str () << "' is not a valid " + << "filesystem path" << endl; + + valid_ = false; + return; + } + + // Included schema should have the same namespace as ours. + // + SchemaId schema_id (abs_path, cur_ns ().name ()); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + { + Schema& s (*schema_map_[schema_id]); + + // Chemeleon inclusion results in a new Schema node for every + // namespace. As a result, such a Schema node can only be + // Source'ed. I use this property to decide which edge to use. + // + + if (s.used_p () && s.used_begin ()->is_a<Sources> ()) + s_->new_edge<Sources> (*cur_, s, path); + else + s_->new_edge<Includes> (*cur_, s, path); + + return; + } + + if (trace_) + wcout << "including " << rel_path << endl; + + if (XML::AutoPtr<Xerces::DOMDocument> d = dom (abs_path, false)) + { + XML::Element r (d->getDocumentElement ()); + String ns (trim (r["targetNamespace"])), cur_ns; + + Schema& s (s_->new_node<Schema> (rel_path, r.line (), r.column ())); + s_->new_edge<Implies> (s, *xml_schema_, xml_schema_path_); + + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + bool chameleon (false); + + if (ns.empty () && !(cur_ns = (cur_->names_begin ())->name ()).empty ()) + { + // Chameleon. + // + ns = cur_ns; + s_->new_edge<Sources> (*cur_, s, path); + chameleon = true; + + if (trace_) + wcout << "handling chameleon schema" << endl; + } + else + s_->new_edge<Includes> (*cur_, s, path); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + Schema* old_cur (cur_); + bool old_cur_chameleon (cur_chameleon_); + cur_ = &s; + cur_chameleon_ = chameleon; + + { + file_stack_.push (PathPair (rel_path, abs_path)); + + { + push_scope ( + s_->new_node<Namespace> (file (), r.line (), r.column ())); + s_->new_edge<Names> (*cur_, scope (), ns); + + { + schema (r); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_chameleon_ = old_cur_chameleon; + cur_ = old_cur; + + dom_docs_->push_back (d); + } + } + + void Parser::Impl:: + element_group (XML::Element const& g, bool in_compositor) + { + if (String name = trim (g["name"])) + { + ElementGroup& group ( + s_->new_node<ElementGroup> (file (), g.line (), g.column ())); + + s_->new_edge<Names> (scope (), group, name); + + push_scope (group); + push (g); + + annotation (false); + + XML::Element e (next ()); + + name = e.name (); + + if (trace_) + wcout << name << endl; + + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'all', 'choice' or 'sequence' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + + // Group's immediate compositor always has cardinality 1,1. + // + if (c) + s_->new_edge<ContainsCompositor> (group, *c, 1, 1); + + pop (); + pop_scope (); + } + else if (String ref = trim (g["ref"])) + { + if (trace_) + wcout << "element-group-ref " << ref << endl; + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (g, ref)); + + // In order to avoid code duplication we are going to let the + // resolver handle this case. + // + if (trace_) + wcout << "deferring resolution of group name '" << uq_name + << "' inside namespace '" << ns_name << "'" + << " until later" << endl; + + if (in_compositor) + { + Compositor& c (compositor ()); + + unsigned long min (parse_min (trim (g["minOccurs"]))); + unsigned long max (parse_max (trim (g["maxOccurs"]))); + + ElementGroupRef ref ( + uq_name, ns_name, + min, + max == unbounded ? 0 : max, + c, + scope ()); + + if (!c.context ().count ("element-group-refs")) + c.context ().set ("element-group-refs", ElementGroupRefs ()); + + c.context ().get<ElementGroupRefs> ( + "element-group-refs").push_back (ref); + } + else + { + // This is a group-ref directly in complexType. + // + + Scope& s (scope ()); + + unsigned long min (parse_min (trim (g["minOccurs"]))); + unsigned long max (parse_max (trim (g["maxOccurs"]))); + + ElementGroupRef ref ( + uq_name, ns_name, min, max == unbounded ? 0 : max, s); + + s.context ().set ("element-group-ref", ref); + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + abort (); + } + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: unable to resolve namespace prefix '" << ex.prefix () + << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: 'name' or 'ref' attribute is missing in group " + << "declaration" << endl; + + valid_ = false; + + return; + } + } + + //@@ Need RAII for push/pop. + // + + Type* Parser::Impl:: + simple_type (XML::Element const& t) + { + Type* r (0); + + push (t); + + Annotation* a (annotation (true)); + + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"list") r = list (e, t); else + if (name == L"union") r = union_ (e, t); else + if (name == L"restriction") r = restriction (e, t); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'list', 'union', or 'restriction' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + + if (r != 0 && a != 0) + s_->new_edge<Annotates> (*a, *r); + + pop (); + + return r; + } + + SemanticGraph::Type* Parser::Impl:: + list (XML::Element const& l, XML::Element const& t) + { + if (trace_) + wcout << "list" << endl; + + List& node (s_->new_node<List> (file (), t.line (), t.column ())); + + if (String item_type = trim (l["itemType"])) + { + if (trace_) + wcout << "item type: " << fq_name (l, item_type) << endl; + + set_type<Arguments> (item_type, l, node); + } + else + { + // Anonymous list item type. + // + push (l); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge<Arguments> (*t, node); + } + else + { + wcerr << file () << ":" << l.line () << ":" << l.column () << ": " + << "error: expected 'itemType' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + pop (); + } + + if (String name = trim (t["name"])) + s_->new_edge<Names> (scope (), node, name); + + return &node; + } + + namespace + { + // + // List parsing utility functions. + // + + // Find first non-space character. + // + size_t + find_ns (const wchar_t* s, size_t size, size_t pos) + { + while (pos < size && + (s[pos] == 0x20 || // space + s[pos] == 0x0D || // carriage return + s[pos] == 0x09 || // tab + s[pos] == 0x0A)) + ++pos; + + return pos < size ? pos : String::npos; + } + + // Find first space character. + // + size_t + find_s (const wchar_t* s, size_t size, size_t pos) + { + while (pos < size && + s[pos] != 0x20 && // space + s[pos] != 0x0D && // carriage return + s[pos] != 0x09 && // tab + s[pos] != 0x0A) + ++pos; + + return pos < size ? pos : String::npos; + } + } + + SemanticGraph::Type* Parser::Impl:: + union_ (XML::Element const& u, XML::Element const& t) + { + if (trace_) + wcout << "union" << endl; + + Union& node (s_->new_node<Union> (file (), t.line (), t.column ())); + + bool has_members (false); + + if (String members = trim (u["memberTypes"])) + { + // Don't bother trying to resolve member types at this point + // since the order is important so we would have to insert + // the late resolutions into specific places. It is simpler + // to just do the whole resolution later. + // + const wchar_t* data (members.c_str ()); + size_t size (members.size ()); + + UnionMemberTypes* m (0); + + // Traverse the type list while logically collapsing spaces. + // + for (size_t i (find_ns (data, size, 0)); i != String::npos;) + { + String s; + size_t j (find_s (data, size, i)); + + if (j != String::npos) + { + s = String (data + i, j - i); + i = find_ns (data, size, j); + } + else + { + // Last item. + // + s = String (data + i, size - i); + i = String::npos; + } + + if (trace_) + wcout << "member type: " << fq_name (u, s) << endl; + + if (m == 0) + { + node.context ().set ("union-member-types", UnionMemberTypes ()); + m = &node.context ().get<UnionMemberTypes> ("union-member-types"); + } + + try + { + m->push_back ( + UnionMemberType ( + namespace_name (u, s), unqualified_name (s))); + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << u.line () << ":" << u.column () << ": " + << "error: unable to resolve namespace prefix " + << "'" << ex.prefix () << "' in '" << s << "'" << endl; + + valid_ = false; + } + } + + has_members = (m != 0); + } + + // Handle anonymous members. + // + push (u); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge<Arguments> (*t, node); + } + + pop (); + + if (node.argumented_begin () == node.argumented_end () && !has_members) + { + wcerr << file () << ":" << u.line () << ":" << u.column () << ": " + << "error: expected 'memberTypes' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (String name = trim (t["name"])) + s_->new_edge<Names> (scope (), node, name); + + return &node; + } + + Type* Parser::Impl:: + restriction (XML::Element const& r, XML::Element const& t) + { + String base (trim (r["base"])); + Type* base_type (0); + + if (base) + { + if (trace_) + wcout << "restriction base: " << fq_name (r, base) << endl; + } + + Type* rv (0); + + push (r); + + annotation (false); + + bool enum_ (false); + + if (!base) + { + // Anonymous base type. + // + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleType") base_type = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << r.line () << ":" << r.column () << ": " + << "error: expected 'base' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (!valid_) + { + pop (); + return 0; + } + } + + Facets facets; + Restricts* restricts (0); + String pattern; + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"enumeration") + { + // Enumeration + // + if (enum_) + enumeration (e); + else + { + // First + // + enum_ = true; + + Enumeration& node ( + s_->new_node<Enumeration> (file (), t.line (), t.column ())); + + if (base_type) + restricts = &s_->new_edge<Restricts> (node, *base_type); + else + restricts = set_type<Restricts> (base, r, node); + + if (String name = trim (t["name"])) + s_->new_edge<Names> (scope (), static_cast<Nameable&> (node), name); + + rv = &node; + push_scope (node); + enumeration (e); + } + } + else if (name == L"minExclusive" || + name == L"minInclusive" || + name == L"maxExclusive" || + name == L"maxInclusive" || + name == L"totalDigits" || + name == L"fractionDigits" || + name == L"length" || + name == L"minLength" || + name == L"maxLength" || + name == L"whiteSpace") + { + facets[name] = trim (e["value"]); + } + else if (name == L"pattern") + { + if (pattern) + pattern += L'|'; + + pattern += e["value"]; + } + else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "' in " + << "simple type restriction" << endl; + + valid_ = false; + } + } + + if (pattern) + facets[L"pattern"] = pattern; + + if (enum_) + pop_scope (); + else + { + Complex& node (s_->new_node<Complex> (file (), t.line (), t.column ())); + + if (base_type) + restricts = &s_->new_edge<Restricts> (node, *base_type); + else + restricts = set_type<Restricts> (base, r, node); + + if (String name = trim (t["name"])) + s_->new_edge<Names> (scope (), node, name); + + rv = &node; + } + + if (!facets.empty ()) + { + if (restricts) + copy_facets (*restricts, facets); + else + rv->context ().set ("facets", facets); + } + + pop (); + + return rv; + } + + void Parser::Impl:: + enumeration (XML::Element const& e) + { + String value (e["value"]); + + if (trace_) + wcout << "enumeration value: " << value << endl; + + push (e); + Annotation* a (annotation (true)); + pop (); + + Enumerator& node ( + s_->new_node<Enumerator> (file (), e.line (), e.column ())); + + s_->new_edge<Names> (scope (), node, value); + s_->new_edge<Belongs> (node, dynamic_cast<Type&>(scope ())); + + if (a != 0) + s_->new_edge<Annotates> (*a, node); + + } + + Type* Parser::Impl:: + complex_type (XML::Element const& t) + { + Type* r (0); + + Complex& node (s_->new_node<Complex> (file (), t.line (), t.column ())); + + if (String m = trim (t["mixed"])) + node.mixed_p (m == L"true" || m == L"1"); + + if (String name = trim (t["name"])) + s_->new_edge<Names> (scope (), node, name); + + r = &node; + + push_scope (node); + push (t); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleContent") simple_content (e); else + if (name == L"complexContent") complex_content (e, node); else + { + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + { + unsigned long min (parse_min (trim (e["minOccurs"]))); + unsigned long max (parse_max (trim (e["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + s_->new_edge<ContainsCompositor> ( + node, *c, min, max == unbounded ? 0 : max); + } + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + } + + pop (); + pop_scope (); + + return r; + } + + All* Parser::Impl:: + all (XML::Element const& a) + { + // 'all' cannot be nested inside 'choice' or 'sequence', nor + // can it contain any of those. The only valid cardinality + // values for 'all' are min=0,1 and max=1. + // + All& node (s_->new_node<All> (file (), a.line (), a.column ())); + + push_compositor (node); + push (a); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"element") element (e, false); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'element' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + Choice* Parser::Impl:: + choice (XML::Element const& c, bool in_compositor) + { + Choice& node (s_->new_node<Choice> (file (), c.line (), c.column ())); + + if (in_compositor) + { + unsigned long min (parse_min (trim (c["minOccurs"]))); + unsigned long max (parse_max (trim (c["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + s_->new_edge<ContainsParticle> ( + compositor (), node, min, max == unbounded ? 0 : max); + } + + push_compositor (node); + push (c); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"any") any (e); else + if (name == L"choice") choice (e, true); else + if (name == L"element") element (e, false); else + if (name == L"sequence") sequence (e, true); else + if (name == L"group") element_group (e, true); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'any', 'group', 'choice', 'sequence', " + << "or 'element' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + Sequence* Parser::Impl:: + sequence (XML::Element const& s, bool in_compositor) + { + Sequence& node (s_->new_node<Sequence> (file (), s.line (), s.column ())); + + if (in_compositor) + { + unsigned long min (parse_min (trim (s["minOccurs"]))); + unsigned long max (parse_max (trim (s["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + s_->new_edge<ContainsParticle> ( + compositor (), node, min, max == unbounded ? 0 : max); + } + + push_compositor (node); + push (s); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"any") any (e); else + if (name == L"choice") choice (e, true); else + if (name == L"element") element (e, false); else + if (name == L"sequence") sequence (e, true); else + if (name == L"group") element_group (e, true); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'any', 'group', 'choice', 'sequence', " + << "or 'element' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + void Parser::Impl:: + simple_content (XML::Element const& c) + { + push (c); + + annotation (false); + + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"extension") simple_content_extension (e); else + if (name == L"restriction") simple_content_restriction (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'extension' or 'restriction' instead of " + << "'" << name << "'" << endl; + + valid_ = false; + } + + pop (); + } + + void Parser::Impl:: + complex_content (XML::Element const& c, Complex& type) + { + if (String m = trim (c["mixed"])) + type.mixed_p (m == L"true" || m == L"1"); + + push (c); + + annotation (false); + + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"extension") complex_content_extension (e, type); else + if (name == L"restriction") complex_content_restriction (e, type); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'extension' or 'restriction' instead of " + << "'" << name << "'" << endl; + + valid_ = false; + } + + pop (); + } + + void Parser::Impl:: + simple_content_extension (XML::Element const& e) + { + if (trace_) + wcout << "extension base: " << fq_name (e, e["base"]) << endl; + + set_type<Extends> (trim (e["base"]), e, dynamic_cast<Complex&> (scope ())); + + push (e); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + } + + void Parser::Impl:: + simple_content_restriction (XML::Element const& r) + { + String base (trim (r["base"])); + + if (trace_ && base) + wcout << "restriction base: " << fq_name (r, base) << endl; + + push (r); + annotation (false); + + if (!base) + { + // Anonymous base type. + // + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleType") simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << r.line () << ":" << r.column () << ": " + << "error: expected 'base' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (!valid_) + { + pop (); + return; + } + } + + Facets facets; + String pattern; + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"simpleType") + { + // This is a "superimposed" restriction where the base + // content is restricted by specifying another simple + // type. The attributes are restricted in the ussual + // way. So in effect we have kind of two base classes. + // I guess the way to handle this one day would be to + // copy all the facets from the base-to-this-type + // part of the hierarchy (will need to "know" facets + // for the built-in type restrictions as well). For + // now just ignore it. + // + } + else if (name == L"enumeration") + { + // Right now our sementic graph cannot represent enumerations + // with attributes so we are going to ignore enumerators for + // now. + // + } + else if (name == L"minExclusive" || + name == L"minInclusive" || + name == L"maxExclusive" || + name == L"maxInclusive" || + name == L"totalDigits" || + name == L"fractionDigits" || + name == L"length" || + name == L"minLength" || + name == L"maxLength" || + name == L"whiteSpace") + { + facets[name] = trim (e["value"]); + } + else if (name == L"pattern") + { + if (pattern) + pattern += L'|'; + + pattern += e["value"]; + } + else if (name == L"attribute") + { + if (proper_restriction_) + attribute (e, false); + } + else if (name == L"anyAttribute") + { + if (proper_restriction_) + any_attribute (e); + } + else if (name == L"attributeGroup") + { + if (proper_restriction_) + attribute_group (e); + } + else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "' in " + << "simple content restriction" << endl; + + valid_ = false; + } + } + + if (pattern) + facets[L"pattern"] = pattern; + + Complex& type (dynamic_cast<Complex&> (scope ())); + Restricts* restricts = set_type<Restricts> (base, r, type); + + if (!facets.empty ()) + { + if (restricts) + copy_facets (*restricts, facets); + else + type.context ().set ("facets", facets); + } + + pop (); + } + + void Parser::Impl:: + complex_content_extension (XML::Element const& e, Complex& type) + { + if (trace_) + wcout << "extension base: " << fq_name (e, e["base"]) << endl; + + set_type<Extends> (trim (e["base"]), e, dynamic_cast<Complex&> (scope ())); + + push (e); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + { + unsigned long min (parse_min (trim (e["minOccurs"]))); + unsigned long max (parse_max (trim (e["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + s_->new_edge<ContainsCompositor> ( + type, *c, min, max == unbounded ? 0 : max); + } + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + + pop (); + } + + void Parser::Impl:: + complex_content_restriction (XML::Element const& e, Complex& type) + { + if (trace_) + wcout << "restriction base: " << fq_name (e, e["base"]) << endl; + + set_type<Restricts> ( + trim (e["base"]), + e, + dynamic_cast<Complex&> (scope ())); + + // @@ + // For now we simply skip the contents unless the base is anyType + // (or a trivial alias thereof). Checking for the trivial alias + // is further complicated by the fact that it might not be defined + // at this stage (forward inheritnace) so we will ignore that case + // as well for now. + // + if (!proper_restriction_) + { + String base (trim (e["base"])); + String uq_name (unqualified_name (base)); + String ns_name (namespace_name (e, base)); + + if (ns_name != xsd || uq_name != L"anyType") + return; + } + + push (e); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + { + unsigned long min (parse_min (trim (e["minOccurs"]))); + unsigned long max (parse_max (trim (e["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + s_->new_edge<ContainsCompositor> ( + type, *c, min, max == unbounded ? 0 : max); + } + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + + pop (); + } + + void Parser::Impl:: + element (XML::Element const& e, bool global) + { + bool qualified (global ? true : qualify_element_); + + if (String form = trim (e["form"])) + qualified = form == L"qualified"; + + if (trace_) + wcout << "element qualified: " << qualified << endl; + + if (String name = trim (e["name"])) + { + if (trace_) + wcout << "element name '" << name << "'" << endl; + + Element& node ( + s_->new_node<Element> ( + file (), e.line (), e.column (), global, qualified)); + + if (!global) + { + unsigned long min (parse_min (trim (e["minOccurs"]))); + unsigned long max (parse_max (trim (e["maxOccurs"]))); + + if (!(min == 0 && max == 0)) + { + s_->new_edge<Names> (scope (), node, name); + + s_->new_edge<ContainsParticle> ( + compositor (), node, min, max == unbounded ? 0 : max); + } + } + else + s_->new_edge<Names> (scope (), node, name); + + if (qualified) + s_->new_edge<BelongsToNamespace> (node, cur_ns ()); + + // Default and fixed values are mutually exclusive. + // + if (e.attribute_p ("fixed")) + node.fixed (e.attribute ("fixed")); + else if (e.attribute_p ("default")) + node.default_ (e.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", e.dom_element ()); + default_values_.push_back (&node); + } + + bool subst (false); + if (global) + { + if (String sg = trim (e["substitutionGroup"])) + { + if (trace_) + wcout << "substitutes " << sg << endl; + + subst = true; + + try + { + String uq_name (unqualified_name (sg)); + String ns_name (namespace_name (e, sg)); + + node.context ().set ("substitution-ns-name", ns_name); + node.context ().set ("substitution-uq-name", uq_name); + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << sg << "'" << endl; + + valid_ = false; + } + } + } + + if (String type = trim (e["type"])) + { + if (trace_) + wcout << "element type " << fq_name (e, type) << endl; + + set_type<Belongs> (type, e, node); + + // Parse annotation. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + pop (); + } + else + { + // Looks like an anonymous type. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + if (name == L"complexType") t = complex_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' or 'complexType' " + << "instead of '" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge<Belongs> (node, *t); + } + // By default the type is anyType unless this element is a + // member of a substitution group, in which case it has the + // same type as the element it substiutes. + // + else if (!subst) + { + if (!is_disabled ("F001")) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "warning F001: element '" << name << "' is implicitly " + << "of anyType" << endl; + + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "info: did you forget to specify 'type' attribute?" + << endl; + } + + String prefix (ns_prefix (e, xsd)); + type = prefix + (prefix.empty () ? L"" : L":") + L"anyType"; + + set_type<Belongs> (type, e, node); + } + + pop (); + } + } + else if (String ref = trim (e["ref"])) + { + Element& node ( + s_->new_node<Element> ( + file (), e.line (), e.column (), true, true)); + + unsigned long min (parse_min (trim (e["minOccurs"]))); + unsigned long max (parse_max (trim (e["maxOccurs"]))); + + // Default and fixed values are mutually exclusive. + // + if (e.attribute_p ("fixed")) + node.fixed (e.attribute ("fixed")); + else if (e.attribute_p ("default")) + node.default_ (e.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", e.dom_element ()); + default_values_.push_back (&node); + } + + // Parse annotation. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge<Annotates> (*a, node); + + pop (); + + if (!(min == 0 && max == 0)) + { + // Ref can only be in compositor. + // + s_->new_edge<ContainsParticle> ( + compositor (), node, min, max == unbounded ? 0 : max); + + // Try to resolve the prototype. + // + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (e, ref)); + + s_->new_edge<Names> (scope (), node, uq_name); + + Element& prot (resolve<Element> (ns_name, uq_name, *s_, *cache_)); + s_->new_edge<BelongsToNamespace> (node, prot.namespace_ ()); + + // Copy substitution group information if any. + // + if (prot.context ().count ("substitution-ns-name")) + { + node.context ().set ( + "substitution-ns-name", + prot.context ().get<String> ("substitution-ns-name")); + + node.context ().set ( + "substitution-uq-name", + prot.context ().get<String> ("substitution-uq-name")); + } + + // Transfer default and fixed values if the ref declaration hasn't + // defined its own. + // + if (!node.default_p ()) + { + if (prot.fixed_p ()) + node.fixed (prot.value ()); + else if (prot.default_p ()) + node.default_ (prot.value ()); + + if (node.default_p ()) + { + node.context ().set ( + "dom-node", + prot.context ().get<Xerces::DOMElement*> ("dom-node")); + default_values_.push_back (&node); + } + } + + // Transfer annotation if the ref declaration hasn't defined its own. + // + if (!node.annotated_p () && prot.annotated_p ()) + s_->new_edge<Annotates> (prot.annotation (), node); + + // Set type information. + // + if (prot.typed_p ()) + { + s_->new_edge<Belongs> (node, prot.type ()); + } + else if (prot.context ().count ("type-ns-name")) + { + String ns_name (prot.context ().get<String> ("type-ns-name")); + String uq_name (prot.context ().get<String> ("type-uq-name")); + + node.context ().set ("type-ns-name", ns_name); + node.context ().set ("type-uq-name", uq_name); + node.context ().set ("edge-type-id", type_id (typeid (Belongs))); + + if (trace_) + wcout << "element '" << ref << "' is not typed" << endl + << "deferring resolution until later" << endl; + } + else + { + // This could be a recursive reference to an element who's + // (anonymous) type is being defined. We are going to let + // resolver sort out this case. + // + node.context ().set ("instance-ns-name", ns_name); + node.context ().set ("instance-uq-name", uq_name); + + if (trace_) + wcout << "looks like a recursive reference to an element '" + << ns_name << "#" << uq_name << "' which is being " + << "defined" << endl + << "deferring resolution until later" << endl; + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + abort (); + } + } + catch (NotName const& ex) + { + node.context ().set ("instance-ns-name", ex.ns ()); + node.context ().set ("instance-uq-name", ex.name ()); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + } + else + { + if (valid_) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: 'name' or 'ref' attribute is missing in element " + << "declaration" << endl; + } + } + } + + SemanticGraph::Annotation* Parser::Impl:: + annotation (bool process) + { + Annotation* r (0); + + if (more ()) + { + XML::Element e (next ()); + + if (e.name () == L"annotation") + { + if (process) + { + push (e); + + while (more ()) + { + XML::Element doc (next ()); + + if (doc.name () == L"documentation") + { + using Xerces::DOMNode; + using Xerces::DOMText; + using Xerces::DOMElement; + + // Use first non-structured (text only) documentation element. + // + String text; + bool struc (false); + DOMElement* de (doc.dom_element()); + + for (DOMNode* n (de->getFirstChild ()); + n != 0 && !struc; + n = n->getNextSibling ()) + { + switch (n->getNodeType ()) + { + case DOMNode::TEXT_NODE: + case DOMNode::CDATA_SECTION_NODE: + { + DOMText* t (static_cast<DOMText*> (n)); + text += XML::transcode (t->getData ()); + break; + } + case DOMNode::ELEMENT_NODE: + { + struc = true; + break; + } + default: + break; // ignore + } + } + + if (struc) + continue; + + r = &s_->new_node<Annotation> ( + file (), e.line (), e.column (), text); + break; + } + } + + pop (); + } + } + else + prev (); + } + + return r; + } + + + void Parser::Impl:: + attribute (XML::Element const& a, bool global) + { + bool optional (true); + + String use (trim (a["use"])); + + if (use == L"prohibited") + return; + else if (use == L"required") + optional = false; + + bool qualified (global ? true : qualify_attribute_); + + if (String form = trim (a["form"])) + qualified = form == L"qualified"; + + if (String name = trim (a["name"])) + { + if (trace_) + wcout << "attribute '" << name << "'" << endl; + + Attribute& node ( + s_->new_node<Attribute> ( + file (), a.line (), a.column (), optional, global, qualified)); + + s_->new_edge<Names> (scope (), node, name); + + if (qualified) + s_->new_edge<BelongsToNamespace> (node, cur_ns ()); + + + // Default and fixed values are mutually exclusive. + // + if (a.attribute_p ("fixed")) + node.fixed (a.attribute ("fixed")); + else if (a.attribute_p ("default")) + node.default_ (a.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", a.dom_element ()); + default_values_.push_back (&node); + } + + if (String type = trim (a["type"])) + { + if (trace_) + wcout << "attribute type: '" << fq_name (a, type) << "'" << endl; + + set_type<Belongs> (type, a, node); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge<Annotates> (*ann, node); + + pop (); + } + else + { + // Looks like an anonymous type. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge<Annotates> (*ann, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: expected 'simpleType' instead of '" << e.name () + << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge<Belongs> (node, *t); + } + else + { + if (!is_disabled ("F002")) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "warning F002: attribute '" << name << "' is implicitly " + << "of anySimpleType" << endl; + + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "info: did you forget to specify 'type' attribute?" + << endl; + } + + // anySimpleType + // + String prefix (ns_prefix (a, xsd)); + type = prefix + (prefix.empty () ? L"" : L":") + L"anySimpleType"; + + set_type<Belongs> (type, a, node); + } + + pop (); + } + } + else if (String ref = trim (a["ref"])) + { + Attribute& node ( + s_->new_node<Attribute> ( + file (), a.line (), a.column (), optional, true, true)); + + + // Default and fixed values are mutually exclusive. + // + if (a.attribute_p ("fixed")) + node.fixed (a.attribute ("fixed")); + else if (a.attribute_p ("default")) + node.default_ (a.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", a.dom_element ()); + default_values_.push_back (&node); + } + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge<Annotates> (*ann, node); + + pop (); + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (a, ref)); + + s_->new_edge<Names> (scope (), node, uq_name); + + Attribute& prot (resolve<Attribute> (ns_name, uq_name, *s_, *cache_)); + s_->new_edge<BelongsToNamespace> (node, prot.namespace_ ()); + + // Transfer default and fixed values if the ref declaration hasn't + // defined its own. + // + if (!node.default_p ()) + { + // Default value applies only if this attribute is optional. + // + if (prot.fixed_p ()) + node.fixed (prot.value ()); + else if (optional && prot.default_p ()) + node.default_ (prot.value ()); + + if (node.default_p ()) + { + node.context ().set ( + "dom-node", + prot.context ().get<Xerces::DOMElement*> ("dom-node")); + default_values_.push_back (&node); + } + } + + // Transfer annotation if the ref declaration hasn't defined its own. + // + if (!node.annotated_p () && prot.annotated_p ()) + s_->new_edge<Annotates> (prot.annotation (), node); + + // Set type. + // + if (prot.typed_p ()) + { + s_->new_edge<Belongs> (node, prot.type ()); + } + else if (prot.context ().count ("type-ns-name")) + { + String ns_name (prot.context ().get<String> ("type-ns-name")); + String uq_name (prot.context ().get<String> ("type-uq-name")); + + node.context ().set ("type-ns-name", ns_name); + node.context ().set ("type-uq-name", uq_name); + node.context ().set ("edge-type-id", type_id (typeid (Belongs))); + + if (trace_) + wcout << "attribute '" << ref << "' is not typed" << endl + << "deferring resolution until later" << endl; + } + else + { + // This could be a recursive reference to an attribute who's + // (anonymous) type is being defined. We are going to let + // resolver sort out this case. + // + node.context ().set ("instance-ns-name", ns_name); + node.context ().set ("instance-uq-name", uq_name); + + if (trace_) + wcout << "looks like a recursive reference to an attribute '" + << ns_name << "#" << uq_name << "' which is being " + << "defined" << endl + << "deferring resolution until later" << endl; + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + node.context ().set ("instance-ns-name", ex.ns ()); + node.context ().set ("instance-uq-name", ex.name ()); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + if (valid_) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: 'name' or 'ref' attribute is missing in attribute " + << "declaration" << endl; + } + } + } + + void Parser::Impl:: + attribute_group (XML::Element const& g) + { + if (String name = trim (g["name"])) + { + // Global definition. + // + if (trace_) + wcout << "attributeGroup '" << name << "'" << endl; + + AttributeGroup& group ( + s_->new_node<AttributeGroup> (file (), g.line (), g.column ())); + s_->new_edge<Names> (scope (), group, name); + + push_scope (group); + push (g); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_scope (); + } + else if (String ref = trim (g["ref"])) + { + if (trace_) + wcout << "attribute-group-ref " << ref << endl; + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (g, ref)); + + // In order to avoid code duplication we are going to let the + // resolver handle this case. + // + if (trace_) + wcout << "deferring resolution of group name '" << uq_name + << "' inside namespace '" << ns_name << "'" + << " until later" << endl; + + Scope& s (scope ()); + AttributeGroupRef ref (uq_name, ns_name, s); + + if (!s.context ().count ("attribute-group-refs")) + s.context ().set ("attribute-group-refs", AttributeGroupRefs ()); + + s.context ().get<AttributeGroupRefs> ( + "attribute-group-refs").push_back (ref); + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: 'name' or 'ref' attribute is missing in " + << "attributeGroup declaration" << endl; + + valid_ = false; + return; + } + } + + void Parser::Impl:: + any (XML::Element const& a) + { + if (trace_) + wcout << "any" << endl; + + String namespaces (trim (a["namespace"])); + + if (!namespaces) + namespaces = L"##any"; + + Any& any ( + s_->new_node<Any> (file (), a.line (), a.column (), namespaces)); + + unsigned long min (parse_min (trim (a["minOccurs"]))); + unsigned long max (parse_max (trim (a["maxOccurs"]))); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge<Annotates> (*ann, any); + + pop (); + + if (!(min == 0 && max == 0)) + { + s_->new_edge<ContainsParticle> ( + compositor (), any, min, max == unbounded ? 0 : max); + + // Any has no name so we have to come up with a fake one in order to + // put it into the scope. + // + unsigned long count; + SemanticGraph::Context& ctx (scope ().context ()); + + if (!ctx.count ("any-name-count")) + { + count = 0; + ctx.set ("any-name-count", count); + } + else + count = ++(ctx.get<unsigned long> ("any-name-count")); + + std::basic_ostringstream<wchar_t> os; + os << "any #" << count; + + s_->new_edge<Names> (scope (), any, os.str ()); + } + } + + void Parser::Impl:: + any_attribute (XML::Element const& a) + { + if (trace_) + wcout << "anyAttribute" << endl; + + String namespaces (trim (a["namespace"])); + + if (!namespaces) + namespaces = L"##any"; + + AnyAttribute& any ( + s_->new_node<AnyAttribute> ( + file (), a.line (), a.column (), namespaces)); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge<Annotates> (*ann, any); + + pop (); + + // AnyAttribute has no name so we have to come up with a fake one + // in order to put it into the scope. + // + + unsigned long count; + SemanticGraph::Context& ctx (scope ().context ()); + + if (!ctx.count ("any-attribute-name-count")) + { + count = 0; + ctx.set ("any-attribute-name-count", count); + } + else + count = ++(ctx.get<unsigned long> ("any-attribute-name-count")); + + std::basic_ostringstream<wchar_t> os; + os << "any-attribute #" << count; + + s_->new_edge<Names> (scope (), any, os.str ()); + } + + // Some specializations to get edge orientations right. + // + + template <typename Edge, typename Node> + struct Orientation + { + static Edge& + set_edge (Schema& s, Node& node, Type& type) + { + // By default it is node->edge + // + return s.template new_edge<Edge> (node, type); + } + }; + + template <typename Node> + struct Orientation<Arguments, Node> + { + static Arguments& + set_edge (Schema& s, Node& node, Type& type) + { + // For Arguments it is type->node. + // + return s.template new_edge<Arguments> (type, node); + } + }; + + template <typename Edge, typename Node> + Edge* Parser::Impl:: + set_type (String const& type, XML::Element const& e, Node& node) + { + Edge* r (0); + + try + { + String uq_name (unqualified_name (type)); + String ns_name (namespace_name (e, type)); + + Type& t (resolve<Type> (ns_name, uq_name, *s_, *cache_)); + + // See if it is an IDREF specialization. + // + if (ns_name == xsd && (uq_name == L"IDREF" || uq_name == L"IDREFS")) + { + // See if we've got 'xse:refType' attribute. + // + if (String ref_type = trim (e.attribute (xse, "refType"))) + { + if (trace_) + wcout << "found refType attribute '" << ref_type << "'" << endl; + + //@@ It is a bit wasteful to create a new spcialization for + // each refType. Instead we could lookup the target type + // and then navigate through Arguments edges to see if this + // type already arguments specialization that we are intersted + // in. But for now I will simplify the logic by creating a new + // specialization every time. + // + + Specialization* spec (0); + + if (uq_name == L"IDREF") + spec = &s_->new_node<Fundamental::IdRef> ( + file (), e.line (), e.column ()); + else + spec = &s_->new_node<Fundamental::IdRefs> ( + file (), e.line (), e.column ()); + + r = &Orientation<Edge, Node>::set_edge (*s_, node, *spec); + + set_type<Arguments> (ref_type, e, *spec); + } + else + r = &Orientation<Edge, Node>::set_edge (*s_, node, t); + } + else + r = &Orientation<Edge, Node>::set_edge (*s_, node, t); + } + catch (NotNamespace const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + valid_ = false; + + } + catch (NotName const& ex) + { + node.context ().set ("type-ns-name", ex.ns ()); + node.context ().set ("type-uq-name", ex.name ()); + node.context ().set ("edge-type-id", type_id (typeid (Edge))); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix " + << "'" << ex.prefix () << "' in '" << type << "'" << endl; + + valid_ = false; + } + + return r; + } + + // Xerces has a provision to associate a public id with input streams + // that can later be used in diagnostics. Unfortunately, it doesn't + // work. So we will have to keep our own track. + // + struct Context + { + Context () {} + + // File map for diagnostic. + // + Path const& + file (Path const& abs) const + { + FileMap::const_iterator i (file_map_.find (abs)); + + if (i != file_map_.end ()) + { + return i->second; + } + else + { + return abs; + } + } + + void + map_file (Path const& abs, Path const& rel) + { + file_map_[abs] = rel; + } + + private: + Context (Context const&); + Context& operator= (Context const&); + + private: + typedef std::map<Path, Path> FileMap; + FileMap file_map_; + }; + + // + // + class ErrorHandler : public Xerces::DOMErrorHandler + { + public: + ErrorHandler (bool& valid, XSDFrontend::Context const& ctx) + : valid_ (valid), + ctx_ (ctx) + { + } + + virtual bool + handleError (Xerces::DOMError const& e) + { + // Xerces likes to say "Fatal error encountered during schema scan". + // We don't need this junk. + // + if (!valid_ + && e.getLocation ()->getLineNumber () == 0 + && e.getLocation ()->getColumnNumber () == 0) + return true; + + + XSDFrontend::SemanticGraph::Path abs_path ( + XML::transcode_to_narrow (e.getLocation ()->getURI ())); + + XSDFrontend::SemanticGraph::Path rel_path (ctx_.file (abs_path)); + + wcerr << rel_path << ':' + << e.getLocation ()->getLineNumber () << ':' + << e.getLocation ()->getColumnNumber () << ": "; + + switch (e.getSeverity ()) + { + case Xerces::DOMError::DOM_SEVERITY_WARNING: + { + wcerr << "warning: "; + break; + } + default: + { + wcerr << "error: "; + valid_ = false; + break; + } + } + + wcerr << e.getMessage () << endl; + + return true; + } + + private: + bool& valid_; + XSDFrontend::Context const& ctx_; + }; + + + // Failed to open resource. + // + struct Open {}; + + class InputSource: public Xerces::InputSource + { + public: + InputSource ( + Path const& abs, + Path const& rel, + Path const& base, + XSDFrontend::Context const& ctx, + Xerces::MemoryManager* mm = Xerces::XMLPlatformUtils::fgMemoryManager) + : Xerces::InputSource (mm), + abs_ (abs), + rel_ (rel), + base_ (base), + ctx_ (ctx) + { + setSystemId (XML::XMLChString (String (abs_.string ())).c_str ()); + } + + virtual Xerces::BinInputStream* + makeStream () const + { + using namespace Xerces; + + BinFileInputStream* is ( + new (getMemoryManager ()) + BinFileInputStream (getSystemId (), getMemoryManager ())); + + if (!is->getIsOpen ()) + { + delete is; + + wcerr << ctx_.file (base_) << ": error: " + << "'" << rel_ << "': unable to open in read mode" + << endl; + + throw Open (); + } + + return is; + } + + private: + Path abs_; + Path rel_; + Path base_; + XSDFrontend::Context const& ctx_; + }; + + + class EntityResolver: public Xerces::XMemory, + public Xerces::DOMLSResourceResolver + { + public: + EntityResolver (XSDFrontend::Context& ctx, LocationTranslator* t) + : ctx_ (ctx), loc_translator_ (t) + { + } + + virtual Xerces::DOMLSInput* + resolveResource(XMLCh const* const, + XMLCh const* const, + XMLCh const* const /*pub_id*/, + XMLCh const* const prv_id, + XMLCh const* const base_uri) + { + /* + XMLCh empty[1]; + empty[0] = 0; + + wcerr << "resolve entity:" << endl + << " pub_id " << (pub_id ? pub_id : empty) << endl + << " prv_id " << (prv_id ? prv_id : empty) << endl + << " uri " << (base_uri ? base_uri : empty) << endl; + */ + + // base_uri should be a valid path by now. + // + Path base (XML::transcode_to_narrow (base_uri)); + + if (prv_id == 0) + { + //@@ How can I get the line/column numbers for this? + // + wcerr << ctx_.file (base) << ": error: " + << "unable to guess which schema to open" + << endl; + + wcerr << ctx_.file (base) << ": info: " + << "did you forget to specify schemaLocation for import/include?" + << endl; + + throw Open (); + } + + NarrowString path_str (XML::transcode_to_narrow (prv_id)); + + if (loc_translator_) + path_str = loc_translator_->translate (path_str); + + try + { + Path path (path_str); + Path base_dir (base.directory ()); + + Path abs_path, rel_path; + + if (path.absolute ()) + { + abs_path = rel_path = path; + } + else + { + abs_path = base_dir / path; + rel_path = ctx_.file (base).directory () / path; + } + + abs_path.normalize (); + + ctx_.map_file (abs_path, rel_path); + + using namespace Xerces; + + InputSource* is ( + new (XMLPlatformUtils::fgMemoryManager) + InputSource (abs_path, rel_path, base, ctx_)); + + // Note that I can't use XMLPlatformUtils::fgMemoryManager here + // since Wrapper4InputSource is-not-an XMemory. + // + return new Wrapper4InputSource (is); + } + catch (InvalidPath const&) + { + wcerr << ctx_.file (base) << ": error: " + << "'" << path_str.c_str () << "' is not a valid filesystem path" + << endl; + throw; + } + + // Will never reach. + // + return 0; + } + + private: + XSDFrontend::Context& ctx_; + LocationTranslator* loc_translator_; + }; + + + XML::AutoPtr<Xerces::DOMDocument> Parser::Impl:: + dom (Path const& tu, bool validate) + { + using namespace Xerces; + + try + { + XSDFrontend::Context ctx; + + // Do normalize() before complete() to avoid hitting system path + // limits with '..' directories. + // + Path abs_path (tu); + abs_path.normalize ().complete (); + ctx.map_file (abs_path, tu); + + InputSource input_source (abs_path, tu, abs_path, ctx); + + // First validate the schema with Xerces. + // + if (validate) + { + // Instantiate the DOM parser. + // + XMLCh const gLS[] = {chLatin_L, chLatin_S, chNull }; + + // Get an implementation of the Load-Store (LS) interface. + // + DOMImplementationLS* impl ( + static_cast<DOMImplementationLS*> ( + DOMImplementationRegistry::getDOMImplementation (gLS))); + + // Create a DOMBuilder. + // + XML::AutoPtr<DOMLSParser> parser ( + impl->createLSParser (DOMImplementationLS::MODE_SYNCHRONOUS, 0)); + + DOMConfiguration* conf (parser->getDomConfig ()); + + conf->setParameter (XMLUni::fgDOMComments, false); + conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true); + conf->setParameter (XMLUni::fgDOMEntities, false); + conf->setParameter (XMLUni::fgDOMNamespaces, true); + conf->setParameter (XMLUni::fgDOMValidate, true); + conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false); + conf->setParameter (XMLUni::fgXercesSchema, true); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + conf->setParameter (XMLUni::fgXercesHandleMultipleImports, multiple_imports_); +#endif + + conf->setParameter (XMLUni::fgXercesSchemaFullChecking, full_schema_check_); + conf->setParameter (XMLUni::fgXercesValidationErrorAsFatal, true); + + ErrorHandler eh (valid_, ctx); + conf->setParameter (XMLUni::fgDOMErrorHandler, &eh); + + EntityResolver er (ctx, loc_translator_); + conf->setParameter (XMLUni::fgDOMResourceResolver, &er); + + Wrapper4InputSource wrap (&input_source, false); + parser->loadGrammar (&wrap, Grammar::SchemaGrammarType); + } + + if (!valid_) + return XML::AutoPtr<DOMDocument> (0); + + // Now do our own parsing. + // + std::auto_ptr<XML::SchemaDOMParser> xsd_parser ( + new (XMLPlatformUtils::fgMemoryManager) XML::SchemaDOMParser ()); + + xsd_parser->parse (input_source); + + XML::AutoPtr<DOMDocument> doc (xsd_parser->adoptDocument()); + + return doc; + } + catch (Xerces::XMLException const& e) + { + wcerr << tu << ": ice: Xerces::XMLException: " << e.getMessage () + << endl; + + abort (); + } + catch (Xerces::DOMException const& e) + { + size_t const size = 2047; + XMLCh text[size + 1]; + + wcerr << tu << ": ice: Xerces::DOMException: "; + + if (DOMImplementation::loadDOMExceptionMsg (e.code, text, size)) + wcerr << text << endl; + else + wcerr << "no message available, error code: " << e.code << endl; + + abort (); + } + catch (InvalidPath const&) + { + // Diagnostics has already been issued. + // + valid_ = false; + } + catch (Open const&) + { + // Diagnostics has already been issued. + // + valid_ = false; + } + + return XML::AutoPtr<DOMDocument> (0); + } + + // LocationTranslator + // + LocationTranslator:: + ~LocationTranslator () + { + } + + // Parser + // + Parser:: + ~Parser () + { + } + + Parser:: + Parser (bool proper_restriction, + bool multiple_imports, + bool full_schema_check) + : impl_ (new Impl (proper_restriction, + multiple_imports, + full_schema_check, + 0, + 0)) + { + } + + Parser:: + Parser (bool proper_restriction, + bool multiple_imports, + bool full_schema_check, + LocationTranslator& t, + const WarningSet& d) + : impl_ (new Impl (proper_restriction, + multiple_imports, + full_schema_check, + &t, + &d)) + { + } + + auto_ptr<SemanticGraph::Schema> Parser:: + parse (SemanticGraph::Path const& path) + { + return impl_->parse (path); + } + + auto_ptr<SemanticGraph::Schema> Parser:: + parse (SemanticGraph::Paths const& paths) + { + return impl_->parse (paths); + } + + auto_ptr<SemanticGraph::Schema> Parser:: + xml_schema (SemanticGraph::Path const& path) + { + return impl_->xml_schema (path); + } +} |