root/livinglogic.python.xist/src/ll/xist/scripts/xml2xsc.py @ 4432:1733a94c86bf

Revision 4432:1733a94c86bf, 6.1 KB (checked in by Walter Doerwald <walter@…>, 8 years ago)

Finish documentation for xml2xsc.

Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/__init__.py for the license
10
11
12"""
13``xml2xsc`` is a script that generates an XIST namespace module from one or more
14XML files. ``xml2xsc`` will output an XIST element class for each element it
15encounters in any of the XML files. The attributes and model information
16``xml2xsc`` assigns to an element will be collected from each occurence of the
17element in the XML files, so the XML files should cover as many different cases
18as possible.
19
20``xml2xsc`` supports the following options:
21
22    ``urls``
23        One or more URLs (or filenames) of XML files to be parsed
24
25    ``-p``, ``--parser`` : ``etree`` or ``lxml``
26        Which XML parser should be used from parsing the XML files? (``etree`` is
27        the default, ``lxml`` requires that lxml_ is installed)
28
29    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
30        Should attributes be shared among the elements? ``none`` means that each
31        element will have its own standalone :class:`Attrs` class directly derived
32        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
33        used by more than one element will be moved into its own :class:`Attrs`
34        class. For ``all`` this will be done for all attributes.
35
36    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
37        Add model information to the namespace. ``no`` doesn't add any model
38        information. ``simple`` only adds ``model = False`` or ``model = True``
39        (i.e. only the information whether the element must be empty or not).
40        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
41        ``fullonce`` adds full model information to, but reuses model objects for
42        elements which have the same model.
43
44    ``-x``, ``--defaultxmlns``
45        The default namespace name. All elements that don't belong to any
46        namespace will be assigned to this namespace.
47
48    .. _lxml: http://lxml.de/
49"""
50
51
52import sys, argparse, cStringIO
53
54from ll import misc, url
55from ll.xist import xsc, xnd, sims
56
57
58__docformat__ = "reStructuredText"
59
60
61def iterpath(node):
62    yield [node]
63    if hasattr(node, "text") and node.text:
64        yield [node, node.text]
65    if hasattr(node, "getchildren"):
66        for child in node:
67            for path in iterpath(child):
68                yield [node] + path
69    if hasattr(node, "tail") and node.tail:
70        yield [node, node.tail]
71
72
73def getelementname(node):
74    xmlns = None
75    name = node.tag
76    if name.startswith("{"):
77        (xmlns, sep, name) = name[1:].partition("}")
78    return (name, xmlns)
79
80
81def addetree2xnd(ns, node, elements):
82    # Iterate through the tree and collect which elements are encountered and how they are nested
83    for path in iterpath(node):
84        node = path[-1]
85        if "Element" in type(node).__name__:
86            (name, xmlns) = getelementname(node)
87            if (name, xmlns) in ns.elements:
88                xndnode = ns.elements[(name, xmlns)]
89            else:
90                xndnode = xnd.Element(name, xmlns=xmlns)
91                ns += xndnode
92                elements[(name, xmlns)] = set()
93            for attrname in node.keys():
94                if not attrname.startswith("{") and attrname not in xndnode.attrs:
95                    xndnode += xnd.Attr(attrname, type=xsc.TextAttr)
96        elif "ProcessingInstruction" in type(node).__name__:
97            name = node.target
98            if name not in ns.procinsts:
99                ns += xnd.ProcInst(name)
100        elif "Comment" in type(node).__name__:
101            xndnode = "#comment"
102        elif isinstance(node, basestring):
103            if node.isspace():
104                xndnode = "#whitespace"
105            else:
106                xndnode = "#text"
107        if len(path) >= 2:
108            parent = path[-2]
109            if "Element" in type(parent).__name__:
110                parententry = elements[getelementname(parent)]
111                parententry.add(xndnode)
112
113
114def makexnd(urls, parser="etree", shareattrs="dupes", model="simple", defaultxmlns=None):
115    elements = {} # maps (name, xmlns) to content set
116    ns = xnd.Module(defaultxmlns=defaultxmlns, model=model)
117    with url.Context():
118        for u in urls:
119            if isinstance(u, url.URL):
120                u = u.openread()
121            elif isinstance(u, str):
122                u = cStringIO.StringIO(u)
123            if parser == "etree":
124                from xml.etree import cElementTree
125                node = cElementTree.parse(u).getroot()
126            elif parser == "lxml":
127                from lxml import etree
128                node = etree.parse(u).getroot()
129            else:
130                raise ValueError("unknown parser {!r}".format(parser))
131            addetree2xnd(ns, node, elements)
132
133    # Put sims info into the element definitions
134    if model == "none":
135        pass
136    elif model == "simple":
137        for (fullname, modelset) in elements.iteritems():
138            ns.elements[fullname].modeltype = bool(modelset)
139    elif model in ("fullall", "fullonce"):
140        for (fullname, modelset) in elements.iteritems():
141            element = ns.elements[fullname]
142            if not modelset:
143                element.modeltype = "sims.Empty"
144            else:
145                elements = [el for el in modelset if isinstance(el, xnd.Element)]
146                if not elements:
147                    if "#text" in modelset:
148                        element.modeltype = "sims.NoElements"
149                    else:
150                        element.modeltype = "sims.NoElementsOrText"
151                else:
152                    if "#text" in modelset:
153                        element.modeltype = "sims.ElementsOrText"
154                    else:
155                        element.modeltype = "sims.Elements"
156                    element.modelargs = elements
157    else:
158        raise ValueError("unknown sims mode {!r}".format(model))
159
160    if shareattrs=="dupes":
161        ns.shareattrs(False)
162    elif shareattrs=="all":
163        ns.shareattrs(True)
164    return ns
165
166
167def main(args=None):
168    p = argparse.ArgumentParser(description="Convert XML files to XIST namespace (on stdout)")
169    p.add_argument("urls", metavar="urls", type=url.URL, help="URLs of XML files to be parsed", nargs="+")
170    p.add_argument("-p", "--parser", dest="parser", help="parser module to use for XML parsing (default: %(default)s)", choices=("etree", "lxml"), default="etree")
171    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
172    p.add_argument("-m", "--model", dest="model", help="Create sims info? (default: %(default)s)", choices=("none", "simple", "fullall", "fullonce"), default="simple")
173    p.add_argument("-x", "--defaultxmlns", dest="defaultxmlns", metavar="NAME", help="Force elements without a namespace into this namespace")
174
175    args = p.parse_args(args)
176    print makexnd(**args.__dict__)
177
178
179if __name__ == "__main__":
180    sys.exit(main())
Note: See TracBrowser for help on using the browser.