root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 4437:6f3d4e845072

Revision 4437:6f3d4e845072, 8.2 KB (checked in by Walter Doerwald <walter@…>, 8 years ago)

Fix typos in script documentation. Add examples. Bump version number.

Line 
1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/__init__.py for the license
10
11
12"""
13Purpose
14-------
15
16``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
17It reads one or more DTDs and outputs a skeleton namespace module.
18
19
20Options
21-------
22
23``dtd2xsc`` supports the following options:
24
25    ``urls``
26        Zero or more URLs (or filenames) of DTDs to be parsed. If no URL is
27        given stdin will be read.
28
29    ``-x``, ``--xmlns``
30        The default namespace name. All elements that don't belong to any
31        namespace will be assigned to this namespace.
32
33    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
34        Should attributes be shared among the elements? ``none`` means that each
35        element will have its own standalone :class:`Attrs` class directly derived
36        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
37        used by more than one element will be moved into its own :class:`Attrs`
38        class. For ``all`` this will be done for all attributes.
39
40    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
41        Add model information to the namespace. ``no`` doesn't add any model
42        information. ``simple`` only adds ``model = False`` or ``model = True``
43        (i.e. only the information whether the element must be empty or not).
44        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
45        ``fullonce`` adds full model information to, but reuses model objects for
46        elements which have the same model.
47
48    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
49        Should default values for attributes specified in the DTD be added to the
50        XIST namespace (as the ``default`` specification in the attribute class)?
51
52    ``--duplicates`` : ``reject``, ``allow``, ``merge``
53        If more that one DTD is specified on the command line, some elements
54        might be specified in more than one DTD. ``--duplicates`` specifies how
55        to handle this case: ``reject`` doesn't allow multiple element
56        specifications. ``allow`` allows them, but only if both specifications
57        are identical (i.e. have the same attributes). ``merge`` allows them and
58        adds the attribute specification of all element specifications to the
59        resulting XIST namespace.
60
61Note that ``dtd2xsc`` requires xmlproc_ to work.
62
63    .. _xmlproc: http://www.garshol.priv.no/download/software/xmlproc/
64
65
66Example
67-------
68
69Suppose we have the following DTD file (named ``foo.dtd``)::
70
71    <?xml version="1.0" encoding="ISO-8859-1"?>
72    <!ELEMENT persons (person*)>
73    <!ELEMENT person (firstname?, lastname?)>
74    <!ATTLIST person id CDATA #REQUIRED>
75    <!ELEMENT firstname (#PCDATA)>
76    <!ELEMENT lastname (#PCDATA)>
77
78Then we can generate a skeleton XIST namespace from it with the following command::
79
80    dtd2xsc ~/gurk.dtd -xhttp://xmlns.example.org/ -mfullall
81
82The output will be::
83
84    # -*- coding: ascii -*-
85
86
87    from ll.xist import xsc, sims
88
89
90    xmlns = 'http://xmlns.example.org/'
91
92
93    class firstname(xsc.Element): xmlns = xmlns
94
95
96    class lastname(xsc.Element): xmlns = xmlns
97
98
99    class person(xsc.Element):
100        xmlns = xmlns
101        class Attrs(xsc.Element.Attrs):
102            class id(xsc.TextAttr): required = True
103
104
105    class persons(xsc.Element): xmlns = xmlns
106
107
108    person.model = sims.Elements(lastname, firstname)
109    persons.model = sims.Elements(person)
110    firstname.model = sims.NoElements()
111    lastname.model = sims.NoElements()
112"""
113
114
115__docformat__ = "reStructuredText"
116
117
118import sys, os.path, argparse, cStringIO
119
120try:
121    from xml.parsers.xmlproc import dtdparser
122except ImportError:
123    from xmlproc import dtdparser
124
125from ll import misc, url
126from ll.xist import xsc, parse, xnd
127
128
129__docformat__ = "reStructuredText"
130
131
132def getxmlns(dtd):
133    # Extract the value of all fixed ``xmlns`` attributes
134    found = set()
135    for elemname in dtd.get_elements():
136        element = dtd.get_elem(elemname)
137        for attrname in element.get_attr_list():
138            attr = element.get_attr(attrname)
139            if attrname=="xmlns" or u":" in attrname:
140                if attr.decl=="#FIXED":
141                    found.add(attr.default)
142                    continue # skip a namespace declaration
143    return found
144
145
146def adddtd2xnd(ns, dtd):
147    # Appends DTD information from :var:`dtd` to the :class:`xnd.Module` object
148    dtd = dtdparser.load_dtd_string(dtd)
149
150    # try to guess the namespace name from the dtd
151    xmlns = getxmlns(dtd)
152    if len(xmlns) == 1:
153        xmlns = iter(xmlns).next()
154    else:
155        xmlns = None
156
157    # Add element info
158    elements = dtd.get_elements()
159    elements.sort()
160    for elemname in elements:
161        dtd_e = dtd.get_elem(elemname)
162        e = xnd.Element(elemname, xmlns=xmlns)
163
164        # Add attribute info for this element
165        attrs = dtd_e.get_attr_list()
166        if len(attrs):
167            attrs.sort()
168            for attrname in attrs:
169                dtd_a = dtd_e.get_attr(attrname)
170                if attrname=="xmlns" or u":" in attrname:
171                    continue # skip namespace declarations and global attributes
172                values = []
173                if dtd_a.type == "ID":
174                    type = "xsc.IDAttr"
175                else:
176                    type = "xsc.TextAttr"
177                    if isinstance(dtd_a.type, list):
178                        if len(dtd_a.type) > 1:
179                            values = dtd_a.type
180                        else:
181                            type = "xsc.BoolAttr"
182                default = dtd_a.default
183                if dtd_a.decl=="#REQUIRED":
184                    required = True
185                else:
186                    required = None
187                e += xnd.Attr(name=attrname, type=type, default=default, required=required, values=values)
188        ns += e
189
190    # Iterate through the elements a second time and add model information
191    for elemname in elements:
192        e = dtd.get_elem(elemname)
193        model = e.get_content_model()
194        if model is None:
195            modeltype = "sims.Any"
196            modelargs = None
197        elif model == ("", [], ""):
198            modeltype = "sims.Empty"
199            modelargs = None
200        else:
201            def extractcont(model):
202                if len(model) == 3:
203                    result = {}
204                    for cont in model[1]:
205                        result.update(extractcont(cont))
206                    return result
207                else:
208                    return {model[0]: None}
209            model = extractcont(model)
210            modeltype = "sims.Elements"
211            modelargs = []
212            for cont in model:
213                if cont == "#PCDATA":
214                    modeltype = "sims.ElementsOrText"
215                elif cont == "EMPTY":
216                    modeltype = "sims.Empty"
217                else:
218                    modelargs.append(ns.elements[(cont, xmlns)])
219            if not modelargs:
220                if modeltype == "sims.ElementsOrText":
221                    modeltype = "sims.NoElements"
222                else:
223                    modeltype = "sims.NoElementsOrText"
224        e = ns.elements[(elemname, xmlns)]
225        if ns.model == "simple":
226            modeltype = modeltype == "sims.Empty"
227            modelargs = None
228        e.modeltype = modeltype
229        e.modelargs = modelargs
230
231    # Add entities
232    ents = dtd.get_general_entities()
233    ents.sort()
234    for entname in ents:
235        if entname not in ("quot", "apos", "gt", "lt", "amp"):
236            try:
237                ent = parse.tree(dtd.resolve_ge(entname).value, parse.Encoder("utf-8"), parse.SGMLOP(encoding="utf-8"), parse.NS(), parse.Node())
238            except xsc.IllegalEntityError:
239                pass
240            else:
241                ns += xnd.CharRef(entname, codepoint=ord(unicode(ent[0])[0]))
242
243
244def urls2xnd(urls, shareattrs=None, **kwargs):
245    ns = xnd.Module(**kwargs)
246    with url.Context():
247        if not urls:
248            urls = [sys.stdin]
249        for u in urls:
250            if isinstance(u, url.URL):
251                u = u.openread()
252            elif isinstance(u, str):
253                u = cStringIO.StringIO(u)
254            adddtd2xnd(ns, u.read())
255
256    if shareattrs=="dupes":
257        ns.shareattrs(False)
258    elif shareattrs=="all":
259        ns.shareattrs(True)
260    return ns
261
262
263def main(args=None):
264    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)")
265    p.add_argument("urls", metavar="urls", type=url.URL, help="Zero of more URLs of DTDs to be parsed (default stdin)", nargs="*")
266    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
267    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
268    p.add_argument("-m", "--model", dest="model", default="fullonce", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
269    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
270    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
271
272    args = p.parse_args(args)
273    print urls2xnd(**args.__dict__)
274
275
276if __name__ == "__main__":
277    sys.exit(main())
Note: See TracBrowser for help on using the browser.