root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 5490:20d5b84d7e03

Revision 5490:20d5b84d7e03, 8.1 KB (checked in by Walter Doerwald <walter@…>, 7 years ago)

ll.xist.xsc and ll.url will now be compiled with Cython, if Cython is available.

  • Property exe set to *
Line 
1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3# cython: language_level=3
4
5## Copyright 1999-2013 by LivingLogic AG, Bayreuth/Germany
6## Copyright 1999-2013 by Walter Dörwald
7##
8## All Rights Reserved
9##
10## See ll/xist/__init__.py for the license
11
12
13"""
14Purpose
15-------
16
17``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
18It reads one or more DTDs and outputs a skeleton namespace module.
19
20
21Options
22-------
23
24``dtd2xsc`` supports the following options:
25
26    ``urls``
27        Zero or more URLs (or filenames) of DTDs to be parsed. If no URL is
28        given stdin will be read.
29
30    ``-x``, ``--xmlns``
31        The default namespace name. All elements that don't belong to any
32        namespace will be assigned to this namespace.
33
34    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
35        Should attributes be shared among the elements? ``none`` means that each
36        element will have its own standalone :class:`Attrs` class directly derived
37        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
38        used by more than one element will be moved into its own :class:`Attrs`
39        class. For ``all`` this will be done for all attributes.
40
41    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
42        Add model information to the namespace. ``no`` doesn't add any model
43        information. ``simple`` only adds ``model = False`` or ``model = True``
44        (i.e. only the information whether the element must be empty or not).
45        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
46        ``fullonce`` adds full model information to, but reuses model objects for
47        elements which have the same model.
48
49    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
50        Should default values for attributes specified in the DTD be added to the
51        XIST namespace (as the ``default`` specification in the attribute class)?
52
53    ``--duplicates`` : ``reject``, ``allow``, ``merge``
54        If more that one DTD is specified on the command line, some elements
55        might be specified in more than one DTD. ``--duplicates`` specifies how
56        to handle this case: ``reject`` doesn't allow multiple element
57        specifications. ``allow`` allows them, but only if both specifications
58        are identical (i.e. have the same attributes). ``merge`` allows them and
59        adds the attribute specification of all element specifications to the
60        resulting XIST namespace.
61
62Note that ``dtd2xsc`` requires lxml_ to work.
63
64    .. _lxml: http://lxml.de/
65
66
67Example
68-------
69
70Suppose we have the following DTD file (named ``foo.dtd``)::
71
72    <?xml version="1.0" encoding="ISO-8859-1"?>
73    <!ELEMENT persons (person*)>
74    <!ELEMENT person (firstname?, lastname?)>
75    <!ATTLIST person id CDATA #REQUIRED>
76    <!ELEMENT firstname (#PCDATA)>
77    <!ELEMENT lastname (#PCDATA)>
78
79Then we can generate a skeleton XIST namespace from it with the following command::
80
81    dtd2xsc ~/gurk.dtd -xhttp://xmlns.example.org/ -mfullall
82
83The output will be::
84
85    # -*- coding: ascii -*-
86
87
88    from ll.xist import xsc, sims
89
90
91    xmlns = 'http://xmlns.example.org/'
92
93
94    class firstname(xsc.Element): xmlns = xmlns
95
96
97    class lastname(xsc.Element): xmlns = xmlns
98
99
100    class person(xsc.Element):
101        xmlns = xmlns
102        class Attrs(xsc.Element.Attrs):
103            class id(xsc.TextAttr): required = True
104
105
106    class persons(xsc.Element): xmlns = xmlns
107
108
109    person.model = sims.Elements(lastname, firstname)
110    persons.model = sims.Elements(person)
111    firstname.model = sims.NoElements()
112    lastname.model = sims.NoElements()
113"""
114
115
116import sys, os.path, argparse, io, operator
117
118from ll import misc, url
119from ll.xist import xsc, parse, xnd
120
121
122__docformat__ = "reStructuredText"
123
124
125def getxmlns(dtd):
126    # Extract the value of all fixed ``xmlns`` attributes
127    found = set()
128    for elemdecl in dtd.iterelements(): # This requires ``lxml`` version 2.4
129        for attrdecl in elemdecl.iterattributes():
130            if attrdecl.name=="xmlns" or ":" in attrdecl.name:
131                if attrdecl.default == "fixed":
132                    found.add(attrdecl.default_value)
133                    continue # skip a namespace declaration
134    return found
135
136
137def adddtd2xnd(ns, dtd):
138    # Appends DTD information from :obj:`dtd` to the :class:`xnd.Module` object
139    from lxml import etree # This requires lxml (http://lxml.de/)
140    dtd = etree.DTD(dtd)
141
142    # try to guess the namespace name from the dtd
143    xmlns = getxmlns(dtd)
144    if len(xmlns) == 1:
145        xmlns = next(iter(xmlns))
146    else:
147        xmlns = None
148
149    namegetter = operator.attrgetter("name")
150    # Add element info
151    elements = sorted(dtd.iterelements(), key=namegetter)
152    for elemdecl in elements:
153        e = xnd.Element(xmlns, elemdecl.name)
154
155        # Add attribute info for this element
156        attrs = sorted(elemdecl.iterattributes(), key=namegetter)
157        for attrdecl in attrs:
158            if attrdecl.name=="xmlns" or attrdecl.prefix:
159                continue # skip namespace declarations and global attributes
160            values = []
161            if attrdecl.type == "id":
162                type = "xsc.IDAttr"
163            else:
164                type = "xsc.TextAttr"
165                values = attrdecl.values()
166                if len(values) == 1:
167                    type = "xsc.BoolAttr"
168                    values = None
169                elif not values:
170                    values = None
171            default = attrdecl.default_value
172            if attrdecl.default == "required":
173                required = True
174            else:
175                required = None
176            e += xnd.Attr(name=attrdecl.name, type=type, default=default, required=required, values=values)
177        ns += e
178
179    # Iterate through the elements a second time and add model information
180    for elemdecl in elements:
181        if elemdecl.type == "empty":
182            modeltype = "sims.Empty"
183            modelargs = None
184        elif elemdecl.type == "any":
185            modeltype = "sims.Any"
186            modelargs = None
187        else:
188            def extractcont(model):
189                content = set()
190                if model is not None:
191                    content.update(extractcont(model.left))
192                    if model.name is not None:
193                        content.add(model.name)
194                    content.update(extractcont(model.right))
195                return content
196            elementcontent = extractcont(elemdecl.content)
197            if elementcontent:
198                modelargs = [ns.elements[(xmlns, name)] for name in elementcontent]
199                if elemdecl.type == "mixed":
200                    modeltype = "sims.ElementsOrText"
201                else:
202                    modeltype = "sims.Elements"
203            else:
204                modelargs = []
205                if elemdecl.type == "mixed":
206                    modeltype = "sims.NoElements"
207                else:
208                    modeltype = "sims.NoElementsOrText"
209        e = ns.elements[(xmlns, elemdecl.name)]
210        if ns.model == "simple":
211            modeltype = modeltype == "sims.Empty"
212            modelargs = None
213        e.modeltype = modeltype
214        e.modelargs = modelargs
215
216    # Add entities
217    entities = sorted(dtd.iterentities(), key=namegetter)
218    for entdecl in entities:
219        if entdecl.name not in ("quot", "apos", "gt", "lt", "amp") and entdecl.content and len(entdecl.content) == 1:
220            ns += xnd.CharRef(entdecl.name, codepoint=ord(entdecl.content))
221
222
223def urls2xnd(urls, shareattrs=None, **kwargs):
224    ns = xnd.Module(**kwargs)
225    with url.Context():
226        if not urls:
227            urls = [sys.stdin]
228        for u in urls:
229            if isinstance(u, url.URL):
230                u = u.openread()
231            elif isinstance(u, str):
232                u = io.StringIO(u)
233            adddtd2xnd(ns, u)
234
235    if shareattrs=="dupes":
236        ns.shareattrs(False)
237    elif shareattrs=="all":
238        ns.shareattrs(True)
239    return ns
240
241
242def main(args=None):
243    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)", epilog="For more info see http://www.livinglogic.de/Python/xist/scripts/dtd2xsc.html")
244    p.add_argument("urls", metavar="urls", type=url.URL, help="Zero of more URLs of DTDs to be parsed (default stdin)", nargs="*")
245    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
246    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
247    p.add_argument("-m", "--model", dest="model", default="fullonce", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
248    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
249    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
250
251    args = p.parse_args(args)
252    print(urls2xnd(**args.__dict__))
253
254
255if __name__ == "__main__":
256    sys.exit(main())
Note: See TracBrowser for help on using the browser.