root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 4430:87d88dc1dee6

Revision 4430:87d88dc1dee6, 7.1 KB (checked in by Walter Doerwald <walter@…>, 9 years ago)

Whitespace.

Line 
1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/__init__.py for the license
10
11
12"""
13``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
14It reads one or more DTDs and outputs a skeleton namespace module.
15
16``dtd2xsc`` supports the following options:
17
18    ``urls``
19        One or more URLs (or filenames) of DTDs to be parsed
20
21    ``-x``, ``--xmlns``
22        The default namespace name. All elements that don't belong to any
23        namespace will be assigned to this namespace.
24
25    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
26        Should attributes be shared among the elements? ``none`` means that each
27        element will have its own standalone :class:`Attrs` class directly derived
28        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
29        used by more than one element will be moved into its own :class:`Attrs`
30        class. For ``all`` this will be done for all attributes.
31
32    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
33        Add model information to the namespace. ``no`` doesn't add any model
34        information. ``simple`` only adds ``model = False`` or ``model = True``
35        (i.e. only the information whether the element must be empty or not).
36        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
37        ``fullonce`` adds full model information to, but reuses model objects for
38        elements which have the same model.
39
40    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
41        Should default values for attributes specified in the DTD be added to the
42        XIST namespace (as the ``default`` specification in the attribute class)?
43
44    ``--duplicates`` : ``reject``, ``allow``, ``merge``
45        If more that one DTD is specified on the command line, some elements
46        might be specified in more than one DTD. ``--duplicates`` specifies how
47        to handle this case: ``reject`` doesn't allow multiple element
48        specifications. ``allow`` allows them, but only if both specifications
49        are identical (i.e. have the same attributes). ``merge`` allows them and
50        adds the attribute specification of all element specifications to the
51        resulting XIST namespace.
52
53Note that ``dtd2xsc`` requires xmlproc_ to work.
54
55    .. _xmlproc: http://www.garshol.priv.no/download/software/xmlproc/
56"""
57
58
59__docformat__ = "reStructuredText"
60
61
62import sys, os.path, argparse, cStringIO
63
64try:
65    from xml.parsers.xmlproc import dtdparser
66except ImportError:
67    from xmlproc import dtdparser
68
69from ll import misc, url
70from ll.xist import xsc, parse, xnd
71
72
73__docformat__ = "reStructuredText"
74
75
76def getxmlns(dtd):
77    # Extract the value of all fixed ``xmlns`` attributes
78    found = set()
79    for elemname in dtd.get_elements():
80        element = dtd.get_elem(elemname)
81        for attrname in element.get_attr_list():
82            attr = element.get_attr(attrname)
83            if attrname=="xmlns" or u":" in attrname:
84                if attr.decl=="#FIXED":
85                    found.add(attr.default)
86                    continue # skip a namespace declaration
87    return found
88
89
90def adddtd2xnd(ns, dtd):
91    # Appends DTD information from :var:`dtd` to the :class:`xnd.Module` object
92    dtd = dtdparser.load_dtd_string(dtd)
93
94    # try to guess the namespace name from the dtd
95    xmlns = getxmlns(dtd)
96    if len(xmlns) == 1:
97        xmlns = iter(xmlns).next()
98    else:
99        xmlns = None
100
101    # Add element info
102    elements = dtd.get_elements()
103    elements.sort()
104    for elemname in elements:
105        dtd_e = dtd.get_elem(elemname)
106        e = xnd.Element(elemname, xmlns=xmlns)
107
108        # Add attribute info for this element
109        attrs = dtd_e.get_attr_list()
110        if len(attrs):
111            attrs.sort()
112            for attrname in attrs:
113                dtd_a = dtd_e.get_attr(attrname)
114                if attrname=="xmlns" or u":" in attrname:
115                    continue # skip namespace declarations and global attributes
116                values = []
117                if dtd_a.type == "ID":
118                    type = "xsc.IDAttr"
119                else:
120                    type = "xsc.TextAttr"
121                    if isinstance(dtd_a.type, list):
122                        if len(dtd_a.type) > 1:
123                            values = dtd_a.type
124                        else:
125                            type = "xsc.BoolAttr"
126                default = dtd_a.default
127                if dtd_a.decl=="#REQUIRED":
128                    required = True
129                else:
130                    required = None
131                e += xnd.Attr(name=attrname, type=type, default=default, required=required, values=values)
132        ns += e
133
134    # Iterate through the elements a second time and add model information
135    for elemname in elements:
136        e = dtd.get_elem(elemname)
137        model = e.get_content_model()
138        if model is None:
139            modeltype = "sims.Any"
140            modelargs = None
141        elif model == ("", [], ""):
142            modeltype = "sims.Empty"
143            modelargs = None
144        else:
145            def extractcont(model):
146                if len(model) == 3:
147                    result = {}
148                    for cont in model[1]:
149                        result.update(extractcont(cont))
150                    return result
151                else:
152                    return {model[0]: None}
153            model = extractcont(model)
154            modeltype = "sims.Elements"
155            modelargs = []
156            for cont in model:
157                if cont == "#PCDATA":
158                    modeltype = "sims.ElementsOrText"
159                elif cont == "EMPTY":
160                    modeltype = "sims.Empty"
161                else:
162                    modelargs.append(ns.elements[(cont, xmlns)])
163            if not modelargs:
164                if modeltype == "sims.ElementsOrText":
165                    modeltype = "sims.NoElements"
166                else:
167                    modeltype = "sims.NoElementsOrText"
168        e = ns.elements[(elemname, xmlns)]
169        if ns.model == "simple":
170            modeltype = modeltype == "sims.Empty"
171            modelargs = None
172        e.modeltype = modeltype
173        e.modelargs = modelargs
174
175    # Add entities
176    ents = dtd.get_general_entities()
177    ents.sort()
178    for entname in ents:
179        if entname not in ("quot", "apos", "gt", "lt", "amp"):
180            try:
181                ent = parse.tree(dtd.resolve_ge(entname).value, parse.Encoder("utf-8"), parse.SGMLOP(encoding="utf-8"), parse.NS(), parse.Node())
182            except xsc.IllegalEntityError:
183                pass
184            else:
185                ns += xnd.CharRef(entname, codepoint=ord(unicode(ent[0])[0]))
186
187
188def urls2xnd(urls, shareattrs=None, **kwargs):
189    ns = xnd.Module(**kwargs)
190    with url.Context():
191        for u in urls:
192            if isinstance(u, url.URL):
193                u = u.openread()
194            elif isinstance(u, str):
195                u = cStringIO.StringIO(u)
196            adddtd2xnd(ns, u.read())
197
198    if shareattrs=="dupes":
199        ns.shareattrs(False)
200    elif shareattrs=="all":
201        ns.shareattrs(True)
202    return ns
203
204
205def main(args=None):
206    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)")
207    p.add_argument("urls", metavar="urls", type=url.URL, help="ULRs of DTDs to be parsed", nargs="+")
208    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
209    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
210    p.add_argument("-m", "--model", dest="model", default="once", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
211    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
212    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
213
214    args = p.parse_args(args)
215    print urls2xnd(args.urls, **args.__dict__)
216
217
218if __name__ == "__main__":
219    sys.exit(main())
Note: See TracBrowser for help on using the browser.