root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 4435:972d763188e6

Revision 4435:972d763188e6, 7.2 KB (checked in by Walter Doerwald <walter@…>, 9 years ago)

Fix call to dt2xsc.urls2xnd().

Line 
1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/__init__.py for the license
10
11
12"""
13``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
14It reads one or more DTDs and outputs a skeleton namespace module.
15
16``dtd2xsc`` supports the following options:
17
18    ``urls``
19        Zerone or more URLs (or filenames) of DTDs to be parsed. If no URL is
20        given stdin will be read.
21
22    ``-x``, ``--xmlns``
23        The default namespace name. All elements that don't belong to any
24        namespace will be assigned to this namespace.
25
26    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
27        Should attributes be shared among the elements? ``none`` means that each
28        element will have its own standalone :class:`Attrs` class directly derived
29        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
30        used by more than one element will be moved into its own :class:`Attrs`
31        class. For ``all`` this will be done for all attributes.
32
33    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
34        Add model information to the namespace. ``no`` doesn't add any model
35        information. ``simple`` only adds ``model = False`` or ``model = True``
36        (i.e. only the information whether the element must be empty or not).
37        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
38        ``fullonce`` adds full model information to, but reuses model objects for
39        elements which have the same model.
40
41    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
42        Should default values for attributes specified in the DTD be added to the
43        XIST namespace (as the ``default`` specification in the attribute class)?
44
45    ``--duplicates`` : ``reject``, ``allow``, ``merge``
46        If more that one DTD is specified on the command line, some elements
47        might be specified in more than one DTD. ``--duplicates`` specifies how
48        to handle this case: ``reject`` doesn't allow multiple element
49        specifications. ``allow`` allows them, but only if both specifications
50        are identical (i.e. have the same attributes). ``merge`` allows them and
51        adds the attribute specification of all element specifications to the
52        resulting XIST namespace.
53
54Note that ``dtd2xsc`` requires xmlproc_ to work.
55
56    .. _xmlproc: http://www.garshol.priv.no/download/software/xmlproc/
57"""
58
59
60__docformat__ = "reStructuredText"
61
62
63import sys, os.path, argparse, cStringIO
64
65try:
66    from xml.parsers.xmlproc import dtdparser
67except ImportError:
68    from xmlproc import dtdparser
69
70from ll import misc, url
71from ll.xist import xsc, parse, xnd
72
73
74__docformat__ = "reStructuredText"
75
76
77def getxmlns(dtd):
78    # Extract the value of all fixed ``xmlns`` attributes
79    found = set()
80    for elemname in dtd.get_elements():
81        element = dtd.get_elem(elemname)
82        for attrname in element.get_attr_list():
83            attr = element.get_attr(attrname)
84            if attrname=="xmlns" or u":" in attrname:
85                if attr.decl=="#FIXED":
86                    found.add(attr.default)
87                    continue # skip a namespace declaration
88    return found
89
90
91def adddtd2xnd(ns, dtd):
92    # Appends DTD information from :var:`dtd` to the :class:`xnd.Module` object
93    dtd = dtdparser.load_dtd_string(dtd)
94
95    # try to guess the namespace name from the dtd
96    xmlns = getxmlns(dtd)
97    if len(xmlns) == 1:
98        xmlns = iter(xmlns).next()
99    else:
100        xmlns = None
101
102    # Add element info
103    elements = dtd.get_elements()
104    elements.sort()
105    for elemname in elements:
106        dtd_e = dtd.get_elem(elemname)
107        e = xnd.Element(elemname, xmlns=xmlns)
108
109        # Add attribute info for this element
110        attrs = dtd_e.get_attr_list()
111        if len(attrs):
112            attrs.sort()
113            for attrname in attrs:
114                dtd_a = dtd_e.get_attr(attrname)
115                if attrname=="xmlns" or u":" in attrname:
116                    continue # skip namespace declarations and global attributes
117                values = []
118                if dtd_a.type == "ID":
119                    type = "xsc.IDAttr"
120                else:
121                    type = "xsc.TextAttr"
122                    if isinstance(dtd_a.type, list):
123                        if len(dtd_a.type) > 1:
124                            values = dtd_a.type
125                        else:
126                            type = "xsc.BoolAttr"
127                default = dtd_a.default
128                if dtd_a.decl=="#REQUIRED":
129                    required = True
130                else:
131                    required = None
132                e += xnd.Attr(name=attrname, type=type, default=default, required=required, values=values)
133        ns += e
134
135    # Iterate through the elements a second time and add model information
136    for elemname in elements:
137        e = dtd.get_elem(elemname)
138        model = e.get_content_model()
139        if model is None:
140            modeltype = "sims.Any"
141            modelargs = None
142        elif model == ("", [], ""):
143            modeltype = "sims.Empty"
144            modelargs = None
145        else:
146            def extractcont(model):
147                if len(model) == 3:
148                    result = {}
149                    for cont in model[1]:
150                        result.update(extractcont(cont))
151                    return result
152                else:
153                    return {model[0]: None}
154            model = extractcont(model)
155            modeltype = "sims.Elements"
156            modelargs = []
157            for cont in model:
158                if cont == "#PCDATA":
159                    modeltype = "sims.ElementsOrText"
160                elif cont == "EMPTY":
161                    modeltype = "sims.Empty"
162                else:
163                    modelargs.append(ns.elements[(cont, xmlns)])
164            if not modelargs:
165                if modeltype == "sims.ElementsOrText":
166                    modeltype = "sims.NoElements"
167                else:
168                    modeltype = "sims.NoElementsOrText"
169        e = ns.elements[(elemname, xmlns)]
170        if ns.model == "simple":
171            modeltype = modeltype == "sims.Empty"
172            modelargs = None
173        e.modeltype = modeltype
174        e.modelargs = modelargs
175
176    # Add entities
177    ents = dtd.get_general_entities()
178    ents.sort()
179    for entname in ents:
180        if entname not in ("quot", "apos", "gt", "lt", "amp"):
181            try:
182                ent = parse.tree(dtd.resolve_ge(entname).value, parse.Encoder("utf-8"), parse.SGMLOP(encoding="utf-8"), parse.NS(), parse.Node())
183            except xsc.IllegalEntityError:
184                pass
185            else:
186                ns += xnd.CharRef(entname, codepoint=ord(unicode(ent[0])[0]))
187
188
189def urls2xnd(urls, shareattrs=None, **kwargs):
190    ns = xnd.Module(**kwargs)
191    with url.Context():
192        if not urls:
193            urls = [sys.stdin]
194        for u in urls:
195            if isinstance(u, url.URL):
196                u = u.openread()
197            elif isinstance(u, str):
198                u = cStringIO.StringIO(u)
199            adddtd2xnd(ns, u.read())
200
201    if shareattrs=="dupes":
202        ns.shareattrs(False)
203    elif shareattrs=="all":
204        ns.shareattrs(True)
205    return ns
206
207
208def main(args=None):
209    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)")
210    p.add_argument("urls", metavar="urls", type=url.URL, help="Zero of more URLs of DTDs to be parsed (default stdin)", nargs="*")
211    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
212    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
213    p.add_argument("-m", "--model", dest="model", default="once", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
214    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
215    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
216
217    args = p.parse_args(args)
218    print urls2xnd(**args.__dict__)
219
220
221if __name__ == "__main__":
222    sys.exit(main())
Note: See TracBrowser for help on using the browser.