root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 4426:a63788f18c28

Revision 4426:a63788f18c28, 7.1 KB (checked in by Walter Doerwald <walter@…>, 9 years ago)

Full docstring for dtd2xsc. Remove docstrings from helper functions.

Line 
1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/__init__.py for the license
10
11
12"""
13``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
14It reads one or more DTDs and outputs a skeleton namespace module.
15
16``dtd2xsc`` supports the following options:
17
18    ``urls``
19        One or more URLs (or filenames) of DTDs to be parsed
20
21    ``-x``, ``--xmlns``
22        The default namespace name. All elements that don't belong to any
23        namespace will be assigned to this namespace.
24
25    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
26        Should attributes be shared among the elements? ``none`` means that each
27        element will have its own standalone :class:`Attrs` class directly derived
28        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
29        used by more than one element will be moved into its own :class:`Attrs`
30        class. For ``all`` this will be done for all attributes.
31
32    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
33        Add model information to the namespace. ``no`` doesn't add any model
34        information. ``simple`` only adds ``model = False`` or ``model = True``
35        (i.e. only the information whether the element must be empty or not).
36        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
37        ``fullonce`` adds full model information to, but reuses model objects for
38        elements which have the same model.
39
40    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
41        Should default values for attributes specified in the DTD be added to the
42        XIST namespace (as the ``default`` specification in the attribute class)?
43
44    ``--duplicates`` : ``reject``, ``allow``, ``merge``
45        If more that one DTD is specified on the command line, some elements
46        might be specified in more than one DTD. ``--duplicates`` specifies how
47        to handle this case: ``reject`` doesn't allow multiple element
48        specifications. ``allow`` allows them, but only if both specifications
49        are identical (i.e. have the same attributes). ``merge`` allows them and
50        adds the attribute specification of all element specifications to the
51        resulting XIST namespace.
52
53Note that ``dtd2xsc`` requires xmlproc_ to work.
54
55    .. _xmlproc: http://www.garshol.priv.no/download/software/xmlproc/
56
57"""
58
59
60__docformat__ = "reStructuredText"
61
62
63import sys, os.path, argparse, cStringIO
64
65try:
66    from xml.parsers.xmlproc import dtdparser
67except ImportError:
68    from xmlproc import dtdparser
69
70from ll import misc, url
71from ll.xist import xsc, parse, xnd
72
73
74__docformat__ = "reStructuredText"
75
76
77def getxmlns(dtd):
78    # Extract the value of all fixed ``xmlns`` attributes
79    found = set()
80    for elemname in dtd.get_elements():
81        element = dtd.get_elem(elemname)
82        for attrname in element.get_attr_list():
83            attr = element.get_attr(attrname)
84            if attrname=="xmlns" or u":" in attrname:
85                if attr.decl=="#FIXED":
86                    found.add(attr.default)
87                    continue # skip a namespace declaration
88    return found
89
90
91def adddtd2xnd(ns, dtd):
92    # Appends DTD information from :var:`dtd` to the :class:`xnd.Module` object
93    dtd = dtdparser.load_dtd_string(dtd)
94
95    # try to guess the namespace name from the dtd
96    xmlns = getxmlns(dtd)
97    if len(xmlns) == 1:
98        xmlns = iter(xmlns).next()
99    else:
100        xmlns = None
101
102    # Add element info
103    elements = dtd.get_elements()
104    elements.sort()
105    for elemname in elements:
106        dtd_e = dtd.get_elem(elemname)
107        e = xnd.Element(elemname, xmlns=xmlns)
108
109        # Add attribute info for this element
110        attrs = dtd_e.get_attr_list()
111        if len(attrs):
112            attrs.sort()
113            for attrname in attrs:
114                dtd_a = dtd_e.get_attr(attrname)
115                if attrname=="xmlns" or u":" in attrname:
116                    continue # skip namespace declarations and global attributes
117                values = []
118                if dtd_a.type == "ID":
119                    type = "xsc.IDAttr"
120                else:
121                    type = "xsc.TextAttr"
122                    if isinstance(dtd_a.type, list):
123                        if len(dtd_a.type) > 1:
124                            values = dtd_a.type
125                        else:
126                            type = "xsc.BoolAttr"
127                default = dtd_a.default
128                if dtd_a.decl=="#REQUIRED":
129                    required = True
130                else:
131                    required = None
132                e += xnd.Attr(name=attrname, type=type, default=default, required=required, values=values)
133        ns += e
134
135    # Iterate through the elements a second time and add model information
136    for elemname in elements:
137        e = dtd.get_elem(elemname)
138        model = e.get_content_model()
139        if model is None:
140            modeltype = "sims.Any"
141            modelargs = None
142        elif model == ("", [], ""):
143            modeltype = "sims.Empty"
144            modelargs = None
145        else:
146            def extractcont(model):
147                if len(model) == 3:
148                    result = {}
149                    for cont in model[1]:
150                        result.update(extractcont(cont))
151                    return result
152                else:
153                    return {model[0]: None}
154            model = extractcont(model)
155            modeltype = "sims.Elements"
156            modelargs = []
157            for cont in model:
158                if cont == "#PCDATA":
159                    modeltype = "sims.ElementsOrText"
160                elif cont == "EMPTY":
161                    modeltype = "sims.Empty"
162                else:
163                    modelargs.append(ns.elements[(cont, xmlns)])
164            if not modelargs:
165                if modeltype == "sims.ElementsOrText":
166                    modeltype = "sims.NoElements"
167                else:
168                    modeltype = "sims.NoElementsOrText"
169        e = ns.elements[(elemname, xmlns)]
170        if ns.model == "simple":
171            modeltype = modeltype == "sims.Empty"
172            modelargs = None
173        e.modeltype = modeltype
174        e.modelargs = modelargs
175
176    # Add entities
177    ents = dtd.get_general_entities()
178    ents.sort()
179    for entname in ents:
180        if entname not in ("quot", "apos", "gt", "lt", "amp"):
181            try:
182                ent = parse.tree(dtd.resolve_ge(entname).value, parse.Encoder("utf-8"), parse.SGMLOP(encoding="utf-8"), parse.NS(), parse.Node())
183            except xsc.IllegalEntityError:
184                pass
185            else:
186                ns += xnd.CharRef(entname, codepoint=ord(unicode(ent[0])[0]))
187
188
189def urls2xnd(urls, shareattrs=None, **kwargs):
190    ns = xnd.Module(**kwargs)
191    with url.Context():
192        for u in urls:
193            if isinstance(u, url.URL):
194                u = u.openread()
195            elif isinstance(u, str):
196                u = cStringIO.StringIO(u)
197            adddtd2xnd(ns, u.read())
198
199    if shareattrs=="dupes":
200        ns.shareattrs(False)
201    elif shareattrs=="all":
202        ns.shareattrs(True)
203    return ns
204
205
206def main(args=None):
207    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)")
208    p.add_argument("urls", metavar="urls", type=url.URL, help="ULRs of DTDs to be parsed", nargs="+")
209    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
210    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
211    p.add_argument("-m", "--model", dest="model", default="once", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
212    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
213    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
214
215    args = p.parse_args(args)
216    print urls2xnd(args.urls, **args.__dict__)
217
218
219if __name__ == "__main__":
220    sys.exit(main())
Note: See TracBrowser for help on using the browser.