root/livinglogic.python.xist/src/ll/xist/scripts/dtd2xsc.py @ 4435:972d763188e6

Revision 4435:972d763188e6, 7.2 KB (checked in by Walter Doerwald <walter@…>, 8 years ago)

Fix call to dt2xsc.urls2xnd().

RevLine 
[2522]1#! /usr/bin/env python
[2903]2# -*- coding: utf-8 -*-
[2522]3
[4422]4## Copyright 1999-2011 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2011 by Walter Dörwald
[2522]6##
7## All Rights Reserved
8##
[3263]9## See ll/__init__.py for the license
[2522]10
[2789]11
[2522]12"""
[4426]13``dtd2xsc`` is a script that helps create XIST namespace modules from DTDs.
[4423]14It reads one or more DTDs and outputs a skeleton namespace module.
[3180]15
[4426]16``dtd2xsc`` supports the following options:
[4423]17
[4426]18    ``urls``
[4434]19        Zerone or more URLs (or filenames) of DTDs to be parsed. If no URL is
20        given stdin will be read.
[4423]21
[4426]22    ``-x``, ``--xmlns``
23        The default namespace name. All elements that don't belong to any
24        namespace will be assigned to this namespace.
[3180]25
[4426]26    ``-s``, ``--shareattrs`` : ``none``, ``dupes``, ``all``
27        Should attributes be shared among the elements? ``none`` means that each
28        element will have its own standalone :class:`Attrs` class directly derived
29        from :class:`ll.xist.Elements.Attrs`. For ``dupes`` each attribute that is
30        used by more than one element will be moved into its own :class:`Attrs`
31        class. For ``all`` this will be done for all attributes.
[3180]32
[4426]33    ``-m``, ``--model`` : ``no``, ``simple``, ``fullall``, ``fullonce``
34        Add model information to the namespace. ``no`` doesn't add any model
35        information. ``simple`` only adds ``model = False`` or ``model = True``
36        (i.e. only the information whether the element must be empty or not).
37        ``fullall`` adds a :mod:`ll.xist.sims` model object to each element class.
38        ``fullonce`` adds full model information to, but reuses model objects for
39        elements which have the same model.
40
41    ``-d``, ``--defaults`` : ``false``, ``no``, ``0``, ``true``, ``yes`` or ``1``
42        Should default values for attributes specified in the DTD be added to the
43        XIST namespace (as the ``default`` specification in the attribute class)?
44
45    ``--duplicates`` : ``reject``, ``allow``, ``merge``
46        If more that one DTD is specified on the command line, some elements
47        might be specified in more than one DTD. ``--duplicates`` specifies how
48        to handle this case: ``reject`` doesn't allow multiple element
49        specifications. ``allow`` allows them, but only if both specifications
50        are identical (i.e. have the same attributes). ``merge`` allows them and
51        adds the attribute specification of all element specifications to the
52        resulting XIST namespace.
53
54Note that ``dtd2xsc`` requires xmlproc_ to work.
55
56    .. _xmlproc: http://www.garshol.priv.no/download/software/xmlproc/
[2522]57"""
58
59
[3180]60__docformat__ = "reStructuredText"
61
62
[4295]63import sys, os.path, argparse, cStringIO
[2522]64
[3857]65try:
66    from xml.parsers.xmlproc import dtdparser
67except ImportError:
68    from xmlproc import dtdparser
[2522]69
[4288]70from ll import misc, url
[4052]71from ll.xist import xsc, parse, xnd
[2522]72
73
[3180]74__docformat__ = "reStructuredText"
[3109]75
76
[2990]77def getxmlns(dtd):
[4426]78    # Extract the value of all fixed ``xmlns`` attributes
[2990]79    found = set()
80    for elemname in dtd.get_elements():
81        element = dtd.get_elem(elemname)
82        for attrname in element.get_attr_list():
83            attr = element.get_attr(attrname)
84            if attrname=="xmlns" or u":" in attrname:
85                if attr.decl=="#FIXED":
86                    found.add(attr.default)
87                    continue # skip a namespace declaration
88    return found
89
90
[4295]91def adddtd2xnd(ns, dtd):
[4426]92    # Appends DTD information from :var:`dtd` to the :class:`xnd.Module` object
[2989]93    dtd = dtdparser.load_dtd_string(dtd)
94
[4295]95    # try to guess the namespace name from the dtd
96    xmlns = getxmlns(dtd)
97    if len(xmlns) == 1:
98        xmlns = iter(xmlns).next()
99    else:
100        xmlns = None
[2989]101
102    # Add element info
103    elements = dtd.get_elements()
104    elements.sort()
105    for elemname in elements:
106        dtd_e = dtd.get_elem(elemname)
107        e = xnd.Element(elemname, xmlns=xmlns)
108
109        # Add attribute info for this element
110        attrs = dtd_e.get_attr_list()
111        if len(attrs):
112            attrs.sort()
113            for attrname in attrs:
114                dtd_a = dtd_e.get_attr(attrname)
[2990]115                if attrname=="xmlns" or u":" in attrname:
116                    continue # skip namespace declarations and global attributes
[2989]117                values = []
118                if dtd_a.type == "ID":
119                    type = "xsc.IDAttr"
120                else:
121                    type = "xsc.TextAttr"
122                    if isinstance(dtd_a.type, list):
[4278]123                        if len(dtd_a.type) > 1:
[2989]124                            values = dtd_a.type
125                        else:
126                            type = "xsc.BoolAttr"
127                default = dtd_a.default
128                if dtd_a.decl=="#REQUIRED":
129                    required = True
130                else:
131                    required = None
[4295]132                e += xnd.Attr(name=attrname, type=type, default=default, required=required, values=values)
133        ns += e
[2989]134
135    # Iterate through the elements a second time and add model information
136    for elemname in elements:
137        e = dtd.get_elem(elemname)
138        model = e.get_content_model()
139        if model is None:
140            modeltype = "sims.Any"
141            modelargs = None
142        elif model == ("", [], ""):
143            modeltype = "sims.Empty"
144            modelargs = None
145        else:
146            def extractcont(model):
147                if len(model) == 3:
148                    result = {}
149                    for cont in model[1]:
150                        result.update(extractcont(cont))
151                    return result
152                else:
153                    return {model[0]: None}
154            model = extractcont(model)
155            modeltype = "sims.Elements"
156            modelargs = []
157            for cont in model:
158                if cont == "#PCDATA":
159                    modeltype = "sims.ElementsOrText"
160                elif cont == "EMPTY":
161                    modeltype = "sims.Empty"
162                else:
[4278]163                    modelargs.append(ns.elements[(cont, xmlns)])
[2989]164            if not modelargs:
165                if modeltype == "sims.ElementsOrText":
166                    modeltype = "sims.NoElements"
167                else:
168                    modeltype = "sims.NoElementsOrText"
[4278]169        e = ns.elements[(elemname, xmlns)]
[4295]170        if ns.model == "simple":
[4289]171            modeltype = modeltype == "sims.Empty"
172            modelargs = None
[2989]173        e.modeltype = modeltype
174        e.modelargs = modelargs
175
176    # Add entities
177    ents = dtd.get_general_entities()
178    ents.sort()
179    for entname in ents:
180        if entname not in ("quot", "apos", "gt", "lt", "amp"):
[4277]181            try:
182                ent = parse.tree(dtd.resolve_ge(entname).value, parse.Encoder("utf-8"), parse.SGMLOP(encoding="utf-8"), parse.NS(), parse.Node())
183            except xsc.IllegalEntityError:
184                pass
185            else:
[4295]186                ns += xnd.CharRef(entname, codepoint=ord(unicode(ent[0])[0]))
[2989]187
[4278]188
[4295]189def urls2xnd(urls, shareattrs=None, **kwargs):
190    ns = xnd.Module(**kwargs)
[4278]191    with url.Context():
[4434]192        if not urls:
193            urls = [sys.stdin]
[4278]194        for u in urls:
[4295]195            if isinstance(u, url.URL):
196                u = u.openread()
197            elif isinstance(u, str):
198                u = cStringIO.StringIO(u)
199            adddtd2xnd(ns, u.read())
[4278]200
201    if shareattrs=="dupes":
202        ns.shareattrs(False)
203    elif shareattrs=="all":
204        ns.shareattrs(True)
[2989]205    return ns
206
207
[3031]208def main(args=None):
[4278]209    p = argparse.ArgumentParser(description="Convert DTDs to XIST namespace (on stdout)")
[4434]210    p.add_argument("urls", metavar="urls", type=url.URL, help="Zero of more URLs of DTDs to be parsed (default stdin)", nargs="*")
[4295]211    p.add_argument("-x", "--xmlns", dest="defaultxmlns", metavar="NAME", help="the namespace name for this module")
[4288]212    p.add_argument("-s", "--shareattrs", dest="shareattrs", help="Should identical attributes be shared among elements? (default: %(default)s)", choices=("none", "dupes", "all"), default="dupes")
[4289]213    p.add_argument("-m", "--model", dest="model", default="once", help="Add sims information to the namespace (default: %(default)s)", choices=("no", "simple", "fullall", "fullonce"))
[4288]214    p.add_argument("-d", "--defaults", dest="defaults", help="Output default values for attributes? (default: %(default)s)", action=misc.FlagAction, default=False)
215    p.add_argument(      "--duplicates", dest="duplicates", help="How to handle duplicate elements from multiple DTDs (default: %(default)s)", choices=("reject", "allow", "merge"), default="reject")
[2522]216
[4113]217    args = p.parse_args(args)
[4435]218    print urls2xnd(**args.__dict__)
[2522]219
220
221if __name__ == "__main__":
222    sys.exit(main())
Note: See TracBrowser for help on using the browser.