Changeset 2230:3b3126b39ec1 in livinglogic.python.xist

Show
Ignore:
Timestamp:
12/17/04 18:59:08 (15 years ago)
Author:
Walter Doerwald <walter@…>
Branch:
default
Message:

Add item and slice operators.

Make operators chainable.

Drop slice support from xfind.item().

Add tests for the new operator stuff.

Files:
4 modified

Legend:

Unmodified
Added
Removed
  • NEWS.xml

    r2217 r2230  
    77<item><class>ll.xist.utils.findAttr</class> has been renamed to 
    88<class>ll.xist.utils.findattr</class>.</item> 
     9<item><class>ll.xist.xfind.item</class> no longer handles slices.</item> 
     10<item>XFind has been enhanced to support item and slice operators, i.e. 
     11if <lit><rep>foo</rep></lit> is an XFind operator, <lit><rep>foo</rep>[0]</lit> 
     12is an operator that will produce the first node from <lit><rep>foo</rep></lit> 
     13(if there is one). Negative values and slices are supported too.</item> 
     14<item>Operators can be chained via division: <lit>html.a/html.b</lit> is an 
     15operator that can be applied to a node.</item> 
    916</ulist> 
    1017</section> 
  • _xist/xfind.py

    r2229 r2230  
    1717 
    1818 
     19import ll 
     20 
     21 
     22### 
     23### General iterator utilities 
     24### 
     25 
     26_defaultitem = object() 
     27 
     28def item(iterator, index, default=_defaultitem): 
     29    """ 
     30    <par>Return the <arg>index</arg>th item from the iterator <arg>iterator</arg>. 
     31    <arg>index</arg> must be an integer (negative integers are relative to the 
     32    end (i.e. the last item produced by the iterator)).</par> 
     33 
     34    <par>If <arg>default</arg> is given, this will be the default value when 
     35    the iterator doesn't contain an item at this position. Otherwise an 
     36    <class>IndexError</class> will be raised.</par> 
     37    """ 
     38    i = index 
     39    if i>=0: 
     40        for item in iterator: 
     41            if not i: 
     42                return item 
     43            i -= 1 
     44    else: 
     45        i = -index 
     46        cache = [] 
     47        for item in iterator: 
     48            cache.append(item) 
     49            if len(cache)>i: 
     50                cache.pop(0) 
     51        if len(cache)==i: 
     52            return cache[0] 
     53    if default is _defaultitem: 
     54        raise IndexError(index) 
     55    else: 
     56        return default 
     57 
     58 
     59def first(iterator, default=_defaultitem): 
     60    """ 
     61    <par>Return the first object produced by the iterator <arg>iterator</arg> or 
     62    <arg>default</arg> if the iterator didn't produce any items.</par> 
     63    <par>Calling this function will consume one item from the iterator.</par> 
     64    """ 
     65    return item(iterator, 0, default) 
     66 
     67 
     68def last(iterator, default=_defaultitem): 
     69    """ 
     70    <par>Return the last object from the iterator <arg>iterator</arg> or 
     71    <arg>default</arg> if the iterator didn't produce any items.</par> 
     72    <par>Calling this function will exhaust the iterator.</par> 
     73    """ 
     74    return item(iterator, -1, default) 
     75 
     76 
     77def count(iterator): 
     78    """ 
     79    <par>Return the number of items produced by the iterator <arg>iterator</arg>.</par> 
     80    <par>Calling this function will exhaust the iterator.</par> 
     81    """ 
     82    count = 0 
     83    for node in iterator: 
     84        count += 1 
     85    return count 
     86 
     87 
    1988def iterone(node): 
    2089    """ 
     
    2493 
    2594 
    26 class Operator(object): 
    27     """ 
    28     The base class of all XFind operators. 
    29     """ 
    30     def xfind(self, iterator, *operators): 
    31         """ 
    32         Apply <self/> to the nodes produced by <arg>iterator</arg> first, and 
    33         apply the operators in <arg>operators</arg> in sequence to the result. 
    34         Return an iterator. This method may be overwritten by iterators that need 
    35         to know the other operators after themselves in the XFind expression. All 
    36         others should overwrite <pyref method="xwalk"><method>xwalk</method></pyref>. 
    37         """ 
    38         # we have to resolve the iterator here 
    39         return iter(Finder(self.xwalk(iterator), *operators)) 
    40  
    41     def xwalk(self, iterator): 
    42         """ 
    43         Apply <self/> to the nodes produced by <arg>iterator</arg> and return 
    44         an iterator for the result. 
    45         """ 
    46         pass 
    47  
    48  
    49 class Finder(object): 
    50     """ 
    51     A <class>Finder</class> object is a <z>parsed</z> XFind expression. 
     95### 
     96### XFind expression 
     97### 
     98 
     99class Expr(object): 
     100    """ 
     101    A <class>Expr</class> object is a <z>parsed</z> XFind expression. 
    52102    The expression <lit><rep>a</rep>/<rep>b</rep></lit> will return an 
    53103    <class>Finder</class> object if <lit><rep>a</rep></lit> is either a 
     
    57107    or the instances of <pyref class="Operator"><class>Operator</class></pyref>. 
    58108    """ 
    59     __slots__ = ("iterator", "operators") 
     109    __slots__ = ("iterator", "operator") 
    60110 
    61111    def __init__(self, iterator, *operators): 
     
    64114            iterator = iterone(iterator) 
    65115        self.iterator = iterator 
    66         newoperators = [] 
    67         for operator in operators: 
    68             if not isinstance(operator, Operator): 
    69                 operator = Walker(operator) 
    70             newoperators.append(operator) 
    71         self.operators = tuple(newoperators) 
     116        self.operator = OperatorChain(*operators) 
    72117 
    73118    def next(self): 
     
    75120 
    76121    def __iter__(self): 
    77         if self.operators: 
    78             return self.operators[0].xfind(self.iterator, *self.operators[1:]) 
    79         else: 
    80             return self 
     122        if self.operator.operators: 
     123            return self.operator.xfind(self.iterator) 
     124        else: 
     125            return self.iterator 
    81126 
    82127    def __getitem__(self, index): 
     
    91136 
    92137    def __div__(self, other): 
    93         return Finder(self.iterator, *(self.operators + (other,))) 
     138        return Expr(self.iterator, self.operator/other) 
    94139 
    95140    def __floordiv__(self, other): 
    96         return Finder(self.iterator, *(self.operators + (all, other))) 
    97  
    98     def __repr__(self): 
    99         if self.operators: 
    100             ops = "/" + "/".join(repr(op) for op in self.operators) 
     141        return Expr(self.iterator, self.operator//other) 
     142 
     143    def __repr__(self): 
     144        if self.operator.operators: 
     145            ops = "/" + "/".join(repr(op) for op in self.operator.operators) 
    101146        else: 
    102147            ops = "" 
     
    104149 
    105150 
    106 _defaultitem = object() 
    107  
    108 def item(iterator, index, default=_defaultitem): 
    109     """ 
    110     <par>Return the <arg>index</arg>th item from the iterator <arg>iterator</arg>. 
    111     <arg>index</arg> may be an integer (negative integers are relative to the 
    112     end (i.e. the last item produced by the iterator)) or a <class>slice</class>.</par> 
    113  
    114     <par>Calling this function will partially or totally exhaust the iterator.</par> 
    115  
    116     <par>If <arg>default</arg> is given, this will be the default value when 
    117     the iterator doesn't contain an item at this position. Otherwise an 
    118     <class>IndexError</class> will be raised.</par> 
    119     """ 
    120     if isinstance(index, slice): 
    121         return list(iterator)[index] # fall back to materializing the list 
    122     else: 
    123         i = index 
    124         if i>=0: 
    125             for item in iterator: 
    126                 if not i: 
    127                     return item 
    128                 i -= 1 
    129         else: 
    130             i = -index 
    131             cache = [] 
    132             for item in iterator: 
    133                 cache.append(item) 
    134                 if len(cache)>i: 
    135                     cache.pop(0) 
    136             if len(cache)==i: 
    137                 return cache[0] 
    138         if default is _defaultitem: 
    139             raise IndexError(index) 
    140         else: 
    141             return default 
    142  
    143  
    144 def first(iterator, default=_defaultitem): 
    145     """ 
    146     <par>Return the first object produced by the iterator <arg>iterator</arg> or 
    147     <arg>default</arg> if the iterator didn't produce any items.</par> 
    148     <par>Calling this function will consume one item from the iterator.</par> 
    149     """ 
    150     return item(iterator, 0, default) 
    151  
    152  
    153 def last(iterator, default=_defaultitem): 
    154     """ 
    155     <par>Return the last object from the iterator <arg>iterator</arg> or 
    156     <arg>default</arg> if the iterator didn't produce any items.</par> 
    157     <par>Calling this function will exhaust the iterator.</par> 
    158     """ 
    159     return item(iterator, -1, default) 
    160  
    161  
    162 def count(iterator): 
    163     """ 
    164     <par>Return the number of items produced by the iterator <arg>iterator</arg>.</par> 
    165     <par>Calling this function will exhaust the iterator.</par> 
    166     """ 
    167     count = 0 
    168     for node in iterator: 
    169         count += 1 
    170     return count 
     151### 
     152### XFind operators 
     153### 
     154 
     155class Operator(object): 
     156    """ 
     157    The base class of all XFind operators. 
     158    """ 
     159    def xfind(self, iterator, *operators): 
     160        """ 
     161        Apply <self/> to the nodes produced by <arg>iterator</arg> first, and 
     162        apply the operators in <arg>operators</arg> in sequence to the result. 
     163        Return an iterator. This method may be overwritten by iterators that need 
     164        to know the other operators after themselves in the XFind expression. All 
     165        others should overwrite <pyref method="xwalk"><method>xwalk</method></pyref>. 
     166        """ 
     167        # we have to resolve the iterator here 
     168        return iter(Expr(self.xwalk(iterator), *operators)) 
     169 
     170    @ll.notimplemented 
     171    def xwalk(self, iterator): 
     172        """ 
     173        Apply <self/> to the nodes produced by <arg>iterator</arg> and return 
     174        an iterator for the result. 
     175        """ 
     176        pass 
     177 
     178    def __div__(self, other): 
     179        """ 
     180        Return a combined iterator. 
     181        """ 
     182        if isinstance(other, OperatorChain): 
     183            return OperatorChain(self, *other.operators) 
     184        else: 
     185            return OperatorChain(self, other) 
     186 
     187    def __floordiv__(self, other): 
     188        """ 
     189        Return a combined iterator. 
     190        """ 
     191        if isinstance(other, OperatorChain): 
     192            return OperatorChain(self, all, *other.operators) 
     193        else: 
     194            return OperatorChain(self, all, other) 
     195 
     196    def __getitem__(self, index): 
     197        """ 
     198        Return an iterator that applies <self/>, but only yields the <arg>index</arg>th 
     199        node from the result. 
     200        """ 
     201        if isinstance(index, slice): 
     202            return SliceOperator(self, slice) 
     203        else: 
     204            return ItemOperator(self, index) 
     205 
     206    def __getslice__(self, index1, index2): 
     207        """ 
     208        Return an iterator that applies <self/>, but only yields the nodes from 
     209        the specified slice. 
     210        """ 
     211        return SliceOperator(self, slice(index1, index2)) 
     212 
     213 
     214class ItemOperator(Operator): 
     215    """ 
     216    """ 
     217    def __init__(self, operator, index): 
     218        self.operator = operator 
     219        self.index = index 
     220 
     221    def xwalk(self, iterator): 
     222        for child in iterator: 
     223            node = item(child/self.operator, self.index, None) 
     224            if node is not None: 
     225                yield node 
     226 
     227 
     228class SliceOperator(Operator): 
     229    """ 
     230    """ 
     231    def __init__(self, operator, slice): 
     232        self.operator = operator 
     233        self.slice = slice 
     234 
     235    def xwalk(self, iterator): 
     236        for child in iterator: 
     237            for subchild in list(child/self.operator)[self.slice]: # materialize the iterator 
     238                yield subchild 
     239 
     240 
     241class OperatorChain(Operator): 
     242    """ 
     243    """ 
     244    def __init__(self, *operators): 
     245        newoperators = [] 
     246        for operator in operators: 
     247            if isinstance(operator, OperatorChain): 
     248                newoperators.extend(operator.operators) 
     249            else: 
     250                if not isinstance(operator, Operator): 
     251                    operator = Walker(operator) 
     252                newoperators.append(operator) 
     253        self.operators = newoperators 
     254 
     255    def xwalk(self, iterator): 
     256        if self.operators: 
     257            return iter(Expr(self.operators[0].xfind(iterator, *self.operators[1:]))) 
     258        else: 
     259            return iterator 
     260 
     261    def __div__(self, other): 
     262        if isinstance(other, OperatorChain): 
     263            return OperatorChain(*(self.operators + other.operators)) 
     264        else: 
     265            return OperatorChain(*(self.operators + [other])) 
     266 
     267    def __floordiv__(self, other): 
     268        if isinstance(other, OperatorChain): 
     269            return OperatorChain(*(self.operators + [all] + other.operators)) 
     270        else: 
     271            return OperatorChain(*(self.operators + [all, other])) 
     272 
     273    def __repr__(self): 
     274        if self.operators: 
     275            ops = "/".join(repr(op) for op in self.operators) 
     276        else: 
     277            ops = "" 
     278        return "<%s.%s for %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, ops, id(self)) 
    171279 
    172280 
  • _xist/xsc.py

    r2228 r2230  
    702702 
    703703    def __div__(self, other): 
    704         return xfind.Finder(self, other) 
     704        return xfind.Expr(self, other) 
    705705         
    706706    def __floordiv__(self, other): 
    707         return xfind.Finder(self, xfind.all, other) 
     707        return xfind.Expr(self, xfind.all, other) 
    708708         
    709709    def compact(self): 
  • test/test.py

    r2198 r2230  
    22142214 
    22152215    def checkids(self, expr, ids): 
    2216         self.assertEqual("".join([str(e["id"]) for e in expr]), ids) 
     2216        self.assertEqual("".join(str(e["id"]) for e in expr), ids) 
    22172217 
    22182218    def test_all(self): 
     
    23192319class XFindTestMisc(unittest.TestCase): 
    23202320    def checkids(self, expr, ids): 
    2321         self.assertEqual("".join([str(e["id"]) for e in expr]), ids) 
     2321        self.assertEqual("".join(str(e["id"]) for e in expr), ids) 
    23222322 
    23232323    def test_frag(self): 
     
    23452345 
    23462346 
     2347class XFindTestItemSlice(unittest.TestCase): 
     2348    def checkids(self, expr, ids): 
     2349        self.assertEqual("".join(str(e["id"]) for e in expr), ids) 
     2350 
     2351    def test_itemsslices(self): 
     2352        #        ____0____ 
     2353        #       /    |    \ 
     2354        #     _1_   _2_   _3_ 
     2355        #    /   \ /   \ /   \ 
     2356        #   4     5     6     7 
     2357        ds = [html.div(id=id) for id in xrange(8)] 
     2358        ds[0].append(ds[1], ds[2], ds[3]) 
     2359        ds[1].append(ds[4], ds[5]) 
     2360        ds[2].append(ds[5], ds[6]) 
     2361        ds[3].append(ds[6], ds[7]) 
     2362 
     2363        self.checkids(ds[0]/html.div[0]/html.div[-1], "5") 
     2364        self.checkids(ds[0]/html.div/html.div[-1], "567") 
     2365        self.checkids(ds[0]/html.div[-1]/html.div, "67") 
     2366        self.checkids(ds[0]/(html.div/html.div), "455667") # we get 5 and 6 twice 
     2367        self.checkids(ds[0]/(html.div/html.div)[2], "5") # we get 5 and 6 twice 
     2368        self.checkids(ds[0]/html.div[:]/html.div[:], "455667") 
     2369        self.checkids(ds[0]/html.div/html.p[0], "") 
     2370        self.checkids(ds[0]/html.p[0]/html.p[0], "") 
     2371 
     2372        # The following might be a surprise, but is perfectly normal: 
     2373        # each node is visited and the div children are yielded. 
     2374        # div(id=0) does have div children and those will be yielded. 
     2375        # This is why the sequence starts with "12" and not "14" 
     2376        self.checkids(ds[0]//html.div, "123455667") 
     2377 
     2378        self.checkids(ds[0]/html.div[1:2], "2") 
     2379        self.checkids(ds[0]/html.div[1:-1]/html.div[1:-1], "") 
     2380        self.checkids(ds[0]/html.div[1:-1]/html.div[-1:], "6") 
     2381 
     2382 
    23472383def test_main(): 
    23482384    unittest.main()