Source code for soupsavvy.selectors.combinators

"""
Module with combinators, which are composite selectors, that define a relationship
between multiple selectors related to their position in the document.

Most of them are counterpart of CSS combinators, but others extend their functionality.

Classes
-------
- `ChildCombinator` - counterpart of CSS child combinator (>)
- `NextSiblingCombinator` - counterpart of CSS adjacent sibling combinator (+)
- `SubsequentSiblingCombinator` - counterpart of CSS subsequent sibling combinator (*)
- `DescentCombinator` - counterpart of CSS descendant combinator (" ")
- `ParentCombinator` - matches parent of preceding selector
- `AncestorCombinator` - matches ancestor of preceding selector

Notes
-----
For more information on CSS combinators, see:

https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Selectors/Combinators
"""

from abc import abstractmethod
from functools import reduce
from typing import Optional, Type

from typing_extensions import deprecated

from soupsavvy.base import CompositeSoupSelector, SoupSelector
from soupsavvy.interfaces import IElement
from soupsavvy.selectors.logical import SelectorList as _SelectorList
from soupsavvy.selectors.relative import (
    RelativeAncestor,
    RelativeChild,
    RelativeDescendant,
    RelativeNextSibling,
    RelativeParent,
    RelativeSelector,
    RelativeSubsequentSibling,
)
from soupsavvy.utils.selector_utils import TagIterator, TagResultSet


[docs] @deprecated("`SelectorList` was moved to `soupsavvy.selectors.logical` module.") class SelectorList(_SelectorList): ...
[docs] class BaseCombinator(CompositeSoupSelector): """ Base class for all combinators, which are composite selectors, that defined a relationship between multiple selectors and apply it to search for elements in the document. """ # order of selectors is relevant in context of results COMMUTATIVE = False
[docs] def __init__( self, selector1: SoupSelector, selector2: SoupSelector, /, *selectors: SoupSelector, ) -> None: """ Initializes Combinator object with provided positional arguments. At least two `SoupSelector` object are required to create Combinator. Parameters ---------- selectors: SoupSelector `SoupSelector` objects to match accepted as positional arguments. Notes ----- Object can be initialized with more than two `SoupSelector` objects, which would be equal to chaining multiple combinators of the same type. For example, chaining child combinator in css: Example ------- >>> div > a > span translated to `soupsavvy` would be: Example ------- >>> ChildCombinator(TypeSelector("div"), TypeSelector("a"), TypeSelector("span")) Raises ------ NotSoupSelectorException If any of provided parameters is not an instance of SoupSelector. """ super().__init__([selector1, selector2, *selectors])
@property @abstractmethod def _selector(self) -> Type[RelativeSelector]: """ Returns type of the relative selector that is used to perform a single step search in the combinator selector. Returns ------- Type[RelativeSelector] Type of the relative selector that is used in the combinator. Selector instance of this type is initialized with each step in the combinator. """ raise NotImplementedError( f"{self.__class__.__name__} is a base class " "and does not implement '_selector' property." ) def _find_first_step( self, step: SoupSelector, tag: IElement, recursive: bool ) -> TagResultSet: """ Returns results of the first step in the combinator selector, given Tag object that is being searched and recursive behavior passed to find method by user. Parameters ---------- tag: Tag Tag object that's being searched by first step in the combinator. recursive: bool Recursive behavior passed to find method by user. Returns ------- TagResultSet Results of the first step in the combinator selector. """ return TagResultSet(step.find_all(tag, recursive=recursive)) def _order_results( self, results: TagResultSet, tag: IElement, recursive: bool ) -> TagResultSet: """ Orders results of find_all method of the combinator selector, given initial Tag object that was passed to find_all method and recursive behavior. Parameters ---------- results: TagResultSet Results of the combinator selector. tag: Tag Initial Tag object that was passed to find_all method. recursive: bool Recursive behavior passed to find method by user. Returns ------- TagResultSet Ordered results of the combinator selector. """ return TagResultSet(list(TagIterator(tag, recursive=True))) & results
[docs] def find_all( self, tag: IElement, recursive: bool = True, limit: Optional[int] = None, ) -> list[IElement]: results = TagResultSet() for i, step in enumerate(self.selectors): if i == 0: results |= self._find_first_step( step=step, tag=tag, recursive=recursive ) continue if not results: break selector = self._selector(step) results = TagResultSet( reduce( list.__add__, # each relative selector has defined recursive behavior (selector.find_all(element) for element in results.fetch()), ) ) results = self._order_results(results=results, tag=tag, recursive=recursive) return results.fetch(limit)
[docs] class BaseAncestorCombinator(BaseCombinator): """ Base class for ancestor combinators, that are specific type of combinators, unlike other combinators, they move up the tree of elements, rather than down after finding first step. - Elements that match first step can be found anywhere in the tree, regardless of `recursive` parameter. - Final results should contain only children of element if `recursive` is False. """ def _find_first_step( self, step: SoupSelector, tag: IElement, recursive: bool ) -> TagResultSet: # always look for all descendants in first step return TagResultSet(step.find_all(tag, recursive=True)) def _order_results( self, results: TagResultSet, tag: IElement, recursive: bool ) -> TagResultSet: # respect recursive parameter while ordering results return TagResultSet(list(TagIterator(tag, recursive=recursive))) & results
[docs] class ChildCombinator(BaseCombinator): """ Counterpart of CSS child combinator. Represents the relationship between selectors, where every next matching element is a direct child of the previous one. Example ------- >>> ChildCombinator(TypeSelector("div"), TypeSelector("a")) matches all 'a' elements that are direct children of 'div' elements. Example ------- >>> <div class="widget"><a>Hello World</a></div> ✔️ >>> <div class="widget"><span></span><a>Hello World</a></div> ✔️ >>> <span class="widget"><a>Hello World</a></span> ❌ >>> <div class="menu"><span>Hello World</span></div> ❌ Object can be created as well by using `greater than` operator `>` on `SoupSelector` objects. Example ------- >>> TypeSelector("div") > TypeSelector("a") Which is equivalent to the first example. CSS counterpart can be represented as: Example ------- >>> div > a { color: red; } Notes ----- For more information on child combinator, see: https://developer.mozilla.org/en-US/docs/Web/CSS/Child_combinator """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeChild
[docs] class NextSiblingCombinator(BaseCombinator): """ Counterpart of CSS next sibling combinator. Represents the relationship between selectors, where every next matching element is a sibling immediately following the previous one. Example ------- >>> NextSiblingCombinator(TypeSelector("div"), TypeSelector("a")) matches all 'a' elements that immediately follow 'div' elements, it means that both elements are children of the same parent element. Example ------- >>> <div class="widget"></div><a>Hello World</a> ✔️ >>> <div class="widget"><a>Hello World</a></div> ❌ >>> <div class="widget"></div><span></span><a>Hello World</a> ❌ Object can be created as well by using `plus` operator `+` on `SoupSelector` objects. Example ------- >>> TypeSelector("div") + TypeSelector("a") Which is equivalent to the first example. CSS counterpart can be represented as: Example ------- >>> div + a Notes ----- This is also known as the `adjacent sibling combinator` in CSS. For more information on next sibling combinator, see: https://developer.mozilla.org/en-US/docs/Web/CSS/Next-sibling_combinator """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeNextSibling
[docs] class SubsequentSiblingCombinator(BaseCombinator): """ Counterpart of CSS subsequent sibling combinator. Represents the relationship between selectors, where every next matching element is a sibling following the previous one, but not necessarily immediately. Example ------- >>> SubsequentSiblingCombinator(TypeSelector("div"), TypeSelector("a")) matches all 'a' elements that follow 'div' elements. Example ------- >>> <div class="widget"></div><a>Hello World</a> ✔️ >>> <div class="widget"><span></span><a>Hello World</a></div> ✔️ >>> <span class="widget"><a>Hello World</a></span> ❌ >>> <a>Hello World</a><div class="menu"></div> ❌ Object can be created as well by using `multiplication` operator `*` on `SoupSelector` objects. Example ------- >>> TypeSelector("div") * TypeSelector("a") CSS counterpart can be represented as: Example ------- >>> div ~ a Notes ----- This combinator is also known as `general sibling combinator` in CSS. For more information on subsequent sibling combinator, see: https://developer.mozilla.org/en-US/docs/Web/CSS/Subsequent-sibling_combinator """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeSubsequentSibling
[docs] class DescendantCombinator(BaseCombinator): """ Counterpart of CSS descendant combinator. Represents the relationship between selectors, where every next matching element is a descendant of the previous one. Example ------- >>> DescentCombinator(TypeSelector("div"), ClassSelector("widget")) matches all descendants of 'div' element with 'widget' class. Example ------- >>> <div><a class="widget"></a></div> ✔️ >>> <div><div><a class="widget"></a></div></div> ✔️ >>> <div><a id="widget"></a></div> ❌ >>> <span><a class="widget"></a></span> ❌ >>> <a class="widget"></a> ❌ Object can be created as well by using `right shift` operator `>>` on `SoupSelector` objects. Example ------- >>> TypeSelector("div") >> ClassSelector("widget") CSS counterpart can be represented as: Example ------- >>> div .widget Notes ----- For more information on subsequent sibling combinator, see: https://developer.mozilla.org/en-US/docs/Web/CSS/Descendant_combinator """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeDescendant
[docs] class ParentCombinator(BaseAncestorCombinator): """ Defines a relationship between selectors, where every next matching element is a parent of the previous one. Example ------- >>> ParentCombinator(TypeSelector("a"), TypeSelector("div")) The given selector matches all 'div' elements that are parents of 'a' elements. Example ------- >>> <div><a href="/shop"></a></div> ✔️ >>> <div><span><div><a href="/shop"></a></span></div> ❌ >>> <span><a href="/shop"></a></span> ❌ Object can be created as well by using `lt` operator `<` on `SoupSelector` objects. Example ------- >>> TypeSelector("a") < TypeSelector("div") Although this combinator does not have its counterpart in CSS, it can be represented as: Example ------- >>> div:has(> a) """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeParent
[docs] class AncestorCombinator(BaseAncestorCombinator): """ Defines a relationship between selectors, where every next matching element is an ancestor of the previous one. Example ------- >>> AncestorCombinator(TypeSelector("a"), TypeSelector("div")) The given selector matches all 'div' elements that are ancestors of 'a' elements. Example ------- >>> <div><span><a href="/shop"></a></span></div> ✔️ >>> <div><a href="/shop"></a></div> ✔️ >>> <div><span class="menu"></span>/div> ❌ >>> <span><a class="menu"></span>/div> ❌ Object can be created as well by using `left shift` operator `<<` on `SoupSelector` objects. Example ------- >>> TypeSelector("a") << TypeSelector("div") Although this combinator does not have its counterpart in CSS, it can be represented as: Example ------- >>> div:has(a) """ @property def _selector(self) -> Type[RelativeSelector]: return RelativeAncestor