Source code for soupsavvy.selectors.xpath.selectors

"""
Selector for finding elements based on XPath,
that allows any supported XPath expressions
to be used with other `soupsavvy` components.

Classes
-------
- XPathSelector
"""

from typing import Any, Optional

from soupsavvy.base import SoupSelector
from soupsavvy.interfaces import IElement
from soupsavvy.utils.selector_utils import TagIterator, TagResultSet


[docs] class XPathSelector(SoupSelector): """ Selector for finding elements based on XPath expressions. Examples -------- >>> selector = XPathSelector("//p[@class='menu']") ... selector.find(soup) Examples -------- >>> from lxml.etree import XPath ... selector = XPathSelector(XPath("//p[@class='menu']", smart_strings=False)) ... selector.find(soup) Expressions must target elements, not attributes or text content. Examples -------- >>> selector = XPathSelector("//div//@href") ... selector.find(soup) None Notes ----- Equality check includes only xpath expression, as lxml `XPath` object does not implement more specific `__eq__` method. """
[docs] def __init__(self, xpath: Any) -> None: """ Initializes `XPathSelector` with a given XPath expression. Parameters ---------- xpath : str | lxml.etree.XPath String representing of xpath expression or compiled `XPath` object. It needs to target elements, not attributes or text content. Raises ------ InvalidXPathSelector If the provided XPath string cannot be compiled into `XPath` object. """ self.xpath = xpath
[docs] def find_all( self, tag: IElement, recursive: bool = True, limit: Optional[int] = None, ) -> list[IElement]: api = tag.xpath(self.xpath) selected = api.select(tag) iterator = TagIterator(tag, recursive=recursive) result = TagResultSet(list(iterator)) & TagResultSet(selected) return result.fetch(limit)
def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented return self.xpath == other.xpath def __repr__(self) -> str: return f"{self.__class__.__name__}({self.xpath!r})"