"""
Module with `lxml` implementation of `IElement`.
`LXMLElement` class is an adapter making `lxml` tree,
compatible with `IElement` interface and usable across the library.
"""
from __future__ import annotations
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Pattern, Union
import lxml.etree as etree
from lxml.etree import _Element as LXMLNode
from typing_extensions import Self
from soupsavvy.interfaces import IElement
from soupsavvy.selectors.css.api import CSSSelectApi
from soupsavvy.selectors.xpath.api import LXMLXpathApi
[docs]
class LXMLElement(IElement[LXMLNode]):
"""
Implementation of `IElement` for `lxml` tree.
Adapter for `lxml` objects, that makes them usable across the library.
Example
-------
>>> from soupsavvy.implementation.lxml import LXMLElement
... from lxml.etree import fromstring
... node = fromstring("<html><body><div>example</div></body></html>")
... element = LXMLElement(node)
"""
_NODE_TYPE = LXMLNode
[docs]
def find_all(
self,
name: Optional[str] = None,
attrs: Optional[dict[str, Union[str, Pattern[str]]]] = None,
recursive: bool = True,
limit: Optional[int] = None,
) -> list[Self]:
iterator = (
self.node.iterdescendants(None)
if recursive
else self.node.iterchildren(None)
)
generator = (
element
for element in iterator
if self._match(element, name=name, attrs=attrs or {})
)
return list(islice(self._map(generator), limit))
def _match(
self,
element: LXMLNode,
name: Optional[str],
attrs: dict[str, Union[str, Pattern[str]]],
) -> bool:
for attr, value in attrs.items():
attribute = element.attrib.get(attr)
if attribute is None:
return False
if isinstance(value, Pattern):
if not value.search(attribute):
return False
else:
if value not in attribute.split():
return False
if name is not None and element.tag != name:
return False
return True
[docs]
def find_subsequent_siblings(self, limit: Optional[int] = None) -> list[Self]:
iterator = self.node.itersiblings(None)
return list(islice(self._map(iterator), limit))
[docs]
def find_ancestors(self, limit: Optional[int] = None) -> list[Self]:
iterator = self.node.iterancestors(None)
return list(islice(self._map(iterator), limit))
[docs]
def get_attribute(self, name: str) -> Optional[str]:
return self.node.attrib.get(name)
[docs]
def css(self, selector: str) -> CSSSelectApi:
return CSSSelectApi(selector)
[docs]
def xpath(self, selector) -> LXMLXpathApi:
return LXMLXpathApi(selector)
@property
def children(self) -> Iterable[Self]:
iterator = self.node.iterchildren(None)
return self._map(iterator)
@property
def descendants(self) -> Iterable[Self]:
iterator = self.node.iterdescendants(None)
return self._map(iterator)
@property
def parent(self) -> Optional[Self]:
parent = self.node.getparent()
return self.from_node(parent) if parent is not None else None
@property
def name(self) -> str:
return self.node.tag
@property
def text(self) -> str:
texts = (text for text in self.node.itertext() if text is not None)
return "".join(texts) # type: ignore
def __str__(self) -> str:
return etree.tostring(self.node, method="html", with_tail=False).decode("utf-8")