"""
Module with selectors that search for elements based on their attributes.
Classes
-------
- `AttributeSelector` - Selects element based on any attribute value.
- `IdSelector` - Selects element based on 'id' attribute value.
- `ClassSelector` - Selects element based on 'class' attribute value.
"""
import re
from dataclasses import dataclass
from typing import Optional, Pattern
import soupsavvy.selectors.namespace as ns
from soupsavvy.base import SoupSelector
from soupsavvy.interfaces import IElement
from soupsavvy.selectors.namespace import PatternType
[docs]
@dataclass
class AttributeSelector(SoupSelector):
"""
Selector for searching element based on its attribute value.
Counterpart of css attribute selectors, that extends its capability
with regex pattern matching.
Example
-------
>>> AttributeSelector(name="role", value="widget")
matches all elements that have 'role' attribute with value "widget".
Example
-------
>>> <div role="widget">Hello World</div> ✔️
>>> <div class="menu">Hello World</div> ❌
>>> <div role="menu">Hello World</div> ❌
CSS counterpart can be represented as:
Example
-------
>>> [role="widget"]
In case of using regex pattern, `re.search` is used to match the attribute value.
Example
-------
>>> AttributeSelector(name="href", value=re.compile(r"wikipedia"))
Parameters
----------
name : str
HTML element attribute name ex. "class", "href"
value : str | Pattern, optional
Value of the attribute to match.
By default None, if not provided, default pattern matching any sequence
of characters is used.
Notes
-----
For more information about attribute selectors, see:
https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
"""
name: str
value: Optional[PatternType] = None
def __post_init__(self) -> None:
"""Sets pattern attribute used in `SoupSelector` find operations."""
self._pattern = self._parse_pattern()
def _parse_pattern(self) -> PatternType:
"""Parses pattern used in find methods based on provided init parameters."""
# if value was not provided, fall back to default pattern
if self.value is None:
return re.compile(ns.DEFAULT_PATTERN)
# cast value to string if not a regex pattern
if not isinstance(self.value, Pattern):
return str(self.value)
# value is already a compiled regex pattern
return self.value
[docs]
def find_all(
self,
tag: IElement,
recursive: bool = True,
limit: Optional[int] = None,
) -> list[IElement]:
params = {self.name: self._pattern}
return tag.find_all(attrs=params, recursive=recursive, limit=limit)
def __eq__(self, other: object) -> bool:
if not isinstance(other, self.__class__):
return NotImplemented
# pattern is what is used in find methods
return self._pattern == other._pattern and self.name == other.name
[docs]
class SpecificAttributeSelector(AttributeSelector):
"""
Base class for specific attribute selectors,
that wraps `AttributeSelector` with default attribute name for user convenience.
Child classes should define _NAME attribute with default attribute name,
that will be used in the `AttributeSelector`.
"""
_NAME: str
[docs]
def __init__(self, value: Optional[PatternType] = None) -> None:
"""
Initializes specific attribute selector with default attribute name.
Parameters
----------
value : str | Pattern, optional
Value of the attribute to match.
By default None, if not provided, default pattern matching any sequence
of characters is used.
"""
super().__init__(name=self._NAME, value=value)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.value!r})"
[docs]
class IdSelector(SpecificAttributeSelector):
"""
Specific `AttributeSelector` for matching elements based on 'id' attribute value.
Example
-------
>>> IdSelector("main")
matches all elements that have 'id' attribute with value "main".
Example
-------
>>> <div id="main">Hello World</div> ✔️
>>> <div id="content">Hello World</div> ❌
`IdSelector` is a convenience wrapper for `AttributeSelector`,
thus example above is equivalent to using:
>>> AttributeSelector(name="id", value="main")
CSS counterpart can be represented as:
Example
-------
>>> #main
In case of using regex pattern, `re.search` is used to match the attribute value.
Example
-------
>>> IdSelector(re.compile(r"content[0-9]+"))
Notes
-----
For more information about id attribute, see:
https://developer.mozilla.org/en-US/docs/Web/CSS/ID_selectors
"""
_NAME = "id"
[docs]
class ClassSelector(SpecificAttributeSelector):
"""
Specific `AttributeSelector` for matching elements based on 'class' attribute value.
Example
-------
>>> ClassSelector("widget")
matches all elements that have 'class' attribute with value "widget".
Example
-------
>>> <div class="widget">Hello World</div> ✔️
>>> <div class="content">Hello World</div> ❌
`ClassSelector` is a convenience wrapper for `AttributeSelector`,
thus example above is equivalent to using:
>>> AttributeSelector(name="class", value="widget")
CSS counterpart can be represented as:
Example
-------
>>> .widget
In case of using regex pattern, `re.search` is used to match the attribute value.
Example
-------
>>> ClassSelector(re.compile(r"nav"))
Notes
-----
For more information about class attribute, see:
https://developer.mozilla.org/en-US/docs/Web/CSS/Class_selectors
"""
_NAME = "class"