"""
Module provides `nth-of-type` selector implementations for `SoupSelector`.
It allows you to search for the nth occurrence of an element,
similar to how the CSS `nth-of-type` pseudo-class works.
However, instead of being limited to css selectors, it works with any `SoupSelector` instance.
Classes
-------
- `NthOfSelector` - Selects nth element matching given selector
- `NthLastOfSelector` - Selects nth last element matching given selector
- `OnlyOfSelector` - Selects only element matching given selector
"""
from typing import Optional
from soupsavvy.base import SoupSelector, check_selector
from soupsavvy.interfaces import IElement
from soupsavvy.selectors.nth.nth_utils import parse_nth
from soupsavvy.utils.selector_utils import TagIterator, TagResultSet
[docs]
class BaseNthOfSelector(SoupSelector):
"""
Base class for nth-of-selector and nth-last-of-selector
that implements general logic for finding matching elements.
"""
# slice for modification of list of elements matching selector
_slice: slice
[docs]
def __init__(self, selector: SoupSelector, nth: str) -> None:
"""
Initializes nth selector instance.
Parameters
----------
selector : SoupSelector
Any `SoupSelector` instance used to match elements.
nth : str
CSS nth selector string. Accepts all valid css nth formulas.
Raises
------
NotSoupSelectorException
If selector is not an instance of `SoupSelector`.
"""
self._selector = check_selector(selector)
self.nth_selector = parse_nth(nth)
@property
def selector(self) -> SoupSelector:
"""
Returns selector instance used for matching elements in this nth selector.
Returns
-------
SoupSelector
Selector used in this nth selector.
"""
return self._selector
[docs]
def find_all(
self,
tag: IElement,
recursive: bool = True,
limit: Optional[int] = None,
) -> list[IElement]:
# if recursive is False, check only children of element itself
tag_iterator = (
TagIterator(tag, recursive=recursive, include_self=True)
if recursive
else iter([tag])
)
matches = []
for tag_ in tag_iterator:
matching = self.selector.find_all(tag=tag_, recursive=False)[self._slice]
matches += [
matching[index - 1]
for index in self.nth_selector.generate(len(matching))
]
# keep order of tags and limit
results = TagResultSet(
list(TagIterator(tag, recursive=recursive))
) & TagResultSet(matches)
return results.fetch(limit)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return NotImplemented
return (
self.selector == other.selector and self.nth_selector == other.nth_selector
)
def __repr__(self):
cls = self.__class__.__name__
return f"{cls}(selector={self.selector}, nth={self.nth_selector})"
def __str__(self):
return repr(self)
[docs]
class NthOfSelector(BaseNthOfSelector):
"""
Selector for finding nth-of elements in the soup among elements that match
provided `SoupSelector` instance.
Example
-------
>>> selector = NthOfSelector(ClassSelector("item"), "2n+1")
matches all odd elements with class "item".
Example
-------
>>> <div class="item">1</div> ✔️
... <div id="item"></div> ❌
... <div class="item">2</div> ❌
... <div class="item">3</div> ✔️
... <div class="widget"></div> ❌
... <div class="item">4</div> ❌
Notes
-----
For more information about standard :nth-of-type pseudo-class, visit:
https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-of-type
"""
# keep initial order of matching elements
_slice = slice(None)
[docs]
class NthLastOfSelector(BaseNthOfSelector):
"""
Selector for finding nth-last-of elements in the soup among elements that match
provided `SoupSelector` instance.
Example
-------
>>> selector = NthLastOfSelector(ClassSelector("item"), "2n+1")
matches all odd elements with class "item" starting from the last element.
Example
-------
>>> <div class="item">1</div> ❌
... <div id="item"></div> ❌
... <div class="item">2</div> ✔️
... <div class="item">3</div> ❌
... <div class="widget"></div> ❌
... <div class="item">4</div> ✔️
Notes
-----
For more information about standard :nth-of-type pseudo-class, visit:
https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-last-of-type
"""
# reverse order of matching elements
_slice = slice(None, None, -1)
[docs]
class OnlyOfSelector(SoupSelector):
"""
Selector for finding the only element,
that matches provided `SoupSelector` instance among its siblings.
Example
-------
>>> selector = OnlyOfSelector(ClassSelector("item"))
matches all elements with class "item" that are the only child of their parent
that matches the selector.
Example
-------
>>> <div><div class="item"></div><a class="item"></a></div> ❌
>>> <div><div class="item"></div><a class="widget"></a></div> ✔️
>>> <div><div class="item"></div></div> ✔️
>>> <div><div class="widget"></div></div> ❌
Notes
-----
For more information about standard :only-of-type pseudo-class, visit:
https://developer.mozilla.org/en-US/docs/Web/CSS/:only-of-type
"""
[docs]
def __init__(self, selector: SoupSelector) -> None:
"""
Initializes `OnlyOfSelector` instance.
Parameters
----------
selector : SoupSelector
Any `SoupSelector` instance used to match elements.
Raises
------
NotSoupSelectorException
If selector is not an instance of `SoupSelector`.
"""
self.selector = check_selector(selector)
[docs]
def find_all(
self,
tag: IElement,
recursive: bool = True,
limit: Optional[int] = None,
) -> list[IElement]:
tag_iterator = (
TagIterator(tag, recursive=recursive, include_self=True)
if recursive
else iter([tag])
)
matching = [
self.selector.find_all(tag=tag_, recursive=False) for tag_ in tag_iterator
]
matches = [elements[0] for elements in matching if len(elements) == 1]
# keep order of tags and limit
results = TagResultSet(
list(TagIterator(tag, recursive=recursive))
) & TagResultSet(matches)
return results.fetch(limit)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return NotImplemented
return self.selector == other.selector
def __repr__(self):
cls = self.__class__.__name__
return f"{cls}(selector={self.selector})"