"""
Module with wrappers for selectors used mostly in model schemas.
Wrappers are used to control behavior and define how fields of the models are defined.
Classes
-------
- `All` - Wrapper to find all information matching criteria.
- `Default` - Wrapper to set default value if information is not found.
- `Required` - Wrapper to enforce that information must be found.
"""
from typing import Any, Optional
import soupsavvy.exceptions as exc
from soupsavvy.base import check_operation, check_tag_searcher
from soupsavvy.interfaces import (
Comparable,
IElement,
JSONSerializable,
TagSearcher,
TagSearcherType,
)
from soupsavvy.operations.selection_pipeline import SelectionPipeline
[docs]
class FieldWrapper(TagSearcher, Comparable):
"""
A wrapper for `TagSearcher` valid objects, that acts as a higher order searcher,
which controls behavior of the wrapped searcher.
Used as field in defined model.
Subclasses must implement `find` method with their specific behavior.
"""
[docs]
def __init__(self, selector: TagSearcherType) -> None:
"""
Initializes wrapper with a `TagSearcher` valid object.
Parameters
----------
selector : TagSearcher
The `TagSearcher` valid object to be wrapped.
Raises
------
NotTagSearcherException
If provided object is not a valid `TagSearcher`.
"""
self._selector = check_tag_searcher(selector)
@property
def selector(self) -> TagSearcher:
"""
Returns searcher, that is wrapped by this wrapper.
Returns
-------
TagSearcher
`TagSearcher` instance wrapped by this wrapper.
"""
return self._selector
[docs]
def find_all(
self,
tag: IElement,
recursive: bool = True,
limit: Optional[int] = None,
) -> list[Any]:
"""
Find all matching element using the wrapped selector.
Used for compatibility with `TagSearcher` interface,
delegates to wrapped selector.
Parameters
----------
tag : IElement
Any `IElement` to search within.
recursive : bool, optional
Whether to search recursively, by default True.
limit : int, optional
Limit the number of results, by default None.
Returns
-------
list[Any]
A list of found results.
"""
return self.selector.find_all(tag, recursive=recursive, limit=limit)
def __eq__(self, x: Any) -> bool:
"""
Check if two `FieldWrapper` instances are equal.
They need to be of the same class and wrap the same selector.
Parameters
----------
x : Any
The object to compare with.
Returns
-------
bool
True if the instances are equal, False otherwise.
"""
if not isinstance(x, self.__class__):
return NotImplemented
return self.selector == x.selector
def __or__(self, x: Any) -> SelectionPipeline:
"""
Overrides `__or__` method called also by pipe operator '|'.
Combines this `FieldWrapper` with an operation.
Parameters
----------
x : BaseOperation
The operation to be combined with this `FieldWrapper`.
Returns
-------
SelectionPipeline
New `SelectionPipeline` object created from
combining selector and operation.
Raises
------
NotOperationException
If provided object is not an instance of `BaseOperation`.
"""
x = check_operation(x)
return SelectionPipeline(selector=self, operation=x)
def __str__(self) -> str:
return f"{self.__class__.__name__}({self.selector})"
def __repr__(self) -> str:
return str(self)
[docs]
class FieldList(list, JSONSerializable):
"""
Convenience wrapper around list to provide JSON serialization for lists
containing JSON serializable items. Useful as field value to represent multiple
matched elements.
"""
[docs]
def json(self) -> list[Any]:
"""
Serializes the FieldList to a JSON-compatible list.
Returns
-------
list[Any]
A list containing JSON-serializable representations of the items.
"""
return [
item.json() if isinstance(item, JSONSerializable) else item for item in self
]
[docs]
class All(FieldWrapper):
"""
Field wrapper for selecting multiple elements matching the selector.
Forces find method to fall back to find_all method and return all matches.
Example
-------
>>> from soupsavvy.models import All
... from soupsavvy import TypeSelector
... selector = All(TypeSelector("div"))
... selector.find(tag)
[element1, element2, element3]
"""
[docs]
def find(
self, tag: IElement, strict: bool = False, recursive: bool = True
) -> list[Any]:
"""
Find all matching tags using the wrapped selector,
enforcing the use of find_all method.
Parameters
----------
tag : IElement
Any `IElement` to search within.
strict : bool, optional
Ignored, as this method always falls back to `find_all`.
recursive : bool, optional
Whether to search recursively, by default True.
Returns
-------
list[Any]
A list of matching results.
"""
return FieldList(self.selector.find_all(tag, recursive=recursive))
[docs]
class Required(FieldWrapper):
"""
Field wrapper for enforcing matched element not to be None.
Raises an exception if searcher does not find any matches.
Example
-------
>>> from soupsavvy.models import Required
... from soupsavvy import TypeSelector
... selector = Required(TypeSelector("div"))
... selector.find(tag)
RequiredConstraintException
"""
[docs]
def find(self, tag: IElement, strict: bool = False, recursive: bool = True) -> Any:
"""
Finds a required element using the wrapped selector,
enforcing matched element not to be None.
If any exception is raised during the search, it's propagated to the caller.
Parameters
----------
tag : IElement
Any `IElement` to search within.
strict : bool, optional
If True, raises an exception if no matches are found, by default False.
recursive : bool, optional
Whether to search recursively, by default True.
Returns
-------
Any
The found element.
Raises
------
RequiredConstraintException
If selector returns None, indicating that required element was not found.
"""
result = self.selector.find(tag=tag, strict=strict, recursive=recursive)
if result is None:
raise exc.RequiredConstraintException("Required element was not found")
return result
[docs]
class Default(FieldWrapper):
"""
Field wrapper for returning a default value if no match is found.
Example
-------
>>> from soupsavvy.models import Default
... from soupsavvy import TypeSelector
... selector = Default(TypeSelector("div"), default="1234")
... selector.find(tag)
"1234"
"""
[docs]
def __init__(self, selector: TagSearcherType, default: Any) -> None:
"""
Initializes `Default` field wrapper.
Parameters
----------
selector : TagSearcher
Object compatible with `TagSearcher` interface to be wrapped.
default : Any
The default value to return if no match is found.
"""
super().__init__(selector)
self.default = default
[docs]
def find(self, tag: IElement, strict: bool = False, recursive: bool = True):
"""
Finds an element, returning a default value if None was returned
by wrapped selector. Any exception raised during the search is propagated.
Parameters
----------
tag : IElement
Any `IElement` to search within.
strict : bool, optional
If True, raises an exception if no matches are found, by default False.
recursive : bool, optional
Whether to search recursively, by default True.
Returns
-------
Any
The found element or the default value if not found.
"""
result = self.selector.find(tag=tag, strict=strict, recursive=recursive)
return self.default if result is None else result
def __str__(self) -> str:
return f"{self.__class__.__name__}({self.selector}, default={self.default})"