CSS Selectors

The soupsavvy.selectors.css subpackage provides a set of CSS-based selectors, built as wrappers around the soupsieve library — ‘a modern CSS selector implementation for BeautifulSoup’. These selectors can be seamlessly combined with other soupsavvy selectors, allowing for flexible use of pure CSS and common pseudo-classes.

Child Selectors

Child selectors target elements based on their position among siblings within a parent element. While nth-child can handle any position-based selection, soupsavvy offers convenient wrappers for several frequently used CSS pseudo-classes.

FirstChild

The FirstChild selector selects every element that is the first child of its parent.

:first-child
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import FirstChild
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>First</p>
        <div>
            <span>First</span>
            <span>
                <a>First</a>
            </span>
        </div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = FirstChild()
selector.find_all(element)
[SoupElement(<p>First</p>),
 SoupElement(<span>First</span>),
 SoupElement(<a>First</a>)]

LastChild

The LastChild selector selects every element that is the last child of its parent.

:last-child
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import LastChild
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p></p>
        <div></div>
        <div>
            <span>
                <a>Last</a>
            </span>
            <span>Last</span>
        </div>
        <div>Last</div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = LastChild()
selector.find_all(element)
[SoupElement(<a>Last</a>),
 SoupElement(<span>Last</span>),
 SoupElement(<div>Last</div>)]

NthChild

The NthChild selector allows you to target elements based on their position among siblings, using a CSS-like nth-child expression.

:nth-child(3)

It, along with other nth-based selectors, fully supports all valid CSS nth parameter values, letting you select elements using the same syntax as CSS.

:nth-child(2n)
:nth-child(odd)
:nth-child(even)
:nth-child(-n+2)
NthChild('2n')
NthChild('odd')
NthChild('even')
NthChild('-n+2')
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import NthChild
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>1</p>
        <p>2</p>
        <p>3</p>
        <p>4</p>
        <p>5</p>
        <p>6</p>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = NthChild("2n")
selector.find_all(element)
[SoupElement(<p>2</p>), SoupElement(<p>4</p>), SoupElement(<p>6</p>)]

NthLastChild

The NthLastChild selector allows you to select elements based on their position among their siblings, counting from the last child of the parent element.

:nth-last-child(3)
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import NthLastChild
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>1</p>
        <p>2</p>
        <p>3</p>
        <p>4</p>
        <p>5</p>
        <p>6</p>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = NthLastChild("odd")
selector.find_all(element)
[SoupElement(<p>2</p>), SoupElement(<p>4</p>), SoupElement(<p>6</p>)]

OnlyChild

The OnlyChild selector matches elements that are the only child of their parent.

:only-child
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import OnlyChild
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>Text</p>
        <div>
            <span></span>
            <span>Text</span>
        </div>
        <div><p>Only child</p></div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = OnlyChild()
selector.find(element)
SoupElement(<p>Only child</p>)

Type selectors

Type selectors are used to select elements based on their position among sibling elements of the same type.

FirstOfType

Selects every element that is the first child of the type.

:first-of-type
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import FirstOfType
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>First p</p>
        <div>First div</div>
        <div>
            <span>First span</span>
            <span>
                <a>First a</a>
            </span>
        </div>
        <p></p>
        <div></div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = FirstOfType()
selector.find_all(element)
[SoupElement(<p>First p</p>),
 SoupElement(<div>First div</div>),
 SoupElement(<span>First span</span>),
 SoupElement(<a>First a</a>)]

LastOfType

Selects every element that is the last child of the type.

:last-of-type
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import LastOfType
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>Last p</p>
        <div>
            <span>
                <a>Last a</a>
            </span>
            <span>Last span</span>
        </div>
        <div>Last div</div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = LastOfType()
selector.find_all(element)
[SoupElement(<p>Last p</p>),
 SoupElement(<a>Last a</a>),
 SoupElement(<span>Last span</span>),
 SoupElement(<div>Last div</div>)]

NthOfType

Selects every element that is the nth child of the type.

:nth-of-type(n)
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import NthOfType
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>1</p>
        <span>1</span>
        <p>2</p>
        <span>2</span>
        <p>3</p>
        <span>3</span>
        <p>4</p>
        <span>4</span>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = NthOfType("2n+2")
selector.find_all(element)
[SoupElement(<p>2</p>),
 SoupElement(<span>2</span>),
 SoupElement(<p>4</p>),
 SoupElement(<span>4</span>)]

NthLastOfType

Selects every element that is the nth child of the type, counting from the last child.

:nth-last-of-type(n)
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import NthLastOfType
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>1</p>
        <span>1</span>
        <p>2</p>
        <span>2</span>
        <p>3</p>
        <span>3</span>
        <p>4</p>
        <span>4</span>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = NthLastOfType("-n+2")
selector.find_all(element)
[SoupElement(<p>3</p>),
 SoupElement(<span>3</span>),
 SoupElement(<p>4</p>),
 SoupElement(<span>4</span>)]

OnlyOfType

Selects every element that is the only child of the type.

:only-of-type
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import OnlyOfType
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <div>
            <span>First span</span>
            <span>Second span</span>
        </div>
        <p>Only p</p>
        <div>
            <span>Only span</span>
            <a>Only a</a>
        </div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = OnlyOfType()
selector.find_all(element)
[SoupElement(<p>Only p</p>),
 SoupElement(<span>Only span</span>),
 SoupElement(<a>Only a</a>)]

Other selectors

Empty

Selects every element that has no children and no text content.

CSS Example:

:empty
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import Empty
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <p>Text</p>
        <div>
            <span>
                <a>Text</a>
            </span>
            <span></span>
        </div>
        <div><a>Text</a></div>
        <p></p>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = Empty()
selector.find_all(element)
[SoupElement(<span></span>), SoupElement(<p></p>)]

CSS

Wrapper for any CSS selector, uses soupsieve under the hood, so support is limited to its version. Convenience class for search based on CSS selector, results of the search are equivalent to BeautifulSoup.select method.

Using BeautifulSoup:

soup.select('div > p')

Using soupsieve:

soupsieve.select_one('div > p', soup)

Using soupsavvy:

CSS('div > p').find(element)
from bs4 import BeautifulSoup

from soupsavvy.selectors.css import CSS
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <div class="foo">Not span</div>
        <span class="foo">Not first</span>
        <div><span class="goo">Not .foo</span></div>
        <div><span class="foo">Found</span></div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = CSS("span.foo:first-child")
selector.find(element)
SoupElement(<span class="foo">Found</span>)

Combining selectors

CSS based selectors can be combined with other soupsavvy selectors to create composite selectors. For example, to select all elements, that are not empty and are children of a div element, the following selector can be used:

from bs4 import BeautifulSoup

from soupsavvy import TypeSelector, to_soupsavvy
from soupsavvy.selectors.css import Empty


soup = BeautifulSoup(
    """
        <p>Text</p>
        <div>
            <span><a></a></span>
            <span>Text</span>
            <span></span>
            <p></p>
        </div>
        <p></p>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = TypeSelector("div") > (~Empty())
selector.find_all(element)
[SoupElement(<span><a></a></span>), SoupElement(<span>Text</span>)]

For finding all elements that have one child and are last child of their parent following selector can be used:

from bs4 import BeautifulSoup

from soupsavvy import Anchor, HasSelector, to_soupsavvy
from soupsavvy.selectors.css import LastChild, OnlyChild

soup = BeautifulSoup(
    """
        <p>Text</p>
        <div>
            <span></span>
            <span>Text</span>
        </div>
        <div><span>Only Child</span></div>
        <div><span>Only Child - Last</span></div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

only_child = Anchor > OnlyChild()
selector = HasSelector(only_child) & LastChild()
selector.find(element)
SoupElement(<div><span>Only Child - Last</span></div>)

Recursivity

Unlike in BeautifulSoup.select, soupsavvy allows non-recursive searches by setting recursive=False in the find methods, ensuring only direct children matching the selector are returned.

from bs4 import BeautifulSoup

from soupsavvy.selectors.css import CSS
from soupsavvy import to_soupsavvy

soup = BeautifulSoup(
    """
        <span class="foo"></span>
        <div class="goo">
            <div class="foo"></div>
        </div>
        <div class="foo">Child</div>
    """,
    features="html.parser",
)
element = to_soupsavvy(soup)

selector = CSS("div.foo")
selector.find(element, recursive=False)
SoupElement(<div class="foo">Child</div>)

Conclusion

soupsavvy offers an easy way to select elements using CSS selectors. It includes wrappers for commonly used pseudo-classes that share the same implementation as other selectors, allowing them to be easily combined.

Enjoy soupsavvy and leave us feedback!
Happy scraping!