"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" import warnings try: import soupsieve except ImportError as e: soupsieve = None warnings.warn( 'The soupsieve package is not installed. CSS selectors cannot be used.' ) class CSS(object): """A proxy object against the soupsieve library, to simplify its CSS selector API. Acquire this object through the .css attribute on the BeautifulSoup object, or on the Tag you want to use as the starting point for a CSS selector. The main advantage of doing this is that the tag to be selected against doesn't need to be explicitly specified in the function calls, since it's already scoped to a tag. """ def __init__(self, tag, api=soupsieve): """Constructor. You don't need to instantiate this class yourself; instead, access the .css attribute on the BeautifulSoup object, or on the Tag you want to use as the starting point for your CSS selector. :param tag: All CSS selectors will use this as their starting point. :param api: A plug-in replacement for the soupsieve module, designed mainly for use in tests. """ if api is None: raise NotImplementedError( "Cannot execute CSS selectors because the soupsieve package is not installed." ) self.api = api self.tag = tag def escape(self, ident): """Escape a CSS identifier. This is a simple wrapper around soupselect.escape(). See the documentation for that function for more information. """ if soupsieve is None: raise NotImplementedError( "Cannot escape CSS identifiers because the soupsieve package is not installed." ) return self.api.escape(ident) def _ns(self, ns, select): """Normalize a dictionary of namespaces.""" if not isinstance(select, self.api.SoupSieve) and ns is None: # If the selector is a precompiled pattern, it already has # a namespace context compiled in, which cannot be # replaced. ns = self.tag._namespaces return ns def _rs(self, results): """Normalize a list of results to a Resultset. A ResultSet is more consistent with the rest of Beautiful Soup's API, and ResultSet.__getattr__ has a helpful error message if you try to treat a list of results as a single result (a common mistake). """ # Import here to avoid circular import from bs4.element import ResultSet return ResultSet(None, results) def compile(self, select, namespaces=None, flags=0, **kwargs): """Pre-compile a selector and return the compiled object. :param selector: A CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will use the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's soupsieve.compile() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.compile() method. :return: A precompiled selector object. :rtype: soupsieve.SoupSieve """ return self.api.compile( select, self._ns(namespaces, select), flags, **kwargs ) def select_one(self, select, namespaces=None, flags=0, **kwargs): """Perform a CSS selection operation on the current Tag and return the first result. This uses the Soup Sieve library. For more information, see that library's documentation for the soupsieve.select_one() method. :param selector: A CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will use the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's soupsieve.select_one() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select_one() method. :return: A Tag, or None if the selector has no match. :rtype: bs4.element.Tag """ return self.api.select_one( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): """Perform a CSS selection operation on the current Tag. This uses the Soup Sieve library. For more information, see that library's documentation for the soupsieve.select() method. :param selector: A string containing a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param limit: After finding this number of results, stop looking. :param flags: Flags to be passed into Soup Sieve's soupsieve.select() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select() method. :return: A ResultSet of Tag objects. :rtype: bs4.element.ResultSet """ if limit is None: limit = 0 return self._rs( self.api.select( select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs ) ) def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): """Perform a CSS selection operation on the current Tag. This uses the Soup Sieve library. For more information, see that library's documentation for the soupsieve.iselect() method. It is the same as select(), but it returns a generator instead of a list. :param selector: A string containing a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param limit: After finding this number of results, stop looking. :param flags: Flags to be passed into Soup Sieve's soupsieve.iselect() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.iselect() method. :return: A generator :rtype: types.GeneratorType """ return self.api.iselect( select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs ) def closest(self, select, namespaces=None, flags=0, **kwargs): """Find the Tag closest to this one that matches the given selector. This uses the Soup Sieve library. For more information, see that library's documentation for the soupsieve.closest() method. :param selector: A string containing a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's soupsieve.closest() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.closest() method. :return: A Tag, or None if there is no match. :rtype: bs4.Tag """ return self.api.closest( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) def match(self, select, namespaces=None, flags=0, **kwargs): """Check whether this Tag matches the given CSS selector. This uses the Soup Sieve library. For more information, see that library's documentation for the soupsieve.match() method. :param: a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's soupsieve.match() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.match() method. :return: True if this Tag matches the selector; False otherwise. :rtype: bool """ return self.api.match( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) def filter(self, select, namespaces=None, flags=0, **kwargs): """Filter this Tag's direct children based on the given CSS selector. This uses the Soup Sieve library. It works the same way as passing this Tag into that library's soupsieve.filter() method. More information, for more information see the documentation for soupsieve.filter(). :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's soupsieve.filter() method. :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.filter() method. :return: A ResultSet of Tag objects. :rtype: bs4.element.ResultSet """ return self._rs( self.api.filter( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) )