from bs4.dammit import EntitySubstitution class Formatter(EntitySubstitution): """Describes a strategy to use when outputting a parse tree to a string. Some parts of this strategy come from the distinction between HTML4, HTML5, and XML. Others are configurable by the user. Formatters are passed in as the `formatter` argument to methods like `PageElement.encode`. Most people won't need to think about formatters, and most people who need to think about them can pass in one of these predefined strings as `formatter` rather than making a new Formatter object: For HTML documents: * 'html' - HTML entity substitution for generic HTML documents. (default) * 'html5' - HTML entity substitution for HTML5 documents, as well as some optimizations in the way tags are rendered. * 'minimal' - Only make the substitutions necessary to guarantee valid HTML. * None - Do not perform any substitution. This will be faster but may result in invalid markup. For XML documents: * 'html' - Entity substitution for XHTML documents. * 'minimal' - Only make the substitutions necessary to guarantee valid XML. (default) * None - Do not perform any substitution. This will be faster but may result in invalid markup. """ # Registries of XML and HTML formatters. XML_FORMATTERS = {} HTML_FORMATTERS = {} HTML = 'html' XML = 'xml' HTML_DEFAULTS = dict( cdata_containing_tags=set(["script", "style"]), ) def _default(self, language, value, kwarg): if value is not None: return value if language == self.XML: return set() return self.HTML_DEFAULTS[kwarg] def __init__( self, language=None, entity_substitution=None, void_element_close_prefix='/', cdata_containing_tags=None, empty_attributes_are_booleans=False, indent=1, ): """Constructor. :param language: This should be Formatter.XML if you are formatting XML markup and Formatter.HTML if you are formatting HTML markup. :param entity_substitution: A function to call to replace special characters with XML/HTML entities. For examples, see bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. :param void_element_close_prefix: By default, void elements are represented as (XML rules) rather than (HTML rules). To get , pass in the empty string. :param cdata_containing_tags: The list of tags that are defined as containing CDATA in this dialect. For example, in HTML,