You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			126 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			126 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
from bleach.linkifier import (
 | 
						|
    DEFAULT_CALLBACKS,
 | 
						|
    Linker,
 | 
						|
)
 | 
						|
from bleach.sanitizer import (
 | 
						|
    ALLOWED_ATTRIBUTES,
 | 
						|
    ALLOWED_PROTOCOLS,
 | 
						|
    ALLOWED_TAGS,
 | 
						|
    Cleaner,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
# yyyymmdd
 | 
						|
__releasedate__ = "20241029"
 | 
						|
# x.y.z or x.y.z.dev0 -- semver
 | 
						|
__version__ = "6.2.0"
 | 
						|
 | 
						|
 | 
						|
__all__ = ["clean", "linkify"]
 | 
						|
 | 
						|
 | 
						|
def clean(
 | 
						|
    text,
 | 
						|
    tags=ALLOWED_TAGS,
 | 
						|
    attributes=ALLOWED_ATTRIBUTES,
 | 
						|
    protocols=ALLOWED_PROTOCOLS,
 | 
						|
    strip=False,
 | 
						|
    strip_comments=True,
 | 
						|
    css_sanitizer=None,
 | 
						|
):
 | 
						|
    """Clean an HTML fragment of malicious content and return it
 | 
						|
 | 
						|
    This function is a security-focused function whose sole purpose is to
 | 
						|
    remove malicious content from a string such that it can be displayed as
 | 
						|
    content in a web page.
 | 
						|
 | 
						|
    This function is not designed to use to transform content to be used in
 | 
						|
    non-web-page contexts.
 | 
						|
 | 
						|
    Example::
 | 
						|
 | 
						|
        import bleach
 | 
						|
 | 
						|
        better_text = bleach.clean(yucky_text)
 | 
						|
 | 
						|
 | 
						|
    .. Note::
 | 
						|
 | 
						|
       If you're cleaning a lot of text and passing the same argument values or
 | 
						|
       you want more configurability, consider using a
 | 
						|
       :py:class:`bleach.sanitizer.Cleaner` instance.
 | 
						|
 | 
						|
    :arg str text: the text to clean
 | 
						|
 | 
						|
    :arg set tags: set of allowed tags; defaults to
 | 
						|
        ``bleach.sanitizer.ALLOWED_TAGS``
 | 
						|
 | 
						|
    :arg dict attributes: allowed attributes; can be a callable, list or dict;
 | 
						|
        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
 | 
						|
 | 
						|
    :arg list protocols: allowed list of protocols for links; defaults
 | 
						|
        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
 | 
						|
 | 
						|
    :arg bool strip: whether or not to strip disallowed elements
 | 
						|
 | 
						|
    :arg bool strip_comments: whether or not to strip HTML comments
 | 
						|
 | 
						|
    :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
 | 
						|
        sanitizing style attribute values and style text; defaults to None
 | 
						|
 | 
						|
    :returns: cleaned text as unicode
 | 
						|
 | 
						|
    """
 | 
						|
    cleaner = Cleaner(
 | 
						|
        tags=tags,
 | 
						|
        attributes=attributes,
 | 
						|
        protocols=protocols,
 | 
						|
        strip=strip,
 | 
						|
        strip_comments=strip_comments,
 | 
						|
        css_sanitizer=css_sanitizer,
 | 
						|
    )
 | 
						|
    return cleaner.clean(text)
 | 
						|
 | 
						|
 | 
						|
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
 | 
						|
    """Convert URL-like strings in an HTML fragment to links
 | 
						|
 | 
						|
    This function converts strings that look like URLs, domain names and email
 | 
						|
    addresses in text that may be an HTML fragment to links, while preserving:
 | 
						|
 | 
						|
    1. links already in the string
 | 
						|
    2. urls found in attributes
 | 
						|
    3. email addresses
 | 
						|
 | 
						|
    linkify does a best-effort approach and tries to recover from bad
 | 
						|
    situations due to crazy text.
 | 
						|
 | 
						|
    .. Note::
 | 
						|
 | 
						|
       If you're linking a lot of text and passing the same argument values or
 | 
						|
       you want more configurability, consider using a
 | 
						|
       :py:class:`bleach.linkifier.Linker` instance.
 | 
						|
 | 
						|
    .. Note::
 | 
						|
 | 
						|
       If you have text that you want to clean and then linkify, consider using
 | 
						|
       the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
 | 
						|
       pass. That way you're not parsing the HTML twice.
 | 
						|
 | 
						|
    :arg str text: the text to linkify
 | 
						|
 | 
						|
    :arg list callbacks: list of callbacks to run when adjusting tag attributes;
 | 
						|
        defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
 | 
						|
 | 
						|
    :arg list skip_tags: list of tags that you don't want to linkify the
 | 
						|
        contents of; for example, you could set this to ``['pre']`` to skip
 | 
						|
        linkifying contents of ``pre`` tags
 | 
						|
 | 
						|
    :arg bool parse_email: whether or not to linkify email addresses
 | 
						|
 | 
						|
    :returns: linkified text as unicode
 | 
						|
 | 
						|
    """
 | 
						|
    linker = Linker(callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email)
 | 
						|
    return linker.linkify(text)
 |