You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			1178 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			1178 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
:mod:``pandas.io.xml`` is a module for reading XML.
 | 
						|
"""
 | 
						|
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
import io
 | 
						|
from os import PathLike
 | 
						|
from typing import (
 | 
						|
    TYPE_CHECKING,
 | 
						|
    Any,
 | 
						|
    Callable,
 | 
						|
)
 | 
						|
import warnings
 | 
						|
 | 
						|
from pandas._libs import lib
 | 
						|
from pandas.compat._optional import import_optional_dependency
 | 
						|
from pandas.errors import (
 | 
						|
    AbstractMethodError,
 | 
						|
    ParserError,
 | 
						|
)
 | 
						|
from pandas.util._decorators import doc
 | 
						|
from pandas.util._exceptions import find_stack_level
 | 
						|
from pandas.util._validators import check_dtype_backend
 | 
						|
 | 
						|
from pandas.core.dtypes.common import is_list_like
 | 
						|
 | 
						|
from pandas.core.shared_docs import _shared_docs
 | 
						|
 | 
						|
from pandas.io.common import (
 | 
						|
    file_exists,
 | 
						|
    get_handle,
 | 
						|
    infer_compression,
 | 
						|
    is_file_like,
 | 
						|
    is_fsspec_url,
 | 
						|
    is_url,
 | 
						|
    stringify_path,
 | 
						|
)
 | 
						|
from pandas.io.parsers import TextParser
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from collections.abc import Sequence
 | 
						|
    from xml.etree.ElementTree import Element
 | 
						|
 | 
						|
    from lxml import etree
 | 
						|
 | 
						|
    from pandas._typing import (
 | 
						|
        CompressionOptions,
 | 
						|
        ConvertersArg,
 | 
						|
        DtypeArg,
 | 
						|
        DtypeBackend,
 | 
						|
        FilePath,
 | 
						|
        ParseDatesArg,
 | 
						|
        ReadBuffer,
 | 
						|
        StorageOptions,
 | 
						|
        XMLParsers,
 | 
						|
    )
 | 
						|
 | 
						|
    from pandas import DataFrame
 | 
						|
 | 
						|
 | 
						|
@doc(
 | 
						|
    storage_options=_shared_docs["storage_options"],
 | 
						|
    decompression_options=_shared_docs["decompression_options"] % "path_or_buffer",
 | 
						|
)
 | 
						|
class _XMLFrameParser:
 | 
						|
    """
 | 
						|
    Internal subclass to parse XML into DataFrames.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    path_or_buffer : a valid JSON ``str``, path object or file-like object
 | 
						|
        Any valid string path is acceptable. The string could be a URL. Valid
 | 
						|
        URL schemes include http, ftp, s3, and file.
 | 
						|
 | 
						|
    xpath : str or regex
 | 
						|
        The ``XPath`` expression to parse required set of nodes for
 | 
						|
        migration to :class:`~pandas.DataFrame`. ``etree`` supports limited ``XPath``.
 | 
						|
 | 
						|
    namespaces : dict
 | 
						|
        The namespaces defined in XML document (``xmlns:namespace='URI'``)
 | 
						|
        as dicts with key being namespace and value the URI.
 | 
						|
 | 
						|
    elems_only : bool
 | 
						|
        Parse only the child elements at the specified ``xpath``.
 | 
						|
 | 
						|
    attrs_only : bool
 | 
						|
        Parse only the attributes at the specified ``xpath``.
 | 
						|
 | 
						|
    names : list
 | 
						|
        Column names for :class:`~pandas.DataFrame` of parsed XML data.
 | 
						|
 | 
						|
    dtype : dict
 | 
						|
        Data type for data or columns. E.g. {{'a': np.float64,
 | 
						|
        'b': np.int32, 'c': 'Int64'}}
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    converters : dict, optional
 | 
						|
        Dict of functions for converting values in certain columns. Keys can
 | 
						|
        either be integers or column labels.
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    parse_dates : bool or list of int or names or list of lists or dict
 | 
						|
        Converts either index or select columns to datetimes
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    encoding : str
 | 
						|
        Encoding of xml object or document.
 | 
						|
 | 
						|
    stylesheet : str or file-like
 | 
						|
        URL, file, file-like object, or a raw string containing XSLT,
 | 
						|
        ``etree`` does not support XSLT but retained for consistency.
 | 
						|
 | 
						|
    iterparse : dict, optional
 | 
						|
        Dict with row element as key and list of descendant elements
 | 
						|
        and/or attributes as value to be retrieved in iterparsing of
 | 
						|
        XML document.
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    {decompression_options}
 | 
						|
 | 
						|
        .. versionchanged:: 1.4.0 Zstandard support.
 | 
						|
 | 
						|
    {storage_options}
 | 
						|
 | 
						|
    See also
 | 
						|
    --------
 | 
						|
    pandas.io.xml._EtreeFrameParser
 | 
						|
    pandas.io.xml._LxmlFrameParser
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    To subclass this class effectively you must override the following methods:`
 | 
						|
        * :func:`parse_data`
 | 
						|
        * :func:`_parse_nodes`
 | 
						|
        * :func:`_iterparse_nodes`
 | 
						|
        * :func:`_parse_doc`
 | 
						|
        * :func:`_validate_names`
 | 
						|
        * :func:`_validate_path`
 | 
						|
 | 
						|
 | 
						|
    See each method's respective documentation for details on their
 | 
						|
    functionality.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
 | 
						|
        xpath: str,
 | 
						|
        namespaces: dict[str, str] | None,
 | 
						|
        elems_only: bool,
 | 
						|
        attrs_only: bool,
 | 
						|
        names: Sequence[str] | None,
 | 
						|
        dtype: DtypeArg | None,
 | 
						|
        converters: ConvertersArg | None,
 | 
						|
        parse_dates: ParseDatesArg | None,
 | 
						|
        encoding: str | None,
 | 
						|
        stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
 | 
						|
        iterparse: dict[str, list[str]] | None,
 | 
						|
        compression: CompressionOptions,
 | 
						|
        storage_options: StorageOptions,
 | 
						|
    ) -> None:
 | 
						|
        self.path_or_buffer = path_or_buffer
 | 
						|
        self.xpath = xpath
 | 
						|
        self.namespaces = namespaces
 | 
						|
        self.elems_only = elems_only
 | 
						|
        self.attrs_only = attrs_only
 | 
						|
        self.names = names
 | 
						|
        self.dtype = dtype
 | 
						|
        self.converters = converters
 | 
						|
        self.parse_dates = parse_dates
 | 
						|
        self.encoding = encoding
 | 
						|
        self.stylesheet = stylesheet
 | 
						|
        self.iterparse = iterparse
 | 
						|
        self.is_style = None
 | 
						|
        self.compression: CompressionOptions = compression
 | 
						|
        self.storage_options = storage_options
 | 
						|
 | 
						|
    def parse_data(self) -> list[dict[str, str | None]]:
 | 
						|
        """
 | 
						|
        Parse xml data.
 | 
						|
 | 
						|
        This method will call the other internal methods to
 | 
						|
        validate ``xpath``, names, parse and return specific nodes.
 | 
						|
        """
 | 
						|
 | 
						|
        raise AbstractMethodError(self)
 | 
						|
 | 
						|
    def _parse_nodes(self, elems: list[Any]) -> list[dict[str, str | None]]:
 | 
						|
        """
 | 
						|
        Parse xml nodes.
 | 
						|
 | 
						|
        This method will parse the children and attributes of elements
 | 
						|
        in ``xpath``, conditionally for only elements, only attributes
 | 
						|
        or both while optionally renaming node names.
 | 
						|
 | 
						|
        Raises
 | 
						|
        ------
 | 
						|
        ValueError
 | 
						|
            * If only elements and only attributes are specified.
 | 
						|
 | 
						|
        Notes
 | 
						|
        -----
 | 
						|
        Namespace URIs will be removed from return node values. Also,
 | 
						|
        elements with missing children or attributes compared to siblings
 | 
						|
        will have optional keys filled with None values.
 | 
						|
        """
 | 
						|
 | 
						|
        dicts: list[dict[str, str | None]]
 | 
						|
 | 
						|
        if self.elems_only and self.attrs_only:
 | 
						|
            raise ValueError("Either element or attributes can be parsed not both.")
 | 
						|
        if self.elems_only:
 | 
						|
            if self.names:
 | 
						|
                dicts = [
 | 
						|
                    {
 | 
						|
                        **(
 | 
						|
                            {el.tag: el.text}
 | 
						|
                            if el.text and not el.text.isspace()
 | 
						|
                            else {}
 | 
						|
                        ),
 | 
						|
                        **{
 | 
						|
                            nm: ch.text if ch.text else None
 | 
						|
                            for nm, ch in zip(self.names, el.findall("*"))
 | 
						|
                        },
 | 
						|
                    }
 | 
						|
                    for el in elems
 | 
						|
                ]
 | 
						|
            else:
 | 
						|
                dicts = [
 | 
						|
                    {ch.tag: ch.text if ch.text else None for ch in el.findall("*")}
 | 
						|
                    for el in elems
 | 
						|
                ]
 | 
						|
 | 
						|
        elif self.attrs_only:
 | 
						|
            dicts = [
 | 
						|
                {k: v if v else None for k, v in el.attrib.items()} for el in elems
 | 
						|
            ]
 | 
						|
 | 
						|
        elif self.names:
 | 
						|
            dicts = [
 | 
						|
                {
 | 
						|
                    **el.attrib,
 | 
						|
                    **({el.tag: el.text} if el.text and not el.text.isspace() else {}),
 | 
						|
                    **{
 | 
						|
                        nm: ch.text if ch.text else None
 | 
						|
                        for nm, ch in zip(self.names, el.findall("*"))
 | 
						|
                    },
 | 
						|
                }
 | 
						|
                for el in elems
 | 
						|
            ]
 | 
						|
 | 
						|
        else:
 | 
						|
            dicts = [
 | 
						|
                {
 | 
						|
                    **el.attrib,
 | 
						|
                    **({el.tag: el.text} if el.text and not el.text.isspace() else {}),
 | 
						|
                    **{ch.tag: ch.text if ch.text else None for ch in el.findall("*")},
 | 
						|
                }
 | 
						|
                for el in elems
 | 
						|
            ]
 | 
						|
 | 
						|
        dicts = [
 | 
						|
            {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} for d in dicts
 | 
						|
        ]
 | 
						|
 | 
						|
        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
 | 
						|
        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
 | 
						|
 | 
						|
        if self.names:
 | 
						|
            dicts = [dict(zip(self.names, d.values())) for d in dicts]
 | 
						|
 | 
						|
        return dicts
 | 
						|
 | 
						|
    def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
 | 
						|
        """
 | 
						|
        Iterparse xml nodes.
 | 
						|
 | 
						|
        This method will read in local disk, decompressed XML files for elements
 | 
						|
        and underlying descendants using iterparse, a method to iterate through
 | 
						|
        an XML tree without holding entire XML tree in memory.
 | 
						|
 | 
						|
        Raises
 | 
						|
        ------
 | 
						|
        TypeError
 | 
						|
            * If ``iterparse`` is not a dict or its dict value is not list-like.
 | 
						|
        ParserError
 | 
						|
            * If ``path_or_buffer`` is not a physical file on disk or file-like object.
 | 
						|
            * If no data is returned from selected items in ``iterparse``.
 | 
						|
 | 
						|
        Notes
 | 
						|
        -----
 | 
						|
        Namespace URIs will be removed from return node values. Also,
 | 
						|
        elements with missing children or attributes in submitted list
 | 
						|
        will have optional keys filled with None values.
 | 
						|
        """
 | 
						|
 | 
						|
        dicts: list[dict[str, str | None]] = []
 | 
						|
        row: dict[str, str | None] | None = None
 | 
						|
 | 
						|
        if not isinstance(self.iterparse, dict):
 | 
						|
            raise TypeError(
 | 
						|
                f"{type(self.iterparse).__name__} is not a valid type for iterparse"
 | 
						|
            )
 | 
						|
 | 
						|
        row_node = next(iter(self.iterparse.keys())) if self.iterparse else ""
 | 
						|
        if not is_list_like(self.iterparse[row_node]):
 | 
						|
            raise TypeError(
 | 
						|
                f"{type(self.iterparse[row_node])} is not a valid type "
 | 
						|
                "for value in iterparse"
 | 
						|
            )
 | 
						|
 | 
						|
        if (not hasattr(self.path_or_buffer, "read")) and (
 | 
						|
            not isinstance(self.path_or_buffer, (str, PathLike))
 | 
						|
            or is_url(self.path_or_buffer)
 | 
						|
            or is_fsspec_url(self.path_or_buffer)
 | 
						|
            or (
 | 
						|
                isinstance(self.path_or_buffer, str)
 | 
						|
                and self.path_or_buffer.startswith(("<?xml", "<"))
 | 
						|
            )
 | 
						|
            or infer_compression(self.path_or_buffer, "infer") is not None
 | 
						|
        ):
 | 
						|
            raise ParserError(
 | 
						|
                "iterparse is designed for large XML files that are fully extracted on "
 | 
						|
                "local disk and not as compressed files or online sources."
 | 
						|
            )
 | 
						|
 | 
						|
        iterparse_repeats = len(self.iterparse[row_node]) != len(
 | 
						|
            set(self.iterparse[row_node])
 | 
						|
        )
 | 
						|
 | 
						|
        for event, elem in iterparse(self.path_or_buffer, events=("start", "end")):
 | 
						|
            curr_elem = elem.tag.split("}")[1] if "}" in elem.tag else elem.tag
 | 
						|
 | 
						|
            if event == "start":
 | 
						|
                if curr_elem == row_node:
 | 
						|
                    row = {}
 | 
						|
 | 
						|
            if row is not None:
 | 
						|
                if self.names and iterparse_repeats:
 | 
						|
                    for col, nm in zip(self.iterparse[row_node], self.names):
 | 
						|
                        if curr_elem == col:
 | 
						|
                            elem_val = elem.text if elem.text else None
 | 
						|
                            if elem_val not in row.values() and nm not in row:
 | 
						|
                                row[nm] = elem_val
 | 
						|
 | 
						|
                        if col in elem.attrib:
 | 
						|
                            if elem.attrib[col] not in row.values() and nm not in row:
 | 
						|
                                row[nm] = elem.attrib[col]
 | 
						|
                else:
 | 
						|
                    for col in self.iterparse[row_node]:
 | 
						|
                        if curr_elem == col:
 | 
						|
                            row[col] = elem.text if elem.text else None
 | 
						|
                        if col in elem.attrib:
 | 
						|
                            row[col] = elem.attrib[col]
 | 
						|
 | 
						|
            if event == "end":
 | 
						|
                if curr_elem == row_node and row is not None:
 | 
						|
                    dicts.append(row)
 | 
						|
                    row = None
 | 
						|
 | 
						|
                elem.clear()
 | 
						|
                if hasattr(elem, "getprevious"):
 | 
						|
                    while (
 | 
						|
                        elem.getprevious() is not None and elem.getparent() is not None
 | 
						|
                    ):
 | 
						|
                        del elem.getparent()[0]
 | 
						|
 | 
						|
        if dicts == []:
 | 
						|
            raise ParserError("No result from selected items in iterparse.")
 | 
						|
 | 
						|
        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
 | 
						|
        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
 | 
						|
 | 
						|
        if self.names:
 | 
						|
            dicts = [dict(zip(self.names, d.values())) for d in dicts]
 | 
						|
 | 
						|
        return dicts
 | 
						|
 | 
						|
    def _validate_path(self) -> list[Any]:
 | 
						|
        """
 | 
						|
        Validate ``xpath``.
 | 
						|
 | 
						|
        This method checks for syntax, evaluation, or empty nodes return.
 | 
						|
 | 
						|
        Raises
 | 
						|
        ------
 | 
						|
        SyntaxError
 | 
						|
            * If xpah is not supported or issues with namespaces.
 | 
						|
 | 
						|
        ValueError
 | 
						|
            * If xpah does not return any nodes.
 | 
						|
        """
 | 
						|
 | 
						|
        raise AbstractMethodError(self)
 | 
						|
 | 
						|
    def _validate_names(self) -> None:
 | 
						|
        """
 | 
						|
        Validate names.
 | 
						|
 | 
						|
        This method will check if names is a list-like and aligns
 | 
						|
        with length of parse nodes.
 | 
						|
 | 
						|
        Raises
 | 
						|
        ------
 | 
						|
        ValueError
 | 
						|
            * If value is not a list and less then length of nodes.
 | 
						|
        """
 | 
						|
        raise AbstractMethodError(self)
 | 
						|
 | 
						|
    def _parse_doc(
 | 
						|
        self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
 | 
						|
    ) -> Element | etree._Element:
 | 
						|
        """
 | 
						|
        Build tree from path_or_buffer.
 | 
						|
 | 
						|
        This method will parse XML object into tree
 | 
						|
        either from string/bytes or file location.
 | 
						|
        """
 | 
						|
        raise AbstractMethodError(self)
 | 
						|
 | 
						|
 | 
						|
class _EtreeFrameParser(_XMLFrameParser):
 | 
						|
    """
 | 
						|
    Internal class to parse XML into DataFrames with the Python
 | 
						|
    standard library XML module: `xml.etree.ElementTree`.
 | 
						|
    """
 | 
						|
 | 
						|
    def parse_data(self) -> list[dict[str, str | None]]:
 | 
						|
        from xml.etree.ElementTree import iterparse
 | 
						|
 | 
						|
        if self.stylesheet is not None:
 | 
						|
            raise ValueError(
 | 
						|
                "To use stylesheet, you need lxml installed and selected as parser."
 | 
						|
            )
 | 
						|
 | 
						|
        if self.iterparse is None:
 | 
						|
            self.xml_doc = self._parse_doc(self.path_or_buffer)
 | 
						|
            elems = self._validate_path()
 | 
						|
 | 
						|
        self._validate_names()
 | 
						|
 | 
						|
        xml_dicts: list[dict[str, str | None]] = (
 | 
						|
            self._parse_nodes(elems)
 | 
						|
            if self.iterparse is None
 | 
						|
            else self._iterparse_nodes(iterparse)
 | 
						|
        )
 | 
						|
 | 
						|
        return xml_dicts
 | 
						|
 | 
						|
    def _validate_path(self) -> list[Any]:
 | 
						|
        """
 | 
						|
        Notes
 | 
						|
        -----
 | 
						|
        ``etree`` supports limited ``XPath``. If user attempts a more complex
 | 
						|
        expression syntax error will raise.
 | 
						|
        """
 | 
						|
 | 
						|
        msg = (
 | 
						|
            "xpath does not return any nodes or attributes. "
 | 
						|
            "Be sure to specify in `xpath` the parent nodes of "
 | 
						|
            "children and attributes to parse. "
 | 
						|
            "If document uses namespaces denoted with "
 | 
						|
            "xmlns, be sure to define namespaces and "
 | 
						|
            "use them in xpath."
 | 
						|
        )
 | 
						|
        try:
 | 
						|
            elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces)
 | 
						|
            children = [ch for el in elems for ch in el.findall("*")]
 | 
						|
            attrs = {k: v for el in elems for k, v in el.attrib.items()}
 | 
						|
 | 
						|
            if elems is None:
 | 
						|
                raise ValueError(msg)
 | 
						|
 | 
						|
            if elems is not None:
 | 
						|
                if self.elems_only and children == []:
 | 
						|
                    raise ValueError(msg)
 | 
						|
                if self.attrs_only and attrs == {}:
 | 
						|
                    raise ValueError(msg)
 | 
						|
                if children == [] and attrs == {}:
 | 
						|
                    raise ValueError(msg)
 | 
						|
 | 
						|
        except (KeyError, SyntaxError):
 | 
						|
            raise SyntaxError(
 | 
						|
                "You have used an incorrect or unsupported XPath "
 | 
						|
                "expression for etree library or you used an "
 | 
						|
                "undeclared namespace prefix."
 | 
						|
            )
 | 
						|
 | 
						|
        return elems
 | 
						|
 | 
						|
    def _validate_names(self) -> None:
 | 
						|
        children: list[Any]
 | 
						|
 | 
						|
        if self.names:
 | 
						|
            if self.iterparse:
 | 
						|
                children = self.iterparse[next(iter(self.iterparse))]
 | 
						|
            else:
 | 
						|
                parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
 | 
						|
                children = parent.findall("*") if parent is not None else []
 | 
						|
 | 
						|
            if is_list_like(self.names):
 | 
						|
                if len(self.names) < len(children):
 | 
						|
                    raise ValueError(
 | 
						|
                        "names does not match length of child elements in xpath."
 | 
						|
                    )
 | 
						|
            else:
 | 
						|
                raise TypeError(
 | 
						|
                    f"{type(self.names).__name__} is not a valid type for names"
 | 
						|
                )
 | 
						|
 | 
						|
    def _parse_doc(
 | 
						|
        self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
 | 
						|
    ) -> Element:
 | 
						|
        from xml.etree.ElementTree import (
 | 
						|
            XMLParser,
 | 
						|
            parse,
 | 
						|
        )
 | 
						|
 | 
						|
        handle_data = get_data_from_filepath(
 | 
						|
            filepath_or_buffer=raw_doc,
 | 
						|
            encoding=self.encoding,
 | 
						|
            compression=self.compression,
 | 
						|
            storage_options=self.storage_options,
 | 
						|
        )
 | 
						|
 | 
						|
        with preprocess_data(handle_data) as xml_data:
 | 
						|
            curr_parser = XMLParser(encoding=self.encoding)
 | 
						|
            document = parse(xml_data, parser=curr_parser)
 | 
						|
 | 
						|
        return document.getroot()
 | 
						|
 | 
						|
 | 
						|
class _LxmlFrameParser(_XMLFrameParser):
 | 
						|
    """
 | 
						|
    Internal class to parse XML into :class:`~pandas.DataFrame` with third-party
 | 
						|
    full-featured XML library, ``lxml``, that supports
 | 
						|
    ``XPath`` 1.0 and XSLT 1.0.
 | 
						|
    """
 | 
						|
 | 
						|
    def parse_data(self) -> list[dict[str, str | None]]:
 | 
						|
        """
 | 
						|
        Parse xml data.
 | 
						|
 | 
						|
        This method will call the other internal methods to
 | 
						|
        validate ``xpath``, names, optionally parse and run XSLT,
 | 
						|
        and parse original or transformed XML and return specific nodes.
 | 
						|
        """
 | 
						|
        from lxml.etree import iterparse
 | 
						|
 | 
						|
        if self.iterparse is None:
 | 
						|
            self.xml_doc = self._parse_doc(self.path_or_buffer)
 | 
						|
 | 
						|
            if self.stylesheet:
 | 
						|
                self.xsl_doc = self._parse_doc(self.stylesheet)
 | 
						|
                self.xml_doc = self._transform_doc()
 | 
						|
 | 
						|
            elems = self._validate_path()
 | 
						|
 | 
						|
        self._validate_names()
 | 
						|
 | 
						|
        xml_dicts: list[dict[str, str | None]] = (
 | 
						|
            self._parse_nodes(elems)
 | 
						|
            if self.iterparse is None
 | 
						|
            else self._iterparse_nodes(iterparse)
 | 
						|
        )
 | 
						|
 | 
						|
        return xml_dicts
 | 
						|
 | 
						|
    def _validate_path(self) -> list[Any]:
 | 
						|
        msg = (
 | 
						|
            "xpath does not return any nodes or attributes. "
 | 
						|
            "Be sure to specify in `xpath` the parent nodes of "
 | 
						|
            "children and attributes to parse. "
 | 
						|
            "If document uses namespaces denoted with "
 | 
						|
            "xmlns, be sure to define namespaces and "
 | 
						|
            "use them in xpath."
 | 
						|
        )
 | 
						|
 | 
						|
        elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
 | 
						|
        children = [ch for el in elems for ch in el.xpath("*")]
 | 
						|
        attrs = {k: v for el in elems for k, v in el.attrib.items()}
 | 
						|
 | 
						|
        if elems == []:
 | 
						|
            raise ValueError(msg)
 | 
						|
 | 
						|
        if elems != []:
 | 
						|
            if self.elems_only and children == []:
 | 
						|
                raise ValueError(msg)
 | 
						|
            if self.attrs_only and attrs == {}:
 | 
						|
                raise ValueError(msg)
 | 
						|
            if children == [] and attrs == {}:
 | 
						|
                raise ValueError(msg)
 | 
						|
 | 
						|
        return elems
 | 
						|
 | 
						|
    def _validate_names(self) -> None:
 | 
						|
        children: list[Any]
 | 
						|
 | 
						|
        if self.names:
 | 
						|
            if self.iterparse:
 | 
						|
                children = self.iterparse[next(iter(self.iterparse))]
 | 
						|
            else:
 | 
						|
                children = self.xml_doc.xpath(
 | 
						|
                    self.xpath + "[1]/*", namespaces=self.namespaces
 | 
						|
                )
 | 
						|
 | 
						|
            if is_list_like(self.names):
 | 
						|
                if len(self.names) < len(children):
 | 
						|
                    raise ValueError(
 | 
						|
                        "names does not match length of child elements in xpath."
 | 
						|
                    )
 | 
						|
            else:
 | 
						|
                raise TypeError(
 | 
						|
                    f"{type(self.names).__name__} is not a valid type for names"
 | 
						|
                )
 | 
						|
 | 
						|
    def _parse_doc(
 | 
						|
        self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
 | 
						|
    ) -> etree._Element:
 | 
						|
        from lxml.etree import (
 | 
						|
            XMLParser,
 | 
						|
            fromstring,
 | 
						|
            parse,
 | 
						|
        )
 | 
						|
 | 
						|
        handle_data = get_data_from_filepath(
 | 
						|
            filepath_or_buffer=raw_doc,
 | 
						|
            encoding=self.encoding,
 | 
						|
            compression=self.compression,
 | 
						|
            storage_options=self.storage_options,
 | 
						|
        )
 | 
						|
 | 
						|
        with preprocess_data(handle_data) as xml_data:
 | 
						|
            curr_parser = XMLParser(encoding=self.encoding)
 | 
						|
 | 
						|
            if isinstance(xml_data, io.StringIO):
 | 
						|
                if self.encoding is None:
 | 
						|
                    raise TypeError(
 | 
						|
                        "Can not pass encoding None when input is StringIO."
 | 
						|
                    )
 | 
						|
 | 
						|
                document = fromstring(
 | 
						|
                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
 | 
						|
                )
 | 
						|
            else:
 | 
						|
                document = parse(xml_data, parser=curr_parser)
 | 
						|
 | 
						|
        return document
 | 
						|
 | 
						|
    def _transform_doc(self) -> etree._XSLTResultTree:
 | 
						|
        """
 | 
						|
        Transform original tree using stylesheet.
 | 
						|
 | 
						|
        This method will transform original xml using XSLT script into
 | 
						|
        am ideally flatter xml document for easier parsing and migration
 | 
						|
        to Data Frame.
 | 
						|
        """
 | 
						|
        from lxml.etree import XSLT
 | 
						|
 | 
						|
        transformer = XSLT(self.xsl_doc)
 | 
						|
        new_doc = transformer(self.xml_doc)
 | 
						|
 | 
						|
        return new_doc
 | 
						|
 | 
						|
 | 
						|
def get_data_from_filepath(
 | 
						|
    filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str],
 | 
						|
    encoding: str | None,
 | 
						|
    compression: CompressionOptions,
 | 
						|
    storage_options: StorageOptions,
 | 
						|
) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]:
 | 
						|
    """
 | 
						|
    Extract raw XML data.
 | 
						|
 | 
						|
    The method accepts three input types:
 | 
						|
        1. filepath (string-like)
 | 
						|
        2. file-like object (e.g. open file object, StringIO)
 | 
						|
        3. XML string or bytes
 | 
						|
 | 
						|
    This method turns (1) into (2) to simplify the rest of the processing.
 | 
						|
    It returns input types (2) and (3) unchanged.
 | 
						|
    """
 | 
						|
    if not isinstance(filepath_or_buffer, bytes):
 | 
						|
        filepath_or_buffer = stringify_path(filepath_or_buffer)
 | 
						|
 | 
						|
    if (
 | 
						|
        isinstance(filepath_or_buffer, str)
 | 
						|
        and not filepath_or_buffer.startswith(("<?xml", "<"))
 | 
						|
    ) and (
 | 
						|
        not isinstance(filepath_or_buffer, str)
 | 
						|
        or is_url(filepath_or_buffer)
 | 
						|
        or is_fsspec_url(filepath_or_buffer)
 | 
						|
        or file_exists(filepath_or_buffer)
 | 
						|
    ):
 | 
						|
        with get_handle(
 | 
						|
            filepath_or_buffer,
 | 
						|
            "r",
 | 
						|
            encoding=encoding,
 | 
						|
            compression=compression,
 | 
						|
            storage_options=storage_options,
 | 
						|
        ) as handle_obj:
 | 
						|
            filepath_or_buffer = (
 | 
						|
                handle_obj.handle.read()
 | 
						|
                if hasattr(handle_obj.handle, "read")
 | 
						|
                else handle_obj.handle
 | 
						|
            )
 | 
						|
 | 
						|
    return filepath_or_buffer
 | 
						|
 | 
						|
 | 
						|
def preprocess_data(data) -> io.StringIO | io.BytesIO:
 | 
						|
    """
 | 
						|
    Convert extracted raw data.
 | 
						|
 | 
						|
    This method will return underlying data of extracted XML content.
 | 
						|
    The data either has a `read` attribute (e.g. a file object or a
 | 
						|
    StringIO/BytesIO) or is a string or bytes that is an XML document.
 | 
						|
    """
 | 
						|
 | 
						|
    if isinstance(data, str):
 | 
						|
        data = io.StringIO(data)
 | 
						|
 | 
						|
    elif isinstance(data, bytes):
 | 
						|
        data = io.BytesIO(data)
 | 
						|
 | 
						|
    return data
 | 
						|
 | 
						|
 | 
						|
def _data_to_frame(data, **kwargs) -> DataFrame:
 | 
						|
    """
 | 
						|
    Convert parsed data to Data Frame.
 | 
						|
 | 
						|
    This method will bind xml dictionary data of keys and values
 | 
						|
    into named columns of Data Frame using the built-in TextParser
 | 
						|
    class that build Data Frame and infers specific dtypes.
 | 
						|
    """
 | 
						|
 | 
						|
    tags = next(iter(data))
 | 
						|
    nodes = [list(d.values()) for d in data]
 | 
						|
 | 
						|
    try:
 | 
						|
        with TextParser(nodes, names=tags, **kwargs) as tp:
 | 
						|
            return tp.read()
 | 
						|
    except ParserError:
 | 
						|
        raise ParserError(
 | 
						|
            "XML document may be too complex for import. "
 | 
						|
            "Try to flatten document and use distinct "
 | 
						|
            "element and attribute names."
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def _parse(
 | 
						|
    path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
 | 
						|
    xpath: str,
 | 
						|
    namespaces: dict[str, str] | None,
 | 
						|
    elems_only: bool,
 | 
						|
    attrs_only: bool,
 | 
						|
    names: Sequence[str] | None,
 | 
						|
    dtype: DtypeArg | None,
 | 
						|
    converters: ConvertersArg | None,
 | 
						|
    parse_dates: ParseDatesArg | None,
 | 
						|
    encoding: str | None,
 | 
						|
    parser: XMLParsers,
 | 
						|
    stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
 | 
						|
    iterparse: dict[str, list[str]] | None,
 | 
						|
    compression: CompressionOptions,
 | 
						|
    storage_options: StorageOptions,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 | 
						|
    **kwargs,
 | 
						|
) -> DataFrame:
 | 
						|
    """
 | 
						|
    Call internal parsers.
 | 
						|
 | 
						|
    This method will conditionally call internal parsers:
 | 
						|
    LxmlFrameParser and/or EtreeParser.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ImportError
 | 
						|
        * If lxml is not installed if selected as parser.
 | 
						|
 | 
						|
    ValueError
 | 
						|
        * If parser is not lxml or etree.
 | 
						|
    """
 | 
						|
 | 
						|
    p: _EtreeFrameParser | _LxmlFrameParser
 | 
						|
 | 
						|
    if isinstance(path_or_buffer, str) and not any(
 | 
						|
        [
 | 
						|
            is_file_like(path_or_buffer),
 | 
						|
            file_exists(path_or_buffer),
 | 
						|
            is_url(path_or_buffer),
 | 
						|
            is_fsspec_url(path_or_buffer),
 | 
						|
        ]
 | 
						|
    ):
 | 
						|
        warnings.warn(
 | 
						|
            "Passing literal xml to 'read_xml' is deprecated and "
 | 
						|
            "will be removed in a future version. To read from a "
 | 
						|
            "literal string, wrap it in a 'StringIO' object.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
 | 
						|
    if parser == "lxml":
 | 
						|
        lxml = import_optional_dependency("lxml.etree", errors="ignore")
 | 
						|
 | 
						|
        if lxml is not None:
 | 
						|
            p = _LxmlFrameParser(
 | 
						|
                path_or_buffer,
 | 
						|
                xpath,
 | 
						|
                namespaces,
 | 
						|
                elems_only,
 | 
						|
                attrs_only,
 | 
						|
                names,
 | 
						|
                dtype,
 | 
						|
                converters,
 | 
						|
                parse_dates,
 | 
						|
                encoding,
 | 
						|
                stylesheet,
 | 
						|
                iterparse,
 | 
						|
                compression,
 | 
						|
                storage_options,
 | 
						|
            )
 | 
						|
        else:
 | 
						|
            raise ImportError("lxml not found, please install or use the etree parser.")
 | 
						|
 | 
						|
    elif parser == "etree":
 | 
						|
        p = _EtreeFrameParser(
 | 
						|
            path_or_buffer,
 | 
						|
            xpath,
 | 
						|
            namespaces,
 | 
						|
            elems_only,
 | 
						|
            attrs_only,
 | 
						|
            names,
 | 
						|
            dtype,
 | 
						|
            converters,
 | 
						|
            parse_dates,
 | 
						|
            encoding,
 | 
						|
            stylesheet,
 | 
						|
            iterparse,
 | 
						|
            compression,
 | 
						|
            storage_options,
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        raise ValueError("Values for parser can only be lxml or etree.")
 | 
						|
 | 
						|
    data_dicts = p.parse_data()
 | 
						|
 | 
						|
    return _data_to_frame(
 | 
						|
        data=data_dicts,
 | 
						|
        dtype=dtype,
 | 
						|
        converters=converters,
 | 
						|
        parse_dates=parse_dates,
 | 
						|
        dtype_backend=dtype_backend,
 | 
						|
        **kwargs,
 | 
						|
    )
 | 
						|
 | 
						|
 | 
						|
@doc(
 | 
						|
    storage_options=_shared_docs["storage_options"],
 | 
						|
    decompression_options=_shared_docs["decompression_options"] % "path_or_buffer",
 | 
						|
)
 | 
						|
def read_xml(
 | 
						|
    path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
 | 
						|
    *,
 | 
						|
    xpath: str = "./*",
 | 
						|
    namespaces: dict[str, str] | None = None,
 | 
						|
    elems_only: bool = False,
 | 
						|
    attrs_only: bool = False,
 | 
						|
    names: Sequence[str] | None = None,
 | 
						|
    dtype: DtypeArg | None = None,
 | 
						|
    converters: ConvertersArg | None = None,
 | 
						|
    parse_dates: ParseDatesArg | None = None,
 | 
						|
    # encoding can not be None for lxml and StringIO input
 | 
						|
    encoding: str | None = "utf-8",
 | 
						|
    parser: XMLParsers = "lxml",
 | 
						|
    stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None,
 | 
						|
    iterparse: dict[str, list[str]] | None = None,
 | 
						|
    compression: CompressionOptions = "infer",
 | 
						|
    storage_options: StorageOptions | None = None,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 | 
						|
) -> DataFrame:
 | 
						|
    r"""
 | 
						|
    Read XML document into a :class:`~pandas.DataFrame` object.
 | 
						|
 | 
						|
    .. versionadded:: 1.3.0
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    path_or_buffer : str, path object, or file-like object
 | 
						|
        String, path object (implementing ``os.PathLike[str]``), or file-like
 | 
						|
        object implementing a ``read()`` function. The string can be any valid XML
 | 
						|
        string or a path. The string can further be a URL. Valid URL schemes
 | 
						|
        include http, ftp, s3, and file.
 | 
						|
 | 
						|
        .. deprecated:: 2.1.0
 | 
						|
            Passing xml literal strings is deprecated.
 | 
						|
            Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead.
 | 
						|
 | 
						|
    xpath : str, optional, default './\*'
 | 
						|
        The ``XPath`` to parse required set of nodes for migration to
 | 
						|
        :class:`~pandas.DataFrame`.``XPath`` should return a collection of elements
 | 
						|
        and not a single element. Note: The ``etree`` parser supports limited ``XPath``
 | 
						|
        expressions. For more complex ``XPath``, use ``lxml`` which requires
 | 
						|
        installation.
 | 
						|
 | 
						|
    namespaces : dict, optional
 | 
						|
        The namespaces defined in XML document as dicts with key being
 | 
						|
        namespace prefix and value the URI. There is no need to include all
 | 
						|
        namespaces in XML, only the ones used in ``xpath`` expression.
 | 
						|
        Note: if XML document uses default namespace denoted as
 | 
						|
        `xmlns='<URI>'` without a prefix, you must assign any temporary
 | 
						|
        namespace prefix such as 'doc' to the URI in order to parse
 | 
						|
        underlying nodes and/or attributes. For example, ::
 | 
						|
 | 
						|
            namespaces = {{"doc": "https://example.com"}}
 | 
						|
 | 
						|
    elems_only : bool, optional, default False
 | 
						|
        Parse only the child elements at the specified ``xpath``. By default,
 | 
						|
        all child elements and non-empty text nodes are returned.
 | 
						|
 | 
						|
    attrs_only :  bool, optional, default False
 | 
						|
        Parse only the attributes at the specified ``xpath``.
 | 
						|
        By default, all attributes are returned.
 | 
						|
 | 
						|
    names :  list-like, optional
 | 
						|
        Column names for DataFrame of parsed XML data. Use this parameter to
 | 
						|
        rename original element names and distinguish same named elements and
 | 
						|
        attributes.
 | 
						|
 | 
						|
    dtype : Type name or dict of column -> type, optional
 | 
						|
        Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
 | 
						|
        'c': 'Int64'}}
 | 
						|
        Use `str` or `object` together with suitable `na_values` settings
 | 
						|
        to preserve and not interpret dtype.
 | 
						|
        If converters are specified, they will be applied INSTEAD
 | 
						|
        of dtype conversion.
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    converters : dict, optional
 | 
						|
        Dict of functions for converting values in certain columns. Keys can either
 | 
						|
        be integers or column labels.
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    parse_dates : bool or list of int or names or list of lists or dict, default False
 | 
						|
        Identifiers to parse index or columns to datetime. The behavior is as follows:
 | 
						|
 | 
						|
        * boolean. If True -> try parsing the index.
 | 
						|
        * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
 | 
						|
          each as a separate date column.
 | 
						|
        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
 | 
						|
          a single date column.
 | 
						|
        * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
 | 
						|
          result 'foo'
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    encoding : str, optional, default 'utf-8'
 | 
						|
        Encoding of XML document.
 | 
						|
 | 
						|
    parser : {{'lxml','etree'}}, default 'lxml'
 | 
						|
        Parser module to use for retrieval of data. Only 'lxml' and
 | 
						|
        'etree' are supported. With 'lxml' more complex ``XPath`` searches
 | 
						|
        and ability to use XSLT stylesheet are supported.
 | 
						|
 | 
						|
    stylesheet : str, path object or file-like object
 | 
						|
        A URL, file-like object, or a raw string containing an XSLT script.
 | 
						|
        This stylesheet should flatten complex, deeply nested XML documents
 | 
						|
        for easier parsing. To use this feature you must have ``lxml`` module
 | 
						|
        installed and specify 'lxml' as ``parser``. The ``xpath`` must
 | 
						|
        reference nodes of transformed XML document generated after XSLT
 | 
						|
        transformation and not the original XML document. Only XSLT 1.0
 | 
						|
        scripts and not later versions is currently supported.
 | 
						|
 | 
						|
    iterparse : dict, optional
 | 
						|
        The nodes or attributes to retrieve in iterparsing of XML document
 | 
						|
        as a dict with key being the name of repeating element and value being
 | 
						|
        list of elements or attribute names that are descendants of the repeated
 | 
						|
        element. Note: If this option is used, it will replace ``xpath`` parsing
 | 
						|
        and unlike ``xpath``, descendants do not need to relate to each other but can
 | 
						|
        exist any where in document under the repeating element. This memory-
 | 
						|
        efficient method should be used for very large XML files (500MB, 1GB, or 5GB+).
 | 
						|
        For example, ::
 | 
						|
 | 
						|
            iterparse = {{"row_element": ["child_elem", "attr", "grandchild_elem"]}}
 | 
						|
 | 
						|
        .. versionadded:: 1.5.0
 | 
						|
 | 
						|
    {decompression_options}
 | 
						|
 | 
						|
        .. versionchanged:: 1.4.0 Zstandard support.
 | 
						|
 | 
						|
    {storage_options}
 | 
						|
 | 
						|
    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
 | 
						|
        Back-end data type applied to the resultant :class:`DataFrame`
 | 
						|
        (still experimental). Behaviour is as follows:
 | 
						|
 | 
						|
        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
 | 
						|
          (default).
 | 
						|
        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
 | 
						|
          DataFrame.
 | 
						|
 | 
						|
        .. versionadded:: 2.0
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    df
 | 
						|
        A DataFrame.
 | 
						|
 | 
						|
    See Also
 | 
						|
    --------
 | 
						|
    read_json : Convert a JSON string to pandas object.
 | 
						|
    read_html : Read HTML tables into a list of DataFrame objects.
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    This method is best designed to import shallow XML documents in
 | 
						|
    following format which is the ideal fit for the two-dimensions of a
 | 
						|
    ``DataFrame`` (row by column). ::
 | 
						|
 | 
						|
            <root>
 | 
						|
                <row>
 | 
						|
                  <column1>data</column1>
 | 
						|
                  <column2>data</column2>
 | 
						|
                  <column3>data</column3>
 | 
						|
                  ...
 | 
						|
               </row>
 | 
						|
               <row>
 | 
						|
                  ...
 | 
						|
               </row>
 | 
						|
               ...
 | 
						|
            </root>
 | 
						|
 | 
						|
    As a file format, XML documents can be designed any way including
 | 
						|
    layout of elements and attributes as long as it conforms to W3C
 | 
						|
    specifications. Therefore, this method is a convenience handler for
 | 
						|
    a specific flatter design and not all possible XML structures.
 | 
						|
 | 
						|
    However, for more complex XML documents, ``stylesheet`` allows you to
 | 
						|
    temporarily redesign original document with XSLT (a special purpose
 | 
						|
    language) for a flatter version for migration to a DataFrame.
 | 
						|
 | 
						|
    This function will *always* return a single :class:`DataFrame` or raise
 | 
						|
    exceptions due to issues with XML document, ``xpath``, or other
 | 
						|
    parameters.
 | 
						|
 | 
						|
    See the :ref:`read_xml documentation in the IO section of the docs
 | 
						|
    <io.read_xml>` for more information in using this method to parse XML
 | 
						|
    files to DataFrames.
 | 
						|
 | 
						|
    Examples
 | 
						|
    --------
 | 
						|
    >>> from io import StringIO
 | 
						|
    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
 | 
						|
    ... <data xmlns="http://example.com">
 | 
						|
    ...  <row>
 | 
						|
    ...    <shape>square</shape>
 | 
						|
    ...    <degrees>360</degrees>
 | 
						|
    ...    <sides>4.0</sides>
 | 
						|
    ...  </row>
 | 
						|
    ...  <row>
 | 
						|
    ...    <shape>circle</shape>
 | 
						|
    ...    <degrees>360</degrees>
 | 
						|
    ...    <sides/>
 | 
						|
    ...  </row>
 | 
						|
    ...  <row>
 | 
						|
    ...    <shape>triangle</shape>
 | 
						|
    ...    <degrees>180</degrees>
 | 
						|
    ...    <sides>3.0</sides>
 | 
						|
    ...  </row>
 | 
						|
    ... </data>'''
 | 
						|
 | 
						|
    >>> df = pd.read_xml(StringIO(xml))
 | 
						|
    >>> df
 | 
						|
          shape  degrees  sides
 | 
						|
    0    square      360    4.0
 | 
						|
    1    circle      360    NaN
 | 
						|
    2  triangle      180    3.0
 | 
						|
 | 
						|
    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
 | 
						|
    ... <data>
 | 
						|
    ...   <row shape="square" degrees="360" sides="4.0"/>
 | 
						|
    ...   <row shape="circle" degrees="360"/>
 | 
						|
    ...   <row shape="triangle" degrees="180" sides="3.0"/>
 | 
						|
    ... </data>'''
 | 
						|
 | 
						|
    >>> df = pd.read_xml(StringIO(xml), xpath=".//row")
 | 
						|
    >>> df
 | 
						|
          shape  degrees  sides
 | 
						|
    0    square      360    4.0
 | 
						|
    1    circle      360    NaN
 | 
						|
    2  triangle      180    3.0
 | 
						|
 | 
						|
    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
 | 
						|
    ... <doc:data xmlns:doc="https://example.com">
 | 
						|
    ...   <doc:row>
 | 
						|
    ...     <doc:shape>square</doc:shape>
 | 
						|
    ...     <doc:degrees>360</doc:degrees>
 | 
						|
    ...     <doc:sides>4.0</doc:sides>
 | 
						|
    ...   </doc:row>
 | 
						|
    ...   <doc:row>
 | 
						|
    ...     <doc:shape>circle</doc:shape>
 | 
						|
    ...     <doc:degrees>360</doc:degrees>
 | 
						|
    ...     <doc:sides/>
 | 
						|
    ...   </doc:row>
 | 
						|
    ...   <doc:row>
 | 
						|
    ...     <doc:shape>triangle</doc:shape>
 | 
						|
    ...     <doc:degrees>180</doc:degrees>
 | 
						|
    ...     <doc:sides>3.0</doc:sides>
 | 
						|
    ...   </doc:row>
 | 
						|
    ... </doc:data>'''
 | 
						|
 | 
						|
    >>> df = pd.read_xml(StringIO(xml),
 | 
						|
    ...                  xpath="//doc:row",
 | 
						|
    ...                  namespaces={{"doc": "https://example.com"}})
 | 
						|
    >>> df
 | 
						|
          shape  degrees  sides
 | 
						|
    0    square      360    4.0
 | 
						|
    1    circle      360    NaN
 | 
						|
    2  triangle      180    3.0
 | 
						|
 | 
						|
    >>> xml_data = '''
 | 
						|
    ...         <data>
 | 
						|
    ...            <row>
 | 
						|
    ...               <index>0</index>
 | 
						|
    ...               <a>1</a>
 | 
						|
    ...               <b>2.5</b>
 | 
						|
    ...               <c>True</c>
 | 
						|
    ...               <d>a</d>
 | 
						|
    ...               <e>2019-12-31 00:00:00</e>
 | 
						|
    ...            </row>
 | 
						|
    ...            <row>
 | 
						|
    ...               <index>1</index>
 | 
						|
    ...               <b>4.5</b>
 | 
						|
    ...               <c>False</c>
 | 
						|
    ...               <d>b</d>
 | 
						|
    ...               <e>2019-12-31 00:00:00</e>
 | 
						|
    ...            </row>
 | 
						|
    ...         </data>
 | 
						|
    ...         '''
 | 
						|
 | 
						|
    >>> df = pd.read_xml(StringIO(xml_data),
 | 
						|
    ...                  dtype_backend="numpy_nullable",
 | 
						|
    ...                  parse_dates=["e"])
 | 
						|
    >>> df
 | 
						|
       index     a    b      c  d          e
 | 
						|
    0      0     1  2.5   True  a 2019-12-31
 | 
						|
    1      1  <NA>  4.5  False  b 2019-12-31
 | 
						|
    """
 | 
						|
    check_dtype_backend(dtype_backend)
 | 
						|
 | 
						|
    return _parse(
 | 
						|
        path_or_buffer=path_or_buffer,
 | 
						|
        xpath=xpath,
 | 
						|
        namespaces=namespaces,
 | 
						|
        elems_only=elems_only,
 | 
						|
        attrs_only=attrs_only,
 | 
						|
        names=names,
 | 
						|
        dtype=dtype,
 | 
						|
        converters=converters,
 | 
						|
        parse_dates=parse_dates,
 | 
						|
        encoding=encoding,
 | 
						|
        parser=parser,
 | 
						|
        stylesheet=stylesheet,
 | 
						|
        iterparse=iterparse,
 | 
						|
        compression=compression,
 | 
						|
        storage_options=storage_options,
 | 
						|
        dtype_backend=dtype_backend,
 | 
						|
    )
 |