o
    eI                  	   @   s  d Z dZz!dZddlZdd ZejddddedddZdddi fdd	ZW n ey0   dZY nw G d
d dZ	e	 Z	dZ
dZdZdZeeZdZg dZdd ZddefddZer]eZdd ZdZdd ZddefddZdd Zd)ddZd)ddZed kredd! ddlZddlZdd"lmZ ddlZe Z dZ!ej"dd D ];Z#ej$%e#se&d# qe'e#Z(e() Z*W d   n1 sw   Y  e&d$e#d%ee*  ee*dd! e!d7 Z!qe!re&d&e e  d'd( dS dS dS )*at  Very simple and fast XML parser, used for intra-paragraph text.

Devised by Aaron Watters in the bad old days before Python had fast
parsers available.  Constructs the lightest possible in-memory
representation; parses most files we have seen in pure python very
quickly.

The output structure is the same as the one produced by pyRXP,
our validating C-based parser, which was written later.  It will
use pyRXP if available.

This is used to parse intra-paragraph markup.

Example parse::

    <this type="xml">text <b>in</b> xml</this>

    ( "this",
      {"type": "xml"},
      [ "text ",
        ("b", None, ["in"], None),
        " xml"
        ]
       None )

    { 0: "this"
      "type": "xml"
      1: ["text ",
          {0: "b", 1:["in"]},
          " xml"]
    }

Ie, xml tag translates to a tuple:
 (name, dictofattributes, contentlist, miscellaneousinfo)

where miscellaneousinfo can be anything, (but defaults to None)
(with the intention of adding, eg, line number information)

special cases: name of "" means "top level, no containing tag".
Top level parse always looks like this::

    ("", list, None, None)

 contained text of None means <simple_tag/>

In order to support stuff like::

    <this></this><one></one>

AT THE MOMENT &amp; ETCETERA ARE IGNORED. THEY MUST BE PROCESSED
IN A POST-PROCESSING STEP.

PROLOGUES ARE NOT UNDERSTOOD.  OTHER STUFF IS PROBABLY MISSING.
    Nc                 C   s   t |  d S )N)print)s r   GD:\Projects\ConvertPro\env\Lib\site-packages\reportlab/lib/rparsexml.pywarnCB>   s   r      zstring input)ZErrorOnValidityErrorsZNoNoDTDWarningZExpandCharacterEntitiesZExpandGeneralEntitiesr   ZsrcNameZ
ReturnUTF8c                 C   s.   |t _t j| fi |}|r|pdd |gd fS )N )pyRXP_parsereoCBparse)ZxmlTextoneOutermostTagr
   entityReplacerZ	parseOptspr   r   r   parsexmlI   s   r   c                   @   s   e Zd Zedd ZdS )smartDecodec                    s0   t d dd l  fdd}t|tj_|| S )Ninitialr   c                    s.   t | tr| S  | }td | |d S )Nfinalencoding)
isinstancestrdetectr   decode)r   Zcddchardetr   r   __call__U   s   
z&smartDecode.__call__.<locals>.__call__)r   r   staticmethodr   	__class__r   )r   r   r   r   r   r   Q   s
   zsmartDecode.__call__N)__name__
__module____qualname__r   r   r   r   r   r   r   P   s    r   r   	<![CDATA[z]]>))z&lt;<)z&gt;>)z&amp;&c                 C   s>   g }| D ]}d|v rt D ]
\}}|||}q|| q|S )Nr#   )replacelistreplaceappend)ZcontentListresulteoldnewr   r   r   unEscapeContentListf   s   r+   c                 C   s0   t rtdt| |d\}}|r|d d S |S )z.official interface: discard unused cursor infoz)pyRXP not found, fallback parser disabled)r      r   )RequirePyRXPImportError	parsexml0)xmltextr   r
   r   r'   cursorr   r   r   parsexmlSimpleo   s   r2   c                 C   s   t | d }t|S )Nr)openreadr   )filenamerawr   r   r   	parseFile|   s   r8   c           	      C   s   d}d}|du rM|  d|}|dk r|S |d }d}|D ]&}t|}| |||  |krBd}|  d|}|dk r>td| |d }q|du rId}|du s|S )zBskip any prologue found after cursor, return index of rest of text)z!DOCTYPEz?xmlz!--Nr!   r   r   r"   zcan't close prologue %r)findlen
ValueError)	textr1   Zprologue_elementsdoneZopenbracketZpastfoundr(   ler   r   r   skip_prologue   s*   r@   c           1      C   s  t | } t}d } }}|dur|  } |}| d|}	| |	d |	d  }
d}|	dk rU|durG| |d g}|r=||}||||ft| fS tdt| ||d   g }|durft }}t| |}n|	dk rvtdt| dd	  |
d
kr| |	|	d  dkr|	d }| t|}|dk rtdt| ||d   t	}| || g}|tt }d}n|
dkr| |	|	d  dkr| d|	d }||	k rtdt| ||d   |d }| | dkrtdt| ||d   d|d fS | d|	}|dk }|d }|	d }| || }d|vr/|d dkr%|dd }d}| }|}|}nGd|v rvd}|sEt|d 
dd rGd}|du rv| d|}|d }|dk }| || }|sot|d 
dd rqd}|du sL|rtdt| |	|	d   |}| |d  dkr|d }|dd }d}| }|
d}|d }|
 }|d }|}|d }|d d |d< i  }}d}t|}||k rv|| }|d }| }|d dkrtdt| d|dd vr||krtdt| || } |d }d|| f }d|dd vs| }|
 }!|!d }"|dt|"  }#|# }#z
|#d }$|#d }%W n   td t|#||!f |$|%  krVdksen |$|%  krcd!krkn n|#dd }#|#||< |"}||k s|dur}|}|durY| d|}&|&|k r|tkrd}| |d }'t| }|'r||' ntd"|t| ||d  f | |&d  dkr4| d|&}(|(|&k rtd#t| |&|&d   | |&d |( })|)
 }*|*d }+||+kr| d| },t|,
d$}-| d| },t|,
d$}.td%|.|-t|t|+t| ||d&  f | ||& }'|'r-||' |(d }d}n | ||& }'|'rB||' t| |&d|d'\}/}|/rT||/ |dus|rc|rc||}||||f}0|0|fS )(zsimple recursive descent xml parser...
       return (dictionary, endcharacter)
       special case: comment returns (None, endcharacter)Nr!   r      r   zno tags at non-toplevel %s   z.non top level entry should be at start tag: %s
   z![	   r    zunclosed CDATA %sz!-   z<!--z--zunterminated comment %sr,   r"   z*invalid comment: contains double dashes %s=/".zunclosed start tag %sz fz-attribute value must start with double quoteszunclosed value %s=%szattvalue,attentry,attlist='z&no close bracket for %s found after %szunclosed close tag %s
z8at lines %s...%s close tag name doesn't match %s...%s %sd   )
startingattoplevelr   )r   NONAMEstripr9   r:   r;   reprr@   CDATAENDMARKERCDATAMARKERsplitr&   r/   )1r0   rO   rP   r   Z
NameStringZContentListZAttDictZ
ExtraStuffr1   ZfirstbracketZafterbracket2charZ
docontentsLnameZ
startcdataZendcdataZendcommentdashesZ
endcommentZclosebracketZnocloseZstartsearchZpastfirstbracketZ
tagcontentstopZtaglistZtaglist0Ztaglist0listZattributenameDZtaglistindexZlasttaglistindexZattentryZnextattentryattlistZnextattnameZattvaluefirstlastZnextopenbracket	remainderZnextclosebracketZclosetagcontentsZclosetaglistZ	closenameprefixZ
endlinenumZlinenumZ	parsetreetr   r   r   r/      s.  	






6





 

"



:r/   c                 C   s   t | ttfr	| S | \}}}}|si }g }| D ]}|| }|d|t|f  qd|}|s8|r8td|dur`tt	t
|}	d|	}
|sL|
S |
d}dd| }
d|||
|f S d	||f S )
z!pretty printer mainly for testingrK    zname missing with attributes???NrM   z   z
   z<%s %s>
%s
</%s>z<%s %s/>)r   r   byteskeysr&   rS   joinr;   listmappprettyprintrV   )Z	parsedxmlrX   ZattdictZtextlistextrar[   kv
attributesZtextlistpprintZ
textpprintZnllistr   r   r   rg     s(   


rg   c                 C   sx   ddl m } ddlm} | }t  t| dd}td| |  |d@ r(|| |d@ r:td t|}t| d S d S )	Nr   time)pprintr   )r   ZDONErE   z============== reformatting)rm   rn   
breakpointr2   r   rg   )r   dumprm   rn   nowrZ   r   r   r   r   	testparse  s   rr   c                 C   s   t d| d d S )Na-  <this type="xml">text &lt;&gt;<b>in</b> <funnytag foo="bar"/> xml</this>
                 <!-- comment -->
                 <![CDATA[
                 <this type="xml">text <b>in</b> xml</this> ]]>
                 <tag with="<brackets in values>">just testing brackets feature</tag>
                 rp   )rr   rs   r   r   r   test  s   
rt   __main__rs   rl   z!!!!! no file at {f!r}zparsing z |t|=z	timed at z.2fz secs.)r   )+__doc__r-   ZsimpleparseZpyRXPUr   Parserr	   r   r.   r   rQ   ZNAMEKEYZCONTENTSKEYrU   r:   ZLENCDATAMARKERrT   r$   r+   r2   r8   verboser@   r/   rg   rr   rt   r   sysosrm   Z	reportlabrq   seenargvfpathisfiler   r4   Z_fr5   r`   r   r   r   r   <module>   s|    7		

 i






