
    h/                        % S r SSKrSSKJr  SSKJrJr  SSKrSSKr	SSK
r
\R                  \R                  -   S-   r\R                  S-   r\\R                  -   r " S S\R                   5      r\\\\	R(                  R*                  S4   r\ " S	 S
5      5       r\ " S S5      5       r\ " S S5      5       rS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jr S\S\4S jr!S\S\4S jr"S\S\4S jr#S\S\4S jr$S\S\4S jr%S\S\4S jr&S\S\4S jr'S\S\4S jr(S\S\4S  jr)S\S\4S! jr*\\\\\\!\#\"\\ \$\%\'\(\)\*\&/r+\,\\/\4      \-S"'    " S# S$5      r.S\4S% jr/g)&z@This module takes URCL source code and performs lexical analysis    N)	dataclass)UnionCallablez.__c                   L    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrSrSrSrg)	TokenType   integerlabelzgeneral register	character
identifier
whitespacezrelative jumpmacroportzheader inequality[],string N)__name__
__module____qualname____firstlineno__INTEGERLABELGENERAL_REGISTER	CHARACTER
IDENTIFIER
WHITESPACERELATIVE_JUMPMACROPORTHEADER_INEQUALITYLEFT_BRACKETRIGHT_BRACKETCOMMASTRING__static_attributes__r       '/mnt/public/Repos/urclelf/./urcl/lex.pyr   r      sJ    GE)IJJ#MED+LMEFr)   r   c                   L    \ rS rSr% \\S'   \\S'   \\S'   \\S'   S\4S jr	Sr
g	)
Token"   typeline_numbercolumn_numbervaluereturnc                     SU R                   R                   SU R                   SU R                   3nU R                  b  USU R                   S3-  nU$ US-  nU$ )N< :z - >)r.   namer/   r0   r1   )selfresults     r*   __str__Token.__str__)   si    TYY^^$Ad&6&6%7q9K9K8LM::!DJJ<q))F  cMFr)   r   N)r   r   r   r   r   __annotations__int
TokenValuestrr;   r(   r   r)   r*   r,   r,   "   s%    
O r)   r,   c                   T    \ rS rSr% \\   \S'   S rS\4S jrS r	S r
S\4S	 jrS
rg)TokenStream3   tokensc                     Sn/ n/ nU R                    HV  nUR                  U:X  a  UR                  U5        M&  U(       a  UR                  [        U5      5        UR                  nU/nMX     U(       a  UR                  [        U5      5        U$ N   )rD   r/   appendrB   )r9   current_line_numberlinescurrent_linetokens        r*   split_linesTokenStream.split_lines8   s    #%$&[[E  $77##E*LL\!:;&+&7&7# %w ! LL\23r)   rL   c                 :    U R                   R                  U5        g N)rD   rH   )r9   rL   s     r*   rH   TokenStream.appendJ   s    5!r)   c                 6    U R                   R                  5       $ rP   )rD   __iter__r9   s    r*   rS   TokenStream.__iter__M   s    {{##%%r)   c                 ,    [        U R                  5      $ rP   )lenrD   rT   s    r*   __len__TokenStream.__len__P   s    4;;r)   r2   c                     / nU R                  5        HG  nUR                  SR                  UR                   Vs/ s H  n[	        U5      PM     sn5      5        MI     SR                  U5      $ s  snf )Nr5   
)rM   rH   joinrD   r@   )r9   rJ   linerL   s       r*   r;   TokenStream.__str__S   s]    $$&DLL4;;"G;%3u:;"GHI ' yy #Hs   A/r   N)r   r   r   r   listr,   r=   rM   rH   rS   rX   r@   r;   r(   r   r)   r*   rB   rB   3   s5     K$"E "&    r)   rB   c                   f    \ rS rSr% \\-  S-  \S'   \\S'   \S 5       r	\S\
S\S\4S j5       rS	rg)
TokenParseResult[   Ndatachars_consumedc                     [        S S5      $ )Nr   )ra   )clss    r*   missTokenParseResult.miss`   s    a((r)   r.   r1   c                 2    [        [        USSU5      U5      $ rF   )ra   r,   )rf   r.   r1   rd   s       r*   successTokenParseResult.successd   s    dAq% 8.IIr)   r   )r   r   r   r   r,   r@   r=   r>   classmethodrg   r   r?   rj   r(   r   r)   r*   ra   ra   [   sW    
#+
) ) J9 JZ J J Jr)   ra   sourcer2   c                    U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a#  X   [        ;  a  OUS-  nU[        U 5      :  a  M#  [        R                  [        R                  U SU U5      $ )N.rG   )
startswithra   rg   rW   LEGAL_LABEL_CHARACTERSrj   r   r   rm   indexs     r*   extract_label_tokenrt   j   y    S!!*:*?*?*A#AE
#f+
= 66
 #f+

 ##IOOVAe_eLLr)   c                 6   U (       d  [         R                  5       $ SnU[        U 5      :  a.  X   R                  5       (       d  OUS-  nU[        U 5      :  a  M.  U(       a%  [         R	                  [
        R                  S U5      $ [         R                  5       $ Nr   rG   )ra   rg   rW   isspacerj   r   r   rr   s     r*   extract_space_tokenry   v   s    *//11E
#f+
}$$&&
 #f+
 ''	(<(<dEJJ  ""r)   c                 "   U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a0  U S U R	                  S5      (       a  OUS-  nU[        U 5      :  a  M0  [        R                  [        R                  S U5      $ )Nz/*   z*/rG   )rp   ra   rg   rW   endswithrj   r   r   rr   s     r*   extract_multiline_comment_tokenr}      s    T""+;+@+@+B$BE
#f+
&5>""4((
 #f+

 ##I$8$8$FFr)   c                 
   U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a$  X   S:X  a  US-  nOUS-  nU[        U 5      :  a  M$  [        R	                  [
        R                  S U5      $ )Nz//r{   r[   rG   )rp   ra   rg   rW   rj   r   r   rr   s     r*   extract_line_comment_tokenr      s}    T""+;+@+@+B$BE
#f+
=D QJE
	 #f+
 ##I$8$8$FFr)   c                 ~   U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a.  X   R	                  5       (       a  OUS-  nU[        U 5      :  a  M.   [        U SU SS9n[        R                  [        R                  X!5      $ ! [         a    [        SU SU  S3S5      s $ f = f)N~rG   r   basezMalformed relative jump '')
rp   ra   rg   rW   rx   r>   
ValueErrorrj   r   r    rm   rs   token_values      r*   extract_relative_jump_tokenr      s    S!!*:*?*?*A#AE
#f+
=  ""
 #f+

S&5/2 ##I$;$;[PP  S";F1UO;LA NPQRRSs   +B B<;B<c                    U S   R                  5       S;  a  [        R                  5       $ SnU[        U 5      :  a  X   S;  a  OUS-  nU[        U 5      :  a  M  US:X  a  [        R                  5       $  [        R
                  R                  [        U SU SS95      n[        R                  [        R                  X!5      $ ! [         a    [        SU SU  S3S5      s $ f = f)Nr   )r$rG   
0123456789r   zMalformed register 'r   lowerra   rg   rW   urcltypesGeneralRegisterr>   r   rj   r   r   r   s      r*   extract_register_tokenr      s    ay
*3C3H3H3J,JE
#f+
=,
 #f+

 z$$&&Njj00VAe_11MN ##I$>$>SS  N"6va6Gq I1MMN   7*C C%$C%c                    U S   R                  5       S;  a  [        R                  5       $ SnU[        U 5      :  a  X   S;  a  OUS-  nU[        U 5      :  a  M  US:X  a  [        R                  5       $  [        R
                  R                  [        U SU SS95      n[        R                  [        R                  X!5      $ ! [         a    [        SU SU  S3S5      s $ f = f)Nr   )m#rG   r   r   zMalformed memory address 'r   r   r   s      r*   extract_memory_address_tokenr      s    ay
*3C3H3H3J,JE
#f+
=,
 #f+

 z$$&&Tjj00VAe_11MN ##I$>$>SS  T"<VAe_<MQ OQRSSTr   c                    U (       d  [         R                  5       $ SnSnX   S:X  a  US-  nXUS-    R                  5       S;   a
  US-  nUS-  nU[        U 5      :  a-  X   R                  5       U;  a  OUS-  nU[        U 5      :  a  M-  U(       d  [         R                  5       $  [	        U S U SS9n[         R                  [        R                  X25      $ ! [
         a    [        S	U S U  S
3S5      s $ f = f)Nr   r   -rG   r{   )0x0b0oabcdefr   zMalformed integer 'r   )	ra   rg   r   rW   r>   r   rj   r   r   )rm   permitted_number_charsrs   r   s       r*   extract_integer_tokenr      s   *//11)E}
E!G""$(::(*

#f+
= (>>
 #f+

 $$&&L&%.q1 ##I$5$5{JJ  L"5fVen5EQ GKKLs   (C C98C9c                    U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  aB  X   S:X  a  US-  nO4U[        U 5      S-
  :X  a  [        SS5      $ US-  nU[        U 5      :  a  MB  U SUS-
   nUR	                  SS5      R	                  SS5      R	                  S	S
5      n[        R                  [        R                  X!5      $ )N"rG   zString was never closedr   \nr[   \t	\r)rp   ra   rg   rW   replacerj   r   r'   rm   rs   string_values      r*   extract_string_tokenr      s    S!!*:*?*?*A#AE
#f+
=CQJECK!O##$=qAA
 #f+
 !E!G$L''t4<<UDIQQRWY]^L##I$4$4lJJr)   c                    U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  aB  X   S:X  a  US-  nO4U[        U 5      S-
  :X  a  [        SS5      $ US-  nU[        U 5      :  a  MB  U SUS-
   nUR	                  SS5      R	                  SS5      R	                  S	S
5      n[        U5      S:w  a  [        S[        U5       S3S5      $ [        R                  [        R                  X!5      $ )Nr   rG   z"Character literal was never closedr   r   r[   r   r   r   r   zCharacter literal is of length z (expected 1))rp   ra   rg   rW   r   rj   r   r   r   s      r*   extract_character_tokenr      s   S!!*:*?*?*A#AE
#f+
=CQJECK!O##$H!LL
 #f+
 !E!G$L''t4<<UDIQQRWY]^L
<A"A#lBSATTa bdeff##I$7$7MMr)   c                     U S   [         ;  a  [        R                  5       $ SnU[        U 5      :  a#  X   [        ;  a  OUS-  nU[        U 5      :  a  M#  [        R                  [        R                  U S U U5      $ rw   )!LEGAL_IDENTIFIER_FIRST_CHARACTERSra   rg   rW   (LEGAL_IDENTIFIER_CONTINUATION_CHARACTERSrj   r   r   rr   s     r*   extract_identifier_tokenr     sw    ay99BRBWBWBY;YE
#f+
= HH
 #f+

 ##I$8$8&%.%PPr)   c                    U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a#  X   [        ;  a  OUS-  nU[        U 5      :  a  M#  [        R                  [        R                  U SU U5      $ )N@rG   )rp   ra   rg   rW   rq   rj   r   r!   rr   s     r*   extract_macro_tokenr     ru   r)   c                    U R                  S5      (       d  [        R                  5       $ SnU[        U 5      :  a#  X   [        ;  a  OUS-  nU[        U 5      :  a  M#  [        R                  [        R                  U SU U5      $ )N%rG   )rp   ra   rg   rW   rq   rj   r   r"   rr   s     r*   extract_port_tokenr   !  sy    S!!*:*?*?*A#AE
#f+
= 66
 #f+

 ##INNF1UOUKKr)   c                     S HC  nU R                  U5      (       d  M  [        R                  [        R                  U S S S5      s  $    [        R                  5       $ )N)z==z<=z>=r{   )rp   ra   rj   r   r#   rg   )rm   
inequalitys     r*   extract_header_inequality_tokenr   ,  sR    (
Z((#++I,G,GPRQRUVWW )   ""r)   c                     U R                  S5      (       a%  [        R                  [        R                  S S5      $ [        R                  5       $ )Nr   rG   )rp   ra   rj   r   r$   rg   rm   s    r*   extract_left_bracket_tokenr   4  s=    ''	(>(>aHH  ""r)   c                     U R                  S5      (       a%  [        R                  [        R                  S S5      $ [        R                  5       $ )Nr   rG   )rp   ra   rj   r   r%   rg   r   s    r*   extract_right_bracket_tokenr   ;  s=    ''	(?(?qII  ""r)   c                     U R                  S5      (       a%  [        R                  [        R                  S S5      $ [        R                  5       $ )Nr   rG   )rp   ra   rj   r   r&   rg   r   s    r*   extract_comma_tokenr   B  s;    ''	qAA  ""r)   TOKEN_EXTRACTION_FUNCTIONSc                   f    \ rS rSrS\SS4S jrS rS\\R                  -  4S jr
SS\4S	 jjrS
rg)Lexeri_  rm   r2   Nc                 :    SU l         SU l        SU l        Xl        g rw   )rs   r/   r0   rm   )r9   rm   s     r*   __init__Lexer.__init__a  s    
r)   c                 4    U R                   U R                  S  $ rP   rr   rT   s    r*   remaining_sourceLexer.remaining_sourceh  s    {{4::;''r)   c           	         [        / 5      nU R                  [        U R                  5      :  Ga  [         GH(  nU" U R                  5       5      nUR                  (       d  M-  [        UR                  [        5      (       aA  [        R                  R                  UR                  U R                  U R                  5      s  $ UR                  R                  [        R                   :w  aY  UR#                  [%        UR                  R                  U R                  U R                  UR                  R&                  5      5        U R)                  UR*                  5          OL   [        R                  R                  SU R                  5       S S  S3U R                  U R                  5      $ U R                  [        U R                  5      :  a  GM  U$ )NzUnexpected characters: '   z...')rB   rs   rW   rm   r   r   rc   
isinstancer@   error	Tracebacknewr/   r0   r.   r   r   rH   r,   r1   advancerd   )r9   rD   token_extraction_functionr:   s       r*   lex	Lexer.lexk  s`   Rjj3t{{++-G-G)243H3H3JK{{fkk3// ??..v{{D<L<LdN`N`aa;;##y';';;MM%(8(8$:J:JDL^L^`f`k`k`q`q"rsV223 .H **-EdF[F[F]^`_`FaEbbf+gimiyiy{  |N  |N  O  O jj3t{{++ r)   amountc                 F   US:  a  g [        U5       H  nU R                  [        U R                  5      :X  a    g U =R                  S-  sl        U R                  U R                     S:X  a  U =R
                  S-  sl        SU l        U =R                  S-  sl        M     g )NrG   r[   )rangers   rW   rm   r0   r/   )r9   r   r   s      r*   r   Lexer.advance~  s~    A:vAzzS--!#{{4::&$.  A% %&"JJ!OJ r)   )r0   rs   r/   rm   )rG   )r   r   r   r   r@   r   r   rB   r   r   r   r>   r   r(   r   r)   r*   r   r   _  s@    s t ([5??2 &S  r)   r   c                 4    [        U 5      R                  5       $ rP   )r   r   r   s    r*   tokenizer     s    =r)   )0__doc__enumdataclassesr   typingr   r   r   
urcl.typesr   r   ascii_lettersdigitsrq   r   r   Enumr   r@   r>   r   r   r?   r,   rB   ra   rt   ry   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r_   r=   r   r   r   r)   r*   <module>r      s   B  ! "   --=D $*$8$83$> !+Lv}}+\ (		 $ 3TZZ77=>

    %  %  % N 
J 
J 
J	M 	M(8 	M# #(8 #	GC 	G4D 	G
Gs 
G/? 
GQ Q0@ Q T3 T+; T$T T1A T$K# K*: K0K K)9 K NC N,< N$	QS 	Q-= 	Q	M 	M(8 	M	Ls 	L'7 	L#C #4D ##s #/? ## #0@ ## #(8 # # ##G D3%1A*A!BC ,* *XS r)   