o
    h/                     @   sP  U d Z ddlZddlmZ ddlmZmZ ddlZddlZ	ddl
Z
ejej d Zejd Zeej ZG dd dejZeeee	jjdf ZeG d	d
 d
ZeG dd dZeG dd dZdedefddZdedefddZdedefddZdedefddZdedefddZdedefddZdedefddZ dedefdd Z!dedefd!d"Z"dedefd#d$Z#dedefd%d&Z$dedefd'd(Z%dedefd)d*Z&dedefd+d,Z'dedefd-d.Z(dedefd/d0Z)dedefd1d2Z*eeeeee!e#e"ee e$e%e'e(e)e*e&gZ+e,eegef  e-d3< G d4d5 d5Z.defd6d7Z/dS )8z@This module takes URCL source code and performs lexical analysis    N)	dataclass)UnionCallablez.__c                   @   sD   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdS )	TokenTypeintegerlabelzgeneral register	character
identifier
whitespacezrelative jumpmacroportzheader inequality[],stringN)__name__
__module____qualname__INTEGERLABELGENERAL_REGISTER	CHARACTER
IDENTIFIER
WHITESPACERELATIVE_JUMPMACROPORTHEADER_INEQUALITYLEFT_BRACKETRIGHT_BRACKETCOMMASTRING r#   r#   2/home/seanl/mnt/public/Repos/urclelf/./urcl/lex.pyr      s    r   c                   @   s<   e Zd ZU eed< eed< eed< eed< defddZdS )	Tokentypeline_numbercolumn_numbervaluereturnc                 C   sJ   d| j j d| j d| j }| jd ur|d| j d7 }|S |d7 }|S )N< :z - >)r&   namer'   r(   r)   )selfresultr#   r#   r$   __str__)   s   
zToken.__str__N)	r   r   r   r   __annotations__int
TokenValuestrr2   r#   r#   r#   r$   r%   "   s   
 r%   c                   @   sN   e Zd ZU ee ed< dd ZdefddZdd Zd	d
 Z	de
fddZdS )TokenStreamtokensc                 C   sb   d}g }g }| j D ]}|j|kr|| q	|r|t| |j}|g}q	|r/|t| |S N   )r8   r'   appendr7   )r0   current_line_numberlinescurrent_linetokenr#   r#   r$   split_lines8   s   

zTokenStream.split_linesr?   c                 C   s   | j | d S N)r8   r;   )r0   r?   r#   r#   r$   r;   J      zTokenStream.appendc                 C   s
   | j  S rA   )r8   __iter__r0   r#   r#   r$   rC   M      
zTokenStream.__iter__c                 C   s
   t | jS rA   )lenr8   rD   r#   r#   r$   __len__P   rE   zTokenStream.__len__r*   c                 C   s8   g }|   D ]}|ddd |jD  qd|S )Nr,   c                 S   s   g | ]}t |qS r#   )r6   ).0r?   r#   r#   r$   
<listcomp>W   s    z'TokenStream.__str__.<locals>.<listcomp>
)r@   r;   joinr8   )r0   r=   liner#   r#   r$   r2   S   s   
zTokenStream.__str__N)r   r   r   listr%   r3   r@   r;   rC   rG   r6   r2   r#   r#   r#   r$   r7   3   s   
 r7   c                   @   sL   e Zd ZU eeB dB ed< eed< edd Zede	de
defdd	ZdS )
TokenParseResultNdatachars_consumedc                 C   s
   t d dS )Nr   )rN   )clsr#   r#   r$   miss`   s   
zTokenParseResult.missr&   r)   c                 C   s   t t|dd||S r9   )rN   r%   )rQ   r&   r)   rP   r#   r#   r$   successd   s   zTokenParseResult.success)r   r   r   r%   r6   r3   r4   classmethodrR   r   r5   rS   r#   r#   r#   r$   rN   [   s   
 
rN   sourcer*   c                 C   \   |  ds	t S d}|t| k r"| | tvrn
|d7 }|t| k sttj| d| |S )N.r:   )
startswithrN   rR   rF   LEGAL_LABEL_CHARACTERSrS   r   r   rU   indexr#   r#   r$   extract_label_tokenj      r\   c                 C   sZ   | st  S d}|t| k r| |  sn
|d7 }|t| k s|r)t tjd |S t  S Nr   r:   )rN   rR   rF   isspacerS   r   r   rZ   r#   r#   r$   extract_space_tokenv   s   r`   c                 C   sZ   |  ds	t S d}|t| k r%| d | drn
|d7 }|t| k sttjd |S )Nz/*   z*/r:   )rX   rN   rR   rF   endswithrS   r   r   rZ   r#   r#   r$   extract_multiline_comment_token   s   rc   c                 C   s\   |  ds	t S d}|t| k r&| | dkr|d7 }n
|d7 }|t| k sttjd |S )Nz//ra   rJ   r:   )rX   rN   rR   rF   rS   r   r   rZ   r#   r#   r$   extract_line_comment_token   s   rd   c                 C   s   |  ds	t S d}|t| k r"| |  rn
|d7 }|t| k szt| d| dd}W n tyD   td| d|  dd Y S w ttj	||S )N~r:   r   basezMalformed relative jump '')
rX   rN   rR   rF   r_   r4   
ValueErrorrS   r   r   rU   r[   token_valuer#   r#   r$   extract_relative_jump_token   s   rl   c                 C      | d   dvrt S d}|t| k r%| | dvrn
|d7 }|t| k s|dkr-t S ztjt| d| dd}W n tyS   td| d|  dd Y S w t	t
j||S )Nr   )r$r:   
0123456789rf   zMalformed register 'rh   lowerrN   rR   rF   urcltypesGeneralRegisterr4   ri   rS   r   r   rj   r#   r#   r$   extract_register_token       rv   c                 C   rm   )Nr   )m#r:   rp   rf   zMalformed memory address 'rh   rq   rj   r#   r#   r$   extract_memory_address_token   rw   rz   c                 C   s   | st  S d}d}| | dkr|d7 }| ||d   dv r(|d7 }|d7 }|t| k rA| |  |vr7n
|d7 }|t| k s.|sGt  S zt| d | dd}W n tyi   t d	| d |  d
d Y S w t tj||S )Nrp   r   -r:   ra   )0x0b0oabcdefrf   zMalformed integer 'rh   )	rN   rR   rr   rF   r4   ri   rS   r   r   )rU   permitted_number_charsr[   rk   r#   r#   r$   extract_integer_token   s*   r   c                 C   s   |  ds	t S d}|t| k r3| | dkr|d7 }n|t| d kr)tddS |d7 }|t| k s| d|d  }|ddddd	d
}ttj||S )N"r:   zString was never closedr   \nrJ   \t	\r)rX   rN   rR   rF   replacerS   r   r"   rU   r[   string_valuer#   r#   r$   extract_string_token   s   
r   c                 C   s   |  ds	t S d}|t| k r3| | dkr|d7 }n|t| d kr)tddS |d7 }|t| k s| d|d  }|ddddd	d
}t|dkrZtdt| ddS ttj||S )Nrh   r:   z"Character literal was never closedr   r   rJ   r   r   r   r   zCharacter literal is of length z (expected 1))rX   rN   rR   rF   r   rS   r   r   r   r#   r#   r$   extract_character_token   s   
r   c                 C   s^   | d t vr
t S d}|t| k r#| | tvrn
|d7 }|t| k sttj| d | |S r^   )!LEGAL_IDENTIFIER_FIRST_CHARACTERSrN   rR   rF   (LEGAL_IDENTIFIER_CONTINUATION_CHARACTERSrS   r   r   rZ   r#   r#   r$   extract_identifier_token  s   r   c                 C   rV   )N@r:   )rX   rN   rR   rF   rY   rS   r   r   rZ   r#   r#   r$   extract_macro_token  r]   r   c                 C   rV   )N%r:   )rX   rN   rR   rF   rY   rS   r   r   rZ   r#   r#   r$   extract_port_token!  r]   r   c                 C   s8   dD ]}|  |rttj| d d d  S qt S )N)z==z<=z>=ra   )rX   rN   rS   r   r   rR   )rU   
inequalityr#   r#   r$   extract_header_inequality_token,  s
   
r   c                 C   "   |  drttjd dS t S )Nr   r:   )rX   rN   rS   r   r   rR   rU   r#   r#   r$   extract_left_bracket_token4     
r   c                 C   r   )Nr   r:   )rX   rN   rS   r   r    rR   r   r#   r#   r$   extract_right_bracket_token;  r   r   c                 C   r   )Nr   r:   )rX   rN   rS   r   r!   rR   r   r#   r#   r$   extract_comma_tokenB  r   r   TOKEN_EXTRACTION_FUNCTIONSc                   @   sJ   e Zd ZdeddfddZdd ZdeejB fdd	Z	dde
fddZdS )LexerrU   r*   Nc                 C   s   d| _ d| _d| _|| _d S r^   )r[   r'   r(   rU   )r0   rU   r#   r#   r$   __init__a  s   
zLexer.__init__c                 C   s   | j | jd  S rA   rZ   rD   r#   r#   r$   remaining_sourceh  rB   zLexer.remaining_sourcec                 C   s   t g }| jt| jk rhtD ]=}||  }|jsqt|jtr-t	j
|j| j| j  S |jjtjkrD|t|jj| j| j|jj | |j  nt	j
d|  d d  d| j| jS | jt| jk s|S )NzUnexpected characters: '   z...')r7   r[   rF   rU   r   r   rO   
isinstancer6   error	Tracebacknewr'   r(   r&   r   r   r;   r%   r)   advancerP   )r0   r8   token_extraction_functionr1   r#   r#   r$   lexk  s    (z	Lexer.lexr:   amountc                 C   st   |dk rd S t |D ]-}| jt| jkr d S |  jd7  _| j| j dkr0|  jd7  _d| _|  jd7  _q
d S )Nr:   rJ   )ranger[   rF   rU   r(   r'   )r0   r   r   r#   r#   r$   r   ~  s   zLexer.advance)r:   )r   r   r   r6   r   r   r7   r   r   r   r4   r   r#   r#   r#   r$   r   _  s
    r   c                 C   s   t |  S rA   )r   r   r   r#   r#   r$   tokenize  s   r   )0__doc__enumdataclassesr   typingr   r   r   
urcl.typesrs   r   ascii_lettersdigitsrY   r   r   Enumr   r6   r4   rt   ru   r5   r%   r7   rN   r\   r`   rc   rd   rl   rv   rz   r   r   r   r   r   r   r   r   r   r   r   rM   r3   r   r   r#   r#   r#   r$   <module>   sn    

',