Files
CMLeonOS/interpreter/UniLua/LLex.cs
2026-02-04 17:50:50 +08:00

760 lines
19 KiB
C#

using System;
using System.IO;
using System.Text;
using System.Collections.Generic;
using NumberStyles = System.Globalization.NumberStyles;
namespace UniLua
{
public class LLexException : Exception
{
public LLexException( string info ) : base( info ) { }
}
public enum TK
{
// reserved words
AND = 257,
BREAK,
DO,
ELSE,
ELSEIF,
END,
FALSE,
FOR,
FUNCTION,
GOTO,
IF,
IN,
LOCAL,
NIL,
NOT,
OR,
REPEAT,
RETURN,
THEN,
TRUE,
UNTIL,
WHILE,
// other terminal symbols
CONCAT,
DOTS,
EQ,
GE,
LE,
NE,
DBCOLON,
NUMBER,
STRING,
NAME,
EOS,
}
public abstract class Token
{
public abstract int TokenType{ get; }
public bool EqualsToToken( Token other ) {
return TokenType == other.TokenType;
}
public bool EqualsToToken( int other ) {
return TokenType == other;
}
public bool EqualsToToken( TK other ) {
return TokenType == (int)other;
}
}
public class LiteralToken : Token
{
private int _Literal;
public LiteralToken( int literal )
{
_Literal = literal;
}
public override int TokenType
{
get { return _Literal; }
}
public override string ToString()
{
return string.Format( "LiteralToken: {0}", _Literal );
}
}
public class TypedToken : Token
{
private TK _Type;
public TypedToken( TK type )
{
_Type = type;
}
public override int TokenType
{
get { return (int)_Type; }
}
public override string ToString()
{
return string.Format( "TypedToken: {0}", _Type );
}
}
public class StringToken : TypedToken
{
public string SemInfo;
public StringToken( string seminfo ) : base( TK.STRING )
{
SemInfo = seminfo;
}
public override string ToString()
{
return string.Format( "StringToken: {0}", SemInfo );
}
}
public class NameToken : TypedToken
{
public string SemInfo;
public NameToken( string seminfo ) : base( TK.NAME )
{
SemInfo = seminfo;
}
public override string ToString()
{
return string.Format( "NameToken: {0}", SemInfo );
}
}
public class NumberToken : TypedToken
{
public double SemInfo;
public NumberToken( double seminfo ) : base( TK.NUMBER )
{
SemInfo = seminfo;
}
public override string ToString()
{
return string.Format( "NumberToken: {0}", SemInfo );
}
}
public class LLex
{
public const char EOZ = Char.MaxValue;
private LuaState Lua;
private int Current;
public int LineNumber;
public int LastLine;
private ILoadInfo LoadInfo;
public string Source;
public Token Token;
private Token LookAhead;
private StringBuilder _Saved;
private StringBuilder Saved
{
get {
if( _Saved == null ) { _Saved = new StringBuilder(); }
return _Saved;
}
}
private static Dictionary<string, TK> ReservedWordDict;
static LLex()
{
ReservedWordDict = new Dictionary<string, TK>();
ReservedWordDict.Add("and", TK.AND);
ReservedWordDict.Add("break", TK.BREAK);
ReservedWordDict.Add("do", TK.DO);
ReservedWordDict.Add("else", TK.ELSE);
ReservedWordDict.Add("elseif", TK.ELSEIF);
ReservedWordDict.Add("end", TK.END);
ReservedWordDict.Add("false", TK.FALSE);
ReservedWordDict.Add("for", TK.FOR);
ReservedWordDict.Add("function", TK.FUNCTION);
ReservedWordDict.Add("goto", TK.GOTO);
ReservedWordDict.Add("if", TK.IF);
ReservedWordDict.Add("in", TK.IN);
ReservedWordDict.Add("local", TK.LOCAL);
ReservedWordDict.Add("nil", TK.NIL);
ReservedWordDict.Add("not", TK.NOT);
ReservedWordDict.Add("or", TK.OR);
ReservedWordDict.Add("repeat", TK.REPEAT);
ReservedWordDict.Add("return", TK.RETURN);
ReservedWordDict.Add("then", TK.THEN);
ReservedWordDict.Add("true", TK.TRUE);
ReservedWordDict.Add("until", TK.UNTIL);
ReservedWordDict.Add("while", TK.WHILE);
}
public LLex( ILuaState lua, ILoadInfo loadinfo, string name )
{
Lua = (LuaState)lua;
LoadInfo = loadinfo;
LineNumber = 1;
LastLine = 1;
Token = null;
LookAhead = null;
_Saved = null;
Source = name;
_Next();
}
public void Next()
{
LastLine = LineNumber;
if( LookAhead != null )
{
Token = LookAhead;
LookAhead = null;
}
else
{
Token = _Lex();
}
}
public Token GetLookAhead()
{
Utl.Assert( LookAhead == null );
LookAhead = _Lex();
return LookAhead;
}
private void _Next()
{
var c = LoadInfo.ReadByte();
Current = (c == -1) ? EOZ : c;
}
private void _SaveAndNext()
{
Saved.Append( (char)Current );
_Next();
}
private void _Save( char c )
{
Saved.Append( c );
}
private string _GetSavedString()
{
return Saved.ToString();
}
private void _ClearSaved()
{
_Saved = null;
}
private bool _CurrentIsNewLine()
{
return Current == '\n' || Current == '\r';
}
private bool _CurrentIsDigit()
{
return Char.IsDigit( (char)Current );
}
private bool _CurrentIsXDigit()
{
return _CurrentIsDigit() ||
('A' <= Current && Current <= 'F') ||
('a' <= Current && Current <= 'f');
}
private bool _CurrentIsSpace()
{
return Char.IsWhiteSpace( (char)Current );
}
private bool _CurrentIsAlpha()
{
return Char.IsLetter( (char)Current );
}
private bool _IsReserved( string identifier, out TK type )
{
return ReservedWordDict.TryGetValue( identifier, out type );
}
public bool IsReservedWord( string name )
{
return ReservedWordDict.ContainsKey( name );
}
private void _IncLineNumber()
{
var old = Current;
_Next();
if( _CurrentIsNewLine() && Current != old )
_Next();
if( ++LineNumber >= Int32.MaxValue )
_Error( "chunk has too many lines" );
}
private string _ReadLongString( int sep )
{
_SaveAndNext();
if( _CurrentIsNewLine() )
_IncLineNumber();
while( true )
{
switch( Current )
{
case EOZ:
_LexError( _GetSavedString(),
"unfinished long string/comment",
(int)TK.EOS );
break;
case '[':
{
if( _SkipSep() == sep )
{
_SaveAndNext();
if( sep == 0 )
{
_LexError( _GetSavedString(),
"nesting of [[...]] is deprecated",
(int)TK.EOS );
}
}
break;
}
case ']':
{
if( _SkipSep() == sep )
{
_SaveAndNext();
goto endloop;
}
break;
}
case '\n':
case '\r':
{
_Save('\n');
_IncLineNumber();
break;
}
default:
{
_SaveAndNext();
break;
}
}
}
endloop:
var r = _GetSavedString();
return r.Substring( 2+sep, r.Length - 2*(2+sep) );
}
private void _EscapeError( string info, string msg )
{
_LexError( "\\"+info, msg, (int)TK.STRING );
}
private byte _ReadHexEscape()
{
int r = 0;
var c = new char[3] { 'x', (char)0, (char)0 };
// read two hex digits
for( int i=1; i<3; ++i )
{
_Next();
c[i] = (char)Current;
if( !_CurrentIsXDigit() )
{
_EscapeError( new String(c, 0, i+1),
"hexadecimal digit expected" );
// error
}
r = (r << 4) + Int32.Parse( Current.ToString(),
NumberStyles.HexNumber );
}
return (byte)r;
}
private byte _ReadDecEscape()
{
int r = 0;
var c = new char[3];
// read up to 3 digits
int i = 0;
for( i=0; i<3 && _CurrentIsDigit(); ++i )
{
c[i] = (char)Current;
r = r*10 + Current - '0';
_Next();
}
if( r > Byte.MaxValue )
_EscapeError( new String(c, 0, i),
"decimal escape too large" );
return (byte)r;
}
private string _ReadString()
{
var del = Current;
_Next();
while( Current != del )
{
switch( Current )
{
case EOZ:
_Error( "unfinished string" );
continue;
case '\n':
case '\r':
_Error( "unfinished string" );
continue;
case '\\':
{
byte c;
_Next();
switch( Current )
{
case 'a': c=(byte)'\a'; break;
case 'b': c=(byte)'\b'; break;
case 'f': c=(byte)'\f'; break;
case 'n': c=(byte)'\n'; break;
case 'r': c=(byte)'\r'; break;
case 't': c=(byte)'\t'; break;
case 'v': c=(byte)'\v'; break;
case 'x': c=_ReadHexEscape(); break;
case '\n':
case '\r': _Save('\n'); _IncLineNumber(); continue;
case '\\':
case '\"':
case '\'': c=(byte)Current; break;
case EOZ: continue;
// zap following span of spaces
case 'z': {
_Next(); // skip `z'
while( _CurrentIsSpace() )
{
if( _CurrentIsNewLine() )
_IncLineNumber();
else
_Next();
}
continue;
}
default:
{
if( !_CurrentIsDigit() )
_EscapeError( Current.ToString(),
"invalid escape sequence" );
// digital escape \ddd
c = _ReadDecEscape();
_Save( (char)c );
continue;
// {
// c = (char)0;
// for(int i=0; i<3 && _CurrentIsDigit(); ++i)
// {
// c = (char)(c*10 + Current - '0');
// _Next();
// }
// _Save( c );
// }
// continue;
}
}
_Save( (char)c );
_Next();
continue;
}
default:
_SaveAndNext();
continue;
}
}
_Next();
return _GetSavedString();
}
private double _ReadNumber()
{
var expo = new char[] { 'E', 'e' };
Utl.Assert( _CurrentIsDigit() );
var first = Current;
_SaveAndNext();
if( first == '0' && (Current == 'X' || Current == 'x'))
{
expo = new char[] { 'P', 'p' };
_SaveAndNext();
}
for(;;)
{
if( Current == expo[0] || Current == expo[1] )
{
_SaveAndNext();
if( Current == '+' || Current == '-' )
_SaveAndNext();
}
if( _CurrentIsXDigit() || Current == '.' )
_SaveAndNext();
else
break;
}
double ret;
var str = _GetSavedString();
if( LuaState.O_Str2Decimal( str, out ret ) )
{
return ret;
}
else
{
_Error( "malformed number: " + str );
return 0.0;
}
}
// private float _ReadNumber()
// {
// do
// {
// _SaveAndNext();
// } while( _CurrentIsDigit() || Current == '.' );
// if( Current == 'E' || Current == 'e' )
// {
// _SaveAndNext();
// if( Current == '+' || Current == '-' )
// _SaveAndNext();
// }
// while( _CurrentIsAlpha() || _CurrentIsDigit() || Current == '_' )
// _SaveAndNext();
// float ret;
// if( !Single.TryParse( _GetSavedString(), out ret ) )
// _Error( "malformed number" );
// return ret;
// }
private void _Error( string error )
{
Lua.O_PushString( string.Format(
"{0}:{1}: {2}",
Source, LineNumber, error ) );
Lua.D_Throw( ThreadStatus.LUA_ERRSYNTAX );
}
private void _LexError( string info, string msg, int tokenType )
{
// TODO
_Error( msg + ":" + info );
}
public void SyntaxError( string msg )
{
// TODO
_Error( msg );
}
private int _SkipSep()
{
int count = 0;
var boundary = Current;
_SaveAndNext();
while( Current == '=' ) {
_SaveAndNext();
count++;
}
return ( Current == boundary ? count : (-count)-1 );
}
private Token _Lex()
{
_ClearSaved();
while( true )
{
switch( Current )
{
case '\n':
case '\r': {
_IncLineNumber();
continue;
}
case '-': {
_Next();
if( Current != '-' ) return new LiteralToken('-');
// else is a long comment
_Next();
if( Current == '[' )
{
int sep = _SkipSep();
_ClearSaved();
if( sep >= 0 )
{
_ReadLongString( sep );
_ClearSaved();
continue;
}
}
// else is a short comment
while( !_CurrentIsNewLine() && Current != EOZ )
_Next();
continue;
}
case '[': {
int sep = _SkipSep();
if( sep >= 0 ) {
string seminfo = _ReadLongString( sep );
return new StringToken( seminfo );
}
else if( sep == -1 ) return new LiteralToken('[');
else _Error("invalid long string delimiter");
continue;
}
case '=': {
_Next();
if( Current != '=' ) return new LiteralToken('=');
_Next();
return new TypedToken( TK.EQ );
}
case '<': {
_Next();
if( Current != '=' ) return new LiteralToken('<');
_Next();
return new TypedToken( TK.LE );
}
case '>': {
_Next();
if( Current != '=' ) return new LiteralToken('>');
_Next();
return new TypedToken( TK.GE );
}
case '~': {
_Next();
if( Current != '=' ) return new LiteralToken('~');
_Next();
return new TypedToken( TK.NE );
}
case ':': {
_Next();
if( Current != ':' ) return new LiteralToken(':');
_Next();
return new TypedToken( TK.DBCOLON ); // new in 5.2 ?
}
case '"':
case '\'': {
return new StringToken( _ReadString() );
}
case '.': {
_SaveAndNext();
if( Current == '.' )
{
_SaveAndNext();
if( Current == '.' )
{
_SaveAndNext();
return new TypedToken( TK.DOTS );
}
else
{
return new TypedToken( TK.CONCAT );
}
}
else if( !_CurrentIsDigit() )
return new LiteralToken('.');
else
return new NumberToken( _ReadNumber() );
}
case EOZ: {
return new TypedToken( TK.EOS );
}
default: {
if( _CurrentIsSpace() )
{
_Next();
continue;
}
else if( _CurrentIsDigit() )
{
return new NumberToken( _ReadNumber() );
}
else if( _CurrentIsAlpha() || Current == '_' )
{
do {
_SaveAndNext();
} while( _CurrentIsAlpha() ||
_CurrentIsDigit() ||
Current == '_' );
string identifier = _GetSavedString();
TK type;
if( _IsReserved( identifier, out type ) )
{
return new TypedToken( type );
}
else
{
return new NameToken( identifier );
}
}
else
{
var c = Current;
_Next();
return new LiteralToken(c);
}
}
}
}
}
}
}