/* 
   Copyright (C) 1990 C van Reewijk, email: dutentb.uucp!reeuwijk

This file is part of GLASS.

GLASS is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.

GLASS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GLASS; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* File: tmlex.c
   Lexical analyzer for Miranda algebraic datatypes plus additions
   for naming of constructor elements.
 */

#include "tmdefs.h"
#include <ctype.h>
#include <tmc.h>

#include "tmds.h"
#include "tmstring.h"
#include "debug.h"
#include "tmerror.h"
#include "tmgram.h"
#include "tmglobal.h"
#include "tmlex.h"
#include "tmmisc.h"

extern YYSTYPE yylval;
extern double atof();

#ifdef DEBUG
#define lexshow(tok,nm) if(lextr) fprintf(tracestream,"token: %s(%d). yytext=\"%s\".\n",nm,tok,yytext)
#else
#define lexshow(tok,nm)
#endif

static char yytext[256];

/******************************************************
 *                                                    *
 *            SCANNING TREES                          *
 *                                                    *
 ******************************************************/

/* Scanning trees are used to recogize separator tokens.
   They form a linked list of possible acceptable characters.
   Each node may have a sub-list of possible extensions
   and an the token value for the current match.
 */

struct sctnode {
    struct sctnode *next;   /* next possibility in list */
    int sctchar;            /* char to match            */
    struct sctnode *sub;    /* subtree to use on match. */
    bool valid;             /* is acceptable token?     */
    int tokval;             /* token value for yacc     */
    char *toknm;            /* token name for debugging */
};

#define SCTNIL (struct sctnode *)0

static long newsctnodecnt;
static long fresctnodecnt;

struct sctnode *toktree;

/* Create a new scan tree node to match character 'c'. The next character
   to be considered is described by node 'nxt'. The subtree is set empty,
   and token is set invalid.
 */
static struct sctnode *newsctnode( nxt, c )
 struct sctnode *nxt;
 int c;
{
    struct sctnode *new;

    new = (struct sctnode *) ckmalloc( sizeof( struct sctnode ) );
    newsctnodecnt++;
    new->next    = nxt;
    new->sctchar = c;
    new->sub     = SCTNIL;
    new->valid   = FALSE;
    return( new );
}

/* Recursively free scan tree node 'n'. */
static void rfre_sctnode( n )
 struct sctnode *n;
{
    if( n == SCTNIL ) return;
    rfre_sctnode( n->next );
    rfre_sctnode( n->sub );
    free( (char *) n );
    fresctnodecnt++;
}

/* Add to the scan tree 'tree' a new token with string 'str',
   YACC value 'val' and (debugging) name 'nm'.
   Return a pointer to the modified tree.
 */
static struct sctnode *addtok( tree, str, val, nm )
 struct sctnode *tree;
 char *str;
 int val;
 char *nm;
{
    register struct sctnode *tp;

    for( tp=tree; tp!=SCTNIL; tp=tp->next ){
	if( tp->sctchar == str[0] ) break;
    }
    if( tp == SCTNIL ){
	tree = newsctnode( tree, str[0] );
	tp = tree;
    }
    if( str[1] == '\0' ){
	tp->valid = TRUE;
	tp->tokval= val;
	tp->toknm = nm;
    }
    else{
	tp->sub = addtok( tp->sub, &str[1], val, nm );
    }
    return( tree );
}

/******************************************************
 *                                                    *
 *            TOKEN AND RESERVED WORD TABLES          *
 *                                                    *
 ******************************************************/

/* A structure to describe tokens and reserved words */
struct tok {
    char *tokstr;   /* the string to match. */
    int tokval;     /* associated token value for yacc */
    char *toknm;    /* name for debugging */
};

#define TOKNIL (struct tok *) 0;


/* A table of tokens. Is terminated by an entry with empty string. */
struct tok toktab[] =
{
    { "(", LRBRAC, "LRBRAC" },
    { ")", RRBRAC, "RRBRAC" },
    { ",", COMMA, "COMMA" },
    { ":", COLON, "COLON" },
    { "::=", COLCOLEQ, "COLCOLEQ" },
    { ";", SEMI, "SEMI" },
    { "==", EQEQ, "EQEQ" },
    { "[", LSBRAC, "LSBRAC" },
    { "]", RSBRAC, "RSBRAC" },
    { "|", BAR, "BAR" },
    { NULL, 0, "" }
};

/* A table of reserved words: symbols that have a special meaning.
   Is terminated by an entry with NULL string.
 */

struct tok rwtab[] =
{
    { NULL, 0, "" }
};

/******************************************************
 *                                                    *
 *            FILE MANAGEMENT                         *
 *                                                    *
 ******************************************************/

#define UNGETBUFLENSTEP 10

/* file to read from */
FILE *lexfile;

/* variables for a dynamic unget buffer:
   length, pointer to buffer and index of next char to un-get.
 */
static int ungetbuflen;
static int *ungetbuf;
static unsigned int ungetbufix;

/* push back character 'c' in local pushback queue.
   Enlarge queue if necessary.
 */
static void lexungetc( c )
 int c;
{
    if( ungetbufix >= ungetbuflen ){
	ungetbuflen+=UNGETBUFLENSTEP;
	ungetbuf = (int *) ckrealloc( (char *) ungetbuf, (unsigned) ungetbuflen*sizeof(int) );
    }
    ungetbuf[ungetbufix++] = c;
}

/* Get a character from input stream or pushback queue. */
static int lexgetc()
{
    register int c;

    if( ungetbufix != 0 ){
	c = ungetbuf[--ungetbufix];
    }
    else {
	c = getc( lexfile );
    }
    return( c );
}

/* Try to read characters from 'lexgetc()' to match one of the tokens from the
   table 'toktab' in the string 'buf'. The token characters to match are given 
   by the scan tree 'tree'.
   Fill '*tokval' with the token value, and '*toknm' with the name
   of the token. Return TRUE if this is successful, else return FALSE.
 */
static bool scantoken( tree, buf, tokval, toknm )
 struct sctnode *tree;
 register char *buf;
 int *tokval;
 char **toknm;
{
    register int c;
    register struct sctnode *tp;

    c = lexgetc();
    if( c == EOF ) return( FALSE );
    for( tp=tree; tp!=SCTNIL; tp=tp->next ){
	if( tp->sctchar == c ) break;
    }
    if( tp == SCTNIL ){
	lexungetc( c );
	return( FALSE );
    }
    buf[0] = c;
    if( scantoken( tp->sub, &buf[1], tokval, toknm ) ){
	return( TRUE );
    }
    if( tp->valid ){
	*tokval = tp->tokval;
	*toknm = tp->toknm;
	buf[1] = '\0';
	return( TRUE );
    }
    lexungetc( c );
    return( FALSE );
}

/* Try to read a symbol in the string 'buf' using lexgetc(). Return TRUE if
   this is successful, else return FALSE.
   A symbol is of the form [a-zA-Z][a-zA-Z0-9_]*.
 */
static bool scansymbol( buf )
 register char *buf;
{
    register int c;

    c = lexgetc();
    if( !isalpha( c ) ){
	lexungetc( c );
	return( FALSE );
    }
    do{
	*buf++ = c;
	c = lexgetc();
    } while( isalnum( c ) || c == '_' );
    *buf = '\0';
    lexungetc( c );
    return( TRUE );
}

/* "||" encountered, skip characters until end of line. */
static void skipcomment()
{
    register int c;

    c = lexgetc();
    while( c != '\n' ){
	if( c == EOF ){
	    error( UNEXPECTEOF );
	    exit( 1 );
	}
	c = lexgetc();
    }
    dslineno++;
}

/* Return next token from lex input file. Set 'yytext' to the characters
 * of the next token, and 'yylval' to the associated value of the token.
 */
int yylex()
{
    register int c;
    char *toknm;
    int tokval;

again:
    c = lexgetc();
    if( c == '|' ){
	c = lexgetc();
	if( c == '|' ){
	    skipcomment();
	    goto again;
	}
	lexungetc( c );
	c = '|';
    }
    if( c == '\n' ){
	dslineno++;
	goto again;
    }
    if( isspace( c ) ) goto again;
    if( c == EOF ){
	yytext[0] = '\0';
	lexshow(EOF,"EOF");
	return EOF;
    }
    lexungetc( c );
    if( scansymbol( yytext ) ){
	struct tok *rwp;

	for( rwp = rwtab; rwp->tokstr != NULL; rwp++ ){
	    if( strcmp( rwp->tokstr, yytext ) == 0 ){
		lexshow(rwp->tokval,rwp->toknm);
		return( rwp->tokval );
	    }
	}
	yylval.parstring = new_string( yytext );
	lexshow(NAME,"NAME");
	return NAME;
    }
    if( scantoken( toktree, yytext, &tokval, &toknm ) ){
	lexshow(tokval,toknm);
	return tokval;
    }
    c = lexgetc();
    if( c >= ' ' && c<= 0x7e ){
	(void) sprintf( errarg, "'%c'", c );
    }
    else {
	(void) sprintf( errarg, "0x%02x", c );
    }
    (void) sprintf( errpos, "%s(%d)", dsfilename, dslineno );
    error( BADTOK );
    exit( 1 );
    return( c );
}

/* Initialize lexical analysis routines. */
void init_lex()
{
    struct tok *ttp;

    newsctnodecnt=0;
    fresctnodecnt=0;
    ungetbuflen = 2;	/* Don't make this 0, some mallocs don't like it */
    ungetbuf = (int *) ckmalloc( (unsigned) ungetbuflen*sizeof(int) );
    ungetbufix = 0;
    toktree = SCTNIL;
    for( ttp = toktab; ttp->tokstr != NULL; ttp++ ){
	toktree = addtok( toktree, ttp->tokstr, ttp->tokval, ttp->toknm );
    }
}

/* Terminate lexcial analysis routines. Free all allocated memory */
void end_lex()
{
    rfre_sctnode( toktree );
    toktree = SCTNIL;
    free( (char *) ungetbuf );
    ungetbuflen = 0;
    ungetbufix = 0;
}

/* Indicate that lex routines should read from file 'f'. */
void setlexfile( f )
 FILE *f;
{
    lexfile = f;
    dslineno=1;
}

/* Give allocation statistics of lex routines. */
void stat_lex( f )
 FILE *f;
{
#ifdef STAT
    fprintf( f, "ungetbuflen=%d\n", ungetbuflen );
    PRSTAT( f, "sctnode", newsctnodecnt, fresctnodecnt );
#else
    f = f; /* to stop 'f unused' */
#endif
}
