/* $Header: rep_pars.c,v 3.2 88/07/28 16:45:05 jos Exp $ */
/*
 *  This file is part of the Amsterdam SGML Parser.
 *
 *  Copyright: Faculteit Wiskunde en Informatica
 *             Department of Mathematics and Computer Science
 *             Vrije Universiteit Amsterdam
 *             The Netherlands
 *
 *  Authors:   Sylvia van Egmond
 *             Jos Warmer
 */

#include "types.h"
#include "att_par.h"
#include "charclas.h"
#include "group.h"
#include "rep_pars.h"
#include "replace.h"
#include "report.h"
#include "symtable.h"
#include "tags.h"

void      assert_token(PAR  int  RAP);

/*
 *  Definitions of the tokens used in the parser
 */
#define ESCAPE        1
#define TAGC          2
#define STAGO         3
#define ETAGO         4
#define FMT_OPEN      5
#define FMT_CLOSE     6
#define ATT_OPEN      7
#define ATT_CLOSE     8
#define PLUS          9
#define NEWLINE       10
#define CHARACTER     11
#define COMMENT       12
#define LAYOUT        13
#define ATTRIBUTE     14
#define ENDOFFILE     15

SymbolTable token_names = { {"ESCAPE",    ESCAPE},
                            {"TAGC",      TAGC},
                            {"STAGO",     STAGO},
                            {"ETAGO",     ETAGO},
                            {"FMT_OPEN",  FMT_OPEN},
                            {"FMT_CLOSE", FMT_CLOSE},
                            {"ATT_OPEN",  ATT_OPEN},
                            {"ATT_CLOSE", ATT_CLOSE},
                            {"PLUS",      PLUS},
                            {"NEWLINE",   NEWLINE},
                            {"CHARACTER", CHARACTER},
                            {"COMMENT",   COMMENT},
                            {"LAYOUT",    LAYOUT},
			    {"ATTRIBUTE", ATTRIBUTE},
                            {"ENDOFFILE", ENDOFFILE},
			    {ILL_STRING,  ILL_SYMBOL}
};

/*
 *  Definitions of the characters used in the parser
 */
#define CH_COMMENT    '%'
#define CH_ESCAPE     '\\'
#define CH_TAGO       '<'
#define CH_ETAGO      '/'
#define CH_TAGC       '>'
#define CH_PLUS       '+'
#define CH_FMTOPEN    '"'
#define CH_FMTCLOSE   '"'
#define CH_ATTOPEN    '['
#define CH_ATTCLOSE   ']'

/*
 *  the three modes for token-recognition
 */
#define RULE          1
#define FORMAT_STAGO  2
#define FORMAT_ETAGO  3

/*
 *  Some constants.
 */
#define FORMAT_LENGTH 500
#define NAME_LENGTH   NAMELEN
#define STRING_LENGTH 40

static FILE*  rep_file;			/* the replacement file    */
static String rep_file_name;		/* the name of the replacement file */
static int    current_token;
static int    current_value;
static char   current_name_value[NAME_LENGTH+1];
static int    again          = FALSE;
static int    environ        = RULE;
static int    line_number    = 1;
static String location = 0;


#ifdef DEBUG
Bool debug = FALSE;

void debug_rep_parser(b)
Bool  b;
{
    debug = b;
}
#endif

/***********************************************************************
 *
 *  get and unget characters
 *
 ***********************************************************************/
int get_char()
{
    int ch;

    if( (ch = getc(rep_file)) == '\n' ){
	line_number++;
    }
    return ch;
}

void unget_char(ch)
int ch;
{
    if( ch == '\n' ){
	line_number--;
    }
    ungetc(ch, rep_file);
}

/*********************************************************************
 *
 *  read names and octal numbers
 *
 *********************************************************************/
int get_octal()
{
    int ch;
    int answer = 0;

    ch = get_char();
    while( (ch >= '0') && (ch <= '7') ){
	answer = (8 * answer) + (ch - '0');
	ch = get_char();
    }
    unget_char(ch);
    return answer;
}

void get_name(name, max_length)
char* name;
int   max_length;
{
    int i   = 0;
    int ch;

    ch = get_char();
    if( not is_name_start_character(ch) ) {
	report(REP_NAME_START, FATAL, 0, 0, ch);
    }
    while( is_name_character(ch) ){
	if( i != max_length ) {
	    name[i++] = ch;
	} else {
	    name[i] = '\0';
	    report(REP_WAR_NAMELEN, NOTFATAL, 0, 0, name, NAMELEN);
	}
	ch = get_char();
    }
    name[i] = '\0';
    unget_char(ch);
}

/*************************************************************************
 *
 *  get and unget tokens
 *
 ************************************************************************/
void unget_token()
{
    again = TRUE;
}

void insert_token(token, value)
int   token;
int   value;
{
    current_token = token;
    current_value = value;
    unget_token();
}

int get_rule_token()
{
    int nextch;
    int answer;

    switch( current_value = get_char() ){
	case CH_TAGO    :  nextch = get_char();
			   if( nextch == CH_ETAGO ){
			       answer = ETAGO;
			   } else {
			       unget_char(nextch);
			       answer = STAGO;
			   }
			  get_name(current_name_value, NAME_LENGTH);
			  assert_token(TAGC);
			  return (current_token = answer);
	case CH_COMMENT : return (current_token = COMMENT);
	case CH_PLUS    : return (current_token = PLUS);
	case CH_FMTOPEN : return (current_token = FMT_OPEN);
	case '\f'       :
	case '\t'       :
	case '\r'       :
	case ' '        :
	case '\n'       : return (current_token = LAYOUT);
	case EOF        : return (current_token = ENDOFFILE);
	default         : return (current_token = CHARACTER);
    }
}

get_fmt_token()
{
    if( again ){
	again = FALSE;
	return current_token;
    }
    switch(current_value = get_char() ){
	case CH_ATTOPEN  : if( environ == FORMAT_STAGO ){
			       get_name(current_name_value, NAME_LENGTH);
			       assert_token(ATT_CLOSE);
			       return (current_token = ATTRIBUTE);
			   } else {
			       return (current_token = CHARACTER);
			   }
	case CH_ESCAPE   : switch( current_value = get_char() ){
			       case 'n' : current_value = '\n';      break;
			       case 'f' : current_value = '\f';      break;
			       case 'r' : current_value = '\r';      break;
			       case 't' : current_value = '\t';      break;
			       case '\\': current_value = '\\';      break;
			       case '0' :
			       case '1' :
			       case '2' :
			       case '3' :
			       case '4' :
			       case '5' :
			       case '6' :
			       case '7' : unget_char(current_value);
					  current_value = get_octal();
					  break;
			       case EOF : return (current_token = ENDOFFILE);
			       default  : ;
			   }
			   return (current_token = CHARACTER);
	case CH_FMTCLOSE : return (current_token = FMT_CLOSE);
        case '\n'        : return (current_token = NEWLINE);
        case EOF         : return (current_token = ENDOFFILE);
	default          : return (current_token = CHARACTER);
    }
}

/*
 *  skips until end of line or skip layout
 */
void skip_line()
{
    while( get_char() != '\n' );
}

/*
 *  skips until not layout
 *  also skips comment
 */
int skip_layout()
{
    if( again ){
	again = FALSE;
    } else {
	get_rule_token();
    }
    while(TRUE){
       switch( current_token ){
	   case COMMENT : skip_line();                 break;
           case LAYOUT  :                              break;
	   default      : return current_token;
	}
	get_rule_token();
    }
}

int get_token()
{
    switch( environ ){
	case RULE         : return skip_layout();
	case FORMAT_ETAGO :
	case FORMAT_STAGO : return get_fmt_token() ;
	default           : report(REP_NO_LABEL, FATAL, 0, 0, environ, "get_token");
    }
}

/*********************************************************************
 *
 *  assertions
 *
 ********************************************************************/
void assert_char(ch)
int ch;
{
    int nextch;

    if( (nextch = get_char()) != ch ){
	unget_char(nextch);
	report(REP_INSERTED, FATAL, 0, 0, ch);
    }
}

void assert_token(token)
int token;
{
    if( again ){
	if( token != get_token() ){
	    unget_token();
	    report(REP_ASSERT, FATAL, 0, 0, symbol_string(token_names, token));
	}
	return;
    }
    switch( token ){
	case ATT_CLOSE : assert_char(CH_ATTCLOSE);                  break;
	case FMT_OPEN  : assert_char(CH_FMTOPEN );                  break;
	case TAGC      : assert_char(CH_TAGC    );                  break;
	default        : report(REP_NO_LABEL, FATAL, 0, 0, token, "assert_token");
    }
}

/*********************************************************************
 *
 *  read the different parts of the input recursive descent
 *
 ********************************************************************/
Bool read_plus()
{
    if( get_token() == PLUS ){
	return TRUE;
    } else {
	unget_token();
	return FALSE;
    }
}

void read_chars(s, max_length)
String  s;
int     max_length;
{
    int      index = 0;

    while( get_token() == CHARACTER ){
	if( index != max_length ){
	     s[index++] = current_value;
	}
    }
    s[index] = '\0';
    unget_token();
}

void read_delete(token)
int  token;
{
    char  string[STRING_LENGTH+1];
    int   i = 0;

    if( token == CHARACTER ){
        string[i++] = current_value;
        while( get_rule_token() == CHARACTER ){
	    string[i++] = current_value;
	    if( i == STRING_LENGTH ){
		string[i] = '\0';
		report(REP_DEL_CHARS, FATAL, 0, 0, string);
		i = 0;
	    }
	}
	unget_token();
	if( i != 0 ){
	    string[i] = '\0';
	    report(REP_DEL_CHARS, FATAL, 0, 0, string);
	}
    } else {
        strcpy(string, symbol_string(token_names,token));
	report(REP_DEL, FATAL, 0, 0, string);
    }
}

/*
 *  `tag' == STAGO means that attribute references are recognized.
 *  `tag' == ETAGO means that attribute references are NOT recognized.
 */
P_Group read_formats(tag, elem_name, attlist)
int          tag;
String       elem_name;
P_Attdeflist attlist;
{
    int      token;
    char     s[FORMAT_LENGTH+1];
    P_Group  group;

    group = group_create();
    while( get_token() == FMT_OPEN ){
	switch(tag){
	    case STAGO : environ = FORMAT_STAGO;                    break;
	    case ETAGO : environ = FORMAT_ETAGO;                    break;
	    default    : report(REP_NO_LABEL, FATAL, 0, 0, tag, "read_formats");
	}
	while( (token = get_token()) != FMT_CLOSE ){
	    switch( token ){
		case CHARACTER:
		    unget_token();
		    read_chars(s, FORMAT_LENGTH);
		    group_add(group,new_chunk(CHUNK_STRING,s));
		    break;
		case ATTRIBUTE:
		    if( attlist and
			  attdef_name_lookup(attlist, current_name_value) )
		    {
			group_add(group, new_chunk(CHUNK_ATTNAME,
						      current_name_value));
		    } else {
			report(REP_ATTR, FATAL, 0, 0, elem_name, 
						      current_name_value);
		    }
		    break;
		case NEWLINE  :
		    report(REP_INSERTED, FATAL, 0, 0, CH_FMTCLOSE);
		    insert_token(FMT_CLOSE, 0);
		    break;
		case ENDOFFILE      :
		    report(REP_INSERTED, FATAL, 0, 0, CH_FMTCLOSE);
		    insert_token(FMT_CLOSE, 0);
		    break;
		default       :
		    report(REP_NO_LABEL,FATAL,0,0,token,"read_formats");
	    }
	}
	environ = RULE;

	while(not check_int(get_token(),ENDOFFILE,FMT_OPEN,STAGO,ETAGO,PLUS,0)){
	    read_delete(current_token);
	}
	unget_token();
    }
    unget_token();
    return group;
}

void read_tago(tag)
int  tag;
{
    char         elem_name[NAME_LENGTH+1];
    Parserinfo   info;
    P_Attdeflist attlist = 0;
    Bool         start_plus, end_plus;
    P_Group      replacement;

#ifdef DEBUG
    if( tag == STAGO ){
	DEB1("read_tago <%s> : \n", current_name_value);
    } else {
	DEB1("read_tago </%s> : \n", current_name_value);
    }
#endif
    assert_token(tag);
    strcpy(elem_name, current_name_value);

    if( info = string_info(elem_name) ){
	if( tag == STAGO ){
	    attlist = info_attlist(info);
	}
    } else {
	report(REP_ELEM, FATAL, 0, 0, elem_name);
    }

    start_plus  = read_plus();
    while( not check_int(get_token(), ENDOFFILE,FMT_OPEN,STAGO,ETAGO,PLUS,0) ){
	read_delete(current_token);
    }
    unget_token();
    replacement = read_formats(tag, elem_name, attlist);
    end_plus    = read_plus();

    /*
     *  Check whether an element with name `elem_name' exists
     */
    if( info and (tag == STAGO) ) {
	info_set_start_rep(info,new_replace(start_plus, end_plus, replacement));
    } else if( info and (tag == ETAGO) ) {
	info_set_end_rep(info, new_replace(start_plus, end_plus, replacement));
    }
}

void read_file()
{
    int   token;

    for(;;){
	token = get_token();
	switch(token){
	    case ETAGO     : unget_token();
			     read_tago(ETAGO);               break;
	    case STAGO     : unget_token();
			     read_tago(STAGO);               break;
	    case ENDOFFILE : return;
	    default      : read_delete(token);             break;
	}
    }
}

String rep_location()
{
    sprintf(location,"\"%s\": line %d", rep_file_name, line_number);
    return location;
}

void init_rep_parser(file_name)
String  file_name;
{
    int    length;
    DEB("enter rep_parser\n");

    rep_file_name = strsave(file_name);
    if( not (rep_file = fopen(file_name, "r")) ){
	report(FILE_OPEN, FATAL, 0, 0, file_name);
    }

    length   = 9 + strlen(rep_file_name) + 12 + 1;
    location = (char*) CALLOC( length, sizeof(char) );

    read_file();

    fatal_report();
    FREE(location, length);
    CFREE(rep_file_name);
    DEB("leave rep_parser\n");
}
