/* -*-c-*-
 *
 *	a lexical analyser for RFC2822 address header fields
 *
 * Copyright (c) 2004	Greg A. Woods <woods@planix.com>
 */

%{

#ident "@(#)librfc2822: :address_scan.l,v 1.3 2004/08/14 01:18:21 woods Exp "

#include <sys/cdefs.h>
#include <string.h>

#include "rfc2822.h"			/* public */
#include "librfc2822.h"			/* internals */
#include "address.h"			/* generated by yacc */

#define yylval		rfc2822_lval

#undef YY_INPUT
#define YY_INPUT(buf, ret, max)		(ret = rfc2822_lexinput(buf, max))

int  rfc2822_lexinput __P((char *, int));
void rfc2822_lexerror __P((const char *));

unsigned int rfc2822_lex_ncomm = 0;	/* current number of open comments */

#ifdef FLEX_DEBUG
# undef stderr
# define stderr		stdout
#endif

%}

%pointer

%x QSTRING DOMAINLIT COMMENT QPAIR QPAIR2

%%

\" {
	BEGIN QSTRING;
	return yytext[0];
}
\[ {
	BEGIN DOMAINLIT;
	return yytext[0];
}
\] {
	rfc2822_lexerror("address parse error, unexpected ']' (missing domain literal)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}
\( {
	rfc2822_lex_ncomm = 1;
	BEGIN COMMENT;
}
\) {
	rfc2822_lexerror("address parse error, unexpected ')' (unbalanced comment)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

([^@\.\(\) \t\r\n<>\":,;\[\]\\]*\\)/(.[@\.:;]) {
	BEGIN(QPAIR2);
	yymore();
}
([^@\.\(\) \t\r\n<>\":,;\[\]\\]*\\)/(.) {
	BEGIN(QPAIR2);
	yymore();
}
([^@\.\(\) \t\r\n<>\":,;\[\]\\]*\\)/(.[^@\.:;]) {
	BEGIN(QPAIR);
	yymore();
}

([^@\.\(\) \t\r\n<>\":,;\[\]\\]*\\) {
	rfc2822_lexerror("address parse error, expected '\\' (unqouted special character)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

[^\(\)<>@,;:\\\".\[\] \n\r]+ {
	/*
	 * atext =	ALPHA / DIGIT / ; Any character except controls,
	 *		"!" / "#" /     ;  SP, and specials.
	 *		"$" / "%" /     ;  Used for atoms
	 *		"&" / "'" /
	 *		"*" / "+" /
	 *		"-" / "/" /
	 *		"=" / "?" /
	 *		"^" / "_" /
	 *		"`" / "{" /
	 *		"|" / "}" /
	 *		"~"
	 *
	 * atom =	[CFWS] 1*atext [CFWS]
	 *
	 */
	return TOK_ATEXT;
}

[\t \n\r]+			/* ignore whitespace */

[\"@\.<>:,;\[\]] {
	/*
	 * return these characters as tokens with their own name as their token
	 * identifier (it makes our yacc grammar rules much more readable)
	 */
	return yytext[0];
}

<QSTRING>([^\n\r\"\\]|\\.)* {
	/*
	 * qtext =       NO-WS-CTL /     ; Non white space controls
	 *               %d33 /          ; The rest of the US-ASCII
	 *               %d35-91 /       ;  characters not including "\"
	 *               %d93-126        ;  or the quote character
	 */
	return TOK_QTEXT;
}
<QSTRING>\" {
	BEGIN INITIAL;
	return yytext[0];
}
<QSTRING><<EOF>> {
	rfc2822_lexerror("address parse error, expecting '\"' (unterminated quoted string)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

<DOMAINLIT>([^\[\]\n\r\\]|\\.)* {
	/*
	 * dtext =	NO-WS-CTL /	; Non white space controls
	 *		%d33-90 /	; The rest of the US-ASCII
	 *		%d94-126	;  characters not including "[",
	 *				;  "]", or "\"
	 */
	return TOK_DTEXT;
}
<DOMAINLIT>\] {
	BEGIN INITIAL;
	return yytext[0];
}
<DOMAINLIT>\[ {
	rfc2822_lexerror("address parse error, unexpected '[' (already in a domain literal)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}
<DOMAINLIT><<EOF>> {
	rfc2822_lexerror("address parse error, expecting ']' (unterminated domain literal)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

<COMMENT>([^\(\)\n\0\\]|\\.)* {
	return TOK_CTEXT;
}
<COMMENT>\( {
	rfc2822_lex_ncomm++;
}
<COMMENT>\) {
	rfc2822_lex_ncomm--;
	if (rfc2822_lex_ncomm == 0)
		BEGIN INITIAL;
}
<COMMENT><<EOF>> {
	rfc2822_lexerror("address parse error, expecting ')' (unterminated comment)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

<QPAIR>. {
	BEGIN(INITIAL);
	yymore();
}
<QPAIR><<EOF>> {
	rfc2822_lexerror("address parse error, expecting a special character (dangling '\\')");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

<QPAIR2>. {
	BEGIN(INITIAL);
	return TOK_ATEXT;
}
<QPAIR2><<EOF>> {
	rfc2822_lexerror("address parse error, expecting special character (dangling backslash)");
	BEGIN INITIAL;
	YY_FLUSH_BUFFER;
	return TOK_ILLEGAL;
}

%%

/*
 * Internal storage.
 */

char *rfc2822_lex_bufp;			/* inited by parser */

/*
 * Internal routines.
 */

int
yywrap()		/* avoid needing to link with libl */
{
	return 1;
}

/*
 * Take input from address string provided by the parser wrapper routine.
 *
 * Uses global pointer rfc2822_lex_bufp, adjusting it to point at the next
 * chunk of input to be analyzed.
 */
int
rfc2822_lexinput(buf, max_size)
	char *buf;			/* buffer to be filled for lexer */
	int max_size;
{
	size_t n = strlen(rfc2822_lex_bufp);
	
	n = (n < (size_t) max_size) ? n : max_size;	/* n = MIN(n, max_size) */
	if (n > 0) {
		memcpy(buf, rfc2822_lex_bufp, n);
		rfc2822_lex_bufp += n;
	} else {
		n = YY_NULL;			/* YY_NULL should be 0 */
	}

	return n;
}

void
rfc2822_lex_restart()
{
	yy_init = 1;
}

void
rfc2822_lexerror(msg)
	const char *msg;
{
#ifdef FLEX_DEBUG
	if (yy_flex_debug)
		fprintf(stderr, "rfc2822_lexerror: %s\n", msg);
#endif
	/* XXX do something to return the error message to yacc! */
}
