/*	mark saeger,	msaeger@cse.unl.edu				*/
/*	regex.c								*/
/*	Copyright 1995 Mark Saeger.					*/
/*									*/
/*	Permission is granted to any individual or instituition to use,	*/
/*	copy, or redistribute this executable so long as it is not	*/
/*	modified and that it is not sold for profit.			*/
/*									*/
/*	LIKE ANYTHING THAT IS FREE, MORE IS PROVIDED AS IS AND COMES	*/
/*	WITH NO	WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED.	*/
/*	IN NO EVENT WILL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DAMAGES	*/
/*	RESULTING FROM THE USE OF THIS SOFTWARE.			*/
#include "more.h"
#include <ctype.h>

/* regex.c
	.	match any single char except nl
	*	match 0 or more of preceding chars
	^	match beg of line
	$	match end of line
	\<	match beg of word
	\>	match end of word
	[ ]	match one of enclosed chars
	[^ ]	match any char not enclosed
	\	take next symbol literally

	\\	a single backslash
	\*	a single asterisk
	.*	any number of chars
	\$	a single dollar sign

FROM: A Students Guide to UNIX -- Harley Hahn
*/

int hitend(rgx *walklink, unsigned char bigbuf[], int x);
rgx *handle_rc(rgx *walk,unsigned char bigbuf[], int x, int *testing);
rgx *handle_rd(rgx *walk,unsigned char bigbuf[], int x, int *testing);  
rgx *handle_rBEol(rgx *walk,unsigned char bigbuf[], int x, int *testing,
			int *current);
rgx *handle_rBEow(rgx *walk,unsigned char bigbuf[], int x, int *testing,
			int *current);
rgx *handle_rr(rgx *walk, unsigned char bigbuf[], int x, int *testing);
rgx *handle_rnr(rgx *walk, unsigned char bigbuf[], int x, int *testing);
rgx *handle_rv(rgx *walk, unsigned char bigbuf[], int x, int *testing);
rgx *handle_rnv(rgx *walk, unsigned char bigbuf[], int x, int *testing); 

/*check for a valid regex*/
/*	TRUE	-	found and error	*/
/*	FALSE	-	no errors found	*/
int validate_regex(void)
{
	extern char *optreg;
	char *loptreg;		/*local pointer to optreg*/
	char *begreg;		/*address of the beginning of string*/
	char *endreg;		/*address of the end of string*/
	int retvalue;
	int local_test;

/****test case 1***/
/*	tests for a '*' as first character	*/
	retvalue=FALSE;
	loptreg=optreg;
	if(*loptreg == '*')
		retvalue=TRUE;
	if(retvalue)
		return(TRUE);
/******************/

/****test case 2***/
/*	tests for opening and closing brackets, and illegal
	characters contained in the brackets	*/
	retvalue=FALSE;
	local_test=FALSE;
	loptreg=optreg;
	while(*loptreg!='\0')
	{
		switch(*loptreg++)
		{
			case '\\':	/*handle esc char.*/
				*loptreg++;
				break;
			case '[':
				local_test=TRUE;
				if(*loptreg=='^')
					*loptreg++;	/*around [^..]*/
				break;
			case ']':
				if(local_test)
					local_test=FALSE;
				else
					retvalue=TRUE;
				break;
			case '.':
			case '*':
			case '^':
			case '$':
				if(local_test)
					retvalue=TRUE;
				break;
			default:
				break;
		}
	}
	if(local_test)	/*should be false, assuming ']' found*/
		retvalue=TRUE;
	if(retvalue)
		return(TRUE);
/******************/ 

/****test case 3***/
/*	tests to ensure begin/end expressions are actually at
	the begin/end depending on the type	*/
/*Basically we are comparing addresses, since have a contiginous block of
memory, then we assume if not at beginning/end then not in the correct place*/
	retvalue=FALSE;
	loptreg=optreg;
	begreg=optreg;
	while(*loptreg!='\0')
	{
		endreg=loptreg;
		*loptreg++;
	}
	loptreg=optreg;
	while(*loptreg!='\0')
	{
		switch(*loptreg)
		{
			case '^':
				if(loptreg>begreg)	/*should be 1st char*/
					retvalue=TRUE;
				break;
			case '$':
				if(loptreg<endreg)	/*should be last char*/
					retvalue=TRUE;
				break;
			case '\\':
				*loptreg++;
				switch(*loptreg)
				{
					case '<':
						if(loptreg!=begreg+1)
							retvalue=TRUE;
						break;
					case '>':
						if(loptreg<endreg)
							retvalue=TRUE;
						break;
					default:
						break;
				}
				break;
			default:
				break;
		}
		*loptreg++;
	}
	if(retvalue)
		return(TRUE);
/******************/ 

	return (FALSE);
}

/*stores it in a regex structure*/
rgx *store_regex(void)
{
	extern char *optreg;
	rgx *grgx;
/*	rgx *temp;*/
	char *loptreg;

	destroy_globrgx();	/*get rid of old list*/

	loptreg=optreg;
	grgx=NULL;

	while(*loptreg != '\0')
	{
		switch(*loptreg)
		{
			case '.':
			case '*':
			case '^':
			case '$':
			case '\\':
			case '[':
				grgx=fillrother(grgx, &loptreg);
				break;
			default:
				grgx=fillr(grgx, *loptreg++);
				break;
		}
		fflush(stdout);
	}
	return(grgx);
}

void search_regex(int *poffset, int top_of_form, int pipe, node *llist)
{
	extern rgx *globrgx;
	extern char **currfile;
	extern int optpl;
	rgx *walk, *prev, *save;
	FILE *ip;
	int qmore=TRUE,testing;
	int bigindex,x,max_to_read,current=1;
	unsigned char bigbuf[BUFSIZ];
/*	int dummy, doing;*/
	node *pwalk=NULL;
/*	node *bst;*/
	int glox;  /*set if RGX(BE)OL is ever in the l.l.*/

	walk=globrgx;
	glox=FALSE;

	if(!pipe)
		ip=fopen(*currfile,"r");
	else
		ip=stdin;
	while(qmore)
	{
		if(!pipe)
		{	/*read in consecutive chunks of data*/
			if((bigindex=ftell(ip))>=0)
			{
				fseek(ip,bigindex,SEEK_SET);
				max_to_read=read(ip->_file,bigbuf,BUFSIZ);
				x=0;
			}
		}
		else
		{
			int dummywalk;

			if(pwalk==NULL)
				pwalk=llist;
			else
				pwalk=pwalk->link;
			if(pwalk!=NULL)
			{
				max_to_read=pwalk->size;
				x=0;
				for(dummywalk=0;dummywalk<max_to_read;dummywalk++)
					bigbuf[dummywalk]=pwalk->nbuf[dummywalk];
			}
			else
			{
				max_to_read=read(ip->_file,bigbuf,BUFSIZ);
				if(max_to_read)
				{
					x=0;
					for(pwalk=llist;pwalk->link!=NULL;pwalk=pwalk->link);
					pwalk->link=newn();
					pwalk=pwalk->link;
					for(dummywalk=0;dummywalk<max_to_read;dummywalk++)
						pwalk->nbuf[dummywalk]=bigbuf[dummywalk];
					pwalk->link=NULL;
					pwalk->size=max_to_read;
					pwalk->dirty=FALSE;
				}
			}
		}

		if(max_to_read==0)
			qmore=FALSE;
		testing=TRUE;

		while(x<max_to_read)
		{
			save=NULL;
			while(testing)
			{
				if(bigbuf[x]=='\n')
					current++;
/*below, (current>top_of...), since start at beginning, we only search forward
from the __current__ point*/
				if((walk!=NULL) && (current > top_of_form))
				{
					if(walk->link!=NULL)
						if(walk->link->type==RGXSTAR)
						{
							prev=walk;
							walk=walk->link;
						}
					switch(walk->type)
					{
						case RGXCHAR:
							walk=handle_rc(walk,bigbuf,x,&testing);
							if(!testing && glox)
							{
								testing=TRUE;
								walk=globrgx;
								walk=handle_rBEow(walk,bigbuf,x,&testing,&current);
							}
							break;
						case RGXDOT:	/*any char works*/
							walk=handle_rd(walk,bigbuf,x,&testing);
							break;
						case RGXBOL:
						case RGXEOL:
							walk=handle_rBEol(walk,bigbuf,x,&testing,&current);
							break;
						case RGXBOW:
						case RGXEOW:
							glox=TRUE;
							walk=handle_rBEow(walk,bigbuf,x,&testing,&current);
							break;
						case RGXRNG:
							walk=handle_rr(walk,bigbuf,x,&testing);
							break;
						case RGXNRNG:
							walk=handle_rnr(walk,bigbuf,x,&testing);
							break;
						case RGXVAL:
							walk=handle_rv(walk,bigbuf,x,&testing);
							break;
						case RGXNVAL:
							walk=handle_rnv(walk,bigbuf,x,&testing);
							break;
						case RGXSTAR:
{
	int ntesting=TRUE;
	int onlyonce=TRUE;


	while(ntesting && (x<max_to_read) && ((!(hitend(walk->link,bigbuf,x))) && (prev!=globrgx)))
	{
		rgx *nprev;

		nprev=prev;
		switch(nprev->type)
		{
			case RGXCHAR:
				nprev=handle_rc(nprev,bigbuf,x,&ntesting);
				break;
			case RGXDOT:
				nprev=handle_rd(nprev,bigbuf,x,&ntesting);
				break;
			case RGXRNG:
				nprev=handle_rr(nprev,bigbuf,x,&ntesting);
				break;
			case RGXNRNG:
				nprev=handle_rnr(nprev,bigbuf,x,&ntesting);
				break;
			case RGXVAL:
				nprev=handle_rv(nprev,bigbuf,x,&ntesting);
				break;
			case RGXNVAL:
				nprev=handle_rnv(nprev,bigbuf,x,&ntesting);
				break;
		}
/*If ntesting, then have found a match so increment, BUT if we are going to
be skipping over a \n, then we need to increment current*/
		if(ntesting)
		{
			x++;
			if(bigbuf[x]=='\n')
				current++;
		}
		else
		{
			onlyonce=FALSE;	/*used so dont decrement below*/
			x--;
/*again, need to ensure not counting a \n twice*/
			if(bigbuf[x+1]=='\n')
				current--;
		}
	}
	if(onlyonce && hitend(walk->link,bigbuf,x))
	{
		x--;
		if(bigbuf[x+1]=='\n')
			current--;
	}
	walk=walk->link;
/*testing is still true upon exit from here*/
}
break;
					}
					x++;
				}
				else
				{/*if current<top_of_form, keep going forward*/
					x++;
 					testing=FALSE;
				}
				if(walk==NULL)
				{
					testing=FALSE;	/*break loop*/
					optpl=current;	/*actual line to skip to*/
					*poffset=TRUE;
					x=max_to_read;	/*exit loop*/
					qmore=FALSE;	/*break loop*/
				}
			}
			walk=globrgx;	/*reset to start of l.l.*/
			testing=TRUE;	/*reset*/
		}
	}
	if(!max_to_read)	/*will only be 0, iff no more to read*/
		optpl=0;	/*if regex not found,set to 0*/
	if(!pipe)
		fclose(ip);
}

rgx *handle_rc(rgx *walk,unsigned char bigbuf[], int x, int *testing)
{
	if(walk->rdata[0]==bigbuf[x])
		walk=walk->link;
	else
		*testing=FALSE;
	return (walk);
}
rgx *handle_rd(rgx *walk,unsigned char bigbuf[], int x, int *testing)
{
	if(bigbuf[x]!='\n')
		walk=walk->link;
	else
		*testing=FALSE;
	return (walk);
}

rgx *handle_rBEol(rgx *walk,unsigned char bigbuf[], int x, int *testing, int *current)
{
	if(bigbuf[x]=='\n')
	{
		if(walk->type==RGXEOL)
			--*current;
		walk=walk->link;
	}
	else
		*testing=FALSE;
	return (walk);
}
rgx *handle_rBEow(rgx *walk,unsigned char bigbuf[], int x, int *testing, int *current)
{
	unsigned char c;

	c=bigbuf[x];
	if((c<'0') || ((c>'9') && (c<'A')) || ((c>'Z') && (c<'a')) || (c>'z'))
	{
		if((walk->type==RGXEOW) && (bigbuf[x]=='\n'))
			--*current;
		walk=walk->link;
		*testing=TRUE;
	}
	else
		*testing=FALSE;
	return (walk);
}
rgx *handle_rr(rgx *walk, unsigned char bigbuf[], int x, int *testing)
{
	if((bigbuf[x]>=walk->rdata[0]) && (bigbuf[x]<=walk->rdata[1]))
		walk=walk->link;
	else
		*testing=FALSE;
	return (walk);
}
rgx *handle_rnr(rgx *walk, unsigned char bigbuf[], int x, int *testing)
{
	if((bigbuf[x]>=walk->rdata[0]) && (bigbuf[x]<=walk->rdata[1]))
		*testing=FALSE;
	else
		walk=walk->link;
	return (walk);
}
rgx *handle_rv(rgx *walk, unsigned char bigbuf[], int x, int *testing)
{
	int dummy, doing;

	dummy=0;
	doing=TRUE;
	while((doing) && (walk->rdata[dummy] != '\0'))
		if(walk->rdata[dummy++]==bigbuf[x])
		{
			doing=FALSE;
			walk=walk->link;
		}
	if(doing)
		*testing=FALSE;
	return (walk);
}
rgx *handle_rnv(rgx *walk, unsigned char bigbuf[], int x, int *testing)
{
	int dummy,doing;

	dummy=0;
	doing=FALSE;
	while(walk->rdata[dummy]!='\0')
		if(walk->rdata[dummy++]==bigbuf[x])
			doing=TRUE;
	if(doing)
		*testing=FALSE;
	else
		walk=walk->link;
	return (walk);
}

int hitend(rgx *walklink, unsigned char bigbuf[], int x)
{
	int retvalue=FALSE;
	int testing=TRUE;

	if(walklink!=NULL)
	{
		if(walklink->link!=NULL)
			if(walklink->link->type==RGXSTAR)
				retvalue=hitend(walklink->link->link,bigbuf,x);
		switch(walklink->type)
		{
			case RGXCHAR:
				walklink=handle_rc(walklink,bigbuf,x,&testing);
				break;
			case RGXDOT:
				walklink=handle_rd(walklink,bigbuf,x,&testing);
				break;
			case RGXRNG:
				walklink=handle_rr(walklink,bigbuf,x,&testing);
				break;
			case RGXNRNG:
				walklink=handle_rnr(walklink,bigbuf,x,&testing);
				break;
			case RGXVAL:
				walklink=handle_rv(walklink,bigbuf,x,&testing);
				break;
			case RGXNVAL:
				walklink=handle_rnv(walklink,bigbuf,x,&testing);
				break;
		}
		retvalue+=testing;
	}
	return(retvalue);
}
