/* lqkwik.c -- Copyright 1991, 1992 Liam R. E. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* lqkwik -- produce a keyword-in-context list of matches...
 * Liam R. Quin, February 1991 and later...
 *
 * $Id: lqkwik.c,v 1.4 92/04/02 14:15:24 lee Exp Locker: lee $
 */

#define COLS 65   /* the width of kwik    word index */
#define WORDCOL 25 /* where to put the    word in the index */
#define GAPWIDTH 2 /* space before the    word itself */
#define SCREENWIDTH 79

int Cols = COLS;
int WordCol = WORDCOL;
int GapWidth = GAPWIDTH;
int ScreenWidth = SCREENWIDTH;

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <malloc.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h> /* for fileinfo.h */
#include <sys/stat.h>

#include <stdio.h>

#include "fileinfo.h"
#include "wordinfo.h"
#include "wordrules.h"
#include "pblock.h"
#include "emalloc.h"
#include "readfile.h"

/** Unix system calls that need declaring: **/
extern long lseek();
extern int open(), close();
extern int read();
extern void exit();
extern int stat();

/** Unix/C Library Functions that need declaring: **/
#ifndef tolower
 extern int tolower();
#endif
extern int strlen();
extern int strcmp();
extern unsigned sleep();
extern int atoi();
extern long atol();
extern void perror();

/** lqtext library functions that need declaring: **/
extern int MySystem();
extern int TooCommon();
extern void SetDefault();
extern void DefaultUsage();

/** Functions within this file that are used before being defined: **/
long ReadMatchFile();
int ShowFile();

/** **/

/** some useful macros: **/
#define max(choir,boy) (((choir)>(boy))?(choir):(boy))
#define min(choir,boy) (((choir)<(boy))?(choir):(boy))

/** **/

int AsciiTrace = 0;

extern int errno;

char *progname = "lqkwik"; /* set from argv[] in main() */

int SelectedNames = -1;
FILE *InfoStream = 0;

static char *Revision = "@(#) showfile.c 2.2";
static int TruncateAtPath = 0;

int
main(argc, argv)
    int argc;
    char *argv[];
{
    extern char *getenv();
    extern int optind, getopt();
    extern char *optarg; /* for getopt */
    int ch; /* for getopt */
    int ErrFlag = 0; /* see how getopt makes programs cleaner? */
    char *FileWithMatches = (char *) 0;
    char **MatchList;
    int MatchCount = 0;
    int Right;
    int Left;

    {
	char *p = getenv("COLS");
	if (p) {
	    Cols = atoi(p);
	}
	if (!Cols) Cols = COLS;
	else Cols -= 15; /* leave room for filename */
    }

    Right = Cols - (WordCol + GapWidth);
    Left = WordCol - GapWidth;

    progname = argv[0];

    SetDefaults(argc, argv);

    /* All lq-text programs must call SetDefaults() before getopt, and
     * must then be prepared to ignore options z with arg and Z without.
     */
    while ((ch = getopt(argc, argv, "a:b:f:l:r:g:w:o:p:z:ZVvx")) != EOF) {
	switch (ch) {
	case 'z':
	    break; /* done by SetDefaults(); */
	case 'V':
	    fprintf(stderr, "%s version %s\n", progname, Revision);
	    break;
	case 'v':
	    AsciiTrace = 1;
	    break;
	case 'f':
	    FileWithMatches = optarg;
	    break;
	case 'g':
	    GapWidth = atoi(optarg);
	    break;
	case 'l':
	    Left = atoi(optarg);
	    break;
	case 'p':
	    TruncateAtPath = atoi(optarg);
	    break;
	case 'r':
	    Right = atoi(optarg);
	    break;
	case 'w':
	    ScreenWidth = atoi(optarg);
	    break;
	case 'x':
	    ErrFlag = (-1);
	    break;
	case '?':
	default:
	    ErrFlag = 1;
	}
    }

    if (ErrFlag < 0) { /* -x or -xv was used */
	fprintf(stderr, "usage: %s [-xv] [options] [matches...]\n", progname);
	fprintf(stderr,
	"use %s -x, -xv or -xvv for more detailed explanations.\n", progname);

	if (AsciiTrace) {
	    DefaultUsage();
	    fprintf(stderr, "\n\
	-f file -- \"file\" contains a list of matches, one per line\n");
	    fprintf(stderr, "\
	-g n    -- set gap between text and matched phrase to n [%d]\n\
	-l n    -- display n characters to the left of each phrase [%d]\n\
	-r n    -- display r chars to the right of each phrase's start [%d]\n\
	-p n    -- truncate pathnames at the n-th rightmost `/' character\n\
	-w n    -- truncate the line after n characters [default: %d]\n",
		    GapWidth,
		    Left,
		    Right,
		    ScreenWidth
	    );
	}
	if (AsciiTrace > 1) {
	    fputs("\
	Matches should be in the form of\n\
		BlockNumber  WordInBlock  FileName\n\
	where BlockBumber and WordInBlock are positive numbers.\n\
	(This is the format produced by the lqword -l command.)\n\
", stderr);
	}
	exit(0);
    } else if (ErrFlag > 0) {
	fprintf(stderr, "use %s -x for an explanation.\n", progname);
	exit(1);
    }

    Cols = Left + Right + GapWidth;
    WordCol = Left + GapWidth;

    if (AsciiTrace) {
	fprintf(stderr,"Left:%d  Right:%d  Cols:%d  Gap:%d  WC:%d  SW:%d\n",
			Left, Right,	  Cols, GapWidth, WordCol,ScreenWidth);
    }

    if (ScreenWidth <= Cols + 2) {
	fprintf(stderr,
	    "%s: ScreenWidth %d, %d text cols -- no room for file names!\n",
	    progname, ScreenWidth, Cols);
	exit(1);
    }

    /* open the file for the selected output */
    if (SelectedNames > 0) {
	if ((InfoStream = fdopen(SelectedNames, "w")) == (FILE *) 0) {
	    int e = errno;

	    fprintf(stderr, "%s: -o %d: can't open stream ",
	    					progname, SelectedNames);
	    errno = e;
	    perror("for writing");
	    exit(1);
	}
    }

    /* check that we can get at the file containing the matches, if one
     * was supplied.
     */
    if (FileWithMatches) {
	struct stat StatBuf;
	char *msg = 0;

	if (stat(FileWithMatches, &StatBuf) < 0) {
	    int e = errno; /* on many systems, fprintf() changes errno! */
	    fprintf(stderr, "%s: can't open match-list file ", FileWithMatches);
	    errno = e;
	    perror(progname);
	    exit(1);
	} else if (AsciiTrace) {
	    switch (StatBuf.st_mode & S_IFMT) {
	    case S_IFDIR:
		fprintf(stderr,
		"%s: ca't read matches from \"%s\" -- it's a directory!\n",
						progname, FileWithMatches);
		exit(1);
	    case S_IFREG:
		break;
#ifdef S_IFIFO
	    case S_IFIFO:
		msg = "named pipe or fifo";
		/* fall through */
#endif
	    case S_IFCHR:
		if (!msg) msg = "raw special device";
		/* fall through */
	    case S_IFBLK:
		if (!msg) msg = "block special device";
		/* fall through */
#ifdef S_IFNAM
	    case S_IFNAM:
		if (!msg) msg = "named special file"; /* wot dat? */
		/* fall through */
#endif
	    default:
		if (!msg) msg = "special file";

		fprintf(stderr,
		    "%s: warning: file \"%s\" containing matches is a %s\n",
		    progname, FileWithMatches, msg);
		
		/* but continue anyway... */

	    }
	}
	/* Now read the file, and make an array of matches... */
	if (ReadMatchFile(FileWithMatches, StatBuf.st_size, &MatchCount, &MatchList) < 0) {
	    fprintf(stderr, "%s: couldn't read matches from \"%s\"\n",
						progname, FileWithMatches);
	    exit(1);
	}
    }

    argv += optind;
    argc -= optind;

    if (MatchCount) {
	argc = MatchCount;
	argv = MatchList;
    }

    if (argc < 3) {
	fprintf(stderr,
	"%s: matches must have at least 3 parts; use -xv for an explanation\n",
								progname);
	exit(1);
    } else if (argc % 3) {
	/* Note: I could detect lqword output here (i.e., without -l) */
	fprintf(stderr, "%s: can't understand match format;\n", progname);
	fprintf(stderr, "%s: use -xv for more explanation.\n", progname);
	exit(1);
    }

    while (argc > 0) {

	if (ShowFile(argv[2], atol(*argv), (unsigned) atoi(argv[1])) < 0) {
	    int i;

	    /* This avoids repeated messages about the same file */
	    for (i = argc - 3; i > 0; i -= 3) {
		if (STREQ(argv[2], argv[2 + 3])) {
		    argv += 3;
		} else {
		    break;
		}
	    }
	    argc = i + 3; /* so we can subtract 3 ... */

	}
	argv += 3;
	argc -= 3;
    }

    return 0;
}

long
ReadMatchFile(FileWithMatches, FileSize, MatchCount, MatchList)
    char *FileWithMatches;
    off_t FileSize;
    int *MatchCount;
    char ** *MatchList;
{
    char **Lines;
    char **Result;
    long n_matches;
    int i;
    char **Lppp;

    if (!FileWithMatches || !*FileWithMatches) {
	Error(E_FATAL, "match-list file (from -f) has empty name!");
    }

    n_matches = ReadFile(
	E_FATAL,
	FileWithMatches,
	"match list",
	&Lines, /* yes, a (char ***) */
	UF_IGNBLANKS|UF_IGNSPACES|UF_IGNHASH|UF_ESCAPEOK
    );

    if (n_matches < 1L) {
	Error(E_FATAL,
	    "Match file \"%s\" contains no matches",
	    FileWithMatches
	);
    }

    Result = (char **) malloc((unsigned) n_matches * 3 * sizeof(char *));

    if (Result == (char **) 0) {
	Error(E_FATAL|E_MEMORY, "%u bytes for match list \"%s\"",
	    (unsigned) n_matches * sizeof(char *) * 3,
	    FileWithMatches
	);
    }

    /* Now construc a new argv[] from the file we just read */

    Lppp = &Result[0];
    for (i = 0; i < n_matches; i++) {
	register char *p;

	p = Lines[i];
	/* ASSERT: There are no leading or trailing spaces on the line */
	if (!*p) {
	    continue; /* blank line */
	}

	/* block in file */
	*Lppp++ = p;
	/* find the end */

	while (*p && !isspace(*p)) {
	    p++;
	}
	*p = '\0'; /* terminate the string */

	/* move to the start of the next one: */
	do {
	    p++;
	} while (*p && isspace(*p));

	if (!*p) {
	    Error(E_FATAL,
		"\"%s\": format is: number number pathname, not \"number\"",
		FileWithMatches
	    );
	}

	/* word in block */
	*Lppp++ = p;

	/* find the end */

	while (*p && !isspace(*p)) {
	    p++;
	}
	*p = '\0'; /* terminate the string */

	/* move to the start of the next one: */
	do {
	    p++;
	} while (*p && isspace(*p));

	if (!*p) {
	    Error(E_FATAL,
		"\"%s\": contains a line (%s) with no filename",
		FileWithMatches,
		Lines[i]
	    );
	}

	/* file name, already null-terminated */
	*Lppp++ = p;
    }

    (*MatchList) = Result;
    return (*MatchCount = Lppp - Result);
}


int
ShowFile(FileName, BlockInFile, WordInBlock)
    char *FileName;
    unsigned long BlockInFile;
    unsigned int WordInBlock;
{
    static char *Buffer = 0;
    int fd;
    static unsigned int BufLen;
    int AmountRead;
    register char *p;
    register char *q;
    int InTargetWord = 0;
    char *StartOfMyWord;
    int ThisWord = 0;
    char *Start;
    char *ThisLine = emalloc(Cols + 1); /* +1 for trailing \0 */
    char *FirstBit = emalloc(WordCol - GapWidth + 1); /* +1 for trailing \0 */
    char *LastBit = emalloc(Cols - WordCol + 1); /* +1 for trailing \0 */
    char *FirstStart;

    if (Buffer == (char *) 0) {
	BufLen = Cols * 10;
	if (BufLen < FileBlockSize * 3) BufLen = FileBlockSize * 3;
	Buffer = emalloc(BufLen);
    }

    errno = 0;

#ifdef COMPRESS_SUPPORT
    fd = UnpackAndOpen(FileName);
#else /* COMPRESS_SUPPORT */
    fd = open(FileName, O_RDONLY, 0);
#endif /* COMPRESS_SUPPORT */

    if (fd < 0) {
	int e = errno;
	char *doc;

	if ((doc = FindFile(FileName)) == (char *) 0) {
	    fprintf(stderr, "%s: %s: ", progname, FileName);
	    errno = e;
	    perror(FileName);
	    efree(ThisLine); efree(FirstBit); efree(LastBit);
	    return -1;
	}

#ifdef COMPRESS_SUPPORT
	fd = UnpackAndOpen(doc);
#else /* COMPRESS_SUPPORT */
	fd = open(doc, O_RDONLY, 0);
#endif /* COMPRESS_SUPPORT */

	if (fd < 0) {
	    fprintf(stderr, "%s: %s: ", progname, FileName);
	    errno = e;
	    perror(doc);
	    efree(ThisLine); efree(FirstBit); efree(LastBit);
	    return -1;
	}
	FileName = doc;
    }

    errno = 0;
    if (lseek(fd, BlockInFile? (long) ((BlockInFile - 1) * FileBlockSize) : 0L,
								    0) < 0) {
	int e = errno;
	fprintf(stderr, "%s: %s: ", progname, FileName);
	errno = e;
	perror("lseek");
	efree(ThisLine); efree(FirstBit); efree(LastBit);
	return -1;
    }

    errno = 0;
    if ((AmountRead = read(fd, Buffer, BufLen)) < MinWordLength) {
	int e = errno;
	fprintf(stderr, "%s: %s: ", progname, FileName);
	errno = e;
	perror("read");
	efree(ThisLine); efree(FirstBit); efree(LastBit);
	return -1;
    }


    /** Find the required word */
    if (BlockInFile) {
	/* start 1 char before the end of the previous block */
	StartOfMyWord = &Buffer[FileBlockSize - 1];
	/* perhaps the last word of the previous block spans the block
	 * boundary?
	 */
	while (WithinWord(*StartOfMyWord)) StartOfMyWord++;
	if (StartOfMyWord < &Buffer[FileBlockSize]) {
	    StartOfMyWord = &Buffer[FileBlockSize];
	}
    } else {
	StartOfMyWord = Buffer;
    }

    (void) close(fd);

    for (ThisWord = 0; ThisWord <= WordInBlock + 1; ThisWord++) {
bored:
	/* skip to the start of a word */
	while (!StartsWord(*StartOfMyWord)) {
	    ++StartOfMyWord;
	}

	Start = StartOfMyWord;

	/* find the end of the word */
	while (WithinWord(*StartOfMyWord)) {
	    if (*StartOfMyWord == '\'' && !EndsWord(StartOfMyWord[1])) break;
	    StartOfMyWord++;
	}

	/* Assert: StartOfMyWord points 1 character beyond the end of the
	 * word pointed to by Start
	 */
	/* see if it's long enough */
	if (StartOfMyWord - Start < MinWordLength) {
	    goto bored;
	}

	/** See if it's the right one */
	if (ThisWord == WordInBlock) {
	    StartOfMyWord = Start;
	    break;
	}
    }


    /* Find context before the keyword */

    q = &FirstBit[WordCol - GapWidth];
    *q-- = '\0';

    for (p = StartOfMyWord - 1; p >= Buffer; --p, --q) {
	*q = (isspace(*p)) ? ' ' : *p;
	if (q == FirstBit) break;
    }

    FirstStart = q;

    /* now build up the rest of the buffer */

    q = LastBit;
    *q = '\0';

    InTargetWord = 0;

    for (p = StartOfMyWord; p - Buffer < AmountRead; p++) {
	if (q >= &LastBit[Cols - WordCol]) break;

	switch (InTargetWord) {
	case 0:
	    if (StartsWord(*p)) {
		InTargetWord = 1;
	    }
	    break;
	case 1:
	    if (!WithinWord(*p)) {
		InTargetWord = 2;
	    }
	}
	if (isspace(*p)) {
	    *q = ' ';
	} else {
	    *q = *p;
	}
	*++q = '\0';
	if (q >= &LastBit[Cols - WordCol]) break;
    }

    printf("%*.*s", WordCol - GapWidth, WordCol - GapWidth, FirstStart);

    /* do the gap */
    {

	register int i;

	for (i = GapWidth; i > 0; i--) {
	    putchar(' ');
	}
    }

    printf("%-*.*s", Cols - WordCol, Cols - WordCol, LastBit);
	
    printf(":");
    {
	char *p = FileName;
	int OverShoot;
	int i = 0;

	if (TruncateAtPath) {
	    register char *q = p;

	    while (q && *q) {
		q++;
	    }

	    while (q > p) {
		if (*q == '/') {
		    if (++i == TruncateAtPath) {
			p = ++q;
			break;
		    }
		}
		--q;
	    }
	}

	OverShoot = Cols + 2 + strlen(p) - ScreenWidth; /* +2 is ": " */

	if (OverShoot > 0) {
	    p += OverShoot + 2;
	    printf("...");
	} else {
	    putchar(' ');
	}
	printf("%s\n", p);
    }

    efree(ThisLine); efree(FirstBit); efree(LastBit);
    return 0;
}


/*
 * $Log:	lqkwik.c,v $
 * Revision 1.4  92/04/02  14:15:24  lee
 * Allow spaces in filenames.
 * 
 * Revision 1.3  92/02/15  05:12:55  lee
 * Added compress support.
 * 
 * Revision 1.2  91/08/08  17:22:28  lee
 * Now copes with leading and/or multiple spaces in the match list.
 * 
 * Revision 1.1  91/03/02  20:37:47  lee
 * Initial revision
 * 
 *
 */
