/* lqphrase.c -- Copyright 1989, 1990 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 *
 * $Id: lqphrase.c,v 1.4 90/10/06 00:50:56 lee Rel1-10 $
 */

/* lqphrase, part of Liam Quin's text retrieval package...
 *
 * lqphrase is intended to be an example of one way to use the programming
 * interface to lq-text.
 *
 * The idea is quite simple:
 * Simply take a phrase p, held in a string (char *p), and call
 *	t_Phrase *Phrase = String2Phrase(p);
 * The result, if not null, contains only one interesting thing at this
 * point:
 *	Phrase->ModifiedString
 * is the canonical version of p -- with common and short words removed.
 * for example,
 *	p = "The boy sat down in His Boat and playd with his toes.";
 * might result in Phrase->ModifiedString containing
 *	"[*the] boy sat down [in] [*his] boat [*and] [?playd] with [*his] toe"
 * Common words are marked with a *, and unknown words with ?.
 * An attempt may have been made to reduce plurals.
 * Since this phrase contains a word not in the database (playd), it will
 * never match anything.  As a result, it is a good idea to print this string
 * (possibly massaging it first) so users can see what is going on.  If you
 * have it, the curses-based "lqtext" does this.
 *
 * If we change "playd" to "played", the above string is equivalent to
 *	"[*the] boy sat down [xx] [*the] boat [*the] played with [*the] toe"
 * In other words, all common words are equivalent.  The package remembers
 * that one or more common words were skipped, and also that one or more
 * lumps of letters too small to make up a word were skipped.
 * The following are equivalent:
 * L.R.E. Quin    L. Quin	L.R.Quin	X.X.Quin
 * in a QUIN	a QuIn
 * and the following are not the same as those:
 * Quin (no preceding garbage)
 * L.R.E. quin (first letter of `Quin' is not upper case (the rest is ignored)
 * [*the] Quin (common words are not the same as skipped letters)
 * L. Quin's (the presence of the posessive ('s) is significant)
 * L. Quins (plural (two Quins) not the same as singular)
 * L. Quinn (spelt incorrectly!)
 *
 * Now, having sorted that out, we have our canonical string (and lots of
 * other things) in Phrase, so we can now call
 *	MakeMatches(Phrase);
 * This will return the number of matches (*NOT* the number of files) for
 * the given ModifiedPhrase in the database.
 * This can take several seconds, so again, it can be worth printing out
 * the modified string as soon as it is available, so the user is looking at
 * that whilst MakeMatches is working!  I have experimented with faster
 * versions of MakeMatches involving binary search, but the extra complexity
 * slowed things down on smaller databases.  I don't have enough disk space
 * here to make a large enough database to do real timings, sorry.
 *
 * Now we have done MakeMatches, we can marck along the linked list of
 * pointers to linked lists of arrays of matches.  Clear?  No?  Well,
 * that's why there's en axample.  See Match() below.
 *
 * Now, each match currently gives us
 * t_FID FID; Files are numbered from 1 in the database
 * unsigned long BlockInFile; -- the block in the file
 * unsigned char WordInBlock; -- the word in the block
 * unsigned char StuffBefore; -- the amount of leading garbage
 * unsigned char Flags, including (see wordrules.h):
 *
 * WPF_WASPLURAL		The word...  ended in s
 * WPF_UPPERCASE		...Started with a capital letter
 * WPF_POSSESSIVE		...ended in 's
 * WPF_ENDEDINING		...ended in ing
 * WPF_LASTWASCOMMON	the previous word was common
 * WPF_LASTHADLETTERS	we skipped some letters to get here
 * WPF_LASTINBLOCK	I'm the last word in this block
 *
 */

#include "globals.h" /* defines and declarations for database filenames */

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <fcntl.h>
#include <sys/types.h>
#include <malloc.h>
#include "emalloc.h"
#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "pblock.h"
#include "phrase.h"

#ifndef STREQ
# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl))))
#endif

extern int AsciiTrace;
extern t_PhraseCaseMatch PhraseMatchLevel;

/** System calls and functions... **/
/** Unix system calls used in this file: **/
extern void exit();

/** Unix Library Functions used: **/
/** lqtext library functions: **/
extern void SetDefaults();
extern void DefaultUsage();

/** functions used before they're defined within this file: **/
void Match();
/** **/

static char *Revision = "@(#) $Id: lqphrase.c,v 1.4 90/10/06 00:50:56 lee Rel1-10 $";

char *progname = "tryphrase";

int SilentMode = 0; /* don't print matches if set to one */

int
main(argc, argv)
    int argc;
    char *argv[];
{
    extern int optind, getopt();
    /** extern char *optarg; (unused at present) **/
    int ch;
    int ErrorFlag = 0;

    progname = argv[0];

    SetDefaults(argc, argv);

    while ((ch = getopt(argc, argv, "Zz:ahpslxVv")) != EOF) {
	switch (ch) {
	case 'z':
	case 'Z':
	    break; /* done by SetDefaults(); */
	case 'V':
	    fprintf(stderr, "%s version %s\n", progname, Revision);
	    break;
	case 'v': /* same as -t 1 */
	    AsciiTrace = 1;
	    break;
	case 'l':
	    break; /* list mode is the default */
	case 's':
	    SilentMode = 1;
	    break;
	case 'x':
	    ErrorFlag = (-1);
	    break;
	case '?':
	    ErrorFlag = 1;
	}
    }

    /* Normally put call to lrqError here to give a helpful message,
     * but not yet ready to ship the error handling package, sorry
     */
    if (ErrorFlag) {
	fprintf(stderr, "Usage: %s [options] \"phrase\" [...]\n", progname);
	fprintf(stderr, "%s: options are:\n", progname);
	fputs("\
	-l	-- list mode, suitable for lqshow (the default)\n\
	-s	-- silent mode; exit status indicates success of matching\n\
\n", stderr);

	DefaultUsage();
	exit( ErrorFlag > 0 ? 1 : 0); /* 0 means -x was used */
    }
    
    if (AsciiTrace > 1) {
	switch (PhraseMatchLevel) {
	case PCM_HalfCase:
	    fprintf(stderr, "%s: Matching phrases heuristically.\n", progname);
	    break;
	case PCM_SameCase:
	    fprintf(stderr, "%s: Matching phrases precisely.\n", progname);
	    break;
	case PCM_AnyCase:
	    fprintf(stderr, "%s: Matching phrases approximately.\n", progname);
	    break;
	default:
	    fprintf(stderr, "%s: internall error, case matching is %d\n",
						progname, PhraseMatchLevel);
	    exit(2);
	}
    }

    while (optind < argc) {
	Match(argv[optind++]);
    }

    if (SilentMode) {
	/* if we got to here we didn't find anything */
	exit(1);
    }
    return 0;
}

void
Match(Phrase)
    char *Phrase;
{
    extern t_Phrase *String2Phrase();
    extern t_FileInfo *GetFileInfo();
    extern long MakeMatches();

    t_Phrase *P;
    t_MatchList *Matches;
    t_FID LastFID = (t_FID) 0;
    t_FileInfo *FileInfo = 0;

    if (!Phrase || !*Phrase) return;
    if ((P = String2Phrase(Phrase)) == (t_Phrase *) 0) return;

    if (MakeMatches(P) <= 0L) return;

    if (P) {
	for (Matches = P->Matches; Matches != (t_MatchList *) 0;
						Matches = Matches->Next) {
	    if (Matches->Match != (t_Match *) 0) {
		if (Matches->Match->Where->FID != LastFID) {
		    t_FID FID = Matches->Match->Where->FID;
		    /*TODO: use DestroyFileInfo instead of efree:... */
		    if (FileInfo) efree((char *) FileInfo);
		    if ((FileInfo = GetFileInfo(FID)) == (t_FileInfo *) 0) {
			continue;
		    }
		    LastFID = FID;
		}

		/* Now that we know that we have something to print... */
		if (SilentMode) {
		    exit(0); /* OK, found something */
		}
		if (AsciiTrace) {
		    printf("%-7lu %-7u %-3d %-3d %s\n",
				Matches->Match->Where->BlockInFile,
				(unsigned) Matches->Match->Where->WordInBlock,
				(unsigned) Matches->Match->Where->StuffBefore,
				(unsigned) Matches->Match->Where->Flags,
				FileInfo->Name);
		} else {
		    printf("%-7.7lu %-7.7u %s\n",
				Matches->Match->Where->BlockInFile,
				Matches->Match->Where->WordInBlock,
				FileInfo->Name);
		}
	    }
	}
    }
}

/*
 * $Log:	lqphrase.c,v $
 * Revision 1.4  90/10/06  00:50:56  lee
 * Prepared for first beta release.
 * 
 * Revision 1.3  90/08/29  21:45:29  lee
 * Alpha release
 * 
 * Revision 1.2  90/08/09  19:17:16  lee
 * *** empty log message ***
 * 
 * Revision 1.1  90/03/24  20:22:49  lee
 * Initial revision
 * 
 */

