/* wordlist.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* wordlist -- simple program to print sorted wordlist
 *
 * $Id: lqwordlist.c,v 1.2 92/02/15 05:19:38 lee Exp $
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"
#include "numbers.h"

#include <stdio.h>
#include <sys/types.h>
#include <malloc.h>
#include <ctype.h>

#ifdef BSD
# define USI_MAX ((unsigned int) -1)
#else
# include <limits.h>
  /* for USI_MAX, the largest unsigned integer.
   * 4.3 BSD doesn't seem to have this.  I don't know how to get this
   * on BSD systems.
   */
#endif

#include "fileinfo.h"
#include "wordinfo.h"
#include "smalldb.h"
#include "pblock.h"
#include "wordrules.h"
#include "emalloc.h"

/*** Declarations: ***/
/** System calls and library routines: **/
extern void exit();

/** System calls: **/

/** Unix Library Functions: **/
extern char *strncpy();
#ifndef tolower
 extern int tolower();
#endif

/** lqtext library functions: **/
extern void cleanupdb();
extern void SetDefaults();
extern void DefaultUsage();
extern long GetMaxWID();

/** functions defined within this file: */
void PrintWordInfo();
void DumpMyCache(), AddSort();
void dbmmarch();

/** Macros and variable definitions **/

#define STRNCMP(henry, utzoo, n) \
     ( (n > 0 && *(henry) == *(utzoo)) ? strncmp(henry,utzoo,n) : \
       ( ( (int)(unsigned char) *(henry)) - ((int)(unsigned char) *(utzoo)) ))
#define STRCMP(henry, utzoo) \
     ( (*(henry) == *(utzoo)) ? strcmp(henry,utzoo) : \
       ( ( (int)(unsigned char) *(henry)) - ((int)(unsigned char) *(utzoo)) ))
/* Note: the double casts are in case there's an 8-bit value, and chars
 * are signed on the local machine, both of which do happen in practice.
 * Thanks to msb@sq.com for pointing this out.
 */


char *progname = 0;
    /* Used for error messages */

int AsciiTrace = 0;
    /* If this is non-zero, we provide debugging information.  The lqtext
     * library also uses this variable.  Setting it to values greater
     * than 1 or 2 will generally provide large amounts of debugging
     * information.  If the library was compiled with -UASCIITRACE,
     * however, there will be much less diagnostic output at higher
     * levels.
     */

char *Prefix = NULL;
int PrefixLength = 0;

static char *Revision = "$Revision: 1.2 $";

/** end of declarations... **/


int
main(argc, argv)
    int argc;
    char *argv[];
{
    extern int optind, getopt();  /* For getopt(3) */
    extern char *optarg;	  /* For getopt(3) */
    int ch;			  /* For getopt(3) */
    int ErrorFlag = 0;		  /* For getopt(3) */

    progname = argv[0];
	/* I see this as a library program, so I am leaving the full
	 * path.  lqaddfile(1L) and lqphrase(1L) set progname to be
	 * the filename of the command, rather than the full pathname.
	 */

    SetDefaults(argc, argv);
	/* Deal with any arguments that are understood by all lqtext
	 * programs.
	 */

    while ((ch = getopt(argc, argv, "p:VxZz:")) != EOF) {
	switch (ch) {
	case 'p':
	    Prefix = argv[optind];
	    PrefixLength = strlen(Prefix);
	    break;
	case 'V':
	    fprintf(stderr, "%s version %s\n", progname, Revision);
	    break;
	case 'x':
	    ErrorFlag++;
	    break;
	case '?':
	    ErrorFlag++;
	    break;
	case 'z':
	case 'Z':
	    break; /* done by SetDefaults(); */
	}
    }

    /* Normally put call to lrqError here to give a helpful message,
     * but not yet ready to ship the error handling package, sorry
     */
    if (ErrorFlag) {
	fprintf(stderr, "%s: options are:\n", progname);
	fprintf(stderr, "-p prefix -- only words starting with \"prefix\"");
	DefaultUsage();
	    /* DefaultUsage() prints the list of the standard options. */
	exit(1);
    }

    InitCache(GetMaxWID());

    dbmmarch();
    cleanupdb();
    exit(0); /* 0 or 1 (this is a little devious) */
#ifdef lint
    /*NOTREACHED*/
    return 1;
	/* this is for versions of lint and gcc that don't understand
	 * that exit() doesn't return -- or, if it douse, that there is
	 * nothing that can be done about it!
	 */
#endif
}

static char *DefaultCache[10];
static long MaxInCache = 10;
static char **MyCache = DefaultCache;
static long CacheCount = 0;

int
InitCache(MaxWords)
    long MaxWords;
{
    MaxInCache = sizeof(DefaultCache[0]) / sizeof(DefaultCache);

    if (MaxWords < MaxInCache) {
	MyCache = DefaultCache;
    fprintf(stderr, "Init cache %ld Max set to %ld\n", MaxWords, MaxInCache);
	return 0;
    }
    MyCache = (char **) malloc((MaxWords + 2) * sizeof(char *));
    if (!MyCache) {
	Error(E_FATAL|E_MEMORY,
	    "Couldn't callocate %ld bytes of memory for wordlist",
	    (MaxWords + 2) * sizeof(char *));
	exit(1);
    }
    MaxInCache = MaxWords + 1;
    fprintf(stderr, "Init cache %ld Max set to %ld\n", MaxWords, MaxInCache);
    return 0;
}


/* dbmmarch -- print every value in a dbm database.  This might go
 * wrong (e.g. omitting some values) if the database is being concurrently
 * updated.
 */
void
dbmmarch()
{
    void AddSort();
    DBM *db;
    datum d;

    if ((db = startdb(WordIndex)) == (DBM *) 0) {
	/* WordIndex is the list of words, defined in "globals.h".
	 * If we didn't open it, the user probably has not set
	 * $LQTEXTDIR, or didn't use the -d database-dir option that
	 * is handled by SetDefaults() called from main().
	 */
	fprintf(stderr, "Can't open database file \"%s\"\n", WordIndex);
	exit(1);
    }

    /* The word database contains WID-->word matches, that look like
     * (key = "Word", content = WID)
     */
    for (d = dbm_firstkey(db); d.dptr != (char *) 0 && d.dsize != 0;
	 d = dbm_nextkey(db)
    ) {
	unsigned long WID;
	datum Data;

	/* IMPORTANT NOTE:
	 * The words are not nul-terminated in the database.  It is
	 * therefore not safe to use printf() or puts() unless we make
	 * a copy or are careful...
	 */
	Data = dbm_fetch(db, d);

	if (Data.dsize && Data.dptr) {
	    char *q = Data.dptr;
	    WID = sReadNumber(&q);
	} else {
	    WID = 0;
	}
	AddSort((int) d.dsize, d.dptr, WID);
    }
    enddb(db);
    DumpMyCache();
}


void
AddSort(Length, Word, WID)
    int Length;
    char *Word;
    unsigned long WID;
{
    if (CacheCount >= MaxInCache) {
	DumpMyCache();
	CacheCount = 1; /* including this word... */
    }

    /* regexp checking goes here */
    if (Prefix) {
	if (STRNCMP(Prefix, Word,
		(PrefixLength < Length) ? PrefixLength : Length) == 0) {
	    return;
	}
    }

    if ((MyCache[CacheCount] = (char *) malloc(Length + 12)) == (char *) 0) {
	Error(E_FATAL|E_MEMORY, "malloc for %d bytes failed", Length + 12);
	exit(1);
    }

    (void) strncpy(MyCache[CacheCount], Word, Length);
    MyCache[CacheCount][Length] = '\0';
    {
	char buf[20];
	(void) sprintf(buf, "\t%ld", WID);
	(void) strcat(MyCache[CacheCount], buf);
    }
    ++CacheCount;
}

int
CompareStringsByPointersForQsort(s1p, s2p)
    void *s1p; 
    void *s2p;
{
    return STRCMP(*(char **)s1p, *(char **)s2p);
}

void
DumpMyCache()
{
    extern int strcmp();
    register int i;

    if (!CacheCount) return;

    (void) qsort(&MyCache[0], CacheCount, (int) sizeof(char *), CompareStringsByPointersForQsort);

    for (i = 0; i < CacheCount; i++) {
	/** printf("%d\t%s\n", i, MyCache[i]); **/
	(void) puts(MyCache[i]);
	(void) free(MyCache[i]);
    }
}
