/* WordInfo.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* WordInfo.c -- handle the database of words for lq-Text.
 * 
 * lq-text keeps a master list of all of the words that have ever been
 * seen.  Currently, this is in dbm format (sdbm or ndbm).
 * For each word, there's an associated WID (a unique number), an offset
 * into the master database (see pblock.c), and possibly thesaurus info.
 *
 * $Id: WordInfo.c,v 2.18 92/02/15 05:11:15 lee Exp $
 *
 * $Log:	WordInfo.c,v $
 * Revision 2.18  92/02/15  05:11:15  lee
 * Error messages improved; added function to open WID index.
 * 
 * Revision 2.17  92/02/09  16:17:48  lee
 * deleted file locking code altogether.
 * 
 * Revision 2.16  92/02/09  16:13:52  lee
 * Use Error() instead of fprintf.
 * 
 * Revision 2.15  92/02/07  22:11:26  lee
 * Debugging changes and gcc warnings; also simplified storage of block
 * numbers.
 * 
 * Revision 2.14  92/02/01  22:19:40  lee
 * deleted unused 6-bit code, and changed if 0 to if USE_LOCKING_ON_READ
 * with a view to reinstating the locking code in the future.
 * 
 * Revision 2.13  91/08/12  21:23:43  lee
 * Uses smalldb::GetFileModes() to get the file modes, instead of "0766".
 * 
 * Revision 2.12  91/03/21  23:04:52  lee
 * Improve dbm compatibility by checking dptr != 0 as well as dsize != 0
 * 
 * Revision 2.11  90/10/13  03:10:07  lee
 * Type error -- efree() needs a char *.
 * 
 * Revision 2.10  90/10/06  00:12:01  lee
 * Prepared for first beta release.
 * 
 * Revision 2.9  90/09/29  23:47:30  lee
 * Reduced the size of a buffer, and plugged yet another memory leak!
 * 
 * Revision 2.8  90/09/10  13:38:50  lee
 * deleted declaration of sleep()
 * 
 * Revision 2.7  90/08/29  21:46:48  lee
 * Alpha release.
 * 
 * Revision 2.6  90/08/12  17:33:38  lee
 * malloc changes; added SlayWordInfo() and MakeWordInfo().
 * 
 * Revision 2.5  90/08/09  19:16:35  lee
 * BSD lint and fixes...
 * 
 * Revision 2.4  90/03/22  14:23:19  lee
 * new calls to efree();
 * Offset now stored as a block number, not a byte offset
 * 
 * Revision 2.3  90/03/21  14:59:13  lee
 * Numerous changes.  WID2WordInfo() no longer calles GetWordPlaces().
 * 
 * Revision 2.2  89/10/08  20:45:05  lee
 * Working version of nx-text engine.  Addfile and wordinfo work OK.
 * 
 * Revision 2.1  89/10/02  01:13:56  lee
 * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
 * 
 * Revision 1.4  89/09/17  23:01:53  lee
 * Various fixes; NumberInBlock now a short...
 * 
 * Revision 1.3  89/09/16  21:16:07  lee
 * First demonstratable version.
 * 
 * Revision 1.2  89/09/11  00:35:03  lee
 * Some speedups, but WID not working properly...
 * 
 * Revision 1.1  89/09/07  21:05:51  lee
 * Initial revision
 * 
 *
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <errno.h>
#include <fcntl.h>
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <unistd.h>
#include <memory.h> /* for the definition of memcpy() */

#include "fileinfo.h"
#include "smalldb.h"
#include "wordindex.h"
#include "wordinfo.h"
#include "numbers.h"

#include "emalloc.h"

#include "wordrules.h" /* max word length */

#include "pblock.h"

/** declarations: **/
/** Unix system calls that need to be declared: **/
extern int open(); /* (these are not the stdio fopen and fclose) */
extern long lseek(); /* watch out for this on 16 bit (286, PDP11) systems! */
extern int read(), write();

/** Unix Library Calls that need to be declared: **/

/** lqtext Library calls that need to be declared: **/
extern void Deletepblock();
extern void lqGetFileModes();

/** Functions within this file that need to be declared: **/
t_WordInfo *MakeWordInfo();
void SlayWordInfo();

/** **/

#ifdef ASCIITRACE
extern int AsciiTrace;
#endif

#define WIDINDEXCACHELEN (WIDBLOCKSIZE*32)

static int Widfd = (-1);
static long WidPos = 0L;

static t_WID LastWID = 0;
static char LastBlock[WIDBLOCKSIZE + 8]; /* + 8 for sWriteNumber */

static void
OpenWordIndexFile()
{
    int Flags, Modes;

    lqGetFileModes(&Flags, &Modes);

    if ((Widfd = open(WidIndexFile, Flags, Modes)) < 0) {
	Error(E_FATAL|E_SYS,
	    "Can't open WID file \"%s\" flags %d mode %d",
	    WidIndexFile, Flags, Modes
	);
    }
    WidPos = 0L;
}

t_WordInfo *
WID2WordInfo(WID)
    t_WID WID;
{
    extern t_WordPlace *GetWordPlaces(); /* pblock.c */

    char Buffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */
    char *q = Buffer;
    t_WordInfo *WP;
    int i;

    extern t_WID LastNextWIDVal;

    if (Widfd < 0) {
	OpenWordIndexFile();
    }

    /* Optimisation: if the WID is greater than the largest allocated WID,
     * there's no point in looking at the file!
     */
    if (LastNextWIDVal && WID > LastNextWIDVal) {
	return (t_WordInfo *) 0;
    }

    if (WidPos != (long) (WID * WIDBLOCKSIZE)) {
	WidPos = (long) (WID * WIDBLOCKSIZE);
	if (lseek(Widfd, WidPos, 0) < 0) {
	    Error(E_WARN|E_SYS,
		"WID2WordInfo: WID %ld: lseek(%d=\"%s\", %ld, 0) failed",
		WID, Widfd, WidIndexFile, WidPos
	    );
	    return (t_WordInfo *) 0;
	}
    }

    i = read(Widfd, Buffer, WIDBLOCKSIZE);
    WidPos += WIDBLOCKSIZE;

    if (i < 0) {
	WidPos = -1L;
	Error(E_SYS|E_WARN,
	    "Tried to read %d bytes from %d=\"%s\", but failed",
	    WIDBLOCKSIZE,
	    Widfd,
	    WidIndexFile
	);
	return (t_WordInfo *) 0;
    }

    if (i != WIDBLOCKSIZE) {
	WidPos = -1L;
	return (t_WordInfo *) 0;
    }


    {
	unsigned short L;

	if ((L = sReadNumber(&q)) == 0) {
	    Error(E_BUG,
		"WID2WordInfo: Database corrupt, WID %lu has wordlength zero",
	    WID);
	}
	WP = MakeWordInfo(WID, (int) L, q);
	q += L;
    }

    WP->Offset = sReadNumber(&q) * BLOCKSIZE;
    WP->NumberOfWordPlaces = sReadNumber(&q);

    /* Now, maybe read some WordPlace tuplets: */
#if 1
    if (q - Buffer < WIDBLOCKSIZE) {
	WP->DataBlock = emalloc(WIDBLOCKSIZE + 5);
	(void) memcpy(WP->DataBlock, Buffer, WIDBLOCKSIZE);
	WP->WordPlaceStart = &(WP->DataBlock[q - Buffer]);
	WP->WordPlaces = GetWordPlaces(
	    WP->WID,
	    q,
	    WIDBLOCKSIZE - (q - Buffer),
	    WP->Offset,
	    WP->NumberOfWordPlaces
	);
	WP->WordPlacesInHere = WP->NumberOfWordPlaces;
    } else {
	Error(E_BUG, "block too small for %ld (%s)", WP->WID, WP->Word);
    }

#else
    WP->WordPlaces = (t_WordPlace *) 0;
    if (q - Buffer < WIDBLOCKSIZE) {
	WP->DataBlock = emalloc(WIDBLOCKSIZE + 5);
	(void) memcpy(WP->DataBlock, Buffer, WIDBLOCKSIZE);
	WP->WordPlaceStart = &(WP->DataBlock[q - Buffer]);
    }
#endif

    /* done! */
    return WP;
}

static char PairBuffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */

/* Make WordInfo Block Header... */
void
MkWIBH(WordInfo, pblock)
    t_WordInfo *WordInfo;
    t_pblock *pblock;
{
    char *q = PairBuffer;

#ifdef ASCIITRACE
    if (AsciiTrace > 15) {
	fprintf(stderr, "\tMake info block header for %s, Offset %lu==%lu\n",
	WordInfo->Word, pblock->ChainStart, WordInfo->Offset);
    }
#endif

    sWriteNumber(&q, WordInfo->Length);
    (void) strncpy(q, WordInfo->Word, WordInfo->Length);
    q += WordInfo->Length;
    if (pblock) sWriteNumber(&q, (pblock->ChainStart / BLOCKSIZE) );
    else sWriteNumber(&q, 0L);
    sWriteNumber(&q, WordInfo->NumberOfWordPlaces);

    WordInfo->WordPlaceStart = q;
    WordInfo->DataBlock = PairBuffer;
}

/* Make WordInfo Block ... */
int
MkWIB(WordInfo, pblock)
    t_WordInfo *WordInfo;
    t_pblock *pblock;
{
    extern unsigned short PutWordPlaces();

    /* See how many pairs from the given pblock fit into WordInfo,
     * and leave them in PairBuffer...
     */

#ifdef ASCIITRACE
    if (AsciiTrace > 3) {
	fprintf(stderr, "MkWIB Make info block for %s/%d at %ld\n",
			    WordInfo->Word, WordInfo->WID, WordInfo->Offset);
    }
#endif

    MkWIBH(WordInfo, pblock);

    if (pblock == (t_pblock *) 0) {
	/* No WordPlaces to put in! */
	WordInfo->WordPlacesInHere = 0;
	return 0;
    }

    return WordInfo->WordPlacesInHere = PutWordPlaces(
		pblock->WordPlaces,
		WordInfo->WID,
		(unsigned char *) WordInfo->WordPlaceStart,
		WIDBLOCKSIZE - (WordInfo->WordPlaceStart - PairBuffer),
		pblock->ChainStart,
		pblock->NumberOfWordPlaces);
}

t_WID
Word2WID(Word, Length)
    char *Word;
    unsigned int Length;
{
    DBM *db;
    datum key, data;
    char *q;
    t_WID WID;
    char Buffer[8];
	/* enough for the binary representation of a number -- see numbers.c;
	 * this is _not_ sizeof(long).  It's probably 5, in fact, although
	 * for small numbers it's less.
	 */

    if (Length > MaxWordLength) {
	Length = MaxWordLength; /* NOTE: no trailing \0 required. */
    }

    /* contact database server */
    if ((db = startdb(WordIndex)) == (DBM *) 0) {
	Error(E_FATAL|E_SYS,
	    "Word2WID: Couldn't open Word Index (dbm) database \"%s\"",
	    WordIndex
	);
    }

    key.dptr = Word;
    key.dsize = Length;

    data = dbm_fetch(db, key);

    enddb(db);

    if (data.dptr == (char *) 0 || data.dsize == 0) {
	return (t_WID) 0;
    }

    /* do this because ReadNumber will leave q pointing beyond Buffer: */
    (void) memcpy(Buffer, data.dptr, data.dsize);
    q = Buffer;
    WID = sReadNumber(&q);
    if (q - Buffer != data.dsize) {
	Error(E_BUG, "Word2Wid \"%*s\" failed... got %lu", Length, Word, WID);
    }
    return WID;
}
    
char *
WID2Word(WID)
    t_WID WID;
{
    t_WordInfo *W;
    char *Word;

    if (WID == (t_WID) 0) {
	return (char *) 0;
    }

    if ((W = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
	return (char *) 0;
    }
    Word = W->Word;
    W->Word = (char *) 0;
    SlayWordInfo(W);
    return Word;
}

int
PutWordInfoIntoIndex(WordInfo, Offset)
    t_WordInfo *WordInfo;
    unsigned long Offset;
{
    DBM *db;
    char NumBuf[sizeof(t_WID) + 1];
    char *q = NumBuf;
    datum key, data;
    int RetVal;

    /** First, write the WID itself, so we can go from Word to WID */

    key.dptr = WordInfo->Word;
    key.dsize = WordInfo->Length;

    sWriteNumber(&q, WordInfo->WID);

    data.dptr = NumBuf;
    data.dsize = q - NumBuf;

    /* contact database server */
    if ((db = startdb(WordIndex)) == (DBM *) 0) {
	Error(E_FATAL|E_SYS,
	    "PutWordInfoIntoIndex: Couldn't open dbm Word Index \"%s\"",
	    WordIndex
	);
    }

    RetVal = dbm_store(db, key, data, DBM_REPLACE);

    enddb(db);

    /** Now, ensure that we have a physical block for WordInfo.  If
     ** we don't, there is something very wrong in pblock.c, our only
     ** possible caller.
     **/

    if (WordInfo->DataBlock == (char *) 0) {
	if (Offset) {
	    fprintf(stderr, "WARNING: WordInfo corrupt for \"%s\"\n",
			    WordInfo->Word);
	}
	(void) MkWIB(WordInfo, (t_pblock *) 0);
    }

    /** Now write the physical entry... */

    if (Widfd < 0) {
	OpenWordIndexFile();
    }

    if (WordInfo->WID > LastNextWIDVal) {
	LastNextWIDVal = WordInfo->WID;
    }

    if (WidPos != (long) (WordInfo->WID * WIDBLOCKSIZE)) {
	WidPos = (long) (WordInfo->WID * WIDBLOCKSIZE);
	if (lseek(Widfd, WidPos, 0) < 0) {
	    Error(E_SYS|E_FATAL,
	        "Index \"%s\": PutWordInfoIntoIndex(%s) lseek to %ld failed",
		WidIndexFile,
		WordInfo->Word,
		WidPos
	    );
	}
    }

    if (write(Widfd, WordInfo->DataBlock, WIDBLOCKSIZE) != WIDBLOCKSIZE) {
	Error(E_SYS|E_FATAL,
	    "Index \"%s\": PutWordInfoIntoIndex(%s) write failed",
	    WidIndexFile,
	    WordInfo->Word
	);
    }
    WidPos += WIDBLOCKSIZE;

    return RetVal;
}

int
DeleteWord(Word)
    char *Word;
{
    extern t_pblock *Getpblock();

    t_WID WID;
    t_WordInfo *WordInfo;
    t_pblock *tmp;

    if ((WID = Word2WID(Word, strlen(Word))) == (t_WID) 0) {
	return -1; /* not there */
    }

    /* get info from the list */
    if ((WordInfo = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
	return -1;
    }

    if ((tmp = Getpblock(WordInfo)) != (t_pblock *) NULL) {
	Deletepblock(tmp);
	(void) efree((char *)tmp);
    }

    /* delete the offset from the database, but retain the WID: */
    WordInfo->Offset = 0L;
    WordInfo->NumberOfWordPlaces = 0L;
    WordInfo->WordPlacesInHere = 0;
    PutWordInfoIntoIndex(WordInfo, 0L);
    SlayWordInfo(WordInfo);

    return 0;
}

static t_WordInfo ZeroWordinfo = {
    0,
};

/* Routines to create and destroy WordInfo structures */
INLINE t_WordInfo *
MakeWordInfo(WID, Length, Word)
    t_WID WID;
    int Length;
    char *Word; /* the word, which might not be nul-terminated */
{
    register t_WordInfo *WP;
    WP = (t_WordInfo *) emalloc(sizeof(t_WordInfo));

    *WP = ZeroWordinfo; /* structure copy */
    WP->WID = WID;

    WP->Word = emalloc(Length + 1);
    (void) strncpy(WP->Word, Word, Length);
    WP->Word[WP->Length = Length] = '\0'; /* strncpy does not add a null */

    return WP;
}

void
SlayWordInfo(WP)
    t_WordInfo *WP;
{
    if (!WP) return;
    if (WP->Word) efree(WP->Word);
    if (WP->WordPlaces) efree((char *)WP-> WordPlaces);

    WP->Next = (t_WordInfo *) 0;
	/* The above line is to force a run-time error in the common
	 * (but wrong) case
	 * for (w = WordList; w; w = w->Next) SlayWordInfo(w);
	 */
    efree((char *) WP);
}

#ifdef ASCIITRACE
void
fprintWordInfo(stream, W, Caller)
    FILE *stream;
    t_WordInfo *W;
    char *Caller;
{
    fprintf(stream, "%s: WordInfo 0x%x: {\n", Caller, W);
    (void) fflush(stderr);
    if (W) {
	fprintf(stream, "\tWID: %ld (%s, len %u)\n",
			W->WID, W->Word, (unsigned int) W->Length);
	fprintf(stream, "\tNumberOfWordPlaces: %lu In here: %d\n",
			W->NumberOfWordPlaces, W->WordPlacesInHere);
	fprintf(stream, "\tFID: %ld; Offset: %lu\n", W->FID, W->Offset);
	if (W->Next) fprintf(stream, "\tNext: 0x%x\n", W->Next);
	if (W->DataBlock) fprintf(stream, "\tDataBlock: 0x%x\n", W->DataBlock);
	if (W->WordPlaceStart) {
	    fprintf(stream, "\tWordPlaceStart: 0x%x\n", W->WordPlaceStart);
	}
	if (W->WordPlaces) {
	    fprintf(stream, "\tWordPlaces: 0x%x\n", W->WordPlaces);
	}
	if (W->WordPlace.FID && W->WordPlace.FID != W->FID) {
	    fprintf(stream, "\tWordPlace->FID: %ld != FID\n", W->WordPlace.FID);
	}
	fprintf(stream, "\tWordPlace: (Block: %lu; Word %u",
			W->WordPlace.BlockInFile, W->WordPlace.WordInBlock);
	if (W->WordPlace.Flags || W->WordPlace.StuffBefore) {
	    fprintf(stream, "; Flags %u", W->WordPlace.Flags);
	    fprintf(stream, "; StuffBefore: %u", (unsigned int)
					    W->WordPlace.StuffBefore);
	}
	fprintf(stream, ")\n");
	fprintf(stream, "} %s: WordInfo 0x%x\n", Caller, W);
    }
    fflush(stream);
}
#endif /*ASCIITRACE*/
