/*
** hostinfo.c for managing hosts and URLs
**
** Copyright (C) 1997, Howard Chu
**
** This file can be redistributed under the terms of the GNU General
** Public Licence.
*/

#include <malloc.h>
#include <string.h>
#include <stdio.h>

#include "hostinfo.h"

const service   urltypes[] = {
			      {"ftp", 21}, {"http", 80}, {"wais", 210},
			      {"gopher", 79}, {"nntp", 119}, {"", 0},
			      {"mailto", 25}, {"news", 119},
			      {"https", 443}, {"snews", 563}
};

blkL            hosts, prxys;
lstX            hostX = {MAX_HOSTS, 1, sizeof(hostB), (freefunc *) FreeHostB};
lstX            prxyX = {RQ_PROXIES - 1, 1, sizeof(hostB), (freefunc *) FreeHostB};
lstX            nodeX = {MAX_NODES, 0, sizeof(nodeB), (freefunc *) FreeNodeB};

/*
 * Allocate a new cookieB
 */
cookieB        *
NewCookie(lstr *name, lstr *value)
{
    cookieB        *new;

    new = malloc(name->len + value->len + 2 + sizeof(cookieB));
    if (new)
    {
	new->next = NULL;
	new->name = (char *) (new + 1);
	strncpy(new->name, name->txt, name->len);
	new->name[name->len] = '\0';
	new->value = new->name + name->len + 1;
	strncpy(new->value, value->txt, value->len);
	new->value[value->len] = '\0';
    }
    return new;
}

void           *
NewBlock(lstr * name, size_t blkSize)
{
    genericB       *new;

    new = malloc(name->len + blkSize + 1);
    if (new)
    {
	new->name.len = name->len;
	new->name.txt = ((char *) new) + blkSize;
	strncpy(new->name.txt, name->txt, name->len);
	new->name.txt[name->len] = '\0';
    }
    return new;
}

void 
FreeCookieB(cookieB * ptr)
{
    free(ptr);
}

void
FreeAuthB(authB *ptr)
{
    if (ptr->user)
	free(ptr->user);
    if (ptr->pass)
	free(ptr->pass);
    if (ptr->nonce)
	free(ptr->nonce);
    if (ptr->opaque)
	free(ptr->opaque);
    free(ptr);
}

void 
FreePathB(pathB * ptr)
{
    cookieB        *lst, *nxt;
    for (lst = ptr->cookies; lst; lst = nxt)
    {
	nxt = lst->next;
	FreeCookieB(lst);
    }
    if (ptr->auth)
	FreeAuthB(ptr->auth);
    free(ptr);
}

void 
FreeDomainB(domainB * ptr)
{
    pathB          *lst, *nxt;
    if (--ptr->refs > 0)
	return;
    for (lst = ptr->paths; lst; lst = nxt)
    {
	nxt = lst->next;
	FreePathB(lst);
    }
    free(ptr);
}

void 
FreeNodeB(nodeB * ptr)
{
    if (ptr->type)
	free(ptr->type);
    if (ptr->etag)
	free(ptr->etag);
    free(ptr);
}

void 
FreeHostB(hostB * ptr)
{
    domainH        *lstD, *nxtD;
    nodeB          *lstN, *nxtN;

    for (lstD = ptr->domains; lstD; lstD = nxtD)
    {
	nxtD = lstD->next;
	FreeDomainB(lstD->domain);
	free(lstD);
    }
    for (lstN = (nodeB *) ptr->nodes.lst; lstN; lstN = nxtN)
    {
	nxtN = lstN->next;
	FreeNodeB(lstN);
    }
    free(ptr);
}

/* Find the given auth realm on a path. The auth list is in most-recently-
 * used order.
 */

authB	       *
GetAuthB(lstr * name, pathB *path)
{
    authB *curr, *prev;
    int found = 0;

    for (curr = path->auth, prev = (authB *) & path->auth; curr;
	 prev = curr, curr = curr->next)
    {
	if (curr->name.len == name->len &&
	    !strncmp(curr->name.txt, name->txt, name->len))
	{
	    found = 1;
	    break;
	}
	/* End of list, and still not found */
	if (!curr->next)
	    break;
    }
    /* Make new authB */
    if (!found)
    {
	genericB       *new;
	new = NewBlock(name, sizeof(authB));
	if (!new && curr)	/* Try freeing one anyway */
	{
	    prev->next = NULL;
	    FreeAuthB(curr);
	    new = NewBlock(name, sizeof(authB));
	}
	if (!new)
	    return NULL;
	memset((new + 1), 0, sizeof(authB) - sizeof(genericB));
	curr = (authB *)new;
    } else
    {				/* Pull this node off the list */
	prev->next = curr->next;
    }
    /* Put this node onto head of list */
    curr->next = path->auth;
    path->auth = curr;
    return curr;
}

/* Find the given pathname on a domain list. If we have to add the path,
 * insert it in order - longest path at head of list.
 */
pathB	       *
GetPathB(lstr * name, domainB *dom)
{
    pathB *curr, *prev;

    for (curr = dom->paths, prev = (pathB *) & dom->paths; curr;
	 prev = curr, curr = curr->next)
    {
	if (curr->name.len >= name->len &&
		!strncmp(curr->name.txt, name->txt, name->len))
	    return curr;
	if (curr->name.len < name->len)
	    break;
    }

    curr = NewBlock(name, sizeof(pathB));
    if (curr)
    {
	curr->next = prev->next;
	prev->next = curr;
	curr->cookies = NULL;
	curr->auth = NULL;
	curr->expire = 0;
	curr->version = 0;
	curr->flags = 0;
    }
    return curr;
}

domainB        *
GetDomainB(lstr * name, hostB * host, int * newdomain)
{
    domainH *curr;

    *newdomain = 0;
    for (curr = host->domains; curr; curr = curr->next)
    {
	if (curr->domain->name.len == name->len &&
	    !stricmp(curr->domain->name.txt, name->txt))
	    break;
    }
    if (!curr)
    {
	domainH *newH;
	domainB *newB;

	newB = NewBlock(name, sizeof(domainB));
	if (newB)
	{
	    newH = malloc(sizeof(domainH));
	    if (!newH)
	    {
		FreeDomainB(newB);
		return NULL;
	    }
	    newH->next = host->domains;
	    host->domains = newH;
	    newH->domain = newB;
	    newB->refs = 1;
	    newB->paths = NULL;
	    curr = newH;
	    *newdomain = 1;
	}
    }
    if (curr)
	return curr->domain;
    else
	return NULL;
}

/* See if this new domain also matches any other existing hosts. If so,
 * add it onto the other hosts structures.
 */

void
AddNewDomain(domainB * new)
{
    hostB *ptr;
    char *dom;
    domainH *dh;

    ptr = (hostB *)hosts.lst;
    if (!ptr)	/* Bogus! */
	return;
    for (ptr = ptr->next; ptr; ptr=ptr->next)
    {
	dom = strchr(ptr->name.txt, '.');
	if (!dom || stricmp(dom, new->name.txt))
	    continue;
	dh = malloc(sizeof(domainH));
	if (!dh)
	    return;
	new->refs++;
	dh->domain = new;
	dh->next = ptr->domains;
	ptr->domains = dh;
    }
}

/* See if a newly added host is in the same domain as an existing host.
 * If so, see if that host has any cookie or auth info to propagate to
 * the new host.
 */

void AddNewHost()
{
    hostB *new, *ptr;
    char *newd, *ptrd;
    domainH *dh;

    new = (hostB *)hosts.lst;
    if (!new)	/* Bogus! */
	return;
    newd = strchr(new->name.txt, '.');
    if (!newd)
	return;

    for (ptr=new->next; ptr; ptr=ptr->next)
    {
	ptrd = strchr(ptr->name.txt, '.');
	if (ptrd && !stricmp(newd, ptrd))
	{
	    for (dh = ptr->domains; dh; dh=dh->next)
	    {
		if (dh->domain->name.txt[0] == '.')
		{
		    domainH *newH = malloc(sizeof(domainH));
		    if (newH)
		    {
			newH->domain = dh->domain;
			newH->next = new->domains;
			dh->domain->refs++;
			new->domains = newH;
			break;
		    }
		}
	    }
	    break;
	}
    }
}

/* Insert cookies on new onto cookielist for path. If a new cookie
 * has the same name as an existing cookie, replace the old with
 * the new. Otherwise, just stick the new one onto the list.
 */

void
AddCookies(cookieB *new, pathB *path)
{
    cookieB *old, *prev, *next;

    for (;new;new=next)
    {
	next = new->next;
	for (old = path->cookies, prev = (cookieB *) & path->cookies; old;
	     prev = old, old=old->next)
	{
	    if (stricmp(old->name, new->name))
		continue;
	    prev->next = new;
	    new->next = old->next;
	    FreeCookieB(old);
	    break;
	}
	if (!old)
	{
	    new->next = path->cookies;
	    path->cookies = new;
	}
    }
}

typedef int     (cmpfunc) (char *, char *, size_t);

/* Search a given list for a genericB with the given name. Create it if
 * it's not found. The genericB is automatically promoted to most recent
 * status.
 */
genericB       *
GetListB(lstr * name, blkL * list, lstX * listX, int * didnew)
{
    genericB       *curr, *prev;
    int             found = 0;
    cmpfunc        *cmp;

    *didnew = 0;
    if (listX->icase)
	cmp = (cmpfunc *) strnicmp;
    else
	cmp = (cmpfunc *) strncmp;

    for (curr = list->lst, prev = (genericB *) & list->lst; curr;
	 prev = curr, curr = curr->next)
    {
	if (curr->name.len == name->len &&
	    !cmp(curr->name.txt, name->txt, name->len))
	{
	    found = 1;
	    break;
	}
	/* End of list, and still not found */
	if (!curr->next)
	    break;
    }
    /* Make new genericB */
    if (!found)
    {
	genericB       *new;
	if (listX->max && list->num == listX->max)	/* Kill last one */
	{
	    prev->next = NULL;
	    listX->free(curr);
	    --list->num;
	}
	new = NewBlock(name, listX->size);
	if (!new && curr)	/* Try freeing one anyway */
	{
	    prev->next = NULL;
	    listX->free(curr);
	    --list->num;
	    new = NewBlock(name, listX->size);
	}
	if (!new)
	    return new;
	memset((new + 1), 0, listX->size - sizeof(genericB));
	curr = new;
	++list->num;
	*didnew = 1;
    } else
    {				/* Pull this node off the list */
	prev->next = curr->next;
    }
    /* Put this node onto head of list */
    curr->next = list->lst;
    list->lst = curr;
    return curr;
}

/* Parse the given text URL into the given urlB structure. The urlB is
 * not initialized here. The intent is to allow partially specified URLs
 * to re-use the parameters of the previously processed URL. Also, the
 * routine will fill in certain parameters by default if they are completely
 * absent. As such, it is unlikely that this function will ever return
 * a failure status...
 */
parseUrl(char *url, urlB * pUrl, int is_proxy)
{
    char           *ptr, *prev, c;
    lstr            uhost = {0, 0}, upath = {0, 0};
    int             len;

    /* OK, I lied. user:pass@ info is always initialized. */
    pUrl->user.txt = NULL;
    pUrl->user.len = 0;
    pUrl->pass.txt = NULL;
    pUrl->pass.len = 0;

    /* Look for first significant delimiter... */
    for (ptr = url; c = *ptr; ++ptr)
    {
	if (c == ':' || c == '/')
	    break;
    }

    prev = url;
    /* OK, we got xxxx: - check it for a valid URL type. */
    if (c == ':')
    {
	int             i;
	len = ptr - url;
	for (i = RQ_FIRST; i <= RQ_LAST; i++)
	    if (!strnicmp(url, urltypes[i].name, len))
	    {
		pUrl->request = i;
		pUrl->port = urltypes[i].port;
		if (i == HTTPS || i == SNEWS)
		    pUrl->flags |= U_USE_SSL;
		else
		    pUrl->flags &= ~U_USE_SSL;
		break;
	    }
	if (i > RQ_LAST)
	    return -1;
	/* Special case URLs: hostname is implicit, no path needed */
	if (i == MAIL || i == NEWS || i == SNEWS)
	{
	    pUrl->user.txt = ptr+1;
	    pUrl->user.len = 0;
	    return 0;
	}
	c = * ++ptr;
	prev = ptr;
    }
    /*
     * This could either be the beginning of a "//" sequence, or it could be
     * a naked path. If just a path, the pUrl structure better already have
     * valid hostname info... 
     */
    if (c == '/')
    {
	if (ptr - prev)		/* Got "xxxx/" - what is this? */
	    return -1;
	if (ptr[1] == '/')	/* Got "//" - look for user:pw@host */
	{
	    len = 0;		/* Seen no user/pw info yet */
	    ptr+=2;
	    for (prev = ptr; c = *ptr; ++ptr)
	    {
		if (c == ':' || c == '@' || c == '/')
		    break;
	    }
	    /*
	     * This URL might be host:port or user:pw@host. Don't know for
	     * sure yet. Assume hostname. 
	     */
	    if (c == ':')
	    {
		len = 1;
		uhost.txt = prev;
		uhost.len = ptr - prev;
		for (prev = ++ptr; c = *ptr; ++ptr)
		{
		    if (c == '@' || c == '/')
			break;
		}
	    }
	    /*
	     * This URL definitely includes user/pw info. What's what depends
	     * on whether a ':' was also present. 
	     */
	    if (c == '@')
	    {
		if (len)	/* OK, this user:pw@ */
		{
		    pUrl->pass.txt = prev;
		    pUrl->pass.len = ptr - prev;
		    pUrl->user.txt = uhost.txt;
		    pUrl->user.len = uhost.len;
		    uhost.txt = NULL;
		    uhost.len = 0;
		} else
		{
		    pUrl->user.txt = prev;
		    pUrl->user.len = ptr - prev;
		}
		len = 2;
		for (prev = ++ptr; c = *ptr; ++ptr)
		{
		    if (c == ':' || c == '/')
			break;
		}
	    }
	    /*
	     * This is a second colon, so we know we have user:pw@host:port 
	     */
	    if (c == ':')
	    {
		len = 1;
		uhost.txt = prev;
		uhost.len = ptr - prev;
		for (prev = ++ptr; c = *ptr; ++ptr)
		{
		    if (c == '/')
			break;
		}
	    }
	    /*
	     * Ok, we're at the end now. This is either hostname or port
	     * number. 
	     */
	    if (c == '/' || !*ptr)
	    {
		if (len == 1)
		{
		    pUrl->port = atoi(prev);
		    /* Unrecognizable port number */
		    if (pUrl->port == 0)
			return -1;
		} else
		{
		    uhost.txt = prev;
		    uhost.len = ptr - prev;
		}
	    }
	}
    }
    if (c == '/')
    {
	upath.txt = ptr;	/* We include the leading slash */
	upath.len = strlen(upath.txt);
    } else
    if (!uhost.txt)
    {
	/*
	 * A naked string with no special characters is treated as a
	 * hostname. The path defaults to "/". 
	 */
	uhost.txt = prev;
	uhost.len = ptr - prev;
	upath.txt = "/";
	upath.len = 1;
    }
    if (uhost.txt)
    {
	blkL           *l;
	lstX           *x;
	int		new;

	if (!uhost.len)
	    return -1;
	/* Is this the proxy list or the regular host list? */
	if (is_proxy)
	{
	    l = &prxys;
	    x = &prxyX;
	} else
	{
	    l = &hosts;
	    x = &hostX;
	}
	pUrl->host = (hostB *) GetListB(&uhost, l, x, &new);
	pUrl->path = NULL;
	if (!is_proxy && new)
	    AddNewHost();
    }
    /* OK, we ought to have a valid host by now. */
    if (!pUrl->host)
	return -1;

    /* Don't bother to do path processing for proxy sites. */
    if (is_proxy)
	return 0;

    if (upath.txt)
    {
	if (upath.len == 0)
	{
	    upath.txt = "/";
	    upath.len = 0;
	}
	pUrl->path = (nodeB *) GetListB(&upath, &pUrl->host->nodes,
					&nodeX, &len);
    }
    /* OK, we should have a valid path now. */
    if (!pUrl->path)
	return -1;

    /*
     * Got all the way here without specifying anything? Default to http,
     * standard port. 
     */
    if (pUrl->port == 0)
    {
	pUrl->port = urltypes[HTTP].port;
	pUrl->request = HTTP;
    }
    return 0;
}
