/*
 * cleanup - examine a cached file, and if it meets the criteria for
 * being old, remove it
 *
 * Reinier Post
 *
 * $Log: cleanup.c,v $
 * Revision 0.14  1994/05/17  13:37:09  reinpost
 * support for t directive in addition to existing d directive
 *
 * Revision 0.14  1994/05/17  13:37:09  reinpost
 * support for t directive in addition to existing d directive
 *
 * Revision 0.13  1994/05/03  19:30:05  reinpost
 * some reorganizing, no real changes
 *
 * Revision 0.11  1994/03/25  20:43:30  reinpost
 * new configuration directive: 'd'
 *
 * Revision 0.11  1994/03/25  20:43:30  reinpost
 * new configuration directive: 'd'
 *
 *
 * Revision 0.9  1994/03/02  21:22:20  reinpost
 * now supports a separate configuration file to make expiration rate
 * dependent on URL
 *
 * Revision 0.8  1994/02/25  20:19:30  reinpost
 * the access and modify expiration times are now configurable
 *
 * Revision 0.6  1994/02/17  21:26:41  reinpost
 * it now appears to work at least to the extent that some files
 * are outdated and others aren't
 *
 * Revision 0.5  1994/02/17  10:29:05  reinpost
 * unfinished code - please ignore
 *
 */

#include <sys/stat.h>
#include <errno.h>
#include <sys/time.h>
#include <unistd.h>

#include "system.h"

#include "constants.h"
#include "log.h"
#include "config.h"
#include "util.h"
#include "database.h"

#include "cleanup.h"

static char rcsid[] =
  "$Id: cleanup.c,v 0.14 1994/05/17 13:37:09 reinpost Exp $";

/* stringint configured_value(char *url, int directive);
 *
 * scans the configuration file for the requested directive matching
 * the given URL (without query)
 *
 * example: URL = "http://www.win.tue.nl/internet/", directive = UD_REFRESH,
 * first matching configuration line = "refresh www*internet 2m",
 * then 120 (2 minutes) will be returned
 *
 * return codes: a time in seconds, or a dynamically allocated string
 */

#define XP_NONE    -1  /* URL could not be matched in file  */
#define XP_NO_CONF -2  /* could not read configuration file */
#define XP_PRS_ERR -3  /* parse error in configuration line */
#define XP_OK	    0  /* correct value found in configuration file */
			 /* (as far as the checking goes) */

/* the three keys used in expire.conf */
#define XP_REFRESH CF_REFRESH  /* 1 */
#define XP_CLEANUP CF_CLEANUP  /* 2 */
#define XP_DIRECT  3
#define XP_TRANSL  4

static int expire_directive(char *key)
{
  if (!strcasecmp(key,"r") || !strcasecmp(key,"refresh")) return(XP_REFRESH);
  if (!strcasecmp(key,"c") || !strcasecmp(key,"cleanup")) return(XP_CLEANUP);
  if (!strcasecmp(key,"d") || !strcasecmp(key,"direct"))  return(XP_DIRECT);
  if (!strcasecmp(key,"t") || !strcasecmp(key,"translate")) return(XP_TRANSL);
  return(XP_PRS_ERR);
  /* using XP_PRS_ERR as a return value */
}

#if ST_JUTTEMIS

/* not used; we may need to rescan expire.conf for different URLs */
static int r_read = 0;
static int c_read = 0;
static int d_read = 0;

static int conf_value_read(int directive)
{
  return(
    directive == XP_REFRESH ? r_read :
    directive == XP_CLEANUP ? c_read : 
    directive == XP_DIRECT ? d_read : 
    t_read);
}

static void set_conf_value_read(int directive)
{
  if (directive == XP_REFRESH)
    r_read = 1;
  else if (directive == XP_CLEANUP)
    c_read = 1;
  else if (directive == XP_TRANSL)
    c_read = 1;
  else
   t_read = 1;
}
/* desu ton */

#endif /* ST_JUTTEMIS */

typedef union {char *s; int i;} stringint;

static stringint configured_value(char *url, int directive)
{
  char l[MAX_STRING_LEN+1];
  char key[MAX_STRING_LEN+1];
  char pattern[MAX_STRING_LEN+1];
  char value[MAX_STRING_LEN+1];
  FILE *f;
  struct stat finfo;
  int rc = XP_NONE;
  int time_configured;

  /* open the configuration file; on failure, forget about the rest */

  if ((stat(expire_confname,&finfo) == -1)
	|| !(f = fopen(expire_confname,"r")))
  {
    log_if_debug("expire configuration file could not be read:",
      expire_confname);
    rc = XP_NO_CONF;
  }
  else
  {
    /* actually read it */
    /* try to match the given URL (without query) to a pattern in the file */
    /* with the requested directive indicated in the key */
    /* the first matching line is used */

    /* parse every line until a match is found */
    while (!(cfg_getline(l,256,f)) && (rc != XP_OK))
    {
      skipspace(l);
      cfg_getword(key,l);
      skipspace(l);
      cfg_getword(pattern,l);
      skipspace(l);
      cfg_getword(value,l);

      if (match(pattern,url))
      {
	if (expire_directive(key) == directive)
	{
	  /* if value is supposed to be a time spec, convert */
	  if ((directive != XP_DIRECT) && (directive != XP_TRANSL)
          /* they don't get a time spec */
		&& (time_configured = stringtotime(value)) == -1)
	  {
            log_if_debug("malformed time specification in configuration file:",
		value);
	  }
	  else
	  {
	    rc = XP_OK;
	  }
	}
	/* else the directive is incorrect - ignore the line and continue */
      }
    }
    fclose(f);
    /* the configuration file has been read */
    /* rc is now XP_NO_CONF (no config file), XP_NONE (no match), */
    /* XP_PRS_ERR (only a faulty match), or XP_OK (some correct match) */

    /* if no value was found, set the defaults */
    if (rc != XP_OK)
    {
      log_if_debug("no match in configuration file for directive",
	directive == XP_REFRESH ? "r" : directive == XP_CLEANUP ? "c"
	  : directive == XP_DIRECT ? "d" : "t");
      /* supply a default value */
      if (directive == XP_CLEANUP)
      {
	time_configured = a_expires;
      }
      else if (directive == XP_REFRESH)
      { 
        time_configured = m_expires;
      }
      else /* (directive == XP_DIRECT) || (directive == XP_TRANSL)) */
      {
	strcpy(value,"");
      }
    }
    else
    {
#ifdef DEBUG
      char buf[HUGE_STRING_LEN+1];
      sprintf(buf,"in %s, the %s value is configured as %s for URL",
	expire_confname,
	  directive == XP_REFRESH ? "refresh" :
            directive == XP_CLEANUP ? "cleanup" :
	      directive == XP_DIRECT ? "direct" : "translate", value);
      log_if_debug(buf,url);
#endif /* DEBUG */
    }
  }
  /* we now have a value for the requested directive */

  /* return */
  {
    stringint si;
    if ((directive == XP_DIRECT) || (directive == XP_TRANSL))
    {
      si.s = strdup(value);
    }
    else
    {
      si.i = time_configured;
    }
    return(si);
  }
}

int url_is_cached(char *url, char *query_string)
/* does the URL indicate a document we wish to keep cached copies for? */
{
  if (*query_string && !cache_queries)
    return(0);
  else
    return (configured_value(url,XP_REFRESH).i > cache_threshold);
}

static char *translation_cache_prefix
  (char *cache_prefix, char *urlq)
{
  /* URL still contains query string; ignore it */
  char *res;
  char *cp;
  char *url;
  int qpos;

  qpos = rind(urlq,'?');
  if (qpos != -1)
  {
    url = (char *)malloc(qpos+1);
    strncpy(url,urlq,qpos);
    url[qpos] = '\0';
  }
  else
  {
    url = strdup(urlq);
  }

  cp = configured_value(url,XP_TRANSL).s;
  if (!*cp)
  {
    /* not configured; use the default */
    cp = cache_prefix;
  }
  /* if (!(res = strdup(cp)) die(DIE_CONV,"memory exhausted"); */
  /* don't; not every user has error.[ch] */
  res = strdup(cp);
  free(url);
  return(res);
}

char *conf_cache_prefix(char *url)
/* uses globals ... */
{
  return(translation_cache_prefix(cache_prefix,url));
}

int cached_file_status(char *file, int directive)
{
  struct stat finfo;
  time_t now = time(NULL);

  if (stat(file,&finfo) == -1)
    if (errno != ENOENT)
      return(CF_ERR);
    else
      return(CF_NOT);
  else if (!S_ISREG(finfo.st_mode))  /* no test whether it's readable */
    return(CF_ERR);
  else
  {
    /* compare file date to configured expiration date */
    char url[strlen(file)+1];
    int on_q;

    file_to_url(url,file);
    /* cut off the query string, if any */
    on_q = ind(url,'?');
    if (on_q != -1) url[on_q] = '\0';

#if DEBUG
    {
      char msg[MAX_STRING_LEN+1];
      stringint si = configured_value(url,XP_CLEANUP);
      int intje = configured_value(url,XP_REFRESH).i;
      sprintf(msg,"the cleanup rate is %d; the refresh rate is %d, for URL",
	si.i, intje);
      log_if_debug(msg,url);
    }
#endif DEBUG

    if ((directive == XP_CLEANUP && finfo.st_atime+configured_value(url,XP_CLEANUP).i < now)
     || (directive == XP_REFRESH && finfo.st_mtime+configured_value(url,XP_REFRESH).i < now))
    /* document hasn't been accessed resp. refreshed lately - it's old */
    {
      return(CF_OLD);
    }
    else
    {
      return(CF_NEW);
    }
  }
}

int remove_cached_file(char *file)
{
  return(unlink(file));
}

char *url_to_remote_source(char *url)
{
  char *remote_cache_prefix;
  char *url_trs;
  remote_cache_prefix = configured_value(url,XP_DIRECT).s;
  /* using the default, "", is OK */
  url_trs = (char *)malloc(strlen(remote_cache_prefix)+strlen(url)+1);
  /* if (!url_trs) die(DIE_CONV,"memory exhausted"); */
  sprintf(url_trs,"%s%s",remote_cache_prefix,url);
  /* no check on presence of slash or anything */
  free(remote_cache_prefix);
  return(url_trs);
}
