Logo Search packages:      
Sourcecode: fhist version File versions

str.c

/*
 *    fhist - file history and comparison tools
 *    Copyright (C) 1991-1994, 1998-2002 Peter Miller;
 *    All rights reserved.
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 *
 * MANIFEST: string manipulation functions
 *
 * A literal pool is maintained.  Each string has a reference count.  The
 * string stays in the literal pool for as long as it has a positive
 * reference count.  To determine if a string is already in the literal pool,
 * linear dynamic hashing is used to guarantee an O(1) search.  Making all equal
 * strings the same item in the literal pool means that string equality is
 * a pointer test, and thus very fast.
 */

#include <ac/stddef.h>
#include <ac/stdio.h>
#include <ac/stdlib.h>
#include <ac/string.h>
#include <ac/ctype.h>
#include <ac/stdarg.h>

#include <error.h>
#include <mem.h>
#include <mprintf.h>
#include <str.h>


/*
 * maximum conversion width for numbers
 */
#define MAX_WIDTH 509

static      string_ty   **hash_table;
static      str_hash_ty hash_modulus;
static      str_hash_ty hash_cutover;
static      str_hash_ty hash_cutover_mask;
static      str_hash_ty hash_cutover_split_mask;
static      str_hash_ty hash_split;
static      str_hash_ty hash_load;
static      int         changed;

#define MAX_HASH_LEN 20


/*
 * NAME
 *    hash_generate - hash string to number
 *
 * SYNOPSIS
 *    str_hash_ty hash_generate(char *s, size_t n);
 *
 * DESCRIPTION
 *    The hash_generate function is used to make a number from a string.
 *
 * RETURNS
 *    str_hash_ty - the magic number
 *
 * CAVEAT
 *    Only the last MAX_HASH_LEN characters are used.
 *    It is important that str_hash_ty be unsigned (int or long).
 */

static str_hash_ty
hash_generate(const char *s, size_t n)
{
      str_hash_ty retval;

      if (n > MAX_HASH_LEN)
      {
            s += n - MAX_HASH_LEN;
            n = MAX_HASH_LEN;
      }

      retval = 0;
      while (n > 0)
      {
            retval = (retval + (retval << 1)) ^ *s++;
            --n;
      }
      return retval;
}


/*
 * NAME
 *    str_initialize - start up string table
 *
 * SYNOPSIS
 *    void str_initialize(void);
 *
 * DESCRIPTION
 *    The str_initialize function is used to create the hash table and
 *    initialize it to empty.
 *
 * RETURNS
 *    void
 *
 * CAVEAT
 *    This function must be called before any other defined in this file.
 */

void
str_initialize(void)
{
      str_hash_ty j;

      if (hash_table)
            return;
      hash_modulus = 1<<8; /* MUST be a power of 2 */
      hash_cutover = hash_modulus;
      hash_split = hash_modulus - hash_cutover;
      hash_cutover_mask = hash_cutover - 1;
      hash_cutover_split_mask = (hash_cutover * 2) - 1;
      hash_load = 0;
      hash_table = (string_ty **)mem_alloc(hash_modulus * sizeof(string_ty *));
      for (j = 0; j < hash_modulus; ++j)
            hash_table[j] = 0;
}


/*
 * NAME
 *    split - reduce table loading
 *
 * SYNOPSIS
 *    void split(void);
 *
 * DESCRIPTION
 *    The split function is used to reduce the load factor on the hash table.
 *
 * RETURNS
 *    void
 *
 * CAVEAT
 *    A load factor of about 80% is suggested.
 */

static void
split(void)
{
      string_ty   *p;
      string_ty   *p2;
      str_hash_ty idx;

      /*
       * get the list to be split across buckets
       */
      p = hash_table[hash_split];
      hash_table[hash_split] = 0;

      /*
       * increase the modulus by one
       */
      hash_modulus++;
      hash_table =
            mem_change_size
            (
                  hash_table,
                  hash_modulus * sizeof(string_ty *)
            );
      hash_table[hash_modulus - 1] = 0;
      hash_split = hash_modulus - hash_cutover;
      if (hash_split >= hash_cutover)
      {
            hash_cutover = hash_modulus;
            hash_split = 0;
            hash_cutover_mask = hash_cutover - 1;
            hash_cutover_split_mask = (hash_cutover * 2) - 1;
      }

      /*
       * now redistribute the list elements
       */
      while (p)
      {
            p2 = p;
            p = p->str_next;

            idx = p2->str_hash & hash_cutover_mask;
            if (idx < hash_split)
                  idx = p2->str_hash & hash_cutover_split_mask;
            p2->str_next = hash_table[idx];
            hash_table[idx] = p2;
      }
}


/*
 * NAME
 *    str_from_c - make string from C string
 *
 * SYNOPSIS
 *    string_ty *str_from_c(char*);
 *
 * DESCRIPTION
 *    The str_from_c function is used to make a string from a null terminated
 *    C string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_from_c(const char *s)
{
      return str_n_from_c(s, strlen(s));
}


/*
 * NAME
 *    str_n_from_c - make string
 *
 * SYNOPSIS
 *    string_ty *str_n_from_c(char *s, size_t n);
 *
 * DESCRIPTION
 *    The str_n_from_c function is used to make a string from an array of
 *    characters.  No null terminator is assumed.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_n_from_c(const char *s, size_t length)
{
      str_hash_ty hash;
      str_hash_ty idx;
      string_ty   *p;

      if (!hash_table)
            str_initialize();
      hash = hash_generate(s, length);

      idx = hash & hash_cutover_mask;
      if (idx < hash_split)
            idx = hash & hash_cutover_split_mask;

      for (p = hash_table[idx]; p; p = p->str_next)
      {
            if
            (
                  p->str_hash == hash
            &&
                  p->str_length == length
            &&
                  !memcmp(p->str_text, s, length)
            )
            {
                  p->str_references++;
                  return p;
            }
      }

      p = (string_ty *)mem_alloc(sizeof(string_ty) + length);
      p->str_hash = hash;
      p->str_length = length;
      p->str_references = 1;
      p->str_next = hash_table[idx];
      hash_table[idx] = p;
      memcpy(p->str_text, s, length);
      p->str_text[length] = 0;

      hash_load++;
      while (hash_load * 10 > hash_modulus * 8)
            split();
      ++changed;
      return p;
}


/*
 * NAME
 *    str_copy - make a copy of a string
 *
 * SYNOPSIS
 *    string_ty *str_copy(string_ty *s);
 *
 * DESCRIPTION
 *    The str_copy function is used to make a copy of a string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_copy(string_ty *s)
{
      s->str_references++;
      return s;
}


/*
 * NAME
 *    str_free - release a string
 *
 * SYNOPSIS
 *    void str_free(string_ty *s);
 *
 * DESCRIPTION
 *    The str_free function is used to indicate that a string hash been
 *    finished with.
 *
 * RETURNS
 *    void
 *
 * CAVEAT
 *    This is the only way to release strings DO NOT use the free function.
 */

void
str_free(string_ty *s)
{
      str_hash_ty idx;
      string_ty   **spp;

      if (!s)
            return;
      if (s->str_references > 1)
      {
            s->str_references--;
            return;
      }
      ++changed;

      /*
       * find the hash bucket it was in,
       * and remove it
       */
      idx = s->str_hash & hash_cutover_mask;
      if (idx < hash_split)
            idx = s->str_hash & hash_cutover_split_mask;
      for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
      {
            if (*spp == s)
            {
                  *spp = s->str_next;
                  mem_free(s);
                  --hash_load;
                  return;
            }
      }

      /*
       * should never reach here!
       */
      fatal_raw("attempted to free non-existent string (bug)");
}


/*
 * NAME
 *    str_catenate - join two strings
 *
 * SYNOPSIS
 *    string_ty *str_catenate(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *    The str_catenate function is used to concatenate two strings to form a
 *    new string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_catenate(string_ty *s1, string_ty *s2)
{
      static char *tmp;
      static size_t     tmplen;
      string_ty   *s;
      size_t            length;

      length = s1->str_length + s2->str_length;
      if (tmplen < length)
      {
            tmplen = length;
            tmp = mem_change_size(tmp, tmplen);
      }
      memcpy(tmp, s1->str_text, s1->str_length);
      memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
      s = str_n_from_c(tmp, length);
      return s;
}


/*
 * NAME
 *    str_cat_three - join three strings
 *
 * SYNOPSIS
 *    string_ty *str_cat_three(string_ty *, string_ty *, string_ty *);
 *
 * DESCRIPTION
 *    The str_cat_three function is used to concatenate three strings to form
 *    a new string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_cat_three(string_ty *s1, string_ty *s2, string_ty *s3)
{
      static char *tmp;
      static size_t     tmplen;
      string_ty   *s;
      size_t            length;

      length = s1->str_length + s2->str_length + s3->str_length;
      if (tmplen < length)
      {
            tmplen = length;
            tmp = mem_change_size(tmp, tmplen);
      }
      memcpy(tmp, s1->str_text, s1->str_length);
      memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
      memcpy
      (
            tmp + s1->str_length + s2->str_length,
            s3->str_text,
            s3->str_length
      );
      s = str_n_from_c(tmp, length);
      return s;
}


/*
 * NAME
 *    str_equal - test equality of strings
 *
 * SYNOPSIS
 *    int str_equal(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *    The str_equal function is used to test if two strings are equal.
 *
 * RETURNS
 *    int; zero if the strings are not equal, nonzero if the strings are
 *    equal.
 *
 * CAVEAT
 *    This function is implemented as a macro in strings.h
 */

#ifndef str_equal

int
str_equal(string_ty *s1, string_ty *s2)
{
      return (s1 == s2);
}

#endif


/*
 * NAME
 *    str_upcase - upcase a string
 *
 * SYNOPSIS
 *    string_ty *str_upcase(string_ty *);
 *
 * DESCRIPTION
 *    The str_upcase function is used to form a string which is an upper case
 *    form of the supplied string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_upcase(string_ty *s)
{
      static char *tmp;
      static size_t     tmplen;
      string_ty   *retval;
      char        *cp1;
      char        *cp2;

      if (tmplen < s->str_length)
      {
            tmplen = s->str_length;
            tmp = mem_change_size(tmp, tmplen);
      }
      for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
      {
            int c;

            c = *cp1;
            if (c >= 'a' && c <= 'z')
                  c += 'A' - 'a';
            *cp2 = c;
      }
      retval = str_n_from_c(tmp, s->str_length);
      return retval;
}


/*
 * NAME
 *    str_downcase - lowercase string
 *
 * SYNOPSIS
 *    string_ty *str_downcase(string_ty *);
 *
 * DESCRIPTION
 *    The str_downcase function is used to form a string which is a lowercase
 *    form of the supplied string.
 *
 * RETURNS
 *    string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *    finished with.
 *
 * CAVEAT
 *    The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_downcase(string_ty *s)
{
      static char *tmp;
      static size_t     tmplen;
      string_ty   *retval;
      char        *cp1;
      char        *cp2;

      if (tmplen < s->str_length)
      {
            tmplen = s->str_length;
            tmp = mem_change_size(tmp, tmplen);
      }
      for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
      {
            int c;

            c = *cp1;
            if (c >= 'A' && c <= 'Z')
                  c += 'a' - 'A';
            *cp2 = c;
      }
      retval = str_n_from_c(tmp, s->str_length);
      return retval;
}


/*
 * NAME
 *    str_bool - get boolean value
 *
 * SYNOPSIS
 *    int str_bool(string_ty *s);
 *
 * DESCRIPTION
 *    The str_bool function is used to determine the boolean value of the
 *    given string.  A "false" result is if the string is empty or
 *    0 or blank, and "true" otherwise.
 *
 * RETURNS
 *    int: zero to indicate a "false" result, nonzero to indicate a "true"
 *    result.
 */

int
str_bool(string_ty *s)
{
      char        *cp;

      cp = s->str_text;
      while (*cp)
      {
            if (!isspace((unsigned char)*cp) && *cp != '0')
                  return 1;
            ++cp;
      }
      return 0;
}


/*
 * NAME
 *    str_field - extract a field from a string
 *
 * SYNOPSIS
 *    string_ty *str_field(string_ty *, char separator, int field_number);
 *
 * DESCRIPTION
 *    The str_field functipon is used to erxtract a field from a string.
 *    Fields of the string are separated by ``separator'' characters.
 *    Fields are numbered from 0.
 *
 * RETURNS
 *    Asking for a field off the end of the string will result in a null
 *    pointer return.  The null string is considered to have one empty field.
 */

string_ty *
str_field(string_ty *s, int sep, int fldnum)
{
      char        *cp;
      char        *ep;

      cp = s->str_text;
      while (fldnum > 0)
      {
            ep = strchr(cp, sep);
            if (!ep)
                  return 0;
            cp = ep + 1;
            --fldnum;
      }
      ep = strchr(cp, sep);
      if (ep)
            return str_n_from_c(cp, ep - cp);
      return str_from_c(cp);
}


void
slow_to_fast(char **in, string_ty **out, size_t length)
{
      size_t            j;

      if (out[0])
            return;
      for (j = 0; j < length; ++j)
            out[j] = str_from_c(in[j]);
}


string_ty *
str_format(const char *fmt, ...)
{
      va_list           ap;
      string_ty   *result;

      va_start(ap, fmt);
      result = vmprintf_str(fmt, ap);
      va_end(ap);
      return result;
}


string_ty *
str_vformat(const char *fmt, va_list ap)
{
      return vmprintf_str(fmt, ap);
}

Generated by  Doxygen 1.6.0   Back to index