Logo Search packages:      
Sourcecode: fhist version File versions

compare.c

/*
 *    fhist - file history and comparison tools
 *    Copyright (C) 1991-1995, 1998-2002 Peter Miller;
 *    All rights reserved.
 *
 *    Derived from a work
 *    Copyright (C) 1990 David I. Bell.
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 * MANIFEST: functions to compare text files
 *
 * This program is based on the algorithm in:
 *    An O(ND) Difference Algorithm and Its Variations
 *    Eugene W. Myers
 *    (TR 85-6, April 10, 1985)
 *    Department of Computer Science
 *    The University of Arizona
 *    Tuscon, Arizona 85721
 *
 * Also see:
 *    A File Comparison Program
 *    Webb Miller and Eugene W. Myers
 *    Software Practice and Experience
 *    (Volume 15, No. 11, November 1985)
 *
 * Actual implementation and features by David I. Bell.
 * Enhancements and bug fixes by Peter Miller.
 */

#include <ac/ctype.h>
#include <ac/stddef.h>
#include <ac/stdio.h>
#include <ac/string.h>
#include <ac/libintl.h>

#include <compare.h>
#include <cmalloc.h>
#include <error.h>
#include <error_intl.h>
#include <fcheck.h>
#include <input.h>
#include <input/file.h>
#include <input/file_text.h>
#include <input/quotprinenco.h>
#include <input/hexify.h>
#include <str.h>
#include <trace.h>


#define SNAKEALLOCSIZE (size_t)500 /* chunk size for snake allocation */
#define CHARALLOCSIZE (size_t)10240 /* allocation size for file data */
#define UNIQUEALLOCSIZE (size_t)512 /* unique handling size for allocation */
#define LINEALLOCSIZE (size_t)2000 /* number of lines to allocate */
#define HASHSIZE 2003         /* size of hash table */
#define CHANGEBLABINTERVAL 100      /* how often to talk about changes */
#define READBLABINTERVAL 1000 /* how often to talk about reading */
#define SNAKEBLABINTERVAL 1000      /* how often to talk about snakes */
#define TABSIZE 8       /* tab size */
#define DETABALLOCSIZE (size_t)128 /* buffer allocation size for detab */

static size_t     tablesize;  /* needed table size */
static size_t     snakecount; /* number of snake structures used */
static int  availchars; /* number of available characters */
static char *nextchar;  /* next allocable character storage */
static long *V1;        /* the row containing the last d */
static long *V2;        /* another row */
static SNAKE      *nextsnake; /* next allocable snake structure */
static SNAKE      *lastsnake; /* end of allocable snake structures */
static SNAKE      *endsnake;  /* last snake structure in list */
static LINE **hashtable;      /* hash table for lines */
static modifyline_type modifyline;

FCOMP_DATA  fc;         /* externally referencable data */


/*
 * NAME
 *    canonical
 *
 * SYNOPSIS
 *    char *canonical(char *s);
 *
 * DESCRIPTION
 *    The canonical function is used to apply the -Upcase and -Spaces
 *    options to produce a canonical line for comparison purposes.
 *
 * ARGUMENTS
 *    s     - string to canonicalize
 *
 * RETURNS
 *    A pointer to the canonical line is returned.  This will be valid
 *    until the next call to this function.  If the canonical string
 *    would be the same as the input string, the input string is returned.
 */

static char *
canonical(char *s)
{
    static size_t   bufsiz;
    static char       *buf;
    size_t      len;
    int               changed;
    int               space;
    char        *cp1;
    char        *cp2;

    /*
     * if no modifiers are in effect,
     * no alterations will be required
     */
    if (!fc.upcaseflag && !fc.spaceflag)
      return s;

    /*
     * make sure the buffer is large enough
     */
    len = strlen(s) + 1;
    if (len > bufsiz)
    {
      if (buf)
      {
          bufsiz = len;
          buf = r_realloc_and_check(buf, bufsiz);
      }
      else
      {
          bufsiz = len;
          buf = r_alloc_and_check(bufsiz);
      }
    }

    /*
     * simple upcase is easier to handle
     */
    if (fc.upcaseflag)
    {
      changed = 0;
      cp1 = s;
      cp2 = buf;
      while (*cp1)
      {
          if (islower((unsigned char)*cp1))
          {
            *cp2++ = toupper((unsigned char)*cp1);
            changed = 1;
          }
          else
            *cp2++ = *cp1;
          ++cp1;
      }
      if (!changed)
          return s;
      *cp2 = 0;
      return buf;
    }

    /*
     * spaces are rather harder to handle
     *
     * keep track of whether a run of spaces has been seen.
     * replace this run of spaces with a single space
     * provided it is not at either end of a line.
     */
    cp1 = s;
    cp2 = buf;
    space = 0;
    changed = 0;
    while (*cp1)
    {
      int         c;

      c = (unsigned char)*cp1++;
      if (fc.upcaseflag && islower(c))
      {
          c = toupper(c);
          changed = 1;
      }
      if (c == '\n')
      {
          if (space)
            changed = 1;
          *cp2++ = c;
          space = 0;
      }
      else if (isspace(c))
      {
          if (space || c != ' ')
            changed = 1;
          space = 1;
      }
      else
      {
          if (space)
          {
            if (cp2 != buf)
                *cp2++ = ' ';
            else
                changed = 1;
          }
          *cp2++ = c;
          space = 0;
      }
    }
    if (!changed)
      return s;
    *cp2 = 0;
    return buf;
}


/*
 * Allocate a string of a certain size.    This routine is used instead of
 * simply calling malloc in order to reduce the overhead.  The allocated
 * strings cannot be freed or realloced.
 */

void *
allocstr(long len)
{
    char        *cp;
    static unsigned alignment_mask;

    if (!alignment_mask)
    {
      struct test
      {
          char        a;
          double      b;
      };
      unsigned    alignment;

      alignment = offsetof(struct test, b);
      /* make sure is a power of 2 */
      while ((alignment & (~alignment + 1)) != alignment)
          ++alignment;
      alignment_mask = alignment - 1;
    }

    /* round length to a multiple of alignment */
    len = (len + alignment_mask) & ~alignment_mask;

    if (len >= UNIQUEALLOCSIZE)
      return cm_alloc_and_check(len);
    cp = nextchar;
    if (len > availchars)
    {
      cp = cm_alloc_and_check(CHARALLOCSIZE);
      nextchar = cp;
      availchars = CHARALLOCSIZE;
    }
    nextchar += len;
    availchars -= len;
    return cp;
}


/*
 * Subroutine to allocate a line structure of the appropriate size and
 * add it to the hash table.  If it matches an already existing line, then
 * the old structure will be returned.
 */

static LINE *
addline(char *cp)
{
    LINE        *lp;    /* current line element */
    char        *curcp; /* current character of string */
    LINE        **hashentry; /* hash entry to use */
    long        hash;   /* hash value */
    long        len;    /* line length */
    int               ch;           /* current char */
    char        *canon;

    /*
     * Compute line length and hash value.
     * This has to take into account the space and uppercase flags.
     */
    canon = canonical(cp);
    hash = 0;
    for (curcp = canon; *curcp; curcp++)
    {
      ch = *curcp;
      hash += ((hash * 101) + ch);
      if (hash < 0)
          hash = (hash + 1) & INFINITY;
    }
    len = curcp - canon;

    /*
     * Search proper hash chain for already existing line.
     * If the hash accidentally matches, modify it and try again.
     *
     * Search on the real data, not the canonical form,
     * that way fcomp -w -s can report more natural looking results.
     */
    again:
    hashentry = &hashtable[hash % HASHSIZE];
    for (lp = *hashentry; lp; lp = lp->l_next)
    {
      if (lp->l_hash != hash)
          continue;
      if (strcmp(cp, lp->l_data) == 0)
          return lp;
      if
      (
          (cp != canon || lp->l_canon != lp->l_data)
      &&
          strcmp(canon, lp->l_canon) == 0
      )
          continue;
      hash = ((hash * 1234321) + 1) & INFINITY;
      goto again;
    }

    /*
     * Line not found, allocate a new one
     */
    if (canon != cp)
    {
      lp = (LINE *)allocstr(LINE_SIZE(strlen(cp)));
      strcpy(lp->l_data, cp);
      lp->l_canon = allocstr(len + 1);
      strcpy(lp->l_canon, canon);
    }
    else
    {
      lp = (LINE *)allocstr(LINE_SIZE(len));
      strcpy(lp->l_data, cp);
      lp->l_canon = lp->l_data;
    }
    lp->l_hash = hash;
    lp->l_next = *hashentry;
    *hashentry = lp;
    return lp;
}


static int
is_blank_string(const char *s)
{
    while (*s)
      if (!isspace((unsigned char)*s))
          return 0;
    return 1;
}


/*
 * Readfile - read in a file and remember the maximum number of lines it needs.
 * This routine squeezes out blank lines if specified.
 * The first specified number of lines is skipped first.
 * Then the specified number of lines is modified.
 */

static void
readfile(FILEINFO *fi, long skip, long modify)
{
    input_ty          *fp;    /* file pointer for file to read in */
    LINE        **lines;      /* pointer to lines array */
    long        *lnums; /* line numbers within file */
    char        *cp;    /* current line */
    long        linesavail;   /* available number of lines */
    long        linecount;    /* current number of lines */
    long        linenumber; /* current line number */
    long        linelen;      /* length of line read */
    
    fi->is_binary = 0;
    if (fc.verbosity > VERBOSE_DEFAULT)
      error_raw("[Reading \"%s\"]", fi->f_name);
    fp = fi->f_file;
    if (skip > 0)
      input_skip_lines(fp, skip);
    linecount = 0;
    linenumber = 0;
    linesavail = LINEALLOCSIZE;
    lines = (LINE **)cm_alloc_and_check(sizeof(LINE *) * LINEALLOCSIZE);
    lnums = (long *)cm_alloc_and_check(sizeof(long) * LINEALLOCSIZE);
    for (;;)
    {
      cp = input_readline(fp, &linelen, 0, &fi->is_binary);
      if (!cp)
          break;
      if (++linenumber <= modify && modifyline)
          cp = modifyline(cp, &linelen, (struct INFO *)0);
      if
      (
          (fc.verbosity > VERBOSE_DEFAULT)
      &&
          ((linenumber % READBLABINTERVAL) == 0)
      )
          error_raw("[%ld lines]", linenumber);
      if (fc.blankflag && is_blank_string(cp))
          continue;
      if (linecount >= linesavail)
      {
          linesavail += LINEALLOCSIZE;
          lines =
            (LINE **)
            cm_realloc_and_check(lines, sizeof(LINE *) * linesavail);
          lnums =
            (long *)cm_realloc_and_check(lnums, sizeof(long) * linesavail);
      }
      lnums[linecount] = linenumber;
      lines[linecount] = addline(cp);
      linecount++;
    }
    fi->f_lnums = lnums;
    fi->f_lines = lines;
    fi->f_linecount = linecount;
    fi->f_linestotal = linenumber;
    if (linecount > fc.maxlines)
      fc.maxlines = linecount;
    input_delete(fp);
    if (fc.verbosity > VERBOSE_DEFAULT)
    {
      if (linecount == linenumber)
          error_raw("[Done, %ld lines]", linenumber);
      else
      {
          error_raw
          (
            "[Done, %ld lines, %ld non-blanks]",
            linenumber,
            linecount
          );
      }
    }
}


/*
 * Allocate a new snake structure and append it to the snake list.
 */

static SNAKE *
allocsnake(void)
{
    SNAKE       *sp;    /* snake being allocated */

    sp = nextsnake;
    if (sp >= lastsnake)
    {
      sp = (SNAKE *)cm_alloc_and_check(sizeof(SNAKE) * SNAKEALLOCSIZE);
      nextsnake = sp;
      lastsnake = sp + SNAKEALLOCSIZE;
    }
    nextsnake++;
    sp->next = NULL;
    if (!fc.snakelist)
      fc.snakelist = sp;
    else
      endsnake->next = sp;
    endsnake = sp;
    if (fc.verbosity > VERBOSE_DEFAULT && !(++snakecount % SNAKEBLABINTERVAL))
      error_raw("[%ld snake elements]", snakecount);
    return sp;
}


/*
 * Routine to find the middle snake of an optimial D-path spanning
 * lines A to A+N in file A to lines B to B+N in file B.  Returns the
 * length D of the D-path as a return value, and the upper left and
 * lower right relative coordinates of a snake midway through the D-path.
 */

static long
midsnake(int depth, long A, long N, long B, long M, long *ulx, long *uly,
    long *lrx, long *lry)
{
    long        D;
    long        MAXD;
    long        DELTA;
    long        odd;
    long        x;
    long        y;
    long        k;
    long        changes;
    long        oldx;
    LINE        **lp1;
    LINE        **lp2;

    if (fc.debugflag)
    {
      error_raw
      (
          "%*ssearching: %ld,%ld to %ld,%ld   midsnake: ",
          depth * 2,
          "",
          A,
          B,
          A + N,
          B + M
      );
    }

    DELTA = N - M;
    odd = DELTA & 1;
    MAXD = (M + N + 1) / 2;
    V1[1] = 0;
    V2[-1] = 0;
    changes = -odd - 2;

    /*
     * This is the main loop for searching for the snake.
     * D is the distance off the diagonals, and is the number
     * of changes needed to get from the upper left to the
     * lower right corner of the region.
     */
    for (D = 0; D <= MAXD; D++)
    {
      changes += 2;
      if (changes > fc.maxchanges)
      {
          sub_context_ty  *scp;

          scp = sub_context_new();
          sub_var_set_long(scp, "Number", fc.maxchanges);
          fatal_intl(scp, i18n("More than $number changes required"));
          /* NOTREACHED */
          sub_context_delete(scp);
      }
      if
      (
          (fc.verbosity > VERBOSE_DEFAULT)
      &&
          (depth == 0)
      &&
          (changes > 1)
      &&
          ((changes % CHANGEBLABINTERVAL) <= 1)
      )
          error_raw("[%ld changes]", changes - odd);

      /*
       * Examine all diagonals within current distance.
       * First search from upper left to lower right,
       * and then search from lower right to upper left.
       */
      for (k = -D; k <= D; k += 2)
      {
          /*
           * Find the end of the furthest forward D-path
           * in diagonal k.
           */
          if (k == -D || (k != D && (V1[k - 1] < V1[k + 1])))
            x = V1[k + 1];
          else
            x = V1[k - 1] + 1;
          y = x - k;
          lp1 = &fc.fileA.f_lines[A + x];
          lp2 = &fc.fileB.f_lines[B + y];
          oldx = x;
          while ((x < N) && (y < M) && ((*lp1)->l_hash == (*lp2)->l_hash))
          {
            x++;
            y++;
            lp1++;
            lp2++;
          }
          V1[k] = x;

          /*
           * See if path overlaps furthest reverse D-path.
           * If so, then we have found the snake.
           */
          if (odd && (k >= (DELTA - (D - 1))) && (k <= (DELTA + (D - 1))))
          {
            if ((x + V2[k - DELTA]) >= N)
            {
                *ulx = oldx;
                *uly = oldx - k;
                *lrx = x;
                *lry = y;
                if (fc.debugflag)
                {
                  error_raw
                  (
                      "%ld,%ld to %ld,%ld (odd)",
                      *ulx,
                      *uly,
                      *lrx,
                      *lry
                  );
                }
                return changes;
            }
          }
      }

      for (k = -D; k <= D; k += 2)
      {
          /*
           * Find the end of the furthest reaching reverse
           * path in diagonal k+DELTA.
           */
          if (k == D || (k != -D && (V2[k + 1] < V2[k - 1])))
            x = V2[k - 1];
          else
            x = V2[k + 1] + 1;
          y = x + k;
          lp1 = &fc.fileA.f_lines[A + N - x - 1];
          lp2 = &fc.fileB.f_lines[B + M - y - 1];
          oldx = x;
          while ((x < N) && (y < M) && ((*lp1)->l_hash == (*lp2)->l_hash))
          {
            x++;
            y++;
            lp1--;
            lp2--;
          }
          V2[k] = x;

          /*
           * See if path overlaps furthest forward D-path.
           * If so, then we have found the snake.
           */
          if (!odd && (k <= D - DELTA) && (k >= -D - DELTA))
          {
            if ((x + V1[k + DELTA]) >= N)
            {
                *ulx = N - x;
                *uly = M - y;
                *lrx = N - oldx;
                *lry = *lrx + *uly - *ulx;
                if (fc.debugflag)
                {
                  error_raw
                  (
                      "%ld,%ld to %ld,%ld (even)",
                      *ulx,
                      *uly,
                      *lrx,
                      *lry
                  );
                }
                return changes;
            }
          }
      }
    }
    fatal_raw("Middle snake procedure failed (bug)");
    return 0;
}


/*
 * Recursive routine to find a minimal D-path through the edit graph
 * of the two input files.  Arguments are the beginning line numbers in
 * the files, and the number of lines to examine.  This is basically a
 * divide-and-conquer routine which finds the middle snake of an optimal
 * D-path, then calls itself to find the remainder of the path before the
 * snake and after the snake.
 */

static void
findsnake(int depth, long A, long N, long B, long M)
{
    SNAKE       *sp;
    long        ulx;
    long        uly;
    long        lrx;
    long        lry;
    long        D;
    long        count;

    /*
     * If more than one change needed, then call ourself for each part.
     */
    D = midsnake(depth, A, N, B, M, &ulx, &uly, &lrx, &lry);
    if ((fc.verbosity > VERBOSE_DEFAULT) && (depth == 0))
    {
      error_raw("[%ld change%s total]", D, ((D == 1) ? "" : "s"));
      error_raw("[Finding edit path]");
    }

    if (D > 1)
    {
      if ((ulx > 0) && (uly > 0))
          findsnake(depth + 1, A, ulx, B, uly);
      count = lrx - ulx;
      if (count > fc.maxjoin)
      {
          sp = allocsnake();
          sp->line1 = A + ulx;
          sp->line2 = B + uly;
          sp->count = count;
      }
      N -= lrx;
      M -= lry;
      if ((N > 0) && (M > 0))
          findsnake(depth + 1, A + lrx, N, B + lry, M);
      return;
    }

    /*
     * Only 0 or 1 change needed, so we can compute the result directly.
     * First compute the snake coming from the upper left corner if any.
     */
    if (N > M)
      count = uly;
    else
      count = ulx;
    if (count > fc.maxjoin)
    {
      sp = allocsnake();
      sp->line1 = A;
      sp->line2 = B;
      sp->count = count;
    }

    /*
     * Finally compute the snake coming from the lower right corner if any.
     */
    count = lrx - ulx;
    if (count > fc.maxjoin)
    {
      sp = allocsnake();
      sp->line1 = A + ulx;
      sp->line2 = B + uly;
      sp->count = count;
    }
}


/*
 * Routine to generate the D-path list and compute the number
 * of insertions, deletions and matching lines found.
 */

static void
makesnakes(void)
{
    SNAKE       *sp;    /* current snake element */
    long        line1;  /* current line in file A */
    long        line2;  /* current line in file B */
    
    if (fc.verbosity > VERBOSE_DEFAULT)
      error_raw("[Beginning comparisons]");
    tablesize = fc.maxlines * 2 + 1;
    V1 = (long *)cm_alloc_and_check(sizeof(long) * tablesize);
    V2 = (long *)cm_alloc_and_check(sizeof(long) * tablesize);
    V1 += fc.maxlines;
    V2 += fc.maxlines;
    if ((fc.fileA.f_linecount > 0) && (fc.fileB.f_linecount > 0))
      findsnake(0, 0L, fc.fileA.f_linecount, 0L, fc.fileB.f_linecount);

    /*
     * End the list with the lower right endpoint
     */
    sp = allocsnake();
    sp->line1 = fc.fileA.f_linecount;
    sp->line2 = fc.fileB.f_linecount;
    sp->count = 0;
    if (fc.verbosity > VERBOSE_DEFAULT)
      error_raw("[Comparisons complete]");
    if (fc.debugflag)
    {
      error_raw("%8s%8s%8s", "line1", "line2", "count");
      for (sp = fc.snakelist; sp; sp = sp->next)
          error_raw("%8ld%8ld%8ld", sp->line1, sp->line2, sp->count);
    }

    /*
     * Scan the snake list and calculate the number of inserted,
     * deleted, and matching lines.
     */
    line1 = 0;
    line2 = 0;
    for (sp = fc.snakelist; sp; sp = sp->next)
    {
      fc.deletes += (sp->line1 - line1);
      fc.inserts += (sp->line2 - line2);
      fc.matches += sp->count;
      line1 = sp->line1 + sp->count;
      line2 = sp->line2 + sp->count;
    }
}


/*
 * Print the specified range of line numbers for both files.
 * The given string is also typed as part of the message.
 * This routine handles both full and line number output.
 * The real line numbers in each file are typed.
 */

static void
printrange(FILE *fp, const char *msg, long line1, long count1, long line2,
    long count2)
{
    long        beg1;
    long        end1;
    long        beg2;
    long        end2;   /* real beginning and ending line numbers */
    string_ty         *buf1;
    string_ty         *buf2;

    end1 = line1 + count1 - 1;
    end2 = line2 + count2 - 1;
    if (line1 >= fc.fileA.f_linecount)
    {
      beg1 = fc.fileA.f_linestotal + 1;
      end1 = beg1;
    }
    else
    {
      beg1 = fc.fileA.f_lnums[line1];
      end1 = fc.fileA.f_lnums[end1];
    }
    if (line2 >= fc.fileB.f_linecount)
    {
      beg2 = fc.fileB.f_linestotal + 1;
      end2 = beg2;
    }
    else
    {
      beg2 = fc.fileB.f_lnums[line2];
      end2 = fc.fileB.f_lnums[end2];
    }
    if (beg1 == end1)
    {
      if (fc.hexify_flag)
          buf1 = str_format("0x%lX", beg1 - 1);
      else
          buf1 = str_format("%ld", beg1);
    }
    else
    {
      if (fc.hexify_flag)
          buf1 = str_format("0x%lX-0x%lX", beg1 - 1, end1 - 1);
      else
          buf1 = str_format("%ld-%ld", beg1, end1);
    }
    if (beg2 == end2)
    {
      if (fc.hexify_flag)
          buf2 = str_format("0x%lX", beg2 - 1);
      else
          buf2 = str_format("%ld", beg2);
    }
    else
    {
      if (fc.hexify_flag)
          buf2 = str_format("0x%lX-0x%lX", beg2 - 1, end2 - 1);
      else
          buf2 = str_format("%ld-%ld", beg2, end2);
    }
    if (fc.editscriptflag)
    {
      fprintf(fp, "%c %s %s\n", *msg, buf1->str_text, buf2->str_text);
    }
    else
    {
      fprintf
      (
          fp,
          "\n******************** %s [A %s, B %s]:\n",
          msg,
          buf1->str_text,
          buf2->str_text
      );
    }
    str_free(buf1);
    str_free(buf2);
}


/*
 * Translate all tabs to the appropriate number of spaces in a line.
 * This is used when the line is not output at a nice tab boundary,
 * so that the output still looks correct.  Returns the new converted
 * string, which is only valid until the next call.
 */

static char *
detab(const char *str)
{
    int               pos;    /* current character position */
    int               ch;           /* current character */
    char        *destcp;      /* destination pointer */
    static char       *strbuf;      /* string buffer */
    static int        maxlen; /* maximum size of string buffer */
    
    if (!maxlen)
    {
      maxlen = DETABALLOCSIZE;
      strbuf = r_alloc_and_check(maxlen + TABSIZE + 1);
    }

    destcp = strbuf;
    pos = 0;
    for (;;)
    {
      ch = *str++;
      if (!ch)
          break;
      if (pos >= maxlen)
      {
          maxlen += DETABALLOCSIZE;
          strbuf = r_realloc_and_check(strbuf, maxlen + TABSIZE + 1);
          destcp = strbuf + pos;
      }
      if (ch != '\t')
      {
          *destcp++ = ch;
          pos++;
      }
      else
      {
          for (;;)
          {
            *destcp++ = ' ';
            pos++;
            if ((pos % TABSIZE) == 0)
                break;
          }
      }
    }
    *destcp = '\0';
    return strbuf;
}


/*
 * Print the specified number of lines of a file beginning at the
 * specified line number.  This prints imbedded blank lines too.
 * The lines can be preceeded by line number if desired.  Context
 * lines can be added also.  Tabs are converted to the appropriate
 * number of spaces if line numbering or tags are supplied.
 */

static void
printlines(FILE *fp, FILEINFO *fi, long line, long count, const char *tagstr)
{
    LINE        **lines;      /* data of file */
    long        *lnum;  /* line numbers */
    char        *data;  /* current data line */
    long        curlnum;      /* current line number */
    
    line = line - fc.context - 1;
    count += (2 * fc.context);
    if (line < 0)
    {
      count += line;
      line = 0;
    }
    if ((line + count) > fi->f_linecount)
      count = fi->f_linecount - line;
    lines = fi->f_lines + line;
    lnum = fi->f_lnums + line;
    curlnum = *lnum;
    while (count-- > 0)
    {
      while (curlnum < *lnum)
      {
          if (fc.whatflag)
            fprintf(fp, "%s\n", tagstr);
          else if (fc.numflag)
            fprintf(fp, "%c%ld\n", fi->f_tag, curlnum);
          else
            putc('\n', fp);
          curlnum++;
      }
      if (fc.whatflag)
          fprintf(fp, "%s  ", tagstr);
      else if (fc.numflag)
          fprintf(fp, "%c%-7ld ", fi->f_tag, curlnum);
      data = (*lines)->l_data;
      if (fc.whatflag)
          data = detab(data);
      fputs(data, fp);
      lines++;
      lnum++;
      curlnum++;
    }
}


/*
 * Dump lines which match, either just the line numbers or with the text.
 */

void
dumpmatch(char *outputname)
{
    FILE        *fp;
    SNAKE       *sp;

    if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
      error_raw("[Writing matching lines to \"%s\"]", outputname);
    if (outputname)
    {
      fp = fopen_and_check(outputname, "w");
    }
    else
    {
      fp = stdout;
      outputname = gettext("standard output");
    }
    if (fc.editscriptflag)
      fprintf(fp, "A %s\nB %s\n", fc.fileA.f_name, fc.fileB.f_name);
    else
    {
      fprintf
      (
          fp,
          "FILE A: %s\nFILE B: %s\n",
          fc.fileA.f_name,
          fc.fileB.f_name
      );
      fprintf
      (
          fp,
          "TOTALS: %ld inserted  %ld deleted  %ld matched\n",
          fc.inserts,
          fc.deletes,
          fc.matches
      );
    }
    for (sp = fc.snakelist; sp; sp = sp->next)
    {
      if (sp->count <= 0)
          continue;
      printrange(fp, "MATCH", sp->line1, sp->count, sp->line2, sp->count);
      if (!fc.editscriptflag)
          printlines(fp, &fc.fileB, sp->line2 + 1, sp->count, "");
    }
    if (fc.editscriptflag)
      fprintf(fp, "T %ld %ld %ld\n", fc.inserts, fc.deletes, fc.matches);
    fflush_and_check(fp, outputname);
    fclose_and_check(fp, outputname);
}


/*
 * Dump the edit script in the normal manner, showing changed line
 * numbers along with the relevent text.
 */

void
dumpnormal(char *outputname)
{
    FILE        *fp;    /* output file */
    SNAKE       *sp;    /* current snake */
    long        line1;
    long        line2;  /* current line numbers */
    long        count1;
    long        count2; /* line counts */
    
    if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
      error_raw("[Writing differences to \"%s\"]", outputname);
    if (outputname)
    {
      fp = fopen_and_check(outputname, "w");
    }
    else
    {
      fp = stdout;
      outputname = gettext("standard output");
    }
    if (fc.quickflag)
    {
      fprintf
      (
          fp,
          "Files \"%s\" and \"%s\":\n",
          fc.fileA.f_name,
          fc.fileB.f_name
      );
      fprintf
      (
          fp,
          "%ld inserted  %ld deleted  %ld matched\n",
          fc.inserts,
          fc.deletes,
          fc.matches
      );
      goto done;
    }
    if ((fc.inserts == 0) && (fc.deletes == 0))
    {
      if (fc.verbosity)
      {
          fprintf
          (
            fp,
            "Files \"%s\" and \"%s\" are identical\n",
            fc.fileA.f_name,
            fc.fileB.f_name
          );
      }
      goto done;
    }
    fprintf
    (
      fp,
     "FILE A: %s\nFILE B: %s\nTOTALS: %ld inserted  %ld deleted  %ld matched\n",
      fc.fileA.f_name,
      fc.fileB.f_name,
      fc.inserts,
      fc.deletes,
      fc.matches
    );
    line1 = 0;
    line2 = 0;
    for (sp = fc.snakelist; sp; sp = sp->next)
    {
      count1 = sp->line1 - line1;
      count2 = sp->line2 - line2;
      if (count1 && count2)
      {
          printrange(fp, "REPLACE", line1, count1, line2, count2);
          printlines(fp, &fc.fileA, line1 + 1, count1, "");
          fputs("******************** WITH:\n", fp);
          printlines(fp, &fc.fileB, line2 + 1, count2, "");
      }
      else if (count1)
      {
          printrange(fp, "DELETE", line1, count1, line2, 1L);
          printlines(fp, &fc.fileA, line1 + 1, count1, "");
      }
      else if (count2)
      {
          printrange(fp, "INSERT", line1, 1L, line2, count2);
          printlines(fp, &fc.fileB, line2 + 1, count2, "");
      }
      line1 = sp->line1 + sp->count;
      line2 = sp->line2 + sp->count;
    }

    done:
    fflush_and_check(fp, outputname);
    fclose_and_check(fp, outputname);
}


/*
 * Dump all of both files showing what happened to each line.
 */

void
dumpwhat(char *outputname)
{
    FILE        *fp;
    SNAKE       *sp;
    long        line1;
    long        line2;  /* current line numbers */
    long        count1;
    long        count2; /* line counts */
    
    if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
      error_raw("[Writing what changes were made to \"%s\"]", outputname);
    if (outputname)
    {
      fp = fopen_and_check(outputname, "w");
    }
    else
    {
      fp = stdout;
      outputname = "(stdandard output)";
    }
    fprintf(fp, "FILE A: %s\n", fc.fileA.f_name);
    fprintf(fp, "FILE B: %s\n", fc.fileB.f_name);
    fprintf
    (
      fp,
      "TOTALS: %ld inserted  %ld deleted  %ld matched\n",
      fc.inserts,
      fc.deletes,
      fc.matches
    );
    line1 = 0;
    line2 = 0;
    for (sp = fc.snakelist; sp; sp = sp->next)
    {
      count1 = sp->line1 - line1;
      count2 = sp->line2 - line2;
      if (count1 && count2)
      {
          printlines(fp, &fc.fileA, line1 + 1, count1, "|-");
          printlines(fp, &fc.fileB, line2 + 1, count2, "|+");
      }
      else if (count1)
          printlines(fp, &fc.fileA, line1 + 1, count1, "|-");
      else if (count2)
          printlines(fp, &fc.fileB, line2 + 1, count2, "|+");
      printlines(fp, &fc.fileB, sp->line2 + 1, sp->count, "  ");
      line1 = sp->line1 + sp->count;
      line2 = sp->line2 + sp->count;
    }
    fflush_and_check(fp, outputname);
    fclose_and_check(fp, outputname);
}


/*
 * Dump only the line numbers which have changed.
 * This is useful for machine processing of the differences.
 */

void
dumplines(char *outputname)
{
    FILE        *fp;    /* output file */
    SNAKE       *sp;    /* current snake */
    long        line1;
    long        line2;  /* previous line numbers */
    long        count1;
    long        count2; /* count of line numbers */
    
    if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
      error_raw("[Writing edit script to \"%s\"]", outputname);
    if (outputname)
    {
      fp = fopen_and_check(outputname, "w");
    }
    else
    {
      fp = stdout;
      outputname = gettext("standard output");
    }
    line1 = 0;
    line2 = 0;
    fprintf(fp, "A %s\nB %s\n", fc.fileA.f_name, fc.fileB.f_name);
    for (sp = fc.snakelist; sp; sp = sp->next)
    {
      count1 = sp->line1 - line1;
      count2 = sp->line2 - line2;
      if (count1 && count2)
          printrange(fp, "R", line1, count1, line2, count2);
      else if (count1)
          printrange(fp, "D", line1, count1, line2, 1L);
      else if (count2)
          printrange(fp, "I", line1, 1L, line2, count2);
      line1 = sp->line1 + sp->count;
      line2 = sp->line2 + sp->count;
    }
    fprintf(fp, "T %ld %ld %ld\n", fc.inserts, fc.deletes, fc.matches);
    fflush_and_check(fp, outputname);
    fclose_and_check(fp, outputname);
}


/*
 * Reinitialize for a new file comparison.
 * This zeroes out our static variables so that another
 * file comparison will work correctly.
 */

void
fcompreset(void)
{
    tablesize = 0;
    snakecount = 0;
    availchars = 0;
    fc.inserts = 0;
    fc.deletes = 0;
    fc.matches = 0;
    fc.snakelist = NULL;
    nextchar = NULL;
    V1 = NULL;
    V2 = NULL;
    nextsnake = NULL;
    lastsnake = NULL;
    endsnake = NULL;
    hashtable = NULL;
}


void
binary_warning(const char *filename)
{
    sub_context_ty  *scp;

    scp = sub_context_new();
    sub_var_set_charstar(scp, "File_Name", filename);
    error_intl(scp, i18n("warning: $filename is binary"));
    sub_context_delete(scp);
}


void
binary_fatal(const char *filename)
{
    sub_context_ty  *scp;

    scp = sub_context_new();
    sub_var_set_charstar(scp, "File_Name", filename);
    fatal_intl(scp, i18n("$filename is binary"));
    /* NOTREACHED */
    sub_context_delete(scp);
}


/*
 * Routine to do the comparison of two files.
 * If either of the input filenames is NULL, then this indicates
 * that the file has already been read by an earlier call.
 */

void
fcomp(char *nameA, char *nameB)
{
    long        changes;      /* total number of changes */
    
    trace(("fcomp(nameA = \"%s\", nameB = \"%s\")\n{\n", nameA, nameB));
    fc.fileA.f_tag = 'A';
    fc.fileB.f_tag = 'B';
    if (nameA)
    {
      fc.fileA.f_name = nameA;
      if (fc.hexify_flag)
      {
          fc.fileA.f_file = input_hexify(input_file_open(nameA), 1);
      }
      else if (fc.binary)
      {
          fc.fileA.f_file =
            input_quoted_printable_encode(input_file_open(nameA), 1);
      }
      else
          fc.fileA.f_file = input_file_text_open(nameA);
    }
    if (nameB)
    {
      fc.fileB.f_name = nameB;
      if (fc.hexify_flag)
      {
          fc.fileB.f_file = input_hexify(input_file_open(nameB), 1);
      }
      else
          fc.fileB.f_file = input_file_text_open(nameB);
    }
    if (!nameA && !nameB)
    {
      fatal_intl(0, i18n("input file names not supplied"));
    }
    if (!hashtable)
    {
      int         j;

      hashtable = (LINE **)cm_alloc_and_check(HASHSIZE * sizeof(LINE *));
      for (j = 0; j < HASHSIZE; ++j)
          hashtable[j] = 0;
    }
    if (nameA)
    {
      readfile(&fc.fileA, fc.fileAskip, fc.fileAmodify);
      if (fc.fileA.is_binary)
          binary_warning(nameA);
    }
    if (nameB)
    {
      readfile(&fc.fileB, fc.fileBskip, fc.fileBmodify);
      if (fc.fileB.is_binary)
          binary_warning(nameB);
    }
    changes = fc.fileA.f_linecount - fc.fileB.f_linecount;
    if (changes < 0)
      changes = -changes;
    if (changes > fc.maxchanges)
    {
      sub_context_ty    *scp;

      scp = sub_context_new();
      sub_var_set_long(scp, "Number", fc.maxchanges);
      fatal_intl(scp, i18n("More than $number changes required"));
      /* NOTREACHED */
      sub_context_delete(scp);
    }
    makesnakes();
    trace(("}\n"));
}


void
modifyline_register(modifyline_type func)
{
    modifyline = func;
}

Generated by  Doxygen 1.6.0   Back to index