/*DDK*************************************************************************/
/*                                                                           */
/* COPYRIGHT    Copyright (C) 1992 IBM Corporation                           */
/*                                                                           */
/*    The following IBM OS/2 source code is provided to you solely for       */
/*    the purpose of assisting you in your development of OS/2 device        */
/*    drivers. You may use this code in accordance with the IBM License      */
/*    Agreement provided in the IBM Developer Connection Device Driver       */
/*    Source Kit for OS/2. This Copyright statement may not be removed.      */
/*                                                                           */
/*****************************************************************************/
/**************************************************************************
 *
 * SOURCE FILE NAME = PPDSHORT.C
 *
 * DESCRIPTIVE NAME = PPD Compressor
 *
 *
 * VERSION = V2.0
 *
 * DATE
 *
 * DESCRIPTION  Sorts words from PPD files and builds a .h file containing
 *              a table of words and offets.
 *
 *
 * FUNCTIONS    main
 *              compare
 *              compare2
 *
 * NOTES
 *
 * STRUCTURES
 *
 * EXTERNAL REFERENCES
 *
 * EXTERNAL FUNCTIONS
 *
*/

#include <os2.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/************************************************************************
* Globals . . .
************************************************************************/
FILE *ppdIn;
FILE *ppdOut;


/************************************************************************
* Structures . . .
************************************************************************/
typedef struct _WORDLIST
{
  SHORT  sCount;                       /* frequency count for this word */
  PCHAR  pchKeyWord;                   /* pointer to keyword            */
} WORDS, *PWORDS;


/************************************************************************
* Defines . . .
************************************************************************/
#define  IMAXTABLE1    127             /* max size of table 1          */
#define  IMAXTABLE_N   254             /* max size tables 2 through n  */
#define  I_NTABLES       3             /* number of tables 2 though n  */
#define  IMAXWORDS     IMAXTABLE1+(I_NTABLES * IMAXTABLE_N)
#define  IBUFFSIZE     256
#define  IWORDSIZE     sizeof(WORDS)
#define  IMAXPERLINE   5               /*# of keywords/offsets per line*/
#define  IMINARGNUM    2               /* min number of args passed in */
#define  IMAXARGNUM    3               /* max number of args passed in */


/************************************************************************
* Function prototypes . . .
************************************************************************/
int compare(const void *, const void *);
int compare2(const void *, const void *);



/************************************************************************
 *
 * FUNCTION NAME = main
 *
 * DESCRIPTION   = Function Mainline
 *
 * INPUT         = argc - count of commandline arguments
 *                 argv - array of pointers to commandline arguments
 *
 * OUTPUT        = NONE.
 *
 * RETURN-NORMAL = NONE.
 * RETURN-ERROR  = NONE.
 *
 ***********************************************************************/
main( int argc, char **argv )
{
  BOOL   FoundOne;                     /* flag for stripping out words */
  int    i;                            /* loop counter                 */
  int    j;                            /* loop counter                 */
  int    iBufferLen;                   /* length of buffer             */
  int    iOffsetCount;                 /* offset into keywords array   */
  int    iBase;                        /* base to send the sort routine*/
  int    iBytesReq;                    /* size required for each table */
  int    iTempCount;                   /* temporary count of keywords  */
  int    iWordCount;                   /* count of keywords            */
  int    iStart;                       /* start of next loop           */
  PSZ    szBuffer;                     /* file buffer                  */
  PSZ    szInFile;                     /* input file (arg)             */
  PSZ    szOutFile = "ppdtable.h";     /* output file default          */
  USHORT usTableCount;                 /* number of tables produced    */
  WORDS  Words[IMAXWORDS+1];       /* add an extra one so we don't trap*/

  if ((argc < IMINARGNUM) || (argc > IMAXARGNUM))
  {
    printf("The correct usage of this program is:\n");
    printf("    PPDSHORT filename.ext\n\n");
    printf("This program is part of a procedure that compresses\n");
    printf("the PPD files for the PostScript driver.\n");
    printf("The procedure includes the following steps:\n\n");
    printf("  1)  Get the latest PPD2BIN.C and build it\n");
    printf("  2)  Run PPD2BIN.EXE with -w option from the ppds dir\n");
    printf("  3)  Sort the resultant file, PSWORDS.DAT\n");
    printf("  4)  Copy PSWORDS.DAT from the ppds dir to post32\\ppd dir\n");
    printf("  5)  Run PPDSHORT in post32\\ppd dir to create PPDTABLE.H\n");
    printf("  6)  Copy PPDTABLE.H to post32\\inc\n");
    printf("  7)  Build new PPD2BIN.EXE using new PPDTABLE.H either manually\n"
           "      or let the makefile build it\n");
    exit(1);
  }
  szInFile = *++argv;
  if (argc == IMAXARGNUM)
  {
    szOutFile = *++argv;
  }
  printf("szInFile = %s\n", szInFile);
  printf("szOutFile = %s\n\n", szOutFile);

  ppdIn = fopen (szInFile, "r");       /* open the input file          */
  if (ppdIn == NULL)
  {
    printf ("can't open %s\n", szInFile);
    exit(1);
  }

  szBuffer = malloc(IBUFFSIZE);
  if (szBuffer == NULL)
  {
    printf ("error1 from malloc/n");
    exit (1);
  }

  /**********************************************************************
  * Read in the first word and put it into the array                    *
  **********************************************************************/
  if (fgets(szBuffer,IBUFFSIZE,ppdIn) == NULL)
  {
    printf ("error from fgets\n");
    exit(1);
  }
  iBufferLen = strlen(szBuffer);

  iWordCount = 0;
  Words[iWordCount].pchKeyWord = malloc(iBufferLen +1);
  if (Words[iWordCount].pchKeyWord == NULL)
  {
    printf ("error2 from malloc/n");
    exit (1);
  }
  strcpy(Words[iWordCount].pchKeyWord,szBuffer);
  Words[iWordCount].sCount = 1;

  /**********************************************************************
  * Loop and read the words one at a time . . .                         *
  **********************************************************************/
  while (fgets(szBuffer,IBUFFSIZE,ppdIn) != NULL)
  {
    iBufferLen = strlen(szBuffer);
    if (strcmp(szBuffer,Words[iWordCount].pchKeyWord))
    {
      /******************************************************************
      * This is a new word -- increment the Word Count, allocate        *
      * space for the new word, set its count to 1, and copy the        *
      * buffer into the newly allocated space                           *
      ******************************************************************/
      iWordCount++;
      if (iWordCount > IMAXWORDS-1)
      {
        printf ("error -- too many words/n");
        exit (1);
      }
      Words[iWordCount].pchKeyWord = malloc(iBufferLen + 1);
      if (Words[iWordCount].pchKeyWord == NULL)
      {
        printf ("error3 from malloc/n");
        exit (1);
      }
      Words[iWordCount].sCount = 1;
      strcpy(Words[iWordCount].pchKeyWord,szBuffer);
    }
    else
      /******************************************************************
      * This is not a new word -- just increment the sCount             *
      ******************************************************************/
      Words[iWordCount].sCount = Words[iWordCount].sCount+1;
  }
  fclose (ppdIn);                      /* close the input file         */

  /**********************************************************************
  * We have a list of unique words -- now sort them by their sCount     *
  **********************************************************************/
  qsort((PVOID)&Words[0],iWordCount+1,IWORDSIZE,compare);

  /**********************************************************************
  * Print the words and their counts to stdout, separating the ones     *
  * that occur only once with a -------- line.                          *
  **********************************************************************/
  for (i = 0; i <= iWordCount; i++)
  {
    if ((Words[i].sCount == 1) && (Words[i-1].sCount == 2))
      printf("--------------------------------------------------------\n");
    printf("%4.0d %s",Words[i].sCount,Words[i].pchKeyWord);
  }
  /**********************************************************************
  * Throw out any whose sCount is 1 . . . they'll be at the bottom      *
  **********************************************************************/
  iTempCount = iWordCount;
  for (i=0;i<=iWordCount;i++)
  {
    if (Words[i].sCount == 1)
    {
      iTempCount--;                    /* decrement the temp word count*/
      free(Words[i].pchKeyWord);       /* free storage for this word   */
    }
  }
  iWordCount = iTempCount;             /* the new word count           */

  /**********************************************************************
  *  It takes 1 byte to store a token in table 1, 2 for table 2,        *
  *  3 for table 3, etc.  Therefore, the string must be longer          *
  *  than the number of bytes it would take to store its token in       *
  *  order to make it worth tokenizing.  The first table was already    *
  *  taken care of in ppd2bin.c.  Now take care of the other tables.    *
  *  The strategy is to loop until we find one that needs to be         *
  *  removed.  Then we reset and start from that word so that the       *
  *  index is updated to reflect the current table that each word is    *
  *  in.                                                                *
  **********************************************************************/
  printf("\niWordCount before = %d",iWordCount);
  FoundOne = TRUE;
  iTempCount = iWordCount;
  iStart = IMAXTABLE1-1;
  while (FoundOne == TRUE)
  {
    FoundOne = FALSE;
    for (i = iStart; (i <= iTempCount) && (FoundOne == FALSE); i++)
    {
      if ((i - IMAXTABLE1) < IMAXTABLE_N)
        iBytesReq = 3;
      else
        iBytesReq = ((i - IMAXTABLE1) / IMAXTABLE_N) + 3;

/*    printf("bytesreq = %d %s",iBytesReq,Words[i].pchKeyWord); */
      if (strlen(Words[i].pchKeyWord)-1 < iBytesReq)
      {
/*      printf("deleting # %d, %s",i,Words[i].pchKeyWord); */
        free(Words[i].pchKeyWord);     /* free storage for this word   */

        /***************************************************************
        * Adjust all the words up one . . .                            *
        ***************************************************************/
        for (j = i; j < iTempCount; j++)
        {
          Words[j].sCount = Words[j+1].sCount;
          Words[j].pchKeyWord = Words[j+1].pchKeyWord;
        }
        FoundOne = TRUE;
        iTempCount--;                  /* decrement the temp word count*/
        iStart = i;                    /* set the start to current i   */
      }
    }
  }
  iWordCount = iTempCount;             /* the new word count           */
  printf("\niWordCount after = %d\n",iWordCount);

  /**********************************************************************
  * Take the first 127 and sort them alphabetically.                    *
  * Then take the second group (anything over 127) and sort them        *
  * alphabetically.                                                     *
  * Right now, we only expect to have 2 tables.  However, this is       *
  * hopefully written to handle more than 2 tables, just in case it's   *
  * ever required.  The assumption is that additional tables will be    *
  * the same size as table 2.                                           *
  **********************************************************************/
  usTableCount = 0;
  iTempCount = iWordCount+1;           /* actual number of keywords    */
  iBase = 0;
  while (iTempCount > 0)
  {
    if ((iTempCount > IMAXTABLE1) && (usTableCount == 0))
    {
      /******************************************************************
      * Sort the first table and adjust the base, tempcount, and        *
      * tablecount.                                                     *
      ******************************************************************/
      qsort((PVOID)&Words[iBase],IMAXTABLE1,IWORDSIZE,compare2);
      usTableCount++;
      iTempCount -= IMAXTABLE1;
      iBase += IMAXTABLE1;
    }
    else if ((iTempCount <= IMAXTABLE1) && (usTableCount == 1))
    {
      /******************************************************************
      * Sort the first table, which will be the only table.             *
      ******************************************************************/
      qsort((PVOID)&Words[iBase],min(iTempCount,IMAXTABLE1),IWORDSIZE,
                   compare2);
      usTableCount++;
      iTempCount -= iTempCount;
    }
    else
    {
      /******************************************************************
      * Sort the second, third, etc. tables and increment usTableCount, *
      * adjust the base, and tempcount.                                 *
      ******************************************************************/
      qsort((PVOID)&Words[iBase],min(iTempCount,IMAXTABLE_N),IWORDSIZE,
                    compare2);
      usTableCount++;
      if (iTempCount >= IMAXTABLE_N)
      {
        iTempCount -= IMAXTABLE_N;
        iBase += IMAXTABLE_N;
      }
      else
        iTempCount -= iTempCount;
    }
  }
  /**********************************************************************
  * The words are all sorted and ready to write to the file.            *
  **********************************************************************/
  ppdOut = fopen (szOutFile, "wb");    /* open the output file         */
  if (!ppdOut)
  {
    printf ("can't open %s\n", *szOutFile);
    exit (1);
  }

  /**********************************************************************
  * Start with the defines and other housekeeping . . .                 *
  **********************************************************************/
  fprintf (ppdOut, "//Number of lists 1 or 2\n");
  fprintf (ppdOut,"#define PSLISTCOUNT ");
  fprintf (ppdOut,"%d\n\n",usTableCount);
  fprintf (ppdOut,"//For each list the size\n");
  fprintf (ppdOut,"SHORT sListSize [PSLISTCOUNT] = {");
  for (i=1;i<=usTableCount;i++)
  {
    if (i==1)
    {
      /******************************************************************
      * Print how many words are in the first table                     *
      ******************************************************************/
      fprintf (ppdOut, "%d",min(IMAXTABLE1,iWordCount+1));
    }
    else if (i == 2)
    {
      /******************************************************************
      * Print how many words are in the second table . . .              *
      ******************************************************************/
      fprintf (ppdOut, "%d",min(IMAXTABLE_N,(iWordCount+1)-IMAXTABLE1));
    }
    else
    {
     /*******************************************************************
     * Print how many words are in the third table . . .                *
     *******************************************************************/
     fprintf (ppdOut, "%d",min(IMAXTABLE_N,
                              (iWordCount+1)-IMAXTABLE1-((i-2)*IMAXTABLE_N)));
    }
    if (i<usTableCount)
      fprintf (ppdOut, "%c",',');
  }
  fprintf (ppdOut, "%s\n\n","};");
  fprintf (ppdOut,"//The Keywords . . . \n\n");
  fprintf (ppdOut,"CHAR achPSKeyWords[] = \n");
  fprintf (ppdOut,"%c",'"');

  /**********************************************************************
  * Now go through and write out the tables, 5 keywords per line,       *
  **********************************************************************/
  iTempCount = 1;
  for (i=0;i <= iWordCount;i++)
  {

    /********************************************************************
    * The keywords have a carriage return character on the end of them, *
    * so we have to strip that off and put the null character back on.  *
    * We also have to put a " at the beginning and end of each line.    *
    ********************************************************************/
    iBufferLen = strlen(Words[i].pchKeyWord)-1;
    memcpy(szBuffer,Words[i].pchKeyWord,iBufferLen);
    szBuffer[iBufferLen] = '\0';
    fprintf (ppdOut, "%s\\000",szBuffer);
    if ((i < iWordCount) && (iTempCount == IMAXPERLINE))
    {
      iTempCount = 1;
      fprintf (ppdOut,"%c\n",'"');
      fprintf (ppdOut,"%c",'"');
    }
    else
      iTempCount++;
  }
  fprintf (ppdOut, "%c",'"');
  fprintf (ppdOut, "%c\n\n",';');      /* end with ;                   */

  fprintf (ppdOut,"//The Offsets . . . \n\n");
  fprintf (ppdOut,"SHORT sPSKeyWordOffset[] = {\n");

  /**********************************************************************
  * Write the array of offsets . . .                                    *
  **********************************************************************/
  printf("\n\nSorted keywords with offsets:  Table 1\n\n");
  iOffsetCount = 0;
  iTempCount = 1;                      /* keep track of words per line */
  for (i=0;i <= iWordCount;i++)
  {
    /********************************************************************
    * Do 5 offsets per line, recalculating the next offset after each   *
    * one.                                                              *
    *                                                                   *
    * NOTE:  The \000 gets read as 1 character, so we would normally    *
    *        have to add the strlen of the keyword + 1 for our next     *
    *        offset.  However, the keyword contains a CR/LF character;  *
    *        therefore, we ONLY need to add the strlen of the keyword   *
    *        in order for the calculation to be correct.                *
    ********************************************************************/
    if (!iOffsetCount)
      printf("   0    0  %s",Words[i].pchKeyWord);
    else
      printf("%4.0d %4.0d  %s",i,iOffsetCount,Words[i].pchKeyWord);
    if ((i == IMAXTABLE1-1) && (i < iWordCount))
      printf("\n\nSorted keywords with offsets:  Table 2\n\n");
    else if ((i == (IMAXTABLE1-1) + IMAXTABLE_N) && (i < iWordCount))
      printf("\n\nSorted keywords with offsets:  Table 3\n");

    fprintf (ppdOut, "%d",iOffsetCount);
    iOffsetCount += (strlen(Words[i].pchKeyWord));
    if (i < iWordCount)
      fprintf (ppdOut,"%c",',');       /* comma before next word       */
    if (iTempCount == IMAXPERLINE)
    {
      iTempCount = 1;                  /* reset for next line          */
      fprintf (ppdOut,"\n");           /* end of line                  */
    }
    else
      iTempCount++;
  }
  fprintf (ppdOut, "};\n");
  fclose (ppdOut);                     /* close the output file        */

  /**********************************************************************
  /* free the memory . . .                                              *
  **********************************************************************/
  for (i=0;i <= iWordCount;i++)
  {
    free (Words[i].pchKeyWord);
  }
  free (szBuffer);
  exit(0);
}


/************************************************************************
 *                                                                      *
 * FUNCTION NAME = compare                                              *
 *                                                                      *
 * DESCRIPTION   = Compare counts from 2 different structures;          *
 *                 called by qsort.                                     *
 *            if value returned <0, element1 is less than element2      *
 *            if value returned >0, element1 is greater than element2   *
 *            if value returned =0, element1 is equal to element2       *
 *                                                                      *
 * INPUT         = arg1 - first structure                               *
 *                 arg2 - second structure                              *
 *                                                                      *
 * OUTPUT        = results of the comparison                            *
 *                                                                      *
 * RETURN-NORMAL = NONE.                                                *
 * RETURN-ERROR  = NONE.                                                *
 *                                                                      *
 ***********************************************************************/
int compare (const void *arg1, const void *arg2)
{
  return(((PWORDS) arg2)->sCount - ((PWORDS) arg1)->sCount);
}

/************************************************************************
 *                                                                      *
 * FUNCTION NAME = compare2                                             *
 *                                                                      *
 * DESCRIPTION   = Compare strings from 2 different structures;         *
 *                 called by qsort.                                     *
 *            if value returned <0, element1 is less than element2      *
 *            if value returned >0, element1 is greater than element2   *
 *            if value returned =0, element1 is equal to element2       *
 *                                                                      *
 * INPUT         = arg1 - first structure                               *
 *                 arg2 - second structure                              *
 *                                                                      *
 * OUTPUT        = results of the comparison                            *
 *                                                                      *
 * RETURN-NORMAL = NONE.                                                *
 * RETURN-ERROR  = NONE.                                                *
 *                                                                      *
 ***********************************************************************/
int compare2 (const void *arg1, const void *arg2)
{
  return(strcmp(((PWORDS)arg1)->pchKeyWord,((PWORDS)arg2)->pchKeyWord));
}
