/* #module IdxBuild "3-001" *********************************************************************** * * * The software was developed at the Monsanto Company and is provided * * "as-is". Monsanto Company and the auther disclaim all warranties * * on the software, including without limitation, all implied warran- * * ties of merchantabilitiy and fitness. * * * * This software does not contain any technical data or information * * that is proprietary in nature. It may be copied, modified, and * * distributed on a non-profit basis and with the inclusion of this * * notice. * * * *********************************************************************** */ /* * Module Name: IdxBuild * * Author: R L Aurbach CR&DS MIS Group 27-Apr-1986 * * Function: * Build the Index Tree data structure * * Modification History: * * Version Initials Date Description * ------------------------------------------------------------------------ * 1-001 RLA 27-Apr-1986 Original Code * 1-002 RLA 03-May-1986 Add support for page-no highlighting * and move spell-string processing to a * separate module. * 2-003 RLA 09-Apr-1987 Add support for volumes (used in master * index processing). * 2-004 RLA 14-Apr-1987 Add new page highlight flag * 2-005 RLA 15-Apr-1987 Add support for cross-referencing * 3-001 F.H. 17-May-1991 converted to portable C */ /* * Module IdxBuild - Module-Wide Data Description Section * * Include Files: */ #ifdef MSDOS #include #include #define F_OK 0 /* access(): File exists */ #else # include extern char *sprintf(); #endif #include #include #include #include #include "IdxDef.h" /* * Module Definitions: */ #define TRUE 1 #define FALSE 0 #define linebfsize 133 /* * Global Declarations: */ /* * Static Declarations: */ static char *page_ref; static char *tok_spell; static char see_spell[linebfsize]; #ifdef MSDOS void idx_build_tree(char *token_1, char *token_2, char *token_3, char *page_no, char token_ct, char *vol, int flag); TREE_PTR idx_locate_node(TREE_PTR *head, char *token, char *desc); void idx_add_page_ref(PGNODE_PTR *pghead, char *token, char highlight, char *vol); void idx_add_cross_ref(PGNODE_PTR *seehead, char *token, char highlight, char *vol); TREE_PTR idx_allocate_node(char *token, char *desc); PGNODE_PTR idx_allocate_pgnode(char *vol, char *dsc, char hlite); #else void idx_build_tree(); TREE_PTR idx_locate_node(); void idx_add_page_ref(); void idx_add_cross_ref(); TREE_PTR idx_allocate_node(); PGNODE_PTR idx_allocate_pgnode(); #endif /* * External References: */ extern TREE_PTR root; /* root of the Index Tree */ #ifdef MSDOS extern void idx_build_spell_string(char *desc); extern void idx_parse(char *linebf, char *token_1, char *token_2, char *token_3, char *page_no, int *token_ct, int *flag); #else extern void idx_build_spell_string(); extern void idx_parse(); #endif /* * Functions Called: */ /* * Function Idx_Build_Tree - Documentation Section * * Discussion: * This routine uses the information parsed by Idx_Parse to build the Index * Tree. * * Calling Synopsis: * Call Idx_Build_Tree (token_1, token_2, token_3, page_no, token_ct, * vol, flag) * * Inputs: * token_1 -> is the top level item token. ASCIZ string passed by * reference. * * token_2 -> is the second level item token. ASCIZ string passed by * reference. * * token_3 -> is the third level item token. ASCIZ string passed by * reference. * * page_no -> is the page number token. ASCIZ string passed by * reference. * * token_ct -> is the number of tokens seen for this index entry. * integer passed by value. * * vol -> is a pointer to the string descriptor for a volume * string. The pointer is passed by value. * * flag -> is a boolean flag. If TRUE, the page_no variable is * really a cross-reference string. * * Outputs: * none * * Return Value: * none * * Global Data: * The Index Tree is updated. * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * Tree is updated. * * Error Conditions: * The only expected error is a memory allocation failure. In this case, * a message is printed and the entry is ignored. * * Algorithm: * A. Check for a page-no highlighting flag. * B. Check for a valid set of input data. Ignore if not. * C. Build a Spell String for the top-level item. * D. Locate the top-level item in the tree. * 1. If it does not exist, * A. Allocate a new node and link it in. * E. If token_ct == 1, * 1. Add page number to list. * F. Else, * 1. Build a Spell String for the second-level item. * 2. Locate the second-level item in the tree. * A. If it does not exist, * 1. Allocate a new node and link it in. * 3. If token_ct == 2, * A. Add page number to list. * 4. Else, * A. Build a Spell String for the third-level item. * B. Locate the third-level item in the tree. * 1. If it does not exist, * a. Allocate a new node and link it in. * C. Add page number to list. * * Special Notes: * The first character of token_1 may be a special character. Special * characters are used to indicate special highlighting to be performed * on the page-no for this index entry. The special characters are: * ^ use boldface * ~ use italic * _ use underlining * Only one type of highlighting is supported for each page entry. */ /* * Function Idx_Build_Tree - Code Section */ void idx_build_tree (token_1, token_2, token_3, page_no, token_ct, vol, flag) char *token_1; char *token_2; char *token_3; char *page_no; char token_ct; char *vol; int flag; { /* * Local Declarations */ char spell_str[linebfsize]; TREE_PTR node; int test_char; char *real_token_1; char highlight; /* * Module Body */ /* Check for page-no highlighting flag */ if (token_ct == 0) return; if (token_1[0] == '\0') return; real_token_1 = &token_1[1]; test_char = token_1[0]; switch (test_char) { case '_' : highlight = IDX_K_UNDERLINE; break; case '~' : highlight = IDX_K_ITALIC; break; case '^' : highlight = IDX_K_BOLD; break; case '#' : highlight = IDX_K_FOLLOW; break; default : highlight = IDX_K_NONE; real_token_1 = &token_1[0]; break; } /* Check out the item for validity */ if (real_token_1[0] == '\0') return; if ((token_ct > 1) && (token_2[0] == '\0')) return; if ((token_ct > 2) && (token_3[0] == '\0')) return; if (token_ct > 3) return; if (page_no[0] == '\0') return; /* First level token processing */ (void)strcpy(spell_str, real_token_1); idx_build_spell_string (spell_str); node = idx_locate_node (&root, real_token_1, spell_str); if (node == 0) { (void)printf("Error processing index item %s\n", real_token_1); return; } if (token_ct == 1) { if (!flag) idx_add_page_ref (&node->pghead, page_no, highlight, vol); else idx_add_cross_ref (&node->seehead, page_no, highlight, vol); } /* Second level token processing */ else { (void)strcpy(spell_str,token_2); idx_build_spell_string(spell_str); node = idx_locate_node (&node->subhead, token_2, spell_str); if (node == 0) { (void)printf("Error processing index subitem %s\n", token_2); return; } if (token_ct == 2) { if (!flag) idx_add_page_ref (&node->pghead, page_no, highlight, vol); else idx_add_cross_ref (&node->seehead, page_no, highlight, vol); } /* Third level token processing */ else { (void)strcpy(spell_str,token_3); idx_build_spell_string(spell_str); node = idx_locate_node (&node->subhead, token_3, spell_str); if (node == 0) { (void)printf("Error processing index subsubitem %s\n", token_3); return; } if (!flag) idx_add_page_ref (&node->pghead, page_no, highlight, vol); else idx_add_cross_ref (&node->seehead, page_no, highlight, vol); } } } /* * Function Idx_Locate_Node - Documentation Section * * Discussion: * This routine scans the list of nodes (starting from the listhead), * looking for one which has a spell-string identical to the given * spell string. If it doesn't find one, it adds a node in the right place * in the chain to maintain alphabetical order. * * Calling Synopsis: * node = Idx_Locate_Node (head, token, desc) * * Inputs: * head -> is the address of the listhead of the list to be * scanned. As such, it is a TREE_PTR, passed by * reference. * * token -> is the token to be located or entered in the list. * ASCIZ string passed by reference. * * desc -> is the spell string, passed by descriptor. * * Outputs: * none * * Return Value: * node -> is the address of the tree leaf for the current token. * It is a TREE_PTR, passed by value. If the leaf did not * already exist and if the system was unable to allocate * one, the node is returned as 0. * * Global Data: * The list specified by the listhead will be modified if it is not * possible to add a new leaf when necessary. * * Files Used: * none * * Assumed Entry State: * An empty listhead contains 0. * * Normal Exit State: * The node is returned. * * Error Conditions: * Allocation failure -- node = 0. * * Algorithm: * A. If the list is empty, * 1. Allocate a node and link it in. * B. Else, * 1. Beginning with the first node, * a. If spell-string < node-spell-string, * 1. Get next node. * b. If spell-string = node-spell-string, * 1. Return node pointer. * c. If spell-string > node-spell-string, * 1. Allocate a node and link it in. * * Special Notes: * Although this algorithm isn't a very efficient way to alphatize things, * it has the tremendous virtue of being easy to implement. Since the * program is not run often (typically a couple of times per document), * the inefficiency is not important. */ /* * Function Idx_Locate_Node - Code Section */ TREE_PTR idx_locate_node (head, token, desc) TREE_PTR *head; char *token; char *desc; { /* * Local Declarations */ TREE_PTR node; TREE_PTR old_node; TREE_PTR new_node; int status; /* * Module Body */ /* Chain through the list, looking for the right place */ old_node = 0; new_node = *head; while (new_node != 0) { status = strcmp(new_node->spell, desc); if (status < 0) { old_node = new_node; new_node = new_node->link; continue; } if (status == 0) return (new_node); if (status > 0) { node = idx_allocate_node (token, desc); if (old_node == 0) *head = node; else old_node->link = node; node->link = new_node; return (node); } } /* If the list is exhausted, allocate a node immediately */ node = idx_allocate_node (token, desc); if (old_node == 0) *head = node; else old_node->link = node; return (node); } /* * Function Idx_Allocate_Node - Documentation Section * * Discussion: * Allocate and initialize a leaf of the Index Tree. * * Calling Synopsis: * node = Idx_Allocate_Node (token, desc) * * Inputs: * token -> token string. ASCIZ string passed by reference. * * desc -> spell string. Character string passed by descriptor. * * Outputs: * none * * Return Value: * node -> Address of the allocated node. TREE_PTR passed by * value. * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * node is allocated. * * Error Conditions: * allocation failure -- node is returned == 0. * * Algorithm: * A. Allocate virtual memory for the node. * B. Initialize the node structure. * * Special Notes: * none */ /* * Function Idx_Allocate_Node - Code Section */ TREE_PTR idx_allocate_node (token, desc) char *token; char *desc; { /* * Local Declarations */ TREE_PTR node; /* * Module Body */ node = (TREE_PTR) malloc (sizeof(TREE)); if (node != 0) { node->link = 0; node->subhead = 0; node->pghead = 0; node->seehead = 0; node->spell = strdup(desc); node->item = strdup(token); } return (node); } /* * Function Idx_Add_Page_Ref - Documentation Section * * Discussion: * If the specified page reference is not already in the current list, * add it to the end of the list. * * Calling Synopsis: * Call Idx_Add_Page_Ref (pghead, token, highlight, vol) * * Inputs: * pghead -> the address of the listhead for the page reference list. * a PGNODE_PTR, passed by reference. * * token -> the page reference. ASCIZ string passed by reference. * * highlight -> a flag indicating what sort of highlighting to give * this particular page reference. char passed by value. * * vol -> is a pointer to a character string descriptor which * describes the volume string. * * Outputs: * none * * Return Value: * none * * Global Data: * If a new page reference needs to be added, the list is updated. * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * If the page reference already exists, nothing happens. If a new * page reference needs to be added, it is allocated and appended to the * end of the list. * * Error Conditions: * If there is an allocation failure, the page reference is not added. * * Algorithm: * A. Put the page_no token in a dynamic string. * B. For all nodes in the list, * 1. If the page-no in the list does not match the page-no, * a. Get next node. * 2. Else, * a. Return. * C. If the list is exhausted, * 1. Allocate a new node. * 2. Fill it in. * 3. Chain it to the end of the list. * * Special Notes: * This routine assumes that page-no references are always in numerical * order. This assumption is necessary, because the variety of page * numbering (perhaps within the document) makes it impossible to check * for order. */ /* * Function Idx_Add_Page_Ref - Code Section */ void idx_add_page_ref (pghead, token, highlight, vol) PGNODE_PTR *pghead; char *token; char highlight; char *vol; { /* * Local Declarations */ PGNODE_PTR node; PGNODE_PTR last_node = *pghead; /* * Module Body */ page_ref = strdup(token); while (last_node != 0) { if ((strcmp(last_node->page_dsc, page_ref) == 0) && ((strcmp(last_node->vol, vol)) == 0)) { if (highlight == IDX_K_NONE) return; last_node->highlight = highlight; return; } if (last_node->link == 0) break; last_node = last_node->link; } node = idx_allocate_pgnode (vol, page_ref, highlight); if (node != 0) { if (last_node == 0) *pghead = node; else last_node->link = node; } } /* * Function Idx_Add_Cross_Ref - Documentation Section * * Discussion: * Parse and build a cross reference string. If the string is not already * on the current list, add it to the list in alphabetical order. * * Calling Synopsis: * Call Idx_Add_Cross_Ref (seehead, token, highlight, vol) * * Inputs: * seehead -> the address of the listhead for the cross-reference * list. * * token -> the cross-reference string. ASCIZ string passed by * reference. * * hightlight -> a flag indicating what sort of highlighting to give" * this particular cross-reference. char passed by value. * * vol -> is a pointer to a character string descriptor which * describes the volume string.f * * Outputs:e * noneM * * Return Value: * none * * Global Data:a * If a new cross-reference needs to be added, the list is updated.t * * Files Used: * nonea * * Assumed Entry State: * nonef * * Normal Exit State:s * If the cross-reference already exists, nothing happens. If a new * page reference needs to be added, it is allocated and added to thei * list. * * Error Conditions: * If there is an allocation failure, the page reference is not added. * * Algorithm: * A. Parse the page reference into tokens. * B. Build the cross-reference text string and put it in a dynamic string. * C. Build a spell string for it. * D. Chain the string into the list. * * Special Notes:* * none* */ /* * Function Idx_Add_Cross_Ref - Code Section */ void idx_add_cross_ref (seehead, token, highlight, vol) PGNODE_PTR *seehead; char *token; char highlight; char *vol; { /* * Local Declarationsp */ int length; PGNODE_PTR node; PGNODE_PTR old_node = 0; PGNODE_PTR new_node = *seehead; char tok_1[linebfsize]; char tok_2[linebfsize]; char tok_3[linebfsize]; char dummy[linebfsize]; int tok_ct; int flag; /* * Module Body */ /* * Parse the cross-reference string into its tokens using Idx_Parse. * Then combine the results to build a token string and write it to thei * page_ref dynamic string. Compute its spell string. */ idx_parse(token, tok_1, tok_2, tok_3, dummy, &tok_ct, &flag); if (tok_ct == 0) return; if (strlen(tok_1) == 0) return; if (tok_ct > 1 && strlen(tok_2) == 0) return; if (tok_ct > 2 && strlen(tok_3) == 0) return; length = strlen(tok_1); (void)sprintf (dummy, "%s", tok_1); if (tok_ct > 1) { (void)sprintf (&dummy[length], ", %s", tok_2); length += strlen(tok_2); } if (tok_ct > 2) { (void)sprintf(&dummy[length], ", %s", tok_3); length += strlen(tok_3); } page_ref = strdup(dummy); tok_spell = dummy; idx_build_spell_string(tok_spell); /* * Now scan the list of cross-references to find where to put this one.C */ while (new_node != 0) { (void)strncpy(dummy, new_node->page_dsc, strlen(new_node->page_dsc)); dummy[strlen(new_node->page_dsc)] = '\0'; (void)strcpy(see_spell, dummy); idx_build_spell_string(see_spell); flag = strcmp(see_spell, tok_spell); if (flag < 0) { old_node = new_node; new_node = new_node->link; continue; } if (flag == 0) return; if (flag > 0) { node = idx_allocate_pgnode(vol, page_ref, highlight); if (node != 0) { if (old_node == 0) *seehead = node; else old_node->link = node; node->link = new_node; } return; } } /* If the list is exhausted, allocate a node immediately */ node = idx_allocate_pgnode(vol, page_ref, highlight); if (node != 0) { if (old_node == 0) *seehead = node; else old_node->link = node; } } /* * Function Idx_Allocate_PgNode - Documentation Sectioni * * Discussion: * Allocate and initialize a new page node.o * * Calling Synopsis: * node = Idx_Allocate_PgNode (vol, dsc, hlite) * * Inputs: * vol -> is the address of the volume string descriptor. * * dsc -> is the address of the page ref string descriptor. * * hlite -> is the highlight flag * * Outputs: * nonem * * Return Value: * node -> is the address of the newly allocated node. * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * if the allocation failed, node is returned = 0. * * Algorithm: * A. Allocate the node. * B. If successful, * 1. Initialize it. * * Special Notes: * none */ /* * Function Idx_Allocate_PgNode - Code Section */ PGNODE_PTR idx_allocate_pgnode (vol, dsc, hlite) char *vol; char *dsc; char hlite; { /* * Local Declarations */ PGNODE_PTR node; /* * Module Body */ node = (PGNODE_PTR) malloc (sizeof(PGNODE)); if (node != 0) { node->link = 0; node->vol = strdup(vol); node->highlight = hlite; node->page_dsc = strdup(dsc); } return (node); }