/*********************************************************************************/ /* */ /* AVL BINARY TREES */ /* FOLLOWING N. WIRTH, Algorithms + data structures = programs */ /* Author: Michel Talon Licence: BSD v2 */ /* */ /*********************************************************************************/ #include #include /* For benchmarks */ #include clock_t t0, t1; /* For graphs */ char buff[50]; struct Node { int key; struct Node * lp; struct Node * rp; int bal; /* Balance flag, may be -1,0,1 */ }; static int h; /* Flag, height of subtree reduced, equal or increased, may be -1, 0, or 1 respectively. */ /* Rebalancing. Adding or removing a node may lead to branches with height * differing by 2. Then rebalancing is needed. This is achieved by using * rotations around a node, which have the property of preserving order * relations in the tree, that is nodes with lower key are at the left, and * nodes with higher keys are at the right of any given node. In particular * this may require reattaching subtrees in a different way. * There are two cases for rebalancing, one involves a single rotation, the * other a double rotation, for example a left rotation of a subtree followed * by a right rotation of the given tree. One then needs to fix the values of * the flags bal and h, which requires a careful case by case consideration. A * notable fact is that rebalancing after a node addition sets h = 0, so there * is at most one such operation. This is not the case for node deletion. */ struct Node * balanceL(struct Node * p) { struct Node *p1, *p2; /* h = -1, left branch has shrunk, or h = 1 right branch has grown. */ switch (p->bal) { case -1 : p->bal = 0; if (h > 0) h = 0; /* Right node addition restores balance and keeps height of subtree */ break; case 0 : p->bal = 1; if (h < 0) h = 0; /* In this case left mode removal doesn't change height of subtree */ break; case 1 : /* bal = 1, rebalance */ p1 = p->rp; if (p1->bal > 0) { /* single left rotation */ p->rp = p1->lp; p1->lp = p; p->bal = 0; p = p1; p->bal = 0; /* h not reset, needs to see higher */ } else if (p1->bal == 0) { /* in fact impossible for node insertion */ p->rp = p1->lp; p1->lp = p; p->bal = 1; p = p1; p->bal = -1; if (h < 0) h = 0; /* Global height doesn't change */ } else { /* double RL rotation */ p2 = p1->lp; p1->lp = p2->rp; p2->rp = p1; /* right rotation at p1 */ p->rp = p2->lp; p2->lp = p; /* left rotation at p */ if (p2->bal == +1) p->bal = -1; else p->bal = 0 ; if (p2->bal == -1) p1->bal = 1; else p1->bal = 0; p = p2; p->bal = 0; } if (h > 0) h = 0; /* No need to go higher for node insertion after rebalancing */ } return p; } struct Node * balanceR(struct Node * p) { struct Node *p1, *p2; /* h = -1, right branch has shrunk, or h = +1 left branch has grown. */ switch (p->bal) { case 1 : p->bal = 0; if (h > 0) h = 0; break; case 0 : p->bal = -1; if (h < 0) h = 0 ; break; case -1 : p1 = p->lp; if (p1->bal < 0) { /* single right rotation */ p->lp = p1->rp; p1->rp = p; p->bal = 0; p = p1; p->bal = 0; } else if (p1->bal == 0) { /* impossible for node insertion */ p->lp = p1->rp; p1->rp = p; p->bal = -1; p = p1; p->bal = 1; if (h < 0) h = 0; } else { /* double LR rotation */ p2 = p1->rp; p1->rp = p2->lp; p2->lp = p1; p->lp = p2->rp; p2->rp = p; if (p2->bal == -1) p->bal = 1; else p->bal = 0 ; if (p2->bal == +1) p1->bal = -1; else p1->bal = 0; p = p2; p->bal = 0; } if (h > 0) h = 0; } return p; } /* A procedure to search and insert into an Adelson-Velskii, Landis binary tree. At each node the difference between the heights of the right and left branches, denoted bal must be at most 1. A new node is located at the end of an appropriate branch, so that everything at the right of a node is larger than it and at the left is smaller. This is done by a recursive invocation of search untils one hits a leaf. On return from previous invocation of search one rebalances the tree and resets flags bal and h. Finally search takes as input a pointer p to the root of the tree and returns a pointer to the new root of the restructured tree. */ struct Node * search(int x, struct Node *p) { struct Node * p1, * p2; if ( p == 0 ) { /* We have recursed in the tree until a leaf without ever finding the * key, so create a new leaf here, with the key, which will be connected * to the tree by the assignement p->(l|r)p = search(x,0), where search * always returns a pointer to the constructed tree. */ p = (struct Node *) malloc(sizeof(struct Node)); if ( p == 0 ) { printf("No more memory\n"); exit(1); } h = 1; /* Tree has grown */ p->key = x; /* New leaf */ p->lp = 0; p->rp = 0; p->bal = 0; } else if ( x < p->key ) { /* Key must go to left */ p->lp = search(x,p->lp); if (h) p = balanceR(p); /* h = +1 on the left branch */ } else if ( x > p->key ) { /* Key must go to right */ p->rp = search(x,p->rp); if (h) p = balanceL(p); /* h = +1 on the right branch */ } else { /* So x == p->key */ #ifdef DEBUG printf ( "key %d present in tree \n",x ); #endif h = 0; } return p; } /* Deleting a node in the edge (no right or left child) is easy, one needs just to reconnect its parent to its only child, and free memory. To delete an interior node, recurse to the right until one hits a node without right child. Copy its contents into interior node, then delete it, as above. This case is delegated to auxiliary_delete() and works because it is called after a first left move, so that one puts a key of value immediately inferior to the deleted one in the interior node, hence preserving the order relations in the tree. */ struct Node * auxiliary_delete(struct Node * r, struct Node * q) /* q points to a node containing the key to be deleted, it remains fixed in * the procedure, except that we use the pointer itself as auxiliary * variable at the end. */ { /* Initially r = q->lp, then it chases right */ if (r->rp) { /* r has a right child */ r->rp = auxiliary_delete(r->rp, q); /* right recurse */ if (h < 0) r = balanceR(r); /* rebalance on return */ } else { /* r has no right child, delete it */ q->key = r->key; /* keep copy in q, don't change q->bal */ q = r; /* this does not alter original q! */ r = r->lp; /* replace r by its left child */ free(q); /* frees storage for old r on boundary */ h = -1; /* branch has shrunk */ } return r; /* the call r->rp = auxiliary_delete(r->rp, q) when r->rp has no right * child, so the function returns r->rp->lp, connects the "parent" to the * left child, hence removing r->rp from the tree. Upwards it simply * connects the parent on the right to a rebalanced tree. */ } struct Node * delete(int x, struct Node * p) /* x is a key corresponding to a node to be deleted in the tree of root pointed at by p. On return we have a pointer to a tree with node deleted and rebalanced to satisfy AVL condition. When called recursively we still need to rebalance the tree after a call, potentially up to the root. */ { struct Node * q; /* if p = 0, we have recursed in the tree without ever finding the key, * until a leaf node and its null left or right pointer. This means the key * is not present in the tree. We return 0 to signal the absence of the key. */ if (!p) return 0; /* Here p is a valid node pointer */ if (p->key > x) { p->lp = delete(x, p->lp); /* recurse to the left */ if (h < 0) p = balanceL(p); } else if (p->key < x) { /* recurse to the right */ p->rp = delete(x, p->rp); if (h < 0) p = balanceR(p); } else if (p->key == x) { /* found the correct node */ q = p; /* keep a copy */ if (!q->rp) { /* q has no right child */ p = q->lp; /* remove p, replace by left child */ free(q); h = -1; /* branch shrunk */ } else if (!q->lp) { /* q has no left child */ p = q->rp; /* remove p, replace by right child*/ free(q); h = -1; /* branch shrunk */ } else { /* recurse to the left with auxiliary_delete */ q->lp = auxiliary_delete(q->lp, q); if (h < 0) p = balanceL(p); } } return p; } /* For testing purposes */ int show(struct Node *p) { if ( p!=0 ) { if (p->lp && p->rp) { printf(" %d, %d -> (%d , %d )\n", p->key, p->bal, p->lp->key, p->rp->key); show(p->lp); show(p->rp); } else if (p->lp && !p->rp) { printf(" %d, %d -> (%d , )\n", p->key, p->bal, p->lp->key); show(p->lp); } else if (!p->lp && p->rp) { printf(" %d, %d -> ( , %d )\n", p->key, p->bal, p->rp->key); show(p->rp); } else { printf(" %d, %d -> ()\n", p->key, p->bal); } return 1; } else return 0; } int height(struct Node *p) { int hl,hr; if ( p == 0 ) return 0; else { hl = height(p->lp); hr = height(p->rp); return ( hl <= hr ? 1 + hr : 1 + hl ); } } int number(struct Node *p) { if ( p == 0 ) return 0; else return ( 1 + number(p->lp) + number(p->rp) ); } int find(int x, struct Node * p) { if (!p) return 0; /* initially p is the root of the tree, then it descends to find x. Note this remains private to the function. */ while(p) { if (p->key == x) return 1; else p = (p->key < x)? p->rp : p->lp; } return 0; /* this runs until the key is found or one hits p = 0, and provides a non recursive implementation. */ } int aux_pr_gr(FILE * st, struct Node * p) { if ( p!=0 ) { fprintf(st, "%d [label = \"%d\\n%d\", fontsize = 10, color = red];\n", p->key, p->key, p->bal); if (p->lp && p->rp) { fprintf(st, " %d -> %d [color = green]; \n", p->key, p->lp->key); fprintf(st, " %d -> %d [color = blue]; \n", p->key, p->rp->key); aux_pr_gr(st, p->lp); aux_pr_gr(st, p->rp); } else if (p->lp && !p->rp) { fprintf(st, " %d -> %d [color = green]; \n", p->key, p->lp->key); aux_pr_gr(st, p->lp); } else if (!p->lp && p->rp) { fprintf(st, " %d -> %d [color = blue]; \n", p->key, p->rp->key); aux_pr_gr(st, p->rp); } return 1; } else return 0; } int print_graphviz(char * buff, struct Node * p) { /* file is a string, the name of a file */ FILE * st; st = fopen(buff, "w"); fprintf(st,"digraph G {\n rotate = 90 \n"); if ( p!=0 ) aux_pr_gr(st, p); fprintf(st,"}\n"); fclose(st); return 0; } int main() { int x,i,k; long tot; char y[2]; int tab[100]; for (i = 0; i < 100; i++) tab[i] = 0 ; struct Node *p; p = 0; while (1) { printf("Type a non zero integer:\n"); scanf("%d", &x); if ( x > 0) { /* add a node to the tree */ p = search(x,p); show(p); printf("Tree height: %d for %d nodes.\n", height(p), number(p)); } else if (x < 0) { /* remove node |x| */ p = delete(-x,p); show(p); printf("Tree height: %d\n for %d nodes.\n", height(p), number(p)); } else { /* put x = 0 to enter this set of tests */ printf("Random test\n"); printf("Give a big number:\n"); scanf("%d",&k); p=0; for ( i=1 ; i <= k ; i++) { x = 1 + (int)( (double)(k)*rand()/(RAND_MAX+1.0) ); if (i <= 100) tab[i-1] = x; p = search(x,p); } #ifdef DEBUG show(p); #endif #ifdef GRAPH sprintf(buff, "avl0.dot"); print_graphviz(buff, p); #endif printf("Tree height: %d\n", height(p)); printf("Number of nodes: %d\n", number(p)); for ( i=0 ; i < 100 ; i++) { x = tab[i]; if (find(x,p)) { p = delete(x,p); #ifdef DEBUG printf("Removing key : %d\n", x); printf("Number of remaining nodes: %d\n", number(p)); show(p); #endif #ifdef GRAPH sprintf(buff,"avl%d-%d.dot", i+1,x); print_graphviz(buff, p); #endif } } printf("Test finding a lot of nodes in a large tree: (y|n)? \n"); scanf("%1s", &y); if ( y[0] == 'y') { /* Long test */ p = 0; tot = 0; t0 = clock(); for ( i=1 ; i <= 10000000 ; i++) { x = 1 + (int)( (double)(i)*rand()/(RAND_MAX+1.0) ); p = search(x,p); } t1 = clock(); printf("Time growing tree, seconds: %.4f\n", (t1-t0)*(1.0/CLOCKS_PER_SEC)); printf("Tree height: %d\n", height(p)); printf("Number of nodes: %d\n", number(p)); t0 = clock(); for ( k=1; k<=5 ; k++) { for ( i=1 ; i <= 10000000 ; i++) { tot += find(i,p); } } t1 = clock(); printf("On 50000000 searches, %Ld found.\n", tot); printf("Total searching time, seconds: %.4f\n", (t1-t0)*(1.0/CLOCKS_PER_SEC)); t0 = clock(); tot = 0; for ( i=5000000 ; i <= 5100000 ; i++) { if (find(i,p)) { tot++; p = delete(i,p); } } t1 = clock(); printf("Deleting time, seconds: %.4f for %d nodes.\n", (t1-t0)*(1.0/CLOCKS_PER_SEC), tot); } /* End long test */ exit(0); } /* end set of tests */ } /* end while(1) */ } /* Benchmarking conclusions. */ /* A typical result: the tree eats 80 Megs memory (the double on a 64 bits machine !) and yields: Time growing tree, seconds: 24.0859 Tree height: 27 Number of nodes: 5000232 On 50000000 searches, 25001160 found. Total searching time, seconds: 7.2109 Deleting time, seconds: 4.2109 for 49328 nodes. Compare this to the red-black tree implementation directly derived from Cormen, Leiserson, Rivest, Stein, Introduction to algorithms, Chapter 13 and written in http://newcenturycomputers.net/projects/download.cgi/RBTree-1.5.zip in the file s_rbt.txt, modified with the above benchmark. Now the tree uses 155 Megs memory, and yields: Time growing tree, seconds: 21.4219 Tree height: 28 Number of nodes: 5000232 On 50000000 searches, 25001160 found. Total searching time, seconds: 7.6875 Deleting time, seconds: 12.9844 for 49328 nodes. So the red-black tree uses much more memory (probably the parent pointers and the numerous useless pointers to NIL) grows slightly faster since the balancing algorithm is less strict, and produces slightly more inefficient tree, but the search time is not much bigger. On the other hand, the deleting time seems much faster with the AVL tree, which is consistent with the assertion in Wirth that deletion, while in principle may produce a lot of rebalancing, needs very little in practice for random data. Anyways there doesn't seem to be any reason to prefer red-black tree over AVL. */