<?php /** * Tree library for PHP 4 / PHP 5 * * This package arose from the need for a simple, memory efficient and upwards compatible * tree module. * It provides * - n-ary trees * - efficient traversal, using pseudo-pointers "up", "down" and "right" * - storage facility for arbitrary data within the nodes * * Two shortcomings of PHP are addressed by this package: * - memory hogging: both arrays and objects reserve huge chunks of memory for each element * or instance. For example, a simple array holding 64K characters takes 4 MB of RAM. * Nested arrays reserve multiples of this * - The OOP model and implementation of references changed significantly between PHP4 and PHP5, making * it difficult or impossible to write portable code using object references * * The present package uses fixed-length tables implemented with the string datatype. Instead of references, * it implements pseudo-pointers, using integer indexes into these tables. Think C. * * The storage capacity is limited by the constant TREE_POINTLEN (default: 2 bytes). The highest number * that can be stored in the pointer (actually, -1) is the maximum total number of nodes the tree can hold, * including deleted nodes. The depth of the tree and the number of branches is not limited, although * depth values will not be reported correctly for values taking more than 8*TREE_POINTLEN-4 bits (the * missing four bits are used for a checksum). Thus, with the default pointer size, the tree depth is * effectively limited to 2^12 == 4096. * The upper limit to TREE_POINTLEN is constituted by PHP's maxint (4 bytes). * * A disadvantage: No garbage collection implemented; our pseudo-memory (stored in the strings) grows with * each "add" operation, but does not free unused space after "delete" operations. * (can be retrofitted by changing the "newnode" logic to use previously freed space instead of claiming new) * Ignore this problem if you use the tree mainly to store and retrieve data, and if you don't reorganize the * tree extensively (many prune and graft operations) * * A comparison with Richard Heyes' object-oriented Tree-Class 1.0.3. shows, how and where this approach * is more efficient, while not sacrificing speed: * * tree.lib.php Tree-1.0.3.php * ------------ -------------- * insert 4096 nodes in bintree CPU: 0.577s CPU: 0.186s * MEM: 0.3M MEM: 3.5M * * get flat list from this tree CPU: 0.099s CPU: 0.064s * MEM: 243K MEM: 206K * * remove this tree's root node CPU: 0.125s CPU: 0.001s * MEM: 3456B MEM: 16B * * @package tree.lib * @author johannes(@)jeudi.de * @version 1.0 */ /** * configure and conquer */ define ('TREE_DATALEN',64); // fixed dataset size for data attached to nodes (recommended: multiples of 8) define ('TREE_TAGPOS',0); // offset of tag (uniq node name) in the data record of each node define ('TREE_TAGLEN',NULL); // enter a fixed length for the tag, or NULL for a variable length, separated field define ('TREE_TAGSEP',"\n"); // for variable length tags, specify a separator character. May not occur in the data define ('TREE_POINTLEN',2); // reserve this many bytes for each pointer. 2 bytes => 2^16 nodes possible /** * more constants */ $tree_nilp = (1<<(TREE_POINTLEN*8))-1; // last entry in pointerspace == NIL (ampty pointer) define ('TREE_META',1); // some constants to sweeten the code that accesses the tree table (pointer offsets) define ('TREE_NEXT',2); define ('TREE_PARENT',3); define ('TREE_CHILD',4); /** * tree master function * * The following primitive operations are selected by setting the "method" argument accordingly: * * CALL WITH ARGUMENTS WILL RETURN * ------------------- ----------- * tree('new') treehandle * tree('+sister',treehandle,pointer-to-node,data) pointer-to-newnode * tree('+child', treehandle,pointer-to-node,data) pointer-to-newnode * tree('remove', treehandle,pointer-to-node) pointer-to-right-sister * tree('getroot',treehandle) root-pointer * tree('getnext',treehandle,pointer-to-node) pointer-to-right-sister or NULL * tree('getprev',treehandle,pointer-to-node) pointer-to-left-sister or NULL * tree('getpare',treehandle,pointer-to-node) pointer-to-parent or NULL * tree('getkid1',treehandle,pointer-to-node) pointer-to-first-child or NULL * tree('getkids',treehandle,pointer-to-node) array of pointers, or empty array * tree('gettag', treehandle,pointer-to-node) first field of node dataset * tree('getdata',treehandle,pointer-to-node) node dataset (fixed length binary string) * tree('getdpth',treehandle,pointer-to-node) depth (root=0, max: 2^(8*POINTLEN)/16) * tree('findtag',treehandle,anchor,tag) pointer-to-node or NULL * tree('pretty', treehandle,anchor) 1 (and print tree structure below anchor) * tree('dump', treehandle,anchor) 1 (and print tree structure below anchor) * * Single vs. multiple roots: * Root nodes are simple nodes whose "parent" pointer is NIL. Like any other node, they can have "sisters", * thereby creating a "multiply rooted" tree (or a forest?). * Not to be confounded: The th_nnn_root variable referenced in the code is just a pointer, pointing to * NIL (when the tree is empty) or to the root node or the first of a group of root nodes. * * After creating a tree with "new", it will be empty. The first node (=root) should be created using the * +child method, though +sister will work, too. Conceptually, the "anchor" should be given as NIL * in that case; practically, the anchor argument is ignored when a first node is added to the empty tree * * Each node is represented by a set of four pseudo pointers: * 1 (TREE_META): The low four bits hold the four lowest bits of the node address (can be used to * assess the integrity of the tree). * The higher bits hold the depth of the node, with root having a depth of 0. * 2 (TREE_NEXT): The address of the right sister of this node * 3 (TREE_PARENT): The address of the parent node * 4 (TREE_CHILD): Address of this node's first child, if any * * Pointers are set to 2^(8*TREE-POINTLEN)-1 (or "-1") to mean NIL * The TREE_META value is set to NIL to indicate a deleted node */ function tree () { global $tree_nilp; static $treecounter = 0; $method = func_get_arg(0); if (func_num_args()>1) $handle = func_get_arg(1); if ($method=='new') $handle = 'th_'.($treecounter++); // static would be more appropriate here, since we want the new tree to // persist this function call, but not clutter the global namespace. But PHP // reserves statics at compile time, so this is impossible (cf. PHP Bug 11449). global ${$handle.'_root'}; global ${$handle.'_tree'}; global ${$handle.'_treeLen'}; global ${$handle.'_treeData'}; if ($method!='new' && !isset(${$handle.'_root'})) _tree_err(2,$method); switch ($method) { case 'new': // static would be more appropriate here, since we want the new tree to // persist this function call, but not clutter the global namespace. But PHP // reserves statics at compile time, so this is impossible (cf. PHP Bug 11449). ${$handle.'_root'}=$tree_nilp; ${$handle.'_tree'}=''; ${$handle.'_treeLen'}=0; ${$handle.'_treeData'}=''; return $handle; case '+sister': case '+child': if (func_num_args()!=4) _tree_err(1,$method); $anchor = func_get_arg(2); $data = func_get_arg(3); if (${$handle.'_root'} == $tree_nilp) { if (-1===($newnode = _tree_newnode($tree_nilp, ${$handle.'_treeLen'},${$handle.'_tree'}))) _tree_err(4,$method); ${$handle.'_root'} = $newnode; } else { if (NULL===($noderec = _tree_getrec($anchor,${$handle.'_tree'}))) _tree_err(3,$method); if ($method == '+sister') { if (-1===($newnode = _tree_newnode($noderec[TREE_PARENT], ${$handle.'_treeLen'},${$handle.'_tree'}))) _tree_err(4,$method); $noderec[TREE_NEXT] = $newnode; _tree_setrec($anchor,$noderec,${$handle.'_tree'}); } else { if (-1===($newnode = _tree_newnode($anchor, ${$handle.'_treeLen'},${$handle.'_tree'}))) _tree_err(4,$method); if ($tree_nilp==$noderec[TREE_CHILD]) { // we are the first child $noderec[TREE_CHILD] = $newnode; _tree_setrec($anchor,$noderec,${$handle.'_tree'}); } else { $kidx = $noderec[TREE_CHILD]; do { if (NULL===($kidrec = _tree_getrec($kidx,${$handle.'_tree'}))) _tree_err(3,$method); } while ($kidrec[TREE_NEXT]<$tree_nilp); $kidrec[TREE_NEXT] = $newnode; _tree_setrec($kidx,$kidrec,${$handle.'_tree'}); } } } if (strlen($data)>TREE_DATALEN) _tree_err(5,$method); ${$handle.'_treeData'} .= substr(str_pad($data,TREE_DATALEN,"\0"),0,TREE_DATALEN); return $newnode; case 'getroot': // or "first root", if more than one if (func_num_args()!=2) _tree_err(1,$method); return (${$handle.'_root'}); case 'getnext': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method); return ($nodrec[TREE_NEXT]<$tree_nilp?$nodrec[TREE_NEXT]:NULL); case 'getprev': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); $res = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'}); return ($res<$tree_nilp?$res:NULL); case 'getpare': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method); return ($nodrec[TREE_PARENT]<$tree_nilp?$nodrec[TREE_PARENT]:NULL); case 'getkid1': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method); return ($nodrec[TREE_CHILD]<$tree_nilp?$nodrec[TREE_CHILD]:NULL); case 'getkids': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); // accept NULL which as root if (is_null($which)) $which = ${$handle.'_root'}; if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method); $res = array(); $kidx = $nodrec[TREE_CHILD]; while ($kidx<$tree_nilp) { $res[] = $kidx; if (NULL===($nodrec = _tree_getrec($kidx,${$handle.'_tree'}))) _tree_err(3,$method); $kidx = $nodrec[TREE_NEXT]; } return $res; case 'remove': if (func_num_args()!=3) _tree_err(1,$method); $which = func_get_arg(2); if (NULL===($delrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method); if (NULL===($prev = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'}))) { if ($delrec[TREE_PARENT]<$tree_nilp) { if (NULL===($parrec = _tree_getrec($delrec[TREE_PARENT],${$handle.'_tree'}))) _tree_err(3,$method); $parrec[TREE_CHILD] = $delrec[TREE_NEXT]; _tree_setrec($delrec[TREE_PARENT],$parrec,${$handle.'_tree'}); } else { ${$handle.'_root'} = $delrec[TREE_NEXT]; } } else { if (NULL===($prvrec = _tree_getrec($prev,${$handle.'_tree'}))) _tree_err(3,$method); $prvrec[TREE_NEXT] = $delrec[TREE_NEXT]; _tree_setrec($prev,$prvrec,${$handle.'_tree'}); } // set self pointer to NIL => this marks the node as deleted $delrec[TREE_META] = $tree_nilp; _tree_setrec($which,$delrec,${$handle.'_tree'}); // trickle deletion marks down through subtree // note that for safe tree traversal, this is unneccassary, as the subtree // now is not linked in anymore. The deletion marks are used only by the "flatlst" // method that blindly dumps the nodes without traversing them. // If you can do without flatlst, you might as well uncomment the next line if ($delrec[TREE_CHILD]<$tree_nilp) _tree_deepdel($delrec[TREE_CHILD],${$handle.'_tree'}); return (($delrec[TREE_NEXT]<$tree_nilp)?$delrec[TREE_NEXT]:NULL); case 'gettag': if (func_num_args()!=3) _tree_err(1,$method); $anchor = func_get_arg(2); return (_tree_gettag($anchor,${$handle.'_tree'},${$handle.'_treeData'})); case 'getdata': if (func_num_args()!=3) _tree_err(1,$method); $anchor = func_get_arg(2); return (substr(${$handle.'_treeData'},$anchor*TREE_DATALEN,TREE_DATALEN)); case 'findtag': if (func_num_args()!=4) _tree_err(1,$method); $anchor = func_get_arg(2); $tag = func_get_arg(3); // accept NULL anchor as root if (is_null($anchor)) $anchor = ${$handle.'_root'}; if ($tree_nilp == ${$handle.'_root'}) $res = NULL; else $res = _tree_findtag($anchor,$tag,${$handle.'_tree'},${$handle.'_treeData'}); return $res; case 'pretty': if (func_num_args()!=3) _tree_err(1,$method); $anchor = func_get_arg(2); // accept NULL anchor as root if (is_null($anchor)) $anchor = ${$handle.'_root'}; if ($tree_nilp == ${$handle.'_root'}) print "[]"; else _tree_prettyprint($anchor,${$handle.'_tree'},${$handle.'_treeData'}); print "\n"; return 1; case 'dump': if (func_num_args()!=3) _tree_err(1,$method); $anchor = func_get_arg(2); // accept NULL anchor as root if (is_null($anchor)) $anchor = ${$handle.'_root'}; if ($tree_nilp == ${$handle.'_root'}) print "NIL"; else { for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN) { for ($j=0;$j<TREE_POINTLEN;$j++) printf("%02x",ord(${$handle.'_tree'}[$i+$j])); print ' '; if ($i % (TREE_POINTLEN*4) == TREE_POINTLEN*3) print ("\t"._tree_gettag($i/(TREE_POINTLEN*4)-0.75, ${$handle.'_tree'},${$handle.'_treeData'})."\n"); } } case 'flatlst': if (func_num_args()!=3) _tree_err(1,$method); $anchor = func_get_arg(2); // accept NULL anchor as root if (is_null($anchor)) $anchor = ${$handle.'_root'}; if ($tree_nilp == ${$handle.'_root'}) return (array()); else { $res = array(); // recreate "deletion signature" by generating empty node, masking out "self" pointer $delsig = str_pad('',TREE_POINTLEN*4,"\0"); _tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig); $delsig = substr($delsig,0,TREE_POINTLEN); for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN*4) if ($delsig != substr(${$handle.'_tree'},$i,TREE_POINTLEN)) $res[] = $i/(TREE_POINTLEN*4); return ($res); } default: _tree_err(0,$method); } } function _tree_err($t,$method) { switch($t) { case 0: die ("tree, m=$method: unknown."); case 1: die ("tree, m=$method: bad number of arguments."); case 2: die ("tree, m=$method: unknown tree handle."); case 3: die ("tree, m=$method: node does not exist."); case 4: die ("tree, m=$method: could not create new node."); case 5: die ("tree, m=$method: passed node data exceeds fixed length."); case 6: die ("tree, m=$method: tag separator not found in dataset."); case 7: die ("tree, m=$method: too many recursions."); case 8: die ("tree, m=$method: parent node not found."); case 9: die ("tree, m=$method: sister node not found."); default: die ("tree: unspecified error"); } } function _tree_getrec($a,&$treetable) { global $tree_nilp; // like unpack's result lists, we return an array counting from 1 ! $s = substr($treetable,$a*4*TREE_POINTLEN,4*TREE_POINTLEN); if (!$s) return NULL; // use built-in unpack, little endian unsigned, for 16 bit pointers if (TREE_POINTLEN==2) return (unpack("v*",$s)); // mimick unpack for 24 or 32 bit pointers $res = array(); for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) { $res[$i+1] |= ord($s[$i*TREE_POINTLEN+$j]) << $j; } }; return $res; } function _tree_setrec($a,$rec,&$treetable) { global $tree_nilp; // like pack, expects $rec array to count from 1 // use built-in unpack, little endian unsigned, for 16 bit pointers if (TREE_POINTLEN==2) $s = pack("v*",$rec[1],$rec[2],$rec[3],$rec[4]); // did I mention php sucks? // mimick pack for 24 or 32 bit pointers else { $s = ''; for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) { $s .= chr(($rec[$i+1] >> $j) & 255); } } } for($i=0;$i<TREE_POINTLEN*4;$i++) $treetable[$a*TREE_POINTLEN*4+$i] = $s[$i]; } function _tree_newnode($parent,&$treelength,&$treetable) { global $tree_nilp; $res = $treelength/(4*TREE_POINTLEN); $treetable.=str_pad('',4*TREE_POINTLEN,"\0"); $treelength += (4*TREE_POINTLEN); if ($parent == $tree_nilp) { $depth = 0; } else { if (NULL===($parrec = _tree_getrec($parent,$treetable))) _tree_err(8,'tree_newnode'); $depth = 1 + ($parrec[1] >> 4); } _tree_setrec($res,array(1=>(($res & 15) | ($depth << 4)) & ((1 << 8*TREE_POINTLEN)-1), 2=>$tree_nilp,3=>$parent,4=>$tree_nilp),$treetable); return $res; } function _tree_previous($a,&$treetable,&$treeroot) { global $tree_nilp; if (NULL===($nodrec = _tree_getrec($a,$treetable))) _tree_err(3,'tree_previous'); if ($nodrec[TREE_PARENT]<$tree_nilp) { if (NULL===($parrec = _tree_getrec($nodrec[TREE_PARENT],$treetable))) _tree_err(8,'tree_previous'); $sibidx = $parrec[TREE_CHILD]; } else { $sibidx = $treeroot; } while ($sibidx != $a && $sibidx < $tree_nilp) { if (NULL===($sibrec = _tree_getrec($sibidx,$treetable))) _tree_err(9,'tree_previous'); if ($sibrec[TREE_NEXT] == $a) return ($sibidx); $sibidx = $sibrec[TREE_NEXT]; } return NULL; // no previous node => $a is firstChild } function _tree_gettag($a,&$treetable,&$treedata) { if (TREE_TAGLEN>0) { return (substr($treedata,$a*TREE_DATALEN+TREE_TAGPOS,TREE_TAGLEN)); } else { $o = $a*TREE_DATALEN+TREE_TAGPOS; $i = strpos($treedata,TREE_TAGSEP,$o); if ($i<0) _tree_err(6,'gettag'); return (substr($treedata,$o,$i-$o)); } } function _tree_findtag($a,$t,&$treetable,&$treedata) { global $tree_nilp; static $depth=0; $depth++; // catch22 if ($depth>1024) _tree_err (7,'findtag'); while ($a<$tree_nilp) { if (_tree_gettag($a,$treetable,$treedata) == $t) return $a; $rec = _tree_getrec($a,$treetable); if ($rec[TREE_CHILD]<$tree_nilp) { $r = _tree_findtag($rec[TREE_CHILD],$t,$treetable,$treedata); if (!is_null($r)) return $r; } $a = $rec[TREE_NEXT]; } $depth--; return NULL; } function _tree_deepdel($a,&$treetable) { global $tree_nilp; static $delsig = ''; if (!$delsig) { // recreate "deletion signature" by generating empty node, masking out "self" pointer $delsig = str_pad('',TREE_POINTLEN*4,"\0"); _tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig); $delsig = substr($delsig,0,TREE_POINTLEN); } static $depth=0; $depth++; // catch22 if ($depth>1024) _tree_err (7,'remove'); while ($a<$tree_nilp) { $rec = _tree_getrec($a,$treetable); if ($rec[TREE_CHILD]<$tree_nilp) { _tree_deepdel($rec[TREE_CHILD],$treetable); } for ($j=0;$j<TREE_POINTLEN;$j++) $treetable[$a*TREE_POINTLEN*4+$j] = $delsig[$j]; $a = $rec[TREE_NEXT]; } $depth--; return NULL; } function _tree_prettyprint($a,&$treetable,&$treedata) { global $tree_nilp; static $depth=0; $depth++; // catch22 if ($depth>1024) _tree_err (7,'pretty'); while ($a<$tree_nilp) { printf ("%x:",$a); print (_tree_gettag($a,$treetable,$treedata))." "; $rec = _tree_getrec($a,$treetable); if ($rec[TREE_CHILD]<$tree_nilp) { print "[ "; _tree_prettyprint($rec[TREE_CHILD],$treetable,$treedata); print "] "; } $a = $rec[TREE_NEXT]; } $depth--; } ?>