<?php
/**
* Tree library for PHP 4 / PHP 5
*
* This package arose from the need for a simple, memory efficient and upwards compatible
* tree module.
* It provides
* - n-ary trees
* - efficient traversal, using pseudo-pointers "up", "down" and "right"
* - storage facility for arbitrary data within the nodes
*
* Two shortcomings of PHP are addressed by this package:
* - memory hogging: both arrays and objects reserve huge chunks of memory for each element
* or instance. For example, a simple array holding 64K characters takes 4 MB of RAM.
* Nested arrays reserve multiples of this
* - The OOP model and implementation of references changed significantly between PHP4 and PHP5, making
* it difficult or impossible to write portable code using object references
*
* The present package uses fixed-length tables implemented with the string datatype. Instead of references,
* it implements pseudo-pointers, using integer indexes into these tables. Think C.
*
* The storage capacity is limited by the constant TREE_POINTLEN (default: 2 bytes). The highest number
* that can be stored in the pointer (actually, -1) is the maximum total number of nodes the tree can hold,
* including deleted nodes. The depth of the tree and the number of branches is not limited, although
* depth values will not be reported correctly for values taking more than 8*TREE_POINTLEN-4 bits (the
* missing four bits are used for a checksum). Thus, with the default pointer size, the tree depth is
* effectively limited to 2^12 == 4096.
* The upper limit to TREE_POINTLEN is constituted by PHP's maxint (4 bytes).
*
* A disadvantage: No garbage collection implemented; our pseudo-memory (stored in the strings) grows with
* each "add" operation, but does not free unused space after "delete" operations.
* (can be retrofitted by changing the "newnode" logic to use previously freed space instead of claiming new)
* Ignore this problem if you use the tree mainly to store and retrieve data, and if you don't reorganize the
* tree extensively (many prune and graft operations)
*
* A comparison with Richard Heyes' object-oriented Tree-Class 1.0.3. shows, how and where this approach
* is more efficient, while not sacrificing speed:
*
* tree.lib.php Tree-1.0.3.php
* ------------ --------------
* insert 4096 nodes in bintree CPU: 0.577s CPU: 0.186s
* MEM: 0.3M MEM: 3.5M
*
* get flat list from this tree CPU: 0.099s CPU: 0.064s
* MEM: 243K MEM: 206K
*
* remove this tree's root node CPU: 0.125s CPU: 0.001s
* MEM: 3456B MEM: 16B
*
* @package tree.lib
* @author johannes(@)jeudi.de
* @version 1.0
*/
/**
* configure and conquer
*/
define ('TREE_DATALEN',64); // fixed dataset size for data attached to nodes (recommended: multiples of 8)
define ('TREE_TAGPOS',0); // offset of tag (uniq node name) in the data record of each node
define ('TREE_TAGLEN',NULL); // enter a fixed length for the tag, or NULL for a variable length, separated field
define ('TREE_TAGSEP',"\n"); // for variable length tags, specify a separator character. May not occur in the data
define ('TREE_POINTLEN',2); // reserve this many bytes for each pointer. 2 bytes => 2^16 nodes possible
/**
* more constants
*/
$tree_nilp = (1<<(TREE_POINTLEN*8))-1; // last entry in pointerspace == NIL (ampty pointer)
define ('TREE_META',1); // some constants to sweeten the code that accesses the tree table (pointer offsets)
define ('TREE_NEXT',2);
define ('TREE_PARENT',3);
define ('TREE_CHILD',4);
/**
* tree master function
*
* The following primitive operations are selected by setting the "method" argument accordingly:
*
* CALL WITH ARGUMENTS WILL RETURN
* ------------------- -----------
* tree('new') treehandle
* tree('+sister',treehandle,pointer-to-node,data) pointer-to-newnode
* tree('+child', treehandle,pointer-to-node,data) pointer-to-newnode
* tree('remove', treehandle,pointer-to-node) pointer-to-right-sister
* tree('getroot',treehandle) root-pointer
* tree('getnext',treehandle,pointer-to-node) pointer-to-right-sister or NULL
* tree('getprev',treehandle,pointer-to-node) pointer-to-left-sister or NULL
* tree('getpare',treehandle,pointer-to-node) pointer-to-parent or NULL
* tree('getkid1',treehandle,pointer-to-node) pointer-to-first-child or NULL
* tree('getkids',treehandle,pointer-to-node) array of pointers, or empty array
* tree('gettag', treehandle,pointer-to-node) first field of node dataset
* tree('getdata',treehandle,pointer-to-node) node dataset (fixed length binary string)
* tree('getdpth',treehandle,pointer-to-node) depth (root=0, max: 2^(8*POINTLEN)/16)
* tree('findtag',treehandle,anchor,tag) pointer-to-node or NULL
* tree('pretty', treehandle,anchor) 1 (and print tree structure below anchor)
* tree('dump', treehandle,anchor) 1 (and print tree structure below anchor)
*
* Single vs. multiple roots:
* Root nodes are simple nodes whose "parent" pointer is NIL. Like any other node, they can have "sisters",
* thereby creating a "multiply rooted" tree (or a forest?).
* Not to be confounded: The th_nnn_root variable referenced in the code is just a pointer, pointing to
* NIL (when the tree is empty) or to the root node or the first of a group of root nodes.
*
* After creating a tree with "new", it will be empty. The first node (=root) should be created using the
* +child method, though +sister will work, too. Conceptually, the "anchor" should be given as NIL
* in that case; practically, the anchor argument is ignored when a first node is added to the empty tree
*
* Each node is represented by a set of four pseudo pointers:
* 1 (TREE_META): The low four bits hold the four lowest bits of the node address (can be used to
* assess the integrity of the tree).
* The higher bits hold the depth of the node, with root having a depth of 0.
* 2 (TREE_NEXT): The address of the right sister of this node
* 3 (TREE_PARENT): The address of the parent node
* 4 (TREE_CHILD): Address of this node's first child, if any
*
* Pointers are set to 2^(8*TREE-POINTLEN)-1 (or "-1") to mean NIL
* The TREE_META value is set to NIL to indicate a deleted node
*/
function tree () {
global $tree_nilp;
static $treecounter = 0;
$method = func_get_arg(0);
if (func_num_args()>1) $handle = func_get_arg(1);
if ($method=='new') $handle = 'th_'.($treecounter++);
// static would be more appropriate here, since we want the new tree to
// persist this function call, but not clutter the global namespace. But PHP
// reserves statics at compile time, so this is impossible (cf. PHP Bug 11449).
global ${$handle.'_root'};
global ${$handle.'_tree'};
global ${$handle.'_treeLen'};
global ${$handle.'_treeData'};
if ($method!='new' && !isset(${$handle.'_root'})) _tree_err(2,$method);
switch ($method) {
case 'new':
// static would be more appropriate here, since we want the new tree to
// persist this function call, but not clutter the global namespace. But PHP
// reserves statics at compile time, so this is impossible (cf. PHP Bug 11449).
${$handle.'_root'}=$tree_nilp;
${$handle.'_tree'}='';
${$handle.'_treeLen'}=0;
${$handle.'_treeData'}='';
return $handle;
case '+sister':
case '+child':
if (func_num_args()!=4) _tree_err(1,$method);
$anchor = func_get_arg(2);
$data = func_get_arg(3);
if (${$handle.'_root'} == $tree_nilp) {
if (-1===($newnode = _tree_newnode($tree_nilp,
${$handle.'_treeLen'},${$handle.'_tree'})))
_tree_err(4,$method);
${$handle.'_root'} = $newnode;
}
else {
if (NULL===($noderec = _tree_getrec($anchor,${$handle.'_tree'})))
_tree_err(3,$method);
if ($method == '+sister') {
if (-1===($newnode = _tree_newnode($noderec[TREE_PARENT],
${$handle.'_treeLen'},${$handle.'_tree'})))
_tree_err(4,$method);
$noderec[TREE_NEXT] = $newnode;
_tree_setrec($anchor,$noderec,${$handle.'_tree'});
}
else {
if (-1===($newnode = _tree_newnode($anchor,
${$handle.'_treeLen'},${$handle.'_tree'})))
_tree_err(4,$method);
if ($tree_nilp==$noderec[TREE_CHILD]) { // we are the first child
$noderec[TREE_CHILD] = $newnode;
_tree_setrec($anchor,$noderec,${$handle.'_tree'});
}
else {
$kidx = $noderec[TREE_CHILD];
do {
if (NULL===($kidrec = _tree_getrec($kidx,${$handle.'_tree'})))
_tree_err(3,$method);
} while ($kidrec[TREE_NEXT]<$tree_nilp);
$kidrec[TREE_NEXT] = $newnode;
_tree_setrec($kidx,$kidrec,${$handle.'_tree'});
}
}
}
if (strlen($data)>TREE_DATALEN) _tree_err(5,$method);
${$handle.'_treeData'} .= substr(str_pad($data,TREE_DATALEN,"\0"),0,TREE_DATALEN);
return $newnode;
case 'getroot': // or "first root", if more than one
if (func_num_args()!=2) _tree_err(1,$method);
return (${$handle.'_root'});
case 'getnext':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
return ($nodrec[TREE_NEXT]<$tree_nilp?$nodrec[TREE_NEXT]:NULL);
case 'getprev':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
$res = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'});
return ($res<$tree_nilp?$res:NULL);
case 'getpare':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
return ($nodrec[TREE_PARENT]<$tree_nilp?$nodrec[TREE_PARENT]:NULL);
case 'getkid1':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
return ($nodrec[TREE_CHILD]<$tree_nilp?$nodrec[TREE_CHILD]:NULL);
case 'getkids':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
// accept NULL which as root
if (is_null($which)) $which = ${$handle.'_root'};
if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
$res = array();
$kidx = $nodrec[TREE_CHILD];
while ($kidx<$tree_nilp) {
$res[] = $kidx;
if (NULL===($nodrec = _tree_getrec($kidx,${$handle.'_tree'}))) _tree_err(3,$method);
$kidx = $nodrec[TREE_NEXT];
}
return $res;
case 'remove':
if (func_num_args()!=3) _tree_err(1,$method);
$which = func_get_arg(2);
if (NULL===($delrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
if (NULL===($prev = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'}))) {
if ($delrec[TREE_PARENT]<$tree_nilp) {
if (NULL===($parrec = _tree_getrec($delrec[TREE_PARENT],${$handle.'_tree'})))
_tree_err(3,$method);
$parrec[TREE_CHILD] = $delrec[TREE_NEXT];
_tree_setrec($delrec[TREE_PARENT],$parrec,${$handle.'_tree'});
}
else {
${$handle.'_root'} = $delrec[TREE_NEXT];
}
}
else {
if (NULL===($prvrec = _tree_getrec($prev,${$handle.'_tree'})))
_tree_err(3,$method);
$prvrec[TREE_NEXT] = $delrec[TREE_NEXT];
_tree_setrec($prev,$prvrec,${$handle.'_tree'});
}
// set self pointer to NIL => this marks the node as deleted
$delrec[TREE_META] = $tree_nilp;
_tree_setrec($which,$delrec,${$handle.'_tree'});
// trickle deletion marks down through subtree
// note that for safe tree traversal, this is unneccassary, as the subtree
// now is not linked in anymore. The deletion marks are used only by the "flatlst"
// method that blindly dumps the nodes without traversing them.
// If you can do without flatlst, you might as well uncomment the next line
if ($delrec[TREE_CHILD]<$tree_nilp) _tree_deepdel($delrec[TREE_CHILD],${$handle.'_tree'});
return (($delrec[TREE_NEXT]<$tree_nilp)?$delrec[TREE_NEXT]:NULL);
case 'gettag':
if (func_num_args()!=3) _tree_err(1,$method);
$anchor = func_get_arg(2);
return (_tree_gettag($anchor,${$handle.'_tree'},${$handle.'_treeData'}));
case 'getdata':
if (func_num_args()!=3) _tree_err(1,$method);
$anchor = func_get_arg(2);
return (substr(${$handle.'_treeData'},$anchor*TREE_DATALEN,TREE_DATALEN));
case 'findtag':
if (func_num_args()!=4) _tree_err(1,$method);
$anchor = func_get_arg(2);
$tag = func_get_arg(3);
// accept NULL anchor as root
if (is_null($anchor)) $anchor = ${$handle.'_root'};
if ($tree_nilp == ${$handle.'_root'}) $res = NULL;
else $res = _tree_findtag($anchor,$tag,${$handle.'_tree'},${$handle.'_treeData'});
return $res;
case 'pretty':
if (func_num_args()!=3) _tree_err(1,$method);
$anchor = func_get_arg(2);
// accept NULL anchor as root
if (is_null($anchor)) $anchor = ${$handle.'_root'};
if ($tree_nilp == ${$handle.'_root'}) print "[]";
else _tree_prettyprint($anchor,${$handle.'_tree'},${$handle.'_treeData'});
print "\n";
return 1;
case 'dump':
if (func_num_args()!=3) _tree_err(1,$method);
$anchor = func_get_arg(2);
// accept NULL anchor as root
if (is_null($anchor)) $anchor = ${$handle.'_root'};
if ($tree_nilp == ${$handle.'_root'}) print "NIL";
else {
for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN) {
for ($j=0;$j<TREE_POINTLEN;$j++)
printf("%02x",ord(${$handle.'_tree'}[$i+$j]));
print ' ';
if ($i % (TREE_POINTLEN*4) == TREE_POINTLEN*3)
print ("\t"._tree_gettag($i/(TREE_POINTLEN*4)-0.75,
${$handle.'_tree'},${$handle.'_treeData'})."\n");
}
}
case 'flatlst':
if (func_num_args()!=3) _tree_err(1,$method);
$anchor = func_get_arg(2);
// accept NULL anchor as root
if (is_null($anchor)) $anchor = ${$handle.'_root'};
if ($tree_nilp == ${$handle.'_root'}) return (array());
else {
$res = array();
// recreate "deletion signature" by generating empty node, masking out "self" pointer
$delsig = str_pad('',TREE_POINTLEN*4,"\0");
_tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig);
$delsig = substr($delsig,0,TREE_POINTLEN);
for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN*4)
if ($delsig != substr(${$handle.'_tree'},$i,TREE_POINTLEN))
$res[] = $i/(TREE_POINTLEN*4);
return ($res);
}
default: _tree_err(0,$method);
}
}
function _tree_err($t,$method) {
switch($t) {
case 0: die ("tree, m=$method: unknown.");
case 1: die ("tree, m=$method: bad number of arguments.");
case 2: die ("tree, m=$method: unknown tree handle.");
case 3: die ("tree, m=$method: node does not exist.");
case 4: die ("tree, m=$method: could not create new node.");
case 5: die ("tree, m=$method: passed node data exceeds fixed length.");
case 6: die ("tree, m=$method: tag separator not found in dataset.");
case 7: die ("tree, m=$method: too many recursions.");
case 8: die ("tree, m=$method: parent node not found.");
case 9: die ("tree, m=$method: sister node not found.");
default: die ("tree: unspecified error");
}
}
function _tree_getrec($a,&$treetable) {
global $tree_nilp;
// like unpack's result lists, we return an array counting from 1 !
$s = substr($treetable,$a*4*TREE_POINTLEN,4*TREE_POINTLEN);
if (!$s) return NULL;
// use built-in unpack, little endian unsigned, for 16 bit pointers
if (TREE_POINTLEN==2) return (unpack("v*",$s));
// mimick unpack for 24 or 32 bit pointers
$res = array(); for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) {
$res[$i+1] |= ord($s[$i*TREE_POINTLEN+$j]) << $j; } }; return $res;
}
function _tree_setrec($a,$rec,&$treetable) {
global $tree_nilp;
// like pack, expects $rec array to count from 1
// use built-in unpack, little endian unsigned, for 16 bit pointers
if (TREE_POINTLEN==2) $s = pack("v*",$rec[1],$rec[2],$rec[3],$rec[4]); // did I mention php sucks?
// mimick pack for 24 or 32 bit pointers
else {
$s = ''; for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) {
$s .= chr(($rec[$i+1] >> $j) & 255); } }
}
for($i=0;$i<TREE_POINTLEN*4;$i++) $treetable[$a*TREE_POINTLEN*4+$i] = $s[$i];
}
function _tree_newnode($parent,&$treelength,&$treetable) {
global $tree_nilp;
$res = $treelength/(4*TREE_POINTLEN);
$treetable.=str_pad('',4*TREE_POINTLEN,"\0");
$treelength += (4*TREE_POINTLEN);
if ($parent == $tree_nilp) {
$depth = 0;
}
else {
if (NULL===($parrec = _tree_getrec($parent,$treetable))) _tree_err(8,'tree_newnode');
$depth = 1 + ($parrec[1] >> 4);
}
_tree_setrec($res,array(1=>(($res & 15) | ($depth << 4)) & ((1 << 8*TREE_POINTLEN)-1),
2=>$tree_nilp,3=>$parent,4=>$tree_nilp),$treetable);
return $res;
}
function _tree_previous($a,&$treetable,&$treeroot) {
global $tree_nilp;
if (NULL===($nodrec = _tree_getrec($a,$treetable))) _tree_err(3,'tree_previous');
if ($nodrec[TREE_PARENT]<$tree_nilp) {
if (NULL===($parrec = _tree_getrec($nodrec[TREE_PARENT],$treetable))) _tree_err(8,'tree_previous');
$sibidx = $parrec[TREE_CHILD];
}
else {
$sibidx = $treeroot;
}
while ($sibidx != $a && $sibidx < $tree_nilp) {
if (NULL===($sibrec = _tree_getrec($sibidx,$treetable))) _tree_err(9,'tree_previous');
if ($sibrec[TREE_NEXT] == $a) return ($sibidx);
$sibidx = $sibrec[TREE_NEXT];
}
return NULL; // no previous node => $a is firstChild
}
function _tree_gettag($a,&$treetable,&$treedata) {
if (TREE_TAGLEN>0) {
return (substr($treedata,$a*TREE_DATALEN+TREE_TAGPOS,TREE_TAGLEN));
}
else {
$o = $a*TREE_DATALEN+TREE_TAGPOS;
$i = strpos($treedata,TREE_TAGSEP,$o);
if ($i<0) _tree_err(6,'gettag');
return (substr($treedata,$o,$i-$o));
}
}
function _tree_findtag($a,$t,&$treetable,&$treedata) {
global $tree_nilp;
static $depth=0;
$depth++;
// catch22
if ($depth>1024) _tree_err (7,'findtag');
while ($a<$tree_nilp) {
if (_tree_gettag($a,$treetable,$treedata) == $t) return $a;
$rec = _tree_getrec($a,$treetable);
if ($rec[TREE_CHILD]<$tree_nilp) {
$r = _tree_findtag($rec[TREE_CHILD],$t,$treetable,$treedata);
if (!is_null($r)) return $r;
}
$a = $rec[TREE_NEXT];
}
$depth--;
return NULL;
}
function _tree_deepdel($a,&$treetable) {
global $tree_nilp;
static $delsig = '';
if (!$delsig) {
// recreate "deletion signature" by generating empty node, masking out "self" pointer
$delsig = str_pad('',TREE_POINTLEN*4,"\0");
_tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig);
$delsig = substr($delsig,0,TREE_POINTLEN);
}
static $depth=0;
$depth++;
// catch22
if ($depth>1024) _tree_err (7,'remove');
while ($a<$tree_nilp) {
$rec = _tree_getrec($a,$treetable);
if ($rec[TREE_CHILD]<$tree_nilp) {
_tree_deepdel($rec[TREE_CHILD],$treetable);
}
for ($j=0;$j<TREE_POINTLEN;$j++) $treetable[$a*TREE_POINTLEN*4+$j] = $delsig[$j];
$a = $rec[TREE_NEXT];
}
$depth--;
return NULL;
}
function _tree_prettyprint($a,&$treetable,&$treedata) {
global $tree_nilp;
static $depth=0;
$depth++;
// catch22
if ($depth>1024) _tree_err (7,'pretty');
while ($a<$tree_nilp) {
printf ("%x:",$a);
print (_tree_gettag($a,$treetable,$treedata))." ";
$rec = _tree_getrec($a,$treetable);
if ($rec[TREE_CHILD]<$tree_nilp) {
print "[ ";
_tree_prettyprint($rec[TREE_CHILD],$treetable,$treedata);
print "] ";
}
$a = $rec[TREE_NEXT];
}
$depth--;
}
?>