<?php
/**
 * Tree library for PHP 4 / PHP 5
 *
 * This package arose from the need for a simple, memory efficient and upwards compatible
 * tree module.
 * It provides
 * - n-ary trees
 * - efficient traversal, using pseudo-pointers "up", "down" and "right"
 * - storage facility for arbitrary data within the nodes
 *
 * Two shortcomings of PHP are addressed by this package:
 * - memory hogging: both arrays and objects reserve huge chunks of memory for each element 
 *   or instance. For example, a simple array holding 64K characters takes 4 MB of RAM.
 *   Nested arrays reserve multiples of this
 * - The OOP model and implementation of references changed significantly between PHP4 and PHP5, making 
 *   it difficult or impossible to write portable code using object references
 *
 * The present package uses fixed-length tables implemented with the string datatype. Instead of references,
 * it implements pseudo-pointers, using integer indexes into these tables. Think C.
 *
 * The storage capacity is limited by the constant TREE_POINTLEN (default: 2 bytes). The highest number
 * that can be stored in the pointer (actually, -1) is the maximum total number of nodes the tree can hold,
 * including deleted nodes. The depth of the tree and the number of branches is not limited, although
 * depth values will not be reported correctly for values taking more than 8*TREE_POINTLEN-4 bits (the
 * missing four bits are used for a checksum). Thus, with the default pointer size, the tree depth is
 * effectively limited to 2^12 == 4096.
 * The upper limit to TREE_POINTLEN is constituted by PHP's maxint (4 bytes).
 *
 * A disadvantage: No garbage collection implemented; our pseudo-memory (stored in the strings) grows with
 * each "add" operation, but does not free unused space after "delete" operations.
 * (can be retrofitted by changing the "newnode" logic to use previously freed space instead of claiming new)
 * Ignore this problem if you use the tree mainly to store and retrieve data, and if you don't reorganize the
 * tree extensively (many prune and graft operations)
 *
 * A comparison with Richard Heyes' object-oriented Tree-Class 1.0.3. shows, how and where this approach
 * is more efficient, while not sacrificing speed:
 *
 * 					tree.lib.php			Tree-1.0.3.php
 *					------------			--------------
 * insert 4096 nodes in bintree		CPU: 0.577s			CPU: 0.186s
 *					MEM: 0.3M			MEM: 3.5M
 *
 * get flat list from this tree		CPU: 0.099s			CPU: 0.064s
 *					MEM: 243K			MEM: 206K
 *
 * remove this tree's root node		CPU: 0.125s			CPU: 0.001s
 *					MEM: 3456B			MEM: 16B
 *
 * @package tree.lib
 * @author johannes(@)jeudi.de
 * @version 1.0
 */

/**
 * configure and conquer
 */
define ('TREE_DATALEN',64);  // fixed dataset size for data attached to nodes (recommended: multiples of 8)
define ('TREE_TAGPOS',0);    // offset of tag (uniq node name) in the data record of each node
define ('TREE_TAGLEN',NULL); // enter a fixed length for the tag, or NULL for a variable length, separated field
define ('TREE_TAGSEP',"\n"); // for variable length tags, specify a separator character. May not occur in the data
define ('TREE_POINTLEN',2);  // reserve this many bytes for each pointer. 2 bytes => 2^16 nodes possible

/**
 * more constants
 */
$tree_nilp = (1<<(TREE_POINTLEN*8))-1; // last entry in pointerspace == NIL (ampty pointer)
define ('TREE_META',1);	     // some constants to sweeten the code that accesses the tree table (pointer offsets)
define ('TREE_NEXT',2);
define ('TREE_PARENT',3);
define ('TREE_CHILD',4);

/**
 * tree master function
 *
 * The following primitive operations are selected by setting the "method" argument accordingly:
 *
 *      CALL WITH ARGUMENTS 					WILL RETURN
 *      -------------------					-----------
 * 	tree('new')						treehandle
 *	tree('+sister',treehandle,pointer-to-node,data)		pointer-to-newnode
 *	tree('+child', treehandle,pointer-to-node,data)		pointer-to-newnode
 *	tree('remove', treehandle,pointer-to-node)		pointer-to-right-sister
 *      tree('getroot',treehandle)				root-pointer
 *      tree('getnext',treehandle,pointer-to-node)		pointer-to-right-sister or NULL
 *      tree('getprev',treehandle,pointer-to-node)		pointer-to-left-sister or NULL
 *      tree('getpare',treehandle,pointer-to-node)		pointer-to-parent or NULL
 *      tree('getkid1',treehandle,pointer-to-node)		pointer-to-first-child or NULL
 *      tree('getkids',treehandle,pointer-to-node)		array of pointers, or empty array
 *	tree('gettag', treehandle,pointer-to-node)		first field of node dataset
 *	tree('getdata',treehandle,pointer-to-node)		node dataset (fixed length binary string)
 *	tree('getdpth',treehandle,pointer-to-node)		depth (root=0, max: 2^(8*POINTLEN)/16)
 *	tree('findtag',treehandle,anchor,tag)			pointer-to-node or NULL
 *	tree('pretty', treehandle,anchor)			1 (and print tree structure below anchor)
 *	tree('dump',   treehandle,anchor)			1 (and print tree structure below anchor)
 *
 * Single vs. multiple roots:
 * Root nodes are simple nodes whose "parent" pointer is NIL. Like any other node, they can have "sisters",
 * thereby creating a "multiply rooted" tree (or a forest?).
 * Not to be confounded: The th_nnn_root variable referenced in the code is just a pointer, pointing to
 * NIL (when the tree is empty) or to the root node or the first of a group of root nodes.
 *
 * After creating a tree with "new", it will be empty. The first node (=root) should be created using the
 * +child method, though +sister will work, too. Conceptually, the "anchor" should be given as NIL
 * in that case; practically, the anchor argument is ignored when a first node is added to the empty tree
 *
 * Each node is represented by a set of four pseudo pointers:
 * 1 (TREE_META):   The low four bits hold the four lowest bits of the node address (can be used to
 *                  assess the integrity of the tree).
 *                  The higher bits hold the depth of the node, with root having a depth of 0.
 * 2 (TREE_NEXT):   The address of the right sister of this node
 * 3 (TREE_PARENT): The address of the parent node
 * 4 (TREE_CHILD):  Address of this node's first child, if any
 *
 * Pointers are set to 2^(8*TREE-POINTLEN)-1 (or "-1") to mean NIL
 * The TREE_META value is set to NIL to indicate a deleted node
 */
function tree () {
	global $tree_nilp;
	static $treecounter = 0;
	$method = func_get_arg(0);
	if (func_num_args()>1) $handle = func_get_arg(1);
	if ($method=='new') $handle = 'th_'.($treecounter++);

	// static would be more appropriate here, since we want the new tree to
	// persist this function call, but not clutter the global namespace. But PHP
	// reserves statics at compile time, so this is impossible (cf. PHP Bug 11449).
	global ${$handle.'_root'};
	global ${$handle.'_tree'};
	global ${$handle.'_treeLen'};
	global ${$handle.'_treeData'};

	if ($method!='new' && !isset(${$handle.'_root'})) _tree_err(2,$method);
	
	switch ($method) {
		case 'new': 
			// static would be more appropriate here, since we want the new tree to
			// persist this function call, but not clutter the global namespace. But PHP
			// reserves statics at compile time, so this is impossible (cf. PHP Bug 11449).
			${$handle.'_root'}=$tree_nilp;
			${$handle.'_tree'}='';
			${$handle.'_treeLen'}=0;
			${$handle.'_treeData'}='';
			return $handle;
		case '+sister':
		case '+child':
			if (func_num_args()!=4) _tree_err(1,$method);
			$anchor = func_get_arg(2);
			$data = func_get_arg(3);
			if (${$handle.'_root'} == $tree_nilp) {
				if (-1===($newnode = _tree_newnode($tree_nilp,
						${$handle.'_treeLen'},${$handle.'_tree'}))) 
					_tree_err(4,$method);
				${$handle.'_root'} = $newnode;
			}
			else {
				if (NULL===($noderec = _tree_getrec($anchor,${$handle.'_tree'}))) 
					_tree_err(3,$method);
				if ($method == '+sister') {
					if (-1===($newnode = _tree_newnode($noderec[TREE_PARENT],
							${$handle.'_treeLen'},${$handle.'_tree'}))) 
						_tree_err(4,$method);
					$noderec[TREE_NEXT] = $newnode;
					_tree_setrec($anchor,$noderec,${$handle.'_tree'});
				}
				else  {
					if (-1===($newnode = _tree_newnode($anchor,
							${$handle.'_treeLen'},${$handle.'_tree'}))) 
						_tree_err(4,$method);
					if ($tree_nilp==$noderec[TREE_CHILD]) { // we are the first child
						$noderec[TREE_CHILD] = $newnode;
						_tree_setrec($anchor,$noderec,${$handle.'_tree'});
					}
					else {
						$kidx = $noderec[TREE_CHILD];
						do {
							if (NULL===($kidrec = _tree_getrec($kidx,${$handle.'_tree'}))) 
								_tree_err(3,$method);
						} while ($kidrec[TREE_NEXT]<$tree_nilp);
						$kidrec[TREE_NEXT] = $newnode;
						_tree_setrec($kidx,$kidrec,${$handle.'_tree'});
					}
				}
			}
			if (strlen($data)>TREE_DATALEN) _tree_err(5,$method);
			${$handle.'_treeData'} .= substr(str_pad($data,TREE_DATALEN,"\0"),0,TREE_DATALEN);
			return $newnode;
		case 'getroot': // or "first root", if more than one
			if (func_num_args()!=2) _tree_err(1,$method);
			return (${$handle.'_root'});
		case 'getnext': 
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
			return ($nodrec[TREE_NEXT]<$tree_nilp?$nodrec[TREE_NEXT]:NULL);
		case 'getprev': 
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			$res = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'});
			return ($res<$tree_nilp?$res:NULL);
		case 'getpare': 
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
			return ($nodrec[TREE_PARENT]<$tree_nilp?$nodrec[TREE_PARENT]:NULL);
		case 'getkid1': 
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
			return ($nodrec[TREE_CHILD]<$tree_nilp?$nodrec[TREE_CHILD]:NULL);
		case 'getkids': 
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			// accept NULL which as root
			if (is_null($which)) $which = ${$handle.'_root'};
			if (NULL===($nodrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
			$res = array();
			$kidx = $nodrec[TREE_CHILD];
			while ($kidx<$tree_nilp) {
				$res[] = $kidx;
				if (NULL===($nodrec = _tree_getrec($kidx,${$handle.'_tree'}))) _tree_err(3,$method);
				$kidx = $nodrec[TREE_NEXT];
			}
			return $res;
		case 'remove':
			if (func_num_args()!=3) _tree_err(1,$method);
			$which = func_get_arg(2);
			if (NULL===($delrec = _tree_getrec($which,${$handle.'_tree'}))) _tree_err(3,$method);
			if (NULL===($prev = _tree_previous($which,${$handle.'_tree'},${$handle.'_root'}))) {
				if ($delrec[TREE_PARENT]<$tree_nilp) {
					if (NULL===($parrec = _tree_getrec($delrec[TREE_PARENT],${$handle.'_tree'}))) 
						_tree_err(3,$method);
					$parrec[TREE_CHILD] = $delrec[TREE_NEXT];
					_tree_setrec($delrec[TREE_PARENT],$parrec,${$handle.'_tree'});
				}
				else {
					${$handle.'_root'} = $delrec[TREE_NEXT];
				}
			}
			else {
				if (NULL===($prvrec = _tree_getrec($prev,${$handle.'_tree'}))) 
					_tree_err(3,$method);
				$prvrec[TREE_NEXT] = $delrec[TREE_NEXT];
				_tree_setrec($prev,$prvrec,${$handle.'_tree'});
			}
			// set self pointer to NIL => this marks the node as deleted
			$delrec[TREE_META] = $tree_nilp;
			_tree_setrec($which,$delrec,${$handle.'_tree'});
			// trickle deletion marks down through subtree
			// note that for safe tree traversal, this is unneccassary, as the subtree
			// now is not linked in anymore. The deletion marks are used only by the "flatlst"
			// method that blindly dumps the nodes without traversing them.
			// If you can do without flatlst, you might as well uncomment the next line
			if ($delrec[TREE_CHILD]<$tree_nilp) _tree_deepdel($delrec[TREE_CHILD],${$handle.'_tree'});
			return (($delrec[TREE_NEXT]<$tree_nilp)?$delrec[TREE_NEXT]:NULL);
		case 'gettag':
			if (func_num_args()!=3) _tree_err(1,$method);
			$anchor = func_get_arg(2);
			return (_tree_gettag($anchor,${$handle.'_tree'},${$handle.'_treeData'}));
		case 'getdata':
			if (func_num_args()!=3) _tree_err(1,$method);
			$anchor = func_get_arg(2);
			return (substr(${$handle.'_treeData'},$anchor*TREE_DATALEN,TREE_DATALEN));
		case 'findtag':
			if (func_num_args()!=4) _tree_err(1,$method);
			$anchor = func_get_arg(2);
			$tag = func_get_arg(3);

			// accept NULL anchor as root
			if (is_null($anchor)) $anchor = ${$handle.'_root'};

			if ($tree_nilp == ${$handle.'_root'}) $res = NULL;
			else $res = _tree_findtag($anchor,$tag,${$handle.'_tree'},${$handle.'_treeData'});
			return $res;
		case 'pretty':
			if (func_num_args()!=3) _tree_err(1,$method);
			$anchor = func_get_arg(2);

			// accept NULL anchor as root
			if (is_null($anchor)) $anchor = ${$handle.'_root'};

			if ($tree_nilp == ${$handle.'_root'}) print "[]";
			else _tree_prettyprint($anchor,${$handle.'_tree'},${$handle.'_treeData'});
			print "\n";
			return 1;
		case 'dump':
			if (func_num_args()!=3) _tree_err(1,$method);
			$anchor = func_get_arg(2);

			// accept NULL anchor as root
			if (is_null($anchor)) $anchor = ${$handle.'_root'};

			if ($tree_nilp == ${$handle.'_root'}) print "NIL";
			else {
				for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN)  {
					for ($j=0;$j<TREE_POINTLEN;$j++) 
						printf("%02x",ord(${$handle.'_tree'}[$i+$j]));
					print ' ';
					if ($i % (TREE_POINTLEN*4) == TREE_POINTLEN*3) 
						print ("\t"._tree_gettag($i/(TREE_POINTLEN*4)-0.75,
							${$handle.'_tree'},${$handle.'_treeData'})."\n");
				}
			}
		case 'flatlst':
			if (func_num_args()!=3) _tree_err(1,$method);
			$anchor = func_get_arg(2);

			// accept NULL anchor as root
			if (is_null($anchor)) $anchor = ${$handle.'_root'};

			if ($tree_nilp == ${$handle.'_root'}) return (array());
			else {
				$res = array();
				// recreate "deletion signature" by generating empty node, masking out "self" pointer
				$delsig = str_pad('',TREE_POINTLEN*4,"\0");
				_tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig);
				$delsig = substr($delsig,0,TREE_POINTLEN);
				for ($i=0;$i<strlen(${$handle.'_tree'});$i+=TREE_POINTLEN*4)
					if ($delsig != substr(${$handle.'_tree'},$i,TREE_POINTLEN)) 
						$res[] = $i/(TREE_POINTLEN*4);
				return ($res);
			}
		default: _tree_err(0,$method);
	}
}

function _tree_err($t,$method) {
	switch($t) {
		case 0: die ("tree, m=$method: unknown.");
		case 1: die ("tree, m=$method: bad number of arguments.");
		case 2: die ("tree, m=$method: unknown tree handle.");
		case 3: die ("tree, m=$method: node does not exist.");
		case 4: die ("tree, m=$method: could not create new node.");
		case 5: die ("tree, m=$method: passed node data exceeds fixed length.");
		case 6: die ("tree, m=$method: tag separator not found in dataset.");
		case 7: die ("tree, m=$method: too many recursions.");
		case 8: die ("tree, m=$method: parent node not found.");
		case 9: die ("tree, m=$method: sister node not found.");
		default: die ("tree: unspecified error");
	}
}
function _tree_getrec($a,&$treetable)  {
	global $tree_nilp;
	// like unpack's result lists, we return an array counting from 1 !
	$s = substr($treetable,$a*4*TREE_POINTLEN,4*TREE_POINTLEN);
	if (!$s) return NULL;
	// use built-in unpack, little endian unsigned, for 16 bit pointers
	if (TREE_POINTLEN==2) return (unpack("v*",$s));
	// mimick unpack for 24 or 32 bit pointers
	$res = array(); for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) { 
		$res[$i+1] |= ord($s[$i*TREE_POINTLEN+$j]) << $j; } }; return $res;
}
function _tree_setrec($a,$rec,&$treetable) {
	global $tree_nilp;
	// like pack, expects $rec array to count from 1
	// use built-in unpack, little endian unsigned, for 16 bit pointers
	if (TREE_POINTLEN==2) $s = pack("v*",$rec[1],$rec[2],$rec[3],$rec[4]); // did I mention php sucks?
	// mimick pack for 24 or 32 bit pointers
	else {
		$s = ''; for ($i=0;$i<4;$i++) { for ($j=0;$j<TREE_POINTLEN;$j++) { 
			$s .= chr(($rec[$i+1] >> $j) & 255); } }
	}
	for($i=0;$i<TREE_POINTLEN*4;$i++) $treetable[$a*TREE_POINTLEN*4+$i] = $s[$i];
}
function _tree_newnode($parent,&$treelength,&$treetable) {
	global $tree_nilp;
	$res = $treelength/(4*TREE_POINTLEN);
	$treetable.=str_pad('',4*TREE_POINTLEN,"\0");
	$treelength += (4*TREE_POINTLEN);
	if ($parent == $tree_nilp) {
		$depth = 0;
	}
	else {
		if (NULL===($parrec = _tree_getrec($parent,$treetable))) _tree_err(8,'tree_newnode');
		$depth = 1 + ($parrec[1] >> 4);
	}
	_tree_setrec($res,array(1=>(($res & 15) | ($depth << 4)) & ((1 << 8*TREE_POINTLEN)-1),
				2=>$tree_nilp,3=>$parent,4=>$tree_nilp),$treetable);
	return $res;
}
function _tree_previous($a,&$treetable,&$treeroot) {
	global $tree_nilp;
	if (NULL===($nodrec = _tree_getrec($a,$treetable))) _tree_err(3,'tree_previous');
	if ($nodrec[TREE_PARENT]<$tree_nilp) {
		if (NULL===($parrec = _tree_getrec($nodrec[TREE_PARENT],$treetable))) _tree_err(8,'tree_previous');
		$sibidx = $parrec[TREE_CHILD];
	}
	else {
		$sibidx = $treeroot;
	}
	while ($sibidx != $a && $sibidx < $tree_nilp) {
		if (NULL===($sibrec = _tree_getrec($sibidx,$treetable))) _tree_err(9,'tree_previous');
		if ($sibrec[TREE_NEXT] == $a) return ($sibidx);
		$sibidx = $sibrec[TREE_NEXT];
	}
	return NULL; // no previous node => $a is firstChild
}
function _tree_gettag($a,&$treetable,&$treedata) {
	if (TREE_TAGLEN>0) {
		return (substr($treedata,$a*TREE_DATALEN+TREE_TAGPOS,TREE_TAGLEN));
	}
	else {
		$o = $a*TREE_DATALEN+TREE_TAGPOS;
		$i = strpos($treedata,TREE_TAGSEP,$o);
		if ($i<0) _tree_err(6,'gettag');
		return (substr($treedata,$o,$i-$o));
	}
}
function _tree_findtag($a,$t,&$treetable,&$treedata) {
	global $tree_nilp;
	static $depth=0;
	$depth++;
	// catch22
	if ($depth>1024) _tree_err (7,'findtag');
	while ($a<$tree_nilp) {
		if (_tree_gettag($a,$treetable,$treedata) == $t) return $a;
		$rec = _tree_getrec($a,$treetable);
		if ($rec[TREE_CHILD]<$tree_nilp) {
			$r = _tree_findtag($rec[TREE_CHILD],$t,$treetable,$treedata);
			if (!is_null($r)) return $r;
		}
		$a = $rec[TREE_NEXT];
	}
	$depth--;
	return NULL;
}
function _tree_deepdel($a,&$treetable) {
	global $tree_nilp;
	static $delsig = '';
	if (!$delsig) {
		// recreate "deletion signature" by generating empty node, masking out "self" pointer
		$delsig = str_pad('',TREE_POINTLEN*4,"\0");
		_tree_setrec(0,array(1=>$tree_nilp,2=>$tree_nilp,3=>$tree_nilp,4=>$tree_nilp),$delsig);
		$delsig = substr($delsig,0,TREE_POINTLEN);
	}
	static $depth=0;
	$depth++;
	// catch22
	if ($depth>1024) _tree_err (7,'remove');
	while ($a<$tree_nilp) {
		$rec = _tree_getrec($a,$treetable);
		if ($rec[TREE_CHILD]<$tree_nilp) {
			_tree_deepdel($rec[TREE_CHILD],$treetable);
		}
		for ($j=0;$j<TREE_POINTLEN;$j++) $treetable[$a*TREE_POINTLEN*4+$j] = $delsig[$j];
		$a = $rec[TREE_NEXT];
	}
	$depth--;
	return NULL;
}
function _tree_prettyprint($a,&$treetable,&$treedata) {
	global $tree_nilp;
	static $depth=0;
	$depth++;
	// catch22
	if ($depth>1024) _tree_err (7,'pretty');
	while ($a<$tree_nilp) {
		printf ("%x:",$a);
		print (_tree_gettag($a,$treetable,$treedata))." ";
		$rec = _tree_getrec($a,$treetable);
		if ($rec[TREE_CHILD]<$tree_nilp) {
			print "[ ";
			_tree_prettyprint($rec[TREE_CHILD],$treetable,$treedata);
			print "] ";
		}
		$a = $rec[TREE_NEXT];
	}
	$depth--;
}
?>