<?php
/*
	This takes in two files as input and produces a single new file (string)
	that represents the change from original_file -> new_file.
*/
class DeltaEncoder{
	private $lines; //Of the new file (output)
	private $new_lines; //The uniquely new content in the new file
	public function __construct(string $original_file,string $new_file){
		$map = array();
		$lines = explode("\n",$original_file);
		unset($original_file); //Not needed
		foreach($lines as $i=>$line){
			$hash = hash('sha1',$line);
			if(!isset($map[$hash]))
				$map[$hash] = $i;
		}

		$this->new_lines = array();
		$lines = explode("\n",$new_file);
		unset($new_file); //Not needed
		foreach($lines as $i=>&$line){
			$hash = hash('sha1',$line);
			if(isset($map[$hash])){
				$line = $map[$hash];
			}else{
				$this->new_lines[] = $line;
				$line = 'n'.(count($this->new_lines)-1);
			}
		}
		unset($map); //Not needed
		//Walk through the array backwards to calculate diffs
		for($i=count($lines)-1;$i>=0;$i--){
			if($i>0){
				if(substr($lines[$i],0,1)=='n')
					continue;

				$prev_line = $lines[$i-1];
				if(substr($prev_line,0,1)!='n'){
					$value = $lines[$i] - $prev_line;
					$lines[$i] = $value;
				}
			}
		}
		$this->lines = $lines;
	}
	public function encode(){
		$output = array();
		$ones = 0;
		foreach($this->lines as $i=>$line){
			if($line == '1'){
				$ones++;
			}
			else{
				if($ones>0){
					if($ones > 1)
						$output[] = '!'.$ones;
					else
						$output[] = 1;
					$ones = 0;
				}
				$output[] = $line;
			}
		}
		if($ones>0){
			$output[] = '!'.$ones;
		}
		$output = implode(",",$output);
		return $output."\n".implode("\n",$this->new_lines);
	}
}

class DeltaDecoder{
	private $map; //The map that combines old and new
	private $new_lines; //New file, but specifically the unique lines that are new (not all lines)
	private $original_lines; //Original file
	public function __construct(string $original_file, string $encoded){
		//Sanity check the input
		if(empty($original_file))
			throw new Exception('The original file cannot be empty.');
		if(strpos($encoded,"\n")===FALSE)
			throw new Exception('The supplied data is not properly encoded. It does not have a line break');

		$this->original_lines = explode("\n",$original_file);
		unset($original_file); //Not needed

		$lines = explode("\n",$encoded);
		unset($encoded); //Not needed
		$map = explode(",",array_shift($lines));
		$this->map = array();
		foreach($map as $i=>$line){
			if(substr($line,0,1) == '!'){
				$n = (int)substr($line,1);
				for($i=0;$i<$n;$i++)
					$this->map[] = 1;
			}else
				$this->map[] = $line;
		}
		if(count($lines)>=1)
			$this->new_lines = $lines;
		else
			$this->new_lines = array();
		unset($lines); //Save memory
	}
	public function reconstruct(){
		foreach($this->map as $i=>$line){
			if($i==0)
				continue;
			$prev_line = $this->map[$i-1];
			if(substr($prev_line,0,1) != 'n' && substr($line,0,1) != 'n'){
				$this->map[$i] = (int)$prev_line + (int)$line;
			}
		}
		$output = array();
		foreach($this->map as $line){
			if(substr($line,0,1) != 'n'){
				$output[]= $this->original_lines[(int)$line];
			}else{
				$i = (int)substr($line,1);
				$output[]= $this->new_lines[$i];
			}
		}
		return implode("\n",$output);
	}
}
/*
$f1 = file_get_contents('samples/cbc1.html');
$f2 = file_get_contents('samples/cbc2.html');
$f2_md5 = md5($f2);

echo "Minimum memory use possible is (mb): ";
echo round((min(strlen($f1),strlen($f2))/1024)/1024,3);
echo "\n";

$DE = new DeltaEncoder($f1,$f2);
$e = $DE->encode();

$DD = new DeltaDecoder($f1,$e);

$r = $DD->reconstruct();

echo "\nSavings in multibyte characters:\n";
echo (round((mb_strlen($e) - mb_strlen($r))/mb_strlen($r),3)*100).'%';

echo "\nSavings in bytes:\n";
echo (round((strlen($e) - strlen($r))/strlen($r),3)*100).'%';

echo "\nSavings in bytes, compressed versions @gz=9:\n";
$e_gz = gzcompress($e,9);
$r_gz = gzcompress($r,9);

echo (round((strlen($e_gz) - strlen($r_gz))/strlen($r_gz),3)*100).'%';

//echo $r;
$r_md5 = md5($r);

echo "\nMD5:\n";
echo $f2_md5."\n";
echo $r_md5."\n";
echo "Match?\n";
echo var_dump($r_md5 == $f2_md5);
*/

?>
