#!/usr/local/bin/perl -w
##############################################################################
# $Id: ldiff,v 1.3 2002/02/23 01:51:45 jheiss Exp $
##############################################################################
# Compare two LDIF (LDAP Data Interchange Format) files and generate a
# diff in LDIF format, suitable for feeding to ldapmodify.
#
# TODO:
# - Deal with Base64 encoded fields
##############################################################################
# $Log: ldiff,v $
# Revision 1.3  2002/02/23 01:51:45  jheiss
# Fixed a major error in the LDIF output in certain conditions.
#
# Revision 1.2  2002/02/22 01:01:27  jheiss
# Use /usr/local/bin/perl -w
#
# Revision 1.1  2002/02/09 03:25:58  jheiss
# Initial revision
#
##############################################################################

# Includes and such
#use MIME::Base64;

# Constants

# Globals
my %old;
my %new;

sub usage
{
	die "Usage: $0 <old ldif> <new ldif>\n";
}

if (scalar @ARGV != 2)
{
	usage();
}

#print "Parsing old file\n";
open(OLD, "< $ARGV[0]") || die "Failed to open $ARGV[0]\n";
%old = parse_ldif(<OLD>);
close(OLD);

#print "Parsing new file\n";
open(NEW, "< $ARGV[1]") || die "Failed to open $ARGV[1]\n";
%new = parse_ldif(<NEW>);
close(NEW);

#print "Got a hash with " . scalar(keys %new) . " entry(s)\n";
#print "First DN has " . scalar(keys %{$new{((keys %new)[0])}}) . " attrib(s)\n";
foreach my $dn (sort keys %new)
{
	#print "dn: $dn\n";
	foreach my $attrib (sort keys %{$new{$dn}})
	{
		#print "    $attrib: " . join(', ', @{$new{$dn}->{$attrib}}) . "\n";
	}
	#print "\n";
}

# Before:  One value
# After:   Different value
# Result:  Replace
#
# Before:  One value
# After:   Multiple values
# Result:  Replace
#
# Before:  Existed
# After:   Gone
# Result:  Delete
#
# Before:  Didn't exist
# After:   Exists
# Result:  Add

foreach my $dn (sort keys %new)
{
	foreach my $attrib (sort keys %{$new{$dn}})
	{
		# Check to see if an old value exists
		if (exists $old{$dn}->{$attrib})
		{
			# Old value exists, has it changed?
			# Not sure this is a sufficient equality test
			#if ($old{$dn}->{$attrib} ne $new{$dn}->{$attrib})
			if (! array_compare($old{$dn}->{$attrib}, $new{$dn}->{$attrib}))
			{
				# Yes it has changed, add a replace statement to the LDIF
				print "dn: $dn\n";
				$dn_printed = 1;
				print "replace: $attrib\n";
				foreach my $value (@{$new{$dn}->{$attrib}})
				{
					print "$attrib: $value\n";
				}
				print "\n";
			}
		}
		else
		{
			# Old value does not exist, add an add statement to the LDIF
			print "dn: $dn\n";
			$dn_printed = 1;
			print "add: $attrib\n";
			foreach my $value (@{$new{$dn}->{$attrib}})
			{
				print "$attrib: $value\n";
			}
			print "\n";
		}
	}
}

foreach my $dn (sort keys %old)
{
	foreach my $attrib (sort keys %{$old{$dn}})
	{
		# Check to see if a new value exists
		if (! exists $new{$dn}->{$attrib})
		{
			# New value doesn't exist, add a delete statement to the LDIF
			print "dn: $dn\n";
			$dn_printed = 1;
			print "delete: $attrib\n";
			print "\n";
		}
	}
}

sub parse_ldif
{
	my $BOGUS_DN = 'bogus';
	my $dn = $BOGUS_DN;
	my %hash;

	LINE: foreach my $line (@_)
	{
		chomp($line);

		#print "line:  '$line'\n";

		next LINE if ($line =~ /^\s*#/);  # Skip comments
		if ($line =~ /^\s*$/)
		{
			# Blank lines signify the end of a DN entry
			$dn = $BOGUS_DN;
			next LINE;
		}

		my $encoded = 0;
		if ($line =~ /^\w+::/)
		{
			# Field is Base64 encoded
			$encoded = 1;
			# Currently don't have a general way to deal with encoded fields,
			# so skip them.
			next LINE;
		}

		my $attrib;
		my $value;
		if ($line =~ /^(\w+)::\s*(.*)/ || $line =~ /^(\w+):\s*(.*)/)
		{
			$attrib = $1;
			$value = $2;
			#print "  attrib:  '$attrib'\n";
			#print "  value:  '$value'\n";
		}
		else
		{
			print STDERR "  Bogus line\n";
			next LINE;
		}

		if ($attrib eq 'dn')
		{
			$dn = $value
		}
		else
		{
			if ($dn eq $BOGUS_DN)
			{
				# You get a few of these unless you specify -LLL to ldapsearch
				#print STDERR "  Skipping line due to lack of a DN\n";
				#print STDERR "$line\n";
				next LINE;
			}

			if (exists $hash{$dn}->{$attrib})
			{
				# We make no attempt to check whether the scheme
				# allows multiple values for any given attribute, we assume
				# ldapmodify will croak up an error if the user
				# tries something illegal.
				$hash{$dn}->{$attrib} = [@{$hash{$dn}->{$attrib}}, $value];
			}
			else
			{
				$hash{$dn}->{$attrib} = [$value];
			}
		}
	}

	#print "Returning hash with " . scalar(keys %hash) . " entry(s)\n";
	#print "First DN has " . scalar(keys %{$hash{((keys %hash)[0])}}) . " attrib(s)\n";
	foreach my $dn (sort keys %hash)
	{
		#print "dn: $dn\n";
		foreach my $attrib (sort keys %{$hash{$dn}})
		{
			#print "    $attrib: " . join(', ', @{$hash{$dn}->{$attrib}}) . "\n";
		}
		#print "\n";
	}
	return %hash;
}

# Returns 1 if the two arrays are identical, zero otherwise
sub array_compare
{
	my @first = sort @{$_[0]};
	#print "First array has " . scalar(@first) . " entry(s)\n";
	my @second = sort @{$_[1]};
	#print "Second array has " . scalar(@second) . " entry(s)\n";

	# Check for the obvious first
	if (scalar(@first) != scalar(@second))
	{
		return 0;
	}

	for(my $i=0; $i<=$#first; $i++)
	{
		#print "Comparing $first[$i] to $second[$i]\n";

		if ($first[$i] ne $second[$i])
		{
			return 0;
		}
	}

	return 1;
}

