Implementing RPM Compare in C#

For some reason you may want or need to implement an RPM Compare method, i.e. rpmvercmp(), in C#. This is a working model I adapted from a C version of RPM Compare, see: http://rpm.org/index.html. I wrote my version sometime in 2017, some of the links from my source are now dead. I added links here for you to walk through part of the path I took.

RPM Compare works by comparing each part of the package name in a well-formed RPM. I won’t try to explain the RPM version comparison logic. I found Jason Antman’s Blog post “How Yum and RPM Compare Versions” to be very helpful. He explains the madness well.

Like the official rpmvercmp() method, the C# version here compares string a to string b:
if string a is equal to string b, return 0
if string a is newer than string b, return 1
if string a is older than string b, return -1

The RPM compare assumes a check separately on each of the five parts of the full RPM package name. Jason’s blog post is great at helping wrap your head around the comparison model.

There are test values for rpmvercmp() to validate the method. You can adapt this list for your own implementation.

Below the class sample is a pseudo-code example.

using System;
using System.Text;

public partial class RpmMethods
{
	

	/// <summary>
	/// compare alpha and numeric segments of two RPM versions strings
	/// </summary>
	/// <returns>
	/// 1: one is newer than two
	/// 0: one and two are the same version
	/// -1: two is newer than one
	/// </returns>
	/// <see cref="https://github.com/sassoftware/python-rpm-vercmp/blob/master/rpm_vercmp/vercmp.py"/>
	/// <see cref="https://blog.jasonantman.com/2014/07/how-yum-and-rpm-compare-versions/"/>
	/// <see cref="http://rpm.org/"/>
	/// <seealso cref="https://raw.githubusercontent.com/rpm-software-management/rpm/master/tests/rpmvercmp.at"/>
	/// <remarks>This is taken from the references above and reworked from C to C#.
	/// The multiple return points are from the original C version.
	/// This is a translation of the C pointer method into a C# string iteration.
	/// The RPM.org function ignores Unicode letters, hence the 
	/// Encoding translation of the input arguments into working strings.</remarks>
	private static int RpmVerCmp(string oneToTest, string twoToTest)
	{
		bool isnum = false;
		int rc;

		// handle NULL input arguments
		if (string.IsNullOrEmpty(oneToTest) == true)
		{
			oneToTest = string.Empty;
		}
		if (string.IsNullOrEmpty(twoToTest) == true)
		{
			twoToTest = string.Empty;
		}

		// It uses the .NET ASCII encoding to convert a string.
		// UTF8 is used during the conversion because it can represent any of the original characters.
		// It uses an EncoderReplacementFallback to convert any non-ASCII character to an empty string.
		string one = Encoding.ASCII.GetString(
			Encoding.Convert(
				Encoding.UTF8,
				Encoding.GetEncoding(
					Encoding.ASCII.EncodingName,
					new EncoderReplacementFallback(string.Empty),
					new DecoderExceptionFallback()
					),
				Encoding.UTF8.GetBytes(oneToTest)
			)
		);
		string two = Encoding.ASCII.GetString(
			Encoding.Convert(
				Encoding.UTF8,
				Encoding.GetEncoding(
					Encoding.ASCII.EncodingName,
					new EncoderReplacementFallback(string.Empty),
					new DecoderExceptionFallback()
					),
				Encoding.UTF8.GetBytes(twoToTest)
			)
		);

		// If the strings are binary equal (a == b), they’re equal, return 0
		if (string.Compare(one, two) == 0)
		{
			return (0);
		}

		// loop through each version segment of one and two and compare them left-to-right
		while (one.Length > 0 || two.Length > 0)
		{
			string oneLeftPart = string.Empty;
			string twoLeftPart = string.Empty;

			// Trim anything that’s not [A-Za-z0-9] or tilde (~) from the front of both strings.
			while (one.Length > 0)
			{
				if (char.IsLetter(one, 0) == false && char.IsDigit(one, 0) == false && one.StartsWith("~") == false)
				{
					one = one.Substring(1, (one.Length - 1)); // the parts after the segment
				}
				else
				{
					break;
				}
			}
			// same for string two as above
			while (two.Length > 0)
			{
				if (char.IsLetter(two, 0) == false && char.IsDigit(two, 0) == false && two.StartsWith("~") == false)
				{
					two = two.Substring(1, (two.Length - 1)); // the parts after the segment
				}
				else
				{
					break;
				}
			}

			// handle the tilde separator, it sorts before everything else
			if (one.StartsWith("~") == true || two.StartsWith("~") == true)
			{
				// If both strings start with a tilde, discard it and move on to the next character
				if (one.StartsWith("~") == true && two.StartsWith("~") == true)
				{
					one = one.Substring(1, (one.Length - 1)); // pop 1 char off
					two = two.Substring(1, (two.Length - 1)); // pop 1 char off
				}
				else
				{
					// If string one starts with a tilde and string two does not, return -1 (string one is older)
					// and the inverse if string two starts with a tilde and string one does not
					if (one.StartsWith("~") == true)
					{
						return (-1);
					}

					if (two.StartsWith("~") == true)
					{
						return (1);
					}
				}
				continue;
			}

			// End the loop if either string has reached zero length.
			if (one.Length == 0 || two.Length == 0)
			{
				break;
			}

			// Note, "Segments" are represented by oneLeftPart and twoLeftPart strings
			// If the first character of one is a digit, pop the leading chunk of
			// continuous digits from each string
			// (which may be "" for two if only one starts with digits).
			// If one begins with a letter, do the same for leading letters.
			//   grab first completely alpha or completely numeric segment
			// leave one and two pointing to the start of the alpha or numeric
			// segment and walk oneRightPart and twoRightPart to end of segment
			if (char.IsDigit(one, 0) == true)
			{
				while (one.Length > 0)
				{
					if (char.IsDigit(one[0]) == true)
					{
						oneLeftPart += one[0]; // append the leading char of one onto oneLeftPart
						one = one.Substring(1, (one.Length - 1)); // pop 1 char off
					}
					else
					{
						break;
					}
				}
				while (two.Length > 0)
				{
					if (char.IsDigit(two[0]) == true)
					{
						twoLeftPart += two[0];
						two = two.Substring(1, (two.Length - 1)); // pop 1 char off
					}
					else
					{
						break;
					}
				}
				isnum = true;
			}
			else // loop on letter segment
			{
				while (one.Length > 0)
				{
					if (char.IsLetter(one[0]) == true)
					{
						oneLeftPart += one[0];
						one = one.Substring(1, (one.Length - 1)); // pop 1 char off
					}
					else
					{
						break;
					}
				}
				while (two.Length > 0)
				{
					if (char.IsLetter(two[0]) == true)
					{
						twoLeftPart += two[0];
						two = two.Substring(1, (two.Length - 1)); // pop 1 char off
					}
					else
					{
						break;
					}
				}
				isnum = false;
			}

			// this cannot happen, as we previously tested to make sure that
			// the first string has a non-null segment
			//if (one[i] == oneRightPart[0])
			//{
			//	return (-1); // arbitrary
			//}

			// If the segment from two had 0 length,
			// return 1 if the segment from one was numeric, or -1 if it was alphabetic.
			// The logical result of this is that if one begins with numbers and two does not, one is newer (return 1).
			//  If one begins with letters and two does not, then one is older (return -1).
			//  If the leading character(s) from one and two were both numbers or both letters, continue on.

			// take care of the case where the two version segments are
			// different types: one numeric, the other alpha (i.e. empty)
			// numeric segments are always newer than alpha segments
			// XXX See patch #60884 (and details) from bugzilla #50977.
			if (twoLeftPart.Length == 0)
			{
				return ((isnum == true) ? 1 : -1);
			}

			// If the leading segments were both numeric, discard any leading zeros and whichever is longer wins.
			// If one is longer than two (without leading zeroes), return 1, and vice-versa.
			// If they’re of the same length, continue on.
			if (isnum == true)
			{
				int onelen, twolen;
				// this used to be done by converting the digit segments
				// to ints using atoi() - it's changed because long
				// digit segments can overflow an int - this should fix that.

				// throw away any leading zeros - it's a number, right?
				while (oneLeftPart.StartsWith("0") == true)
				{
					oneLeftPart = oneLeftPart.Substring(1, oneLeftPart.Length - 1);
				}
				while (twoLeftPart.StartsWith("0") == true)
				{
					twoLeftPart = twoLeftPart.Substring(1, twoLeftPart.Length - 1);
				}

				// whichever number has more digits wins
				onelen = oneLeftPart.Length;
				twolen = twoLeftPart.Length;
				if (onelen > twolen)
				{
					return (1);
				}
				if (twolen > onelen)
				{
					return (-1);
				}
			}

			// Compare the leading segments with strcmp() (or <=> in Ruby, string.CompareOrdinal() in C#).
			// If that returns a non-zero value, then return that value. Else continue to the next iteration of the loop.

			// Compare will return which one is greater - even if the two segments are alpha or numeric.
			// don't return if they are equal because there might be more segments to compare
			rc = string.CompareOrdinal(oneLeftPart, twoLeftPart);
			if (rc != 0)
			{
				return (rc < 1 ? -1 : 1);
			}
		}

		// If the loop ended (nothing has been returned yet,
		// either both strings are totally the same or they’re the same up to the end of one of them,
		// like with “1.2.3” and “1.2.3b”), then the longest wins if
		// what’s left of one is longer than what’s left of two, return 1.
		// Vice-versa for if what’s left of two is longer than what’s left of one.
		// And finally, if what’s left of them is the same length, return 0.

		// this catches the case where all numeric and alpha segments have
		// compared identically but the segment separating characters were different
		// whichever version still has characters left over wins
		if (one.Length > two.Length)
		{
			return (1);
		}
		else if (one.Length < two.Length)
		{
			return (-1);
		}
		else // (one.Length = two.Length)
		{
			return (0);
		}
	}
}
// pseudo-code example of use, based on parts of the RPM package full name

// The format for the whole string is n-e:v-r.a
// for example
// assume 'a' RPM is: 0:kernel-3.10.0-1127.13.1.el7.x86_64.rpm
// assume 'b' RPM is: 0:kernel-3.10.0-1160.36.2.el7.x86_64.rpm

// parse the RPM into the relevant  parts
// 'a' strings
// and
// 'b' strings

string aEpoch = "0";
string bEpoch = "0";

string aName = "kernel";
string bName = "kernel";

string aVersion = "3.10.0";
string bVersion = "3.10.0";

string aRelease = "1127.13.1.el7";
string bRelease = "1160.36.2.el7";

string aArchitecture = "x86_64";
string bArchitecture = "x86_64";


int result;

int iEpc;
int iNam;
int iVer;
int iRel;
int iArc;


// 1: one (a) is newer than two (b)
// 0: one and two are the same version
// -1: two is newer than one


iEpc = RpmVerCmp(aEpoch, bEpoch);
iNam = RpmVerCmp(aName, bName);
iVer = RpmVerCmp(aVersion, bVersion);
iRel = RpmVerCmp(aRelease, bRelease);
iArc = RpmVerCmp(aArchitecture, bArchitecture);


// check each RPM part in order, first non-zero is final result
if (iEpc <> 0) // epoch
	result = iEpc;

else if (iNam <> 0) // name
	result = iNam;

else if (iVer <> 0) // version
	result = iVer;

else if (iRel <> 0) // release
	result = iRel;

else if (iArc <> 0) // architecture
	result = iArc;

else
	result = 0; // must be same if you get here


return result;