3-way Cmp

Files CCore/inc/Cmp.h CCore/src/Cmp.cpp

Ordering is the one of the most important concept in the software world. CCore supports and utilizes the 3-way comparison. If you do compare two elements of the linear ordered set, then there are 3 possible outcomes: less, equal and greater. Pitiful (and shameful), but there is no a natural support for the 3-way comparison in the language. So the following helper entities are used for it.

enum CmpResult is used to represent an outcome of a 3-way comparison.


enum CmpResult
 {
  CmpLess    = -1,
  CmpEqual   =  0,
  CmpGreater =  1
 };
 
inline CmpResult operator - (CmpResult cmp) { return CmpResult(-int(cmp)); }
 
const char * GetTextDesc(CmpResult cmp);

The function LessCmp() can be used for the 3-way comparison of objects of types with defined operator <. It covers all basic integral types.


template <class T>
CmpResult LessCmp(const T &a,const T &b)
 {
  return (a<b)?CmpLess:( (b<a)?CmpGreater:CmpEqual );
 }

To compare objects use the function Cmp(). If the destined type is a class type with the defined method objCmp(), then Cmp() calls this method, otherwise it calls LessCmp().


template <class T>
CmpResult Cmp(const T &a,const T &b);

Use the function AlphaCmp() to perform the alphabetical comparison:


struct S
 {
  A a;
  B b;
  C c;

  // cmp objects

  CmpResult objCmp(const S &obj) const
   {
    return AlphaCmp(a,obj.a,b,obj.b,c,obj.c);
   }
 };

The function AlphaCmp() expects any number of pairs of arguments:


CmpResult AlphaCmp(const T1 &a1,const T1 &b1,const T2 &a2,const T2 &b2,...,const Tn &an,const Tn &bn);

You may also use instead of pair a single CmpResult value.


CmpResult cmp=....;

CmpResult AlphaCmp(const T1 &a1,const T1 &b1,const T2 &a2,const T2 &b2,...,cmp,...,const Tn &an,const Tn &bn);

Use the function RangeCmp() or RangeLess() to perform the alphabetical comparison of two ranges of objects:


template <class T>
CmpResult RangeCmp(const T *a,const T *b,ulen count);

template <class T>
CmpResult RangeCmp(PtrLen<T> a,PtrLen<T> b);

template <class T>
bool RangeLess(PtrLen<T> a,PtrLen<T> b);

There are also "by" variants of these functions. These variants use the given functor by to compare objects: to compare objects a and b they do compare by(a) and by(b).


template <class T,class Func>
CmpResult RangeCmpBy(const T *a,const T *b,ulen count,Func by);
 
template <class T,class Func>
CmpResult RangeCmpBy(PtrLen<T> a,PtrLen<T> b,Func by);

template <class T,class Func>
bool RangeLessBy(PtrLen<T> a,PtrLen<T> b,Func by); 

There are ...Of variants with combined range extraction:


template <class T>
CmpResult RangeCmpOf(const T &a,const T &b) { return RangeCmp(Range(a),Range(b)); }

template <class T>
bool RangeLessOf(const T &a,const T &b) { return RangeLess(Range(a),Range(b)); }

template <class T,class Func>
CmpResult RangeCmpOfBy(const T &a,const T &b,Func by) { return RangeCmpBy(Range(a),Range(b),by); }

template <class T,class Func>
bool RangeLessOfBy(const T &a,const T &b,Func by) { return RangeLessBy(Range(a),Range(b),by); }

Use the functions StrCmp() or StrLess() to compare strings (and ...Of variants):


CmpResult StrCmp(StrLen a,StrLen b);
 
bool StrLess(StrLen a,StrLen b);

template <class T>
CmpResult StrCmpOf(const T &a,const T &b) { return StrCmp(Range(a),Range(b)); }

template <class T>
bool StrLessOf(const T &a,const T &b) { return StrLess(Range(a),Range(b)); }

Starting from the version 3.50 comparision is performed using the unsigned character code. It makes these operations UTF8-compatible. To compare single characters the following two functions has been added:


inline CmpResult CharCmp(char a,char b) { return LessCmp((unsigned char)a,(unsigned char)b); }

inline bool CharLess(char a,char b) { return (unsigned char)a<(unsigned char)b; }

The class CmpAsStr is an adapter class to help combine the string comparison with the string extraction:


class CmpAsStr
 {
   StrLen str;

  public:

   explicit CmpAsStr(StrLen str_) : str(str_) {}

   template <class T>
   explicit CmpAsStr(const T &obj) : str(Range(obj)) {}

   // cmp objects

   CmpResult objCmp(CmpAsStr obj) const { return StrCmp(str,obj.str); }

   friend bool operator < (const CmpAsStr &a,const CmpAsStr &b) { return StrLess(a.str,b.str); }

   friend bool operator > (const CmpAsStr &a,const CmpAsStr &b) { return StrLess(b.str,a.str); }

   friend bool operator <= (const CmpAsStr &a,const CmpAsStr &b) { return !StrLess(b.str,a.str); }

   friend bool operator >= (const CmpAsStr &a,const CmpAsStr &b) { return !StrLess(a.str,b.str); }

   friend bool operator == (const CmpAsStr &a,const CmpAsStr &b) { return StrCmp(a.str,b.str)==0; }

   friend bool operator != (const CmpAsStr &a,const CmpAsStr &b) { return StrCmp(a.str,b.str)!=0; }
 };

There are two Property Type classes LessComparable and CmpComparable:


/* struct LessComparable<T> */ 

template <class T>
struct LessComparable
 {
  friend bool operator > (const T &a,const T &b) requires ( OpLessType<T> ) { return b<a; }
  
  friend bool operator <= (const T &a,const T &b) requires ( OpLessType<T> ) { return !(b<a); }
  
  friend bool operator >= (const T &a,const T &b) requires ( OpLessType<T> ) { return !(a<b); }
  
  friend bool operator == (const T &a,const T &b) requires ( OpLessType<T> && !OpEqualType<T> ) { return !( a<b || b<a ); }
  
  friend bool operator != (const T &a,const T &b) requires ( OpLessType<T> && !OpEqualType<T> ) { return ( a<b || b<a ); }
 };
  
/* struct CmpComparable<T> */ 

template <class T>
struct CmpComparable
 {
  friend bool operator < (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)<0; }
  
  friend bool operator > (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)>0; }
  
  friend bool operator <= (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)<=0; }
  
  friend bool operator >= (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)>=0; }
  
  friend bool operator == (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)==0; }
  
  friend bool operator != (const T &a,const T &b) requires ( Has_objCmp<T> ) { return a.objCmp(b)!=0; }
 };

They can be used to simplify the implementation of the comparable classes:


class C : public CmpComparable<C> // provides all comparison operators for class C
 {
  public:

   // cmp objects

   CmpResult objCmp(const C &obj) const { .... }
 };


class C : public LessComparable<C> // provides all comparison operators for class C
 {
  public:

   bool operator < (const C &obj) const { .... }
 }; 

// Cmp() uses LessCmp() for this class


class C : public LessComparable<C> // provides all comparison operators for class C
 {
  public:

   bool operator < (const C &obj) const { .... }

   bool operator == (const C &obj) const { .... } // for efficiency

   bool operator != (const C &obj) const { .... } // for efficiency
 };

Why 3-Way is better than 2-Way?

The answer is simple: it is more efficient and more usable in applications. 3-Way comparison has near the same cost for the simple types as 2-Way. In fact, the comparison commands on the modern CPUs gives the complete answers how two integral values are ordered. So in practice only the absence of the language support for 3-Way or the lack of optimization capabilities of compilers makes 3-Way comparison of basic types is less efficient.

But once we compare combined types (usually using alphabetical ordering) we start winning:


bool Less(T1 a1,T1 b1,T2 a2,T2 b2)
 {
  if( a1<b1 )
    {
     return true;
    }
  else
    {
     if( b1<a1 ) return false; // excessive double comparison of the same values

     return a2<b2;
    } 
 }

CmpResult Cmp(T1 a1,T1 b1,T2 a2,T2 b2) // more efficient, simple and clear code
 {
  if( CmpResult ret=Cmp(a1,b1) ) return ret;

  return Cmp(a2,b2);
 }

All usual comparison operations are simply derived from Cmp(). And, finally, many algorithms have advantage using 3-Way than 2-Way both in efficiency and simplicity.