[C#]实现IEqualityComparer接口求集合交集
前言
在系统开发中,常会涉及求集合的交集,利用Enumerable.Intersect扩展方法,很容易获得两个集合的交集。对于一般的集合,如List<string>,使用Enumerable.Intersect默认的相等比较器就可以求得交集,但是涉及到复杂的集合,如List<Member>、List<List<string>>等元素涉及自定义类,或集合嵌套的情况,就需要实现IEqualityComparer接口,自定义相等比较器来实现求交集操作。接下去将用几个实例,来展示求交集几种需求:
1. 普通集合,如List<int>、List<string>
2. 自定义类集合,如List<Member>
3. 嵌套集合,如List<List<string>>
普通集合求交集
实例代码
view sourceprint?01 static void Main(string[] args)
02 {
03 List<string> list1 = new List<string>() { "1", "2", "3" };
04 List<string> list2 = new List<string>() { "2", "4", "3", "5" };
05
06 IEnumerable<string> result = list1.Intersect(list2);
07
08 foreach (string item in result)
09 {
10 Console.WriteLine(item);
11 }
12 Console.Read();
13 }
测试结果
2
3
自定义类集合求交集
在这里,我们假设在两个Member对象实例的ID和Name属性相等的情况下,就表示两个对象实例相等,两者即为交集。因此在实现自定义相等比较器的时候,Equal方法里面只要比较两个Member实例的ID和Name属性是否同时相等即可,而GetHashCode方法里面,应该根据Member实例的ID和Name属性来生成唯一哈希码值,以此来证明:ID和Name不全相等的情况下,两个Member实例就不相等;ID和Name同时相等的情况下,两个Member实例就相等。
(拥有相同值的值类型变量和string类型变量,其哈希码是相等的)
实例代码
view sourceprint?01 /// <summary>
02 /// 自定义类
03 /// </summary>
04 public class Member
05 {
06 public int ID { get; set; }
07 public string Name { get; set; }
08 }
09
10 /// <summary>
11 /// 自定义相等比较器
12 /// </summary>
13 public class EqualComparer : IEqualityComparer<Member>
14 {
15 public bool Equals(Member x, Member y)
16 {
17 if (x == null || y == null) return false;
18 return x.ID == y.ID && x.Name == y.Name;
19 }
20
21 public int GetHashCode(Member obj)
22 {
23 if (obj == null) return 0;
24 int nameCode = obj.Name == null ? 0 : obj.Name.GetHashCode();
25 return obj.ID * 32 + nameCode;
26 }
27 }
28
29 class Program
30 {
31 static void Main(string[] args)
32 {
33 List<Member> list1 = new List<Member>() {
34 new Member() { ID = 1, Name = "a" },
35 new Member() { ID = 2, Name = "b" },
36 new Member() { ID = 3, Name = "c" }
37 };
38 List<Member> list2 = new List<Member>() {
39 new Member() { ID = 2, Name = "b" },
40 new Member() { ID = 3, Name = "c" },
41 new Member() { ID = 4, Name = "d" }
42 };
43
44 IEnumerable<Member> result = list1.Intersect(list2, new EqualComparer());
45
46 foreach (Member item in result)
47 {
48 Console.WriteLine("ID:{0} Name:{1}", item.ID, item.Name);
49 }
50 Console.Read();
51 }
52 }
测试结果
ID:2 Name:b
ID:3 Name:c
嵌套集合求交集
在这里,集合的元素也是一个集合,所以判断两个List集合相等的概念应该是:其元素个数相等,相同位置的元素都相等,那么这两个List集合表示相等,Equal方法实现了两个List集合的元素和长度比较,GetHashCode针对List的元素位置和值生成了唯一哈希码。
实例代码
view sourceprint?01 /// <summary>
02 /// 自定义相等比较器
03 /// </summary>
04 public class EqualComparer : IEqualityComparer<List<string>>
05 {
06 public bool Equals(List<string> x, List<string> y)
07 {
08 if (x == null || y == null) return false;
09 if (x.Count != y.Count) return false;
10
11 for (int i = 0, c = x.Count; i < c; i++)
12 {
13 if (!x[i].Equals(y[i]))
14 return false;
15 }
16 return true;
17 }
18
19 public int GetHashCode(List<string> obj)
20 {
21 if (obj == null) return 0;
22 int hashCode = obj.Aggregate(0, (current, next) => 32 * current + next.GetHashCode());
23 return hashCode;
24 }
25 }
26
27 class Program
28 {
29 static void Main(string[] args)
30 {
31 List<List<string>> list1 = new List<List<string>>() {
32 new List<string>() { "1", "2" },
33 new List<string>() { "1", "2", "3" },
34 new List<string>() { "1", "2", "4" }
35 };
36
37 List<List<string>> list2 = new List<List<string>>() {
38 new List<string>() { "1", "2", "4" },
39 new List<string>() { "3", "2", "1" },
40 new List<string>() { "1", "2" },
41 new List<string>() { "1" }
42 };
43
44 IEnumerable<List<string>> result = list1.Intersect(list2, new EqualComparer());
45
46 foreach (List<string> item in result)
47 {
48 Console.Write("{ ");
49 foreach (string sub in item)
50 {
51 Console.Write("{0} ", sub);
52 }
53 Console.Write("}");
54 Console.WriteLine();
55 }
56 Console.Read();
57 }
58 }
测试结果
{ 1 2 }
{ 1 2 4 }
GetHashCode方法的作用
其实GetHashCode方法即使返回相同的一个值,如return 0; 上述代码也都可以正常测试,反编译查看Intersect方法和相关集合的底层代码,就可以发现这个问题。虽然如此,但是只要观察Set类里面的Find方法和Remove方法的那个IF判断,就知道这个不是好的处理办法。每个元素的HashCode都是相同值的话,那么,(this.slots[i].hashCode == hashCode)接下去返回的值就都会是True,这样Equal被无聊地调用的次数就大大增加了,降低了程序执行的效率。如果每个元素的HashCode能标识自己本身,那么,当(this.slots[i].hashCode == hashCode)返回false的时候,Equal就没必要再去执行,因为这个布尔变量以&&相连。
Intersect方法
view sourceprint?01 public static IEnumerable<TSource> Intersect<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource> comparer)
02 {
03 Set<TSource> set = new Set<TSource>(comparer);
04
05 foreach (var item in second)
06 {
07 set.Add(item);
08 }
09
10 foreach (var item in first)
11 {
12 if (set.Remove(item)) yield return item;
13 }
14 }
Set<T>泛型类
view sourceprint?001 class Set<TElement>
002 {
003 private int[] buckets;
004 private IEqualityComparer<TElement> comparer;
005 private int count;
006 private int freeList;
007 private Slot[] slots;
008
009 public Set()
010 : this(null)
011 {
012 }
013 public Set(IEqualityComparer<TElement> comparer)
014 {
015 if (comparer == null)
016 {
017 comparer = EqualityComparer<TElement>.Default;
018 }
019 this.comparer = comparer;
020 this.buckets = new int[0x7];
021 this.slots = new Slot[0x7];
022 this.freeList = -1;
023 }
024
025 public bool Add(TElement value)
026 {
027 return !this.Find(value, true);
028 }
029 public bool Contains(TElement value)
030 {
031 return this.Find(value, false);
032 }
033 private bool Find(TElement value, bool add)
034 {
035 int hashCode = this.InternalGetHashCode(value);
036 for (int i = this.buckets[hashCode % this.buckets.Length] - 0x1; i >= 0x0; i = this.slots[i].next)
037 {
038 if ((this.slots[i].hashCode == hashCode) && this.comparer.Equals(this.slots[i].value, value))
039 {
040 return true;
041 }
042 }
043 if (add)
044 {
045 int freeList;
046 if (this.freeList >= 0x0)
047 {
048 freeList = this.freeList;
049 this.freeList = this.slots[freeList].next;
050 }
051 else
052 {
053 if (this.count == this.slots.Length)
054 {
055 this.Resize();
056 }
057 freeList = this.count;
058 this.count++;
059 }
060 int index = hashCode % this.buckets.Length;
061 this.slots[freeList].hashCode = hashCode;
062 this.slots[freeList].value = value;
063 this.slots[freeList].next = this.buckets[index] - 0x1;
064 this.buckets[index] = freeList + 0x1;
065 }
066 return false;
067 }
068
069 internal int InternalGetHashCode(TElement value)
070 {
071 if (value != null)
072 {
073 return (this.comparer.GetHashCode(value) & 0x7fffffff);
074 }
075 return 0x0;
076 }
077 public bool Remove(TElement value)
078 {
079 int hashCode = this.InternalGetHashCode(value);
080 int index = hashCode % this.buckets.Length;
081 int num3 = -1;
082 for (int i = this.buckets[index] - 0x1; i >= 0x0; i = this.slots[i].next)
083 {
084 if ((this.slots[i].hashCode == hashCode) && this.comparer.Equals(this.slots[i].value, value))
085 {
086 if (num3 < 0x0)
087 {
088 this.buckets[index] = this.slots[i].next + 0x1;
089 }
090 else
091 {
092 this.slots[num3].next = this.slots[i].next;
093 }
094 this.slots[i].hashCode = -1;
095 this.slots[i].value = default(TElement);
096 this.slots[i].next = this.freeList;
097 this.freeList = i;
098 return true;
099 }
100 num3 = i;
101 }
102 return false;
103 }
104
105 private void Resize()
106 {
107 int num = (this.count * 0x2) + 0x1;
108 int[] numArray = new int[num];
109 Slot[] destinationArray = new Slot[num];
110 Array.Copy(this.slots, 0x0, destinationArray, 0x0, this.count);
111 for (int i = 0x0; i < this.count; i++)
112 {
113 int index = destinationArray[i].hashCode % num;
114 destinationArray[i].next = numArray[index] - 0x1;
115 numArray[index] = i + 0x1;
116 }
117 this.buckets = numArray;
118 this.slots = destinationArray;
119 }
120
121 struct Slot
122 {
123 internal int hashCode;
124 internal TElement value;
125 internal int next;
126 }
127 }