Wednesday, October 15, 2014

The BucketList, an generic IList implementation that doesn't use a contiguous array of items

The wonder of .Net is that most of the time we don't really have to care about stuff like memory allocation, the framework does everything for you. One especially annoying thing, though, is when you are using a basic data structure that is supposed to be efficient and you get stuff like OutOfMemoryException. One of the cases is List<T> which in the background uses one big array. This means two things: one is that certain modifying operations are slow and the other is that it requires contiguous memory space for the items. If your memory space gets too fragmented, then there is not enough to allocate for hundred of thousands of items, even if in that memory space you only need a pointer for each item. That is why you end up with out of memory exceptions. It's not like you don't have enough memory, it's that you don't have a big enough contiguous block of it.

As a solution I give you... the BucketList<T> class. It has a bucket size that defaults to 10000 and it implements a list of lists that each will always have at most that amount of items as specified in the bucket size. This way operations that remove and add items will only operate on 10000 item big arrays and there is no need for only one big memory block. I implemented the IList interface explicitly, so that you will never find it comfortable to use an instance as a BucketList, but as an IList. This way you can replace the implementation of the interface with a normal List or whatever other form you like. Enjoy!

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Siderite.DataStructures
{
    /// <summary>
    /// IList implementation that doesn't require a contiguous memory space (an array)
    /// </summary>
    /// <typeparam name="T"></typeparam>
    public class BucketList<T>:IList<T>
    {
        private int _bucketSize;
        private List<List<T>> _list;
        private int _count;

        /// <summary>
        /// Default constructor
        /// </summary>
        public BucketList():this(10000)
        {
            _list = new List<List<T>>();
        }

        /// <summary>
        /// Specify the bucket size (default 10000)
        /// </summary>
        /// <param name="bucketSize"></param>
        public BucketList(int bucketSize)
        {
            _bucketSize = bucketSize;
        }

        /// <summary>
        /// Create a bucket list from an IEnumerable
        /// </summary>
        /// <param name="enm"></param>
        public BucketList(IEnumerable<T> enm):this()
        {
            var list = (IList<T>)this;
            foreach (var itm in enm)
            {
                list.Add(itm);
            }
        }

        /// <summary>
        /// The item count
        /// </summary>
        public int Count
        {
            get
            {
                return ((IList<T>)this).Count;
            }
        }

        #region IList<T> implementation

        int IList<T>.IndexOf(T item)
        {
            var index = 0;
            for (var i = 0; i < _list.Count; i++)
            {
                var idx = _list[i].IndexOf(item);
                if (idx < 0)
                {
                    index += _list[i].Count;
                }
                else
                {
                    index += idx;
                    return index;
                }
            }
            return -1;
        }

        void IList<T>.Insert(int index, T item)
        {
            var idx = 0;
            for (var i = 0; i < _list.Count; i++)
            {
                var lst = _list[i];
                if (index < idx + lst.Count)
                {
                    lst.Insert(index - idx, item);
                    splitListIfTooLarge(i);
                    _count++;
                    return;
                }
                else
                {
                    idx += lst.Count;
                }
            }
            throw new IndexOutOfRangeException("index");
        }

        void IList<T>.RemoveAt(int index)
        {
            var idx = 0;
            for (var i = 0; i < _list.Count; i++)
            {
                var lst = _list[i];
                if (index < idx + lst.Count)
                {
                    lst.RemoveAt(index - idx);
                    removeListIfEmpty(i);
                    _count--;
                    return;
                }
                else
                {
                    idx += lst.Count;
                }
            }
            throw new IndexOutOfRangeException("index");
        }

        T IList<T>.this[int index]
        {
            get
            {
                var idx = 0;
                for (var i = 0; i < _list.Count; i++)
                {
                    var lst = _list[i];
                    if (index < idx + lst.Count)
                    {
                        return lst[index - idx];
                    }
                    else
                    {
                        idx += lst.Count;
                    }
                }
                throw new IndexOutOfRangeException("index");
            }
            set
            {
                var idx = 0;
                for (var i = 0; i < _list.Count; i++)
                {
                    var lst = _list[i];
                    if (index < idx + lst.Count)
                    {
                        lst[index - idx]=value;
                    }
                    else
                    {
                        idx += lst.Count;
                    }
                }
                throw new IndexOutOfRangeException("index");
            }
        }

        void ICollection<T>.Add(T item)
        {
            List<T> lst;
            int index;
            if (_list.Count == 0)
            {
                lst = new List<T>();
                _list.Add(lst);
                index=0;
            }
            else
            {
                index=_list.Count - 1;
                lst = _list[index];
            }
            lst.Add(item);
            splitListIfTooLarge(index);
            _count++;
        }

        void ICollection<T>.Clear()
        {
            _list.Clear();
            _count = 0;
        }

        bool ICollection<T>.Contains(T item)
        {
            return ((IList<T>)this).IndexOf(item) > -1;
        }

        void ICollection<T>.CopyTo(T[] array, int arrayIndex)
        {
            var index = arrayIndex;
            foreach (var lst in _list)
            {
                lst.CopyTo(array, index);
                index += lst.Count;
            }
        }

        int ICollection<T>.Count
        {
            get
            {
                return _count;
            }
        }

        bool ICollection<T>.IsReadOnly
        {
            get { return false; }
        }

        bool ICollection<T>.Remove(T item)
        {
            for (var i = 0; i < _list.Count; i++)
            {
                var lst = _list[i];
                if (lst.Remove(item))
                {
                    _count--;
                    removeListIfEmpty(i);
                    return true;
                }
            }
            return false;
        }

        IEnumerator<T> IEnumerable<T>.GetEnumerator()
        {
            foreach (var lst in _list)
            {
                foreach (var item in lst)
                {
                    yield return item;
                }
            }
        }

        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
        {
            return ((IList<T>)this).GetEnumerator();
        }

        #endregion

        private void splitListIfTooLarge(int listIndex)
        {
            var lst = _list[listIndex];
            if (lst.Count > _bucketSize)
            {
                var newList = lst.GetRange(0, _bucketSize);
                lst.RemoveRange(0, _bucketSize);
                _list.Insert(listIndex, newList);
            }
        }

        private void removeListIfEmpty(int listIndex)
        {
            var lst = _list[listIndex];
            if (lst.Count ==0)
            {
                _list.RemoveAt(listIndex);
            }
        }
    }
}

0 comments: