Monday, January 13, 2014

A compressed string class

I admit this is not a very efficient class for my purposes, but it was a quick and dirty fix for a personal project, so it didn't matter. The class presented here stores a string in a compressed byte array if the length of the string exceeds a value. I used it to solve an annoying XmlSerializer OutOfMemoryException when deserializing a very large XML (400MB) in a list of objects. By objects had a Content property that stored the content of html pages and it went completely overboard when putting in memory. The class uses the System.IO.Compression.GZipStream class that was introduced in .Net 2.0 (you have to add a reference to System.IO.Compression.dll). Enjoy!

    public class CompressedString
    {
        private byte[] _content;
        private int _length;
        private bool _compressed;
        private int _maximumStringLength;

        public CompressedString():this(0)
        {
        }

        public CompressedString(int maximumStringLengthBeforeCompress)
        {
            _length = 0;
            _maximumStringLength = maximumStringLengthBeforeCompress;
        }

        public string Value
        {
            get
            {
                if (_content == null) return null;
                if (!_compressed) return Encoding.UTF8.GetString(_content);
                using (var ms = new MemoryStream(_content))
                {
                    using (var gz = new GZipStream(ms, CompressionMode.Decompress))
                    {
                        using (var ms2 = new MemoryStream())
                        {
                            gz.CopyTo(ms2);
                            return Encoding.UTF8.GetString(ms2.ToArray());
                        }
                    }
                }
            }
            set
            {
                if (value == null)
                {
                    _content = null;
                    _compressed = false;
                    _length = 0;
                    return;
                }
                _length = value.Length;
                var arr = Encoding.UTF8.GetBytes(value);
                if (_length <= _maximumStringLength)
                {
                    _compressed = false;
                    _content = arr;
                    return;
                }
                using (var ms = new MemoryStream())
                {
                    using (var gz = new GZipStream(ms, CompressionMode.Compress))
                    {
                        gz.Write(arr, 0, arr.Length);
                        gz.Close();
                        _compressed = true;
                        _content = ms.ToArray();
                    }
                }
            }
        }

        public int Length
        {
            get
            {
                return _length;
            }
        }
    }

0 comments: