santisq · November 14, 2025 19:27
diff --git a/ImportCsv.cs b/ImportCsv.cs
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq.Expressions;
 using System.Management.Automation;
 using System.Reflection;
 using System.Text;

 /// <summary>
 /// Helper class to import single CSV file.
 /// </summary>
 public static class ImportCsvHelper
 {
    // Initial sizes of the value list and the line stringbuilder.
    // Set to reasonable initial sizes. They may grow beyond these,
    // but this will prevent a few reallocations.
    private const int ValueCountGuestimate = 16;
    private const int LineLengthGuestimate = 256;

    public static IEnumerable<T> Read<T>(string path, Encoding encoding) => Read<T>(path, encoding: encoding);

    public static IEnumerable<T> Read<T>(string path, char delimiter = ',', Encoding? encoding = null)
        where T : class, new()
    {
        using StreamReader sr = new(path, encoding ?? Encoding.UTF8);
        string[] header = ReadHeader(sr, delimiter);
        List<string> values = new(ValueCountGuestimate);
        StringBuilder builder = new(LineLengthGuestimate);
        Setter<T>[] setters = [.. GetSetters<T>(header)];

        while (true)
        {
            ParseNextRecord(sr, values, builder, delimiter);
            if (values.Count == 0)
                break;

            if (values.Count == 1 && string.IsNullOrEmpty(values[0]))
            {
                // skip the blank lines
                continue;
            }

            T outobj = new();
            for (int i = 0; i < setters.Length; i++)
            {
                setters[i].SetValue(outobj, values[i]);
            }

            yield return outobj;
        }
    }

    record struct Setter<T>(Action<T, object> Action, Type TargetType)
    {
        private static readonly Type s_stringType = typeof(string);
        private static readonly Type s_objectType = typeof(object);
        internal readonly void SetValue(T target, string value) =>
            Action(target, TargetType == s_stringType || TargetType == s_objectType
                ? value : LanguagePrimitives.ConvertTo(value, TargetType));
    }

    private static IEnumerable<Setter<T>> GetSetters<T>(string[] properties)
    {
        Type type = typeof(T);
        foreach (string property in properties)
        {
            PropertyInfo info = type.GetProperty(
                property,
                BindingFlags.IgnoreCase | BindingFlags.Instance | BindingFlags.Public)
                ?? throw new ArgumentNullException($"Property '{property}' not found on type '{type}'.");

            if (!info.CanWrite)
                throw new ArgumentException($"Property '{property}' on '{type}' is read-only.");

            MethodInfo setter = info.GetSetMethod()
                ?? throw new ArgumentException($"Setter for '{property}' is inaccessible.");

            ParameterExpression target = Expression.Parameter(type, "target");
            ParameterExpression value = Expression.Parameter(typeof(object), "value");
            MethodCallExpression body = Expression.Call(target, setter, Expression.Convert(value, info.PropertyType));
            yield return new(
                Expression.Lambda<Action<T, object>>(body, target, value).Compile(),
                info.PropertyType);
        }
    }

    private static char ReadChar(StreamReader sr)
    {
        if (sr.EndOfStream)
        {
            throw new InvalidOperationException("EOF is reached.");
        }

        int i = sr.Read();
        return (char)i;
    }

    /// <summary>
    /// Peeks the next character in the stream and returns true if it is same as passed in character.
    /// </summary>
    /// <param name="c"></param>
    /// <returns></returns>
    private static bool PeekNextChar(StreamReader sr, char c)
    {
        int i = sr.Peek();
        if (i == -1)
        {
            return false;
        }

        return c == (char)i;
    }

    internal static string[] ReadHeader(StreamReader sr, char delimiter)
    {
        List<string> values = new(ValueCountGuestimate);
        StringBuilder builder = new(LineLengthGuestimate);
        while (!sr.EndOfStream)
        {
            ParseNextRecord(sr, values, builder, delimiter);

            // Trim all trailing blankspaces and delimiters ( single/multiple ).
            // If there is only one element in the row and if its a blankspace we dont trim it.
            // A trailing delimiter is represented as a blankspace while being added to result collection
            // which is getting trimmed along with blankspaces supplied through the CSV in the below loop.
            while (values.Count > 1 && values[^1].Equals(string.Empty))
            {
                values.RemoveAt(values.Count - 1);
            }

            // File starts with '#' and contains '#Fields:' is W3C Extended Log File Format
            if (values.Count != 0 && values[0].StartsWith("#Fields: "))
            {
                values[0] = values[0][9..];
                break;
            }
            else if (values.Count != 0 && values[0].StartsWith('#'))
            {
                // Skip all lines starting with '#'
            }
            else
            {
                // This is not W3C Extended Log File Format
                // By default first line is Header
                break;
            }
        }

        ValidatePropertyNames(values);
        return [.. values];
    }

    /// <summary>
    /// Validate the names of properties.
    /// </summary>
    /// <param name="names"></param>
    private static void ValidatePropertyNames(List<string> names)
    {
        if (names != null)
        {
            if (names.Count == 0)
            {
                // If there are no names, it is an error
            }
            else
            {
                HashSet<string> headers = new(StringComparer.OrdinalIgnoreCase);
                foreach (string currentHeader in names)
                {
                    if (!string.IsNullOrEmpty(currentHeader))
                    {
                        if (!headers.Add(currentHeader))
                        {
                            // throw a terminating error as there are duplicate headers in the input.
                            throw new ArgumentOutOfRangeException($"'{currentHeader}' is duplicated in header.");
                        }
                    }
                }
            }
        }
    }

    /// <summary>
    /// Reads the next record from the file and returns parsed collection of string.
    /// </summary>
    /// <returns>
    /// Parsed collection of strings.
    /// </returns>
    private static void ParseNextRecord(
        StreamReader sr,
        List<string> result,
        StringBuilder current,
        char delimiter)
    {
        result.Clear();

        // current string
        current.Clear();

        bool seenBeginQuote = false;

        while (!sr.EndOfStream)
        {
            // Read the next character
            char ch = ReadChar(sr);

            if (ch == delimiter)
            {
                if (seenBeginQuote)
                {
                    // Delimiter inside double quotes is part of string.
                    // Ex:
                    // "foo, bar"
                    // is parsed as
                    // ->foo, bar<-
                    current.Append(ch);
                }
                else
                {
                    // Delimiter outside quotes is end of current word.
                    result.Add(current.ToString());
                    current.Remove(0, current.Length);
                }
            }
            else if (ch == '"')
            {
                if (seenBeginQuote)
                {
                    if (PeekNextChar(sr, '"'))
                    {
                        // "" inside double quote are single quote
                        // ex: "foo""bar"
                        // is read as
                        // ->foo"bar<-

                        // PeekNextChar only peeks. Read the next char.
                        ReadChar(sr);
                        current.Append('"');
                    }
                    else
                    {
                        // We have seen a matching end quote.
                        seenBeginQuote = false;

                        // Read
                        // everything till we hit next delimiter.
                        // In correct CSV,1) end quote is followed by delimiter
                        // 2)end quote is followed some whitespaces and
                        // then delimiter.
                        // We eat the whitespaces seen after the ending quote.
                        // However if there are other characters, we add all of them
                        // to string.
                        // Ex: ->"foo bar"<- is read as ->foo bar<-
                        // ->"foo bar"  <- is read as ->foo bar<-
                        // ->"foo bar" ab <- is read as ->"foo bar" ab <-
                        bool endofRecord = false;
                        ReadTillNextDelimiter(sr, current, ref endofRecord, true, delimiter);
                        result.Add(current.ToString());
                        current.Remove(0, current.Length);
                        if (endofRecord)
                            break;
                    }
                }
                else if (current.Length == 0)
                {
                    // We are at the beginning of a new word.
                    // This quote is the first quote.
                    seenBeginQuote = true;
                }
                else
                {
                    // We are seeing a quote after the start of
                    // the word. This is error, however we will be
                    // lenient here and do what excel does:
                    // Ex: foo "ba,r"
                    // In above example word read is ->foo "ba<-
                    // Basically we read till next delimiter
                    bool endOfRecord = false;
                    current.Append(ch);
                    ReadTillNextDelimiter(sr, current, ref endOfRecord, false, delimiter);
                    result.Add(current.ToString());
                    current.Remove(0, current.Length);
                    if (endOfRecord)
                        break;
                }
            }
            else if (ch == ' ' || ch == '\t')
            {
                if (seenBeginQuote)
                {
                    // Spaces in side quote are valid
                    current.Append(ch);
                }
                else if (current.Length == 0)
                {
                    // ignore leading spaces
                    continue;
                }
                else
                {
                    // We are not in quote and we are not at the
                    // beginning of a word. We should not be seeing
                    // spaces here. This is an error condition, however
                    // we will be lenient here and do what excel does,
                    // that is read till next delimiter.
                    // Ex: ->foo <- is read as ->foo<-
                    // Ex: ->foo bar<- is read as ->foo bar<-
                    // Ex: ->foo bar <- is read as ->foo bar <-
                    // Ex: ->foo bar "er,ror"<- is read as ->foo bar "er<-
                    bool endOfRecord = false;
                    current.Append(ch);
                    ReadTillNextDelimiter(sr, current, ref endOfRecord, true, delimiter);
                    result.Add(current.ToString());
                    current.Remove(0, current.Length);

                    if (endOfRecord)
                    {
                        break;
                    }
                }
            }
            else if (IsNewLine(sr, ch, out string newLine))
            {
                if (seenBeginQuote)
                {
                    // newline inside quote are valid
                    current.Append(newLine);
                }
                else
                {
                    result.Add(current.ToString());
                    current.Remove(0, current.Length);

                    // New line outside quote is end of word and end of record
                    break;
                }
            }
            else
            {
                current.Append(ch);
            }
        }

        if (current.Length != 0)
        {
            result.Add(current.ToString());
        }
    }

    // If we detect a newline we return it as a string "\r", "\n" or "\r\n"
    private static bool IsNewLine(StreamReader sr, char ch, out string newLine)
    {
        newLine = string.Empty;
        if (ch == '\r')
        {
            if (PeekNextChar(sr, '\n'))
            {
                ReadChar(sr);
                newLine = "\r\n";
            }
            else
            {
                newLine = "\r";
            }
        }
        else if (ch == '\n')
        {
            newLine = "\n";
        }

        return newLine != string.Empty;
    }

    /// <summary>
    /// This function reads the characters till next delimiter and adds them to current.
    /// </summary>
    /// <param name="current"></param>
    /// <param name="endOfRecord">
    /// This is true if end of record is reached
    /// when delimiter is hit. This would be true if delimiter is NewLine.
    /// </param>
    /// <param name="eatTrailingBlanks">
    /// If this is true, eat the trailing blanks. Note:if there are non
    /// whitespace characters present, then trailing blanks are not consumed.
    /// </param>
    private static void ReadTillNextDelimiter(
        StreamReader sr,
        StringBuilder current,
        ref bool endOfRecord,
        bool eatTrailingBlanks,
        char delimiter)
    {
        StringBuilder temp = new();

        // Did we see any non-whitespace character
        bool nonWhiteSpace = false;

        while (true)
        {
            if (sr.EndOfStream)
            {
                endOfRecord = true;
                break;
            }

            char ch = ReadChar(sr);

            if (ch == delimiter)
            {
                break;
            }
            else if (IsNewLine(sr, ch, out string _))
            {
                endOfRecord = true;
                break;
            }
            else
            {
                temp.Append(ch);
                if (ch != ' ' && ch != '\t')
                {
                    nonWhiteSpace = true;
                }
            }
        }

        if (eatTrailingBlanks && !nonWhiteSpace)
        {
            string s = temp.ToString();
            s = s.Trim();
            current.Append(s);
        }
        else
        {
            current.Append(temp);
        }
    }
 }
diff --git a/Usage.md b/Usage.md
	using System;
	using System.Collections.Generic;
	using System.IO;
	using System.Linq.Expressions;
	using System.Management.Automation;
	using System.Reflection;
	using System.Text;

	/// <summary>
	/// Helper class to import single CSV file.
	/// </summary>
	public static class ImportCsvHelper
	{
	// Initial sizes of the value list and the line stringbuilder.
	// Set to reasonable initial sizes. They may grow beyond these,
	// but this will prevent a few reallocations.
	private const int ValueCountGuestimate = 16;
	private const int LineLengthGuestimate = 256;

	public static IEnumerable<T> Read<T>(string path, Encoding encoding) => Read<T>(path, encoding: encoding);

	public static IEnumerable<T> Read<T>(string path, char delimiter = ',', Encoding? encoding = null)
	where T : class, new()
	{
	using StreamReader sr = new(path, encoding ?? Encoding.UTF8);
	string[] header = ReadHeader(sr, delimiter);
	List<string> values = new(ValueCountGuestimate);
	StringBuilder builder = new(LineLengthGuestimate);
	Setter<T>[] setters = [.. GetSetters<T>(header)];

	while (true)
	{
	ParseNextRecord(sr, values, builder, delimiter);
	if (values.Count == 0)
	break;

	if (values.Count == 1 && string.IsNullOrEmpty(values[0]))
	{
	// skip the blank lines
	continue;
	}

	T outobj = new();
	for (int i = 0; i < setters.Length; i++)
	{
	setters[i].SetValue(outobj, values[i]);
	}

	yield return outobj;
	}
	}

	record struct Setter<T>(Action<T, object> Action, Type TargetType)
	{
	private static readonly Type s_stringType = typeof(string);
	private static readonly Type s_objectType = typeof(object);
	internal readonly void SetValue(T target, string value) =>
	Action(target, TargetType == s_stringType \|\| TargetType == s_objectType
	? value : LanguagePrimitives.ConvertTo(value, TargetType));
	}

	private static IEnumerable<Setter<T>> GetSetters<T>(string[] properties)
	{
	Type type = typeof(T);
	foreach (string property in properties)
	{
	PropertyInfo info = type.GetProperty(
	property,
	BindingFlags.IgnoreCase \| BindingFlags.Instance \| BindingFlags.Public)
	?? throw new ArgumentNullException($"Property '{property}' not found on type '{type}'.");

	if (!info.CanWrite)
	throw new ArgumentException($"Property '{property}' on '{type}' is read-only.");

	MethodInfo setter = info.GetSetMethod()
	?? throw new ArgumentException($"Setter for '{property}' is inaccessible.");

	ParameterExpression target = Expression.Parameter(type, "target");
	ParameterExpression value = Expression.Parameter(typeof(object), "value");
	MethodCallExpression body = Expression.Call(target, setter, Expression.Convert(value, info.PropertyType));
	yield return new(
	Expression.Lambda<Action<T, object>>(body, target, value).Compile(),
	info.PropertyType);
	}
	}

	private static char ReadChar(StreamReader sr)
	{
	if (sr.EndOfStream)
	{
	throw new InvalidOperationException("EOF is reached.");
	}

	int i = sr.Read();
	return (char)i;
	}

	/// <summary>
	/// Peeks the next character in the stream and returns true if it is same as passed in character.
	/// </summary>
	/// <param name="c"></param>
	/// <returns></returns>
	private static bool PeekNextChar(StreamReader sr, char c)
	{
	int i = sr.Peek();
	if (i == -1)
	{
	return false;
	}

	return c == (char)i;
	}

	internal static string[] ReadHeader(StreamReader sr, char delimiter)
	{
	List<string> values = new(ValueCountGuestimate);
	StringBuilder builder = new(LineLengthGuestimate);
	while (!sr.EndOfStream)
	{
	ParseNextRecord(sr, values, builder, delimiter);

	// Trim all trailing blankspaces and delimiters ( single/multiple ).
	// If there is only one element in the row and if its a blankspace we dont trim it.
	// A trailing delimiter is represented as a blankspace while being added to result collection
	// which is getting trimmed along with blankspaces supplied through the CSV in the below loop.
	while (values.Count > 1 && values[^1].Equals(string.Empty))
	{
	values.RemoveAt(values.Count - 1);
	}

	// File starts with '#' and contains '#Fields:' is W3C Extended Log File Format
	if (values.Count != 0 && values[0].StartsWith("#Fields: "))
	{
	values[0] = values[0][9..];
	break;
	}
	else if (values.Count != 0 && values[0].StartsWith('#'))
	{
	// Skip all lines starting with '#'
	}
	else
	{
	// This is not W3C Extended Log File Format
	// By default first line is Header
	break;
	}
	}

	ValidatePropertyNames(values);
	return [.. values];
	}

	/// <summary>
	/// Validate the names of properties.
	/// </summary>
	/// <param name="names"></param>
	private static void ValidatePropertyNames(List<string> names)
	{
	if (names != null)
	{
	if (names.Count == 0)
	{
	// If there are no names, it is an error
	}
	else
	{
	HashSet<string> headers = new(StringComparer.OrdinalIgnoreCase);
	foreach (string currentHeader in names)
	{
	if (!string.IsNullOrEmpty(currentHeader))
	{
	if (!headers.Add(currentHeader))
	{
	// throw a terminating error as there are duplicate headers in the input.
	throw new ArgumentOutOfRangeException($"'{currentHeader}' is duplicated in header.");
	}
	}
	}
	}
	}
	}

	/// <summary>
	/// Reads the next record from the file and returns parsed collection of string.
	/// </summary>
	/// <returns>
	/// Parsed collection of strings.
	/// </returns>
	private static void ParseNextRecord(
	StreamReader sr,
	List<string> result,
	StringBuilder current,
	char delimiter)
	{
	result.Clear();

	// current string
	current.Clear();

	bool seenBeginQuote = false;

	while (!sr.EndOfStream)
	{
	// Read the next character
	char ch = ReadChar(sr);

	if (ch == delimiter)
	{
	if (seenBeginQuote)
	{
	// Delimiter inside double quotes is part of string.
	// Ex:
	// "foo, bar"
	// is parsed as
	// ->foo, bar<-
	current.Append(ch);
	}
	else
	{
	// Delimiter outside quotes is end of current word.
	result.Add(current.ToString());
	current.Remove(0, current.Length);
	}
	}
	else if (ch == '"')
	{
	if (seenBeginQuote)
	{
	if (PeekNextChar(sr, '"'))
	{
	// "" inside double quote are single quote
	// ex: "foo""bar"
	// is read as
	// ->foo"bar<-

	// PeekNextChar only peeks. Read the next char.
	ReadChar(sr);
	current.Append('"');
	}
	else
	{
	// We have seen a matching end quote.
	seenBeginQuote = false;

	// Read
	// everything till we hit next delimiter.
	// In correct CSV,1) end quote is followed by delimiter
	// 2)end quote is followed some whitespaces and
	// then delimiter.
	// We eat the whitespaces seen after the ending quote.
	// However if there are other characters, we add all of them
	// to string.
	// Ex: ->"foo bar"<- is read as ->foo bar<-
	// ->"foo bar" <- is read as ->foo bar<-
	// ->"foo bar" ab <- is read as ->"foo bar" ab <-
	bool endofRecord = false;
	ReadTillNextDelimiter(sr, current, ref endofRecord, true, delimiter);
	result.Add(current.ToString());
	current.Remove(0, current.Length);
	if (endofRecord)
	break;
	}
	}
	else if (current.Length == 0)
	{
	// We are at the beginning of a new word.
	// This quote is the first quote.
	seenBeginQuote = true;
	}
	else
	{
	// We are seeing a quote after the start of
	// the word. This is error, however we will be
	// lenient here and do what excel does:
	// Ex: foo "ba,r"
	// In above example word read is ->foo "ba<-
	// Basically we read till next delimiter
	bool endOfRecord = false;
	current.Append(ch);
	ReadTillNextDelimiter(sr, current, ref endOfRecord, false, delimiter);
	result.Add(current.ToString());
	current.Remove(0, current.Length);
	if (endOfRecord)
	break;
	}
	}
	else if (ch == ' ' \|\| ch == '\t')
	{
	if (seenBeginQuote)
	{
	// Spaces in side quote are valid
	current.Append(ch);
	}
	else if (current.Length == 0)
	{
	// ignore leading spaces
	continue;
	}
	else
	{
	// We are not in quote and we are not at the
	// beginning of a word. We should not be seeing
	// spaces here. This is an error condition, however
	// we will be lenient here and do what excel does,
	// that is read till next delimiter.
	// Ex: ->foo <- is read as ->foo<-
	// Ex: ->foo bar<- is read as ->foo bar<-
	// Ex: ->foo bar <- is read as ->foo bar <-
	// Ex: ->foo bar "er,ror"<- is read as ->foo bar "er<-
	bool endOfRecord = false;
	current.Append(ch);
	ReadTillNextDelimiter(sr, current, ref endOfRecord, true, delimiter);
	result.Add(current.ToString());
	current.Remove(0, current.Length);

	if (endOfRecord)
	{
	break;
	}
	}
	}
	else if (IsNewLine(sr, ch, out string newLine))
	{
	if (seenBeginQuote)
	{
	// newline inside quote are valid
	current.Append(newLine);
	}
	else
	{
	result.Add(current.ToString());
	current.Remove(0, current.Length);

	// New line outside quote is end of word and end of record
	break;
	}
	}
	else
	{
	current.Append(ch);
	}
	}

	if (current.Length != 0)
	{
	result.Add(current.ToString());
	}
	}

	// If we detect a newline we return it as a string "\r", "\n" or "\r\n"
	private static bool IsNewLine(StreamReader sr, char ch, out string newLine)
	{
	newLine = string.Empty;
	if (ch == '\r')
	{
	if (PeekNextChar(sr, '\n'))
	{
	ReadChar(sr);
	newLine = "\r\n";
	}
	else
	{
	newLine = "\r";
	}
	}
	else if (ch == '\n')
	{
	newLine = "\n";
	}

	return newLine != string.Empty;
	}

	/// <summary>
	/// This function reads the characters till next delimiter and adds them to current.
	/// </summary>
	/// <param name="current"></param>
	/// <param name="endOfRecord">
	/// This is true if end of record is reached
	/// when delimiter is hit. This would be true if delimiter is NewLine.
	/// </param>
	/// <param name="eatTrailingBlanks">
	/// If this is true, eat the trailing blanks. Note:if there are non
	/// whitespace characters present, then trailing blanks are not consumed.
	/// </param>
	private static void ReadTillNextDelimiter(
	StreamReader sr,
	StringBuilder current,
	ref bool endOfRecord,
	bool eatTrailingBlanks,
	char delimiter)
	{
	StringBuilder temp = new();

	// Did we see any non-whitespace character
	bool nonWhiteSpace = false;

	while (true)
	{
	if (sr.EndOfStream)
	{
	endOfRecord = true;
	break;
	}

	char ch = ReadChar(sr);

	if (ch == delimiter)
	{
	break;
	}
	else if (IsNewLine(sr, ch, out string _))
	{
	endOfRecord = true;
	break;
	}
	else
	{
	temp.Append(ch);
	if (ch != ' ' && ch != '\t')
	{
	nonWhiteSpace = true;
	}
	}
	}

	if (eatTrailingBlanks && !nonWhiteSpace)
	{
	string s = temp.ToString();
	s = s.Trim();
	current.Append(s);
	}
	else
	{
	current.Append(temp);
	}
	}
	}
No results found