c# - 验证csv文件的内容#

标签 c# csv file-upload

我有一个要求,用户将上传一个包含大约 1.8 到 200 万条记录的以下格式的 csv 文件

SITE_ID,HOUSE,STREET,CITY,STATE,ZIP,APARTMENT  
44,545395,PORT ROYAL,CORPUS CHRISTI,TX,78418,2  
44,608646,TEXAS AVE,ODESSA,TX,79762,  
44,487460,EVERHART RD,CORPUS CHRISTI,TX,78413,  
44,275543,EDWARD GARY,SAN MARCOS,TX,78666,4  
44,136811,MAGNOLIA AVE,SAN ANTONIO,TX,78212  

我要做的是,首先验证文件,然后将其保存在数据库中(前提是验证成功且没有错误)。我必须应用的验证对于每一列都是不同的。例如,

SITE_ID: it can only be an integer and it is required.  
HOUSE: integer, required  
STREET: alphanumeric, required  
CITY: alphabets only, required  
State: 2 alphabets only, required  
zip: 5 digits only, required  
APARTMENT: integer only, optional  

我需要一种将这些验证应用于各个列的通用方法。到目前为止我尝试过的是我将 csv 文件转换为数据表,我计划尝试通过正则表达式验证每个单元格,但这对我来说似乎不是一个通用或好的解决方案。谁能在这方面帮助我并指出正确的方向?

最佳答案

这是一种有效的方法:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Text.RegularExpressions;
using System.IO;


namespace ConsoleApplication23
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.csv";
        static void Main(string[] args)
        {
            CSVReader csvReader = new CSVReader();
            DataSet ds = csvReader.ReadCSVFile(FILENAME, true);
            RegexCompare compare = new RegexCompare();
            DataTable errors = compare.Get_Error_Rows(ds.Tables[0]);
        }
    }
    class RegexCompare
    {
        public static Dictionary<string,RegexCompare> dict =  new Dictionary<string,RegexCompare>() {
               { "SITE_ID", new RegexCompare() { columnName = "SITE_ID", pattern = @"[^\d]+", positveNegative = false, required = true}},
               { "HOUSE", new RegexCompare() { columnName = "HOUSE", pattern = @"[^\d]+", positveNegative = false, required = true}}, 
               { "STREET", new RegexCompare() { columnName = "STREET", pattern = @"[A-Za-z0-9 ]+", positveNegative = true, required = true}}, 
               { "CITY", new RegexCompare() { columnName = "CITY", pattern = @"[A-Za-z ]+", positveNegative = true, required = true}},
               { "STATE", new RegexCompare() { columnName = "STATE", pattern = @"[A-Za-z]{2}", positveNegative = true, required = true}},
               { "ZIP", new RegexCompare() { columnName = "ZIP", pattern = @"\d{5}", positveNegative = true, required = true}},
               { "APARTMENT", new RegexCompare() { columnName = "APARTMENT", pattern = @"\d*", positveNegative = true, required = false}},
            };


        string columnName { get; set;}
        string pattern { get; set; }
        Boolean positveNegative { get; set; }
        Boolean required { get; set; }

        public DataTable Get_Error_Rows(DataTable dt)
        {
            DataTable dtError = null;
            foreach (DataRow row in dt.AsEnumerable())
            {
                Boolean error = false;
                foreach (DataColumn col in dt.Columns)
                {
                    RegexCompare regexCompare = dict[col.ColumnName];
                    object colValue = row.Field<object>(col.ColumnName);
                    if (regexCompare.required)
                    {
                        if (colValue == null)
                        {
                            error = true;
                            break;
                        }
                    }
                    else
                    {
                        if (colValue == null)
                            continue;
                    }
                    string colValueStr = colValue.ToString();
                    Match match = Regex.Match(colValueStr, regexCompare.pattern);
                    if (regexCompare.positveNegative)
                    {
                        if (!match.Success)
                        {
                            error = true;
                            break;
                        }
                        if (colValueStr.Length != match.Value.Length)
                        {
                            error = true;
                            break;
                        }
                    }
                    else
                    {
                        if (match.Success)
                        {
                            error = true;
                            break;
                        }
                    }

                }

                if(error)
                {
                    if (dtError == null) dtError = dt.Clone();
                    dtError.Rows.Add(row.ItemArray);
                }
            }
            return dtError;
        }
    }

    public class CSVReader
    {

        public DataSet ReadCSVFile(string fullPath, bool headerRow)
        {

            string path = fullPath.Substring(0, fullPath.LastIndexOf("\\") + 1);
            string filename = fullPath.Substring(fullPath.LastIndexOf("\\") + 1);
            DataSet ds = new DataSet();

            try
            {
                if (File.Exists(fullPath))
                {
                    string ConStr = string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0}" + ";Extended Properties=\"Text;HDR={1};FMT=Delimited\\\"", path, headerRow ? "Yes" : "No");
                    string SQL = string.Format("SELECT * FROM {0}", filename);
                    OleDbDataAdapter adapter = new OleDbDataAdapter(SQL, ConStr);
                    adapter.Fill(ds, "TextFile");
                    ds.Tables[0].TableName = "Table1";
                }
                foreach (DataColumn col in ds.Tables["Table1"].Columns)
                {
                    col.ColumnName = col.ColumnName.Replace(" ", "_");
                }
            }

            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return ds;
        }
    }

}

关于c# - 验证csv文件的内容#,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48788682/

相关文章:

c# - 发布方法并使用某些值重定向而不使用 Form C#

python - Pandas 读取 csv 正在移动列

java - 在 Tomcat 中使用 JSP 上传图像

php - 将文件上传到同一服务器上的子域 - 权限被拒绝

python - 为 CSV Sqlite Python 脚本提供的绑定(bind)数量不正确

带有多个进度条的 HTML5 文件上传

c# - C# .net 中的 Json 对象

c# - 删除 TabPage : Dispose or Clear or both?

c# - 单个 NSTabView 中的多个 NSTabViewItem

python - 有没有更快的方法使用 python 从 CSV 中分离重复和不同的数据?