我有一个要求,用户将上传一个包含大约 1.8 到 200 万条记录的以下格式的 csv 文件
SITE_ID,HOUSE,STREET,CITY,STATE,ZIP,APARTMENT
44,545395,PORT ROYAL,CORPUS CHRISTI,TX,78418,2
44,608646,TEXAS AVE,ODESSA,TX,79762,
44,487460,EVERHART RD,CORPUS CHRISTI,TX,78413,
44,275543,EDWARD GARY,SAN MARCOS,TX,78666,4
44,136811,MAGNOLIA AVE,SAN ANTONIO,TX,78212
我要做的是,首先验证文件,然后将其保存在数据库中(前提是验证成功且没有错误)。我必须应用的验证对于每一列都是不同的。例如,
SITE_ID: it can only be an integer and it is required.
HOUSE: integer, required
STREET: alphanumeric, required
CITY: alphabets only, required
State: 2 alphabets only, required
zip: 5 digits only, required
APARTMENT: integer only, optional
我需要一种将这些验证应用于各个列的通用方法。到目前为止我尝试过的是我将 csv 文件转换为数据表,我计划尝试通过正则表达式验证每个单元格,但这对我来说似乎不是一个通用或好的解决方案。谁能在这方面帮助我并指出正确的方向?
最佳答案
这是一种有效的方法:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Text.RegularExpressions;
using System.IO;
namespace ConsoleApplication23
{
class Program
{
const string FILENAME = @"c:\temp\test.csv";
static void Main(string[] args)
{
CSVReader csvReader = new CSVReader();
DataSet ds = csvReader.ReadCSVFile(FILENAME, true);
RegexCompare compare = new RegexCompare();
DataTable errors = compare.Get_Error_Rows(ds.Tables[0]);
}
}
class RegexCompare
{
public static Dictionary<string,RegexCompare> dict = new Dictionary<string,RegexCompare>() {
{ "SITE_ID", new RegexCompare() { columnName = "SITE_ID", pattern = @"[^\d]+", positveNegative = false, required = true}},
{ "HOUSE", new RegexCompare() { columnName = "HOUSE", pattern = @"[^\d]+", positveNegative = false, required = true}},
{ "STREET", new RegexCompare() { columnName = "STREET", pattern = @"[A-Za-z0-9 ]+", positveNegative = true, required = true}},
{ "CITY", new RegexCompare() { columnName = "CITY", pattern = @"[A-Za-z ]+", positveNegative = true, required = true}},
{ "STATE", new RegexCompare() { columnName = "STATE", pattern = @"[A-Za-z]{2}", positveNegative = true, required = true}},
{ "ZIP", new RegexCompare() { columnName = "ZIP", pattern = @"\d{5}", positveNegative = true, required = true}},
{ "APARTMENT", new RegexCompare() { columnName = "APARTMENT", pattern = @"\d*", positveNegative = true, required = false}},
};
string columnName { get; set;}
string pattern { get; set; }
Boolean positveNegative { get; set; }
Boolean required { get; set; }
public DataTable Get_Error_Rows(DataTable dt)
{
DataTable dtError = null;
foreach (DataRow row in dt.AsEnumerable())
{
Boolean error = false;
foreach (DataColumn col in dt.Columns)
{
RegexCompare regexCompare = dict[col.ColumnName];
object colValue = row.Field<object>(col.ColumnName);
if (regexCompare.required)
{
if (colValue == null)
{
error = true;
break;
}
}
else
{
if (colValue == null)
continue;
}
string colValueStr = colValue.ToString();
Match match = Regex.Match(colValueStr, regexCompare.pattern);
if (regexCompare.positveNegative)
{
if (!match.Success)
{
error = true;
break;
}
if (colValueStr.Length != match.Value.Length)
{
error = true;
break;
}
}
else
{
if (match.Success)
{
error = true;
break;
}
}
}
if(error)
{
if (dtError == null) dtError = dt.Clone();
dtError.Rows.Add(row.ItemArray);
}
}
return dtError;
}
}
public class CSVReader
{
public DataSet ReadCSVFile(string fullPath, bool headerRow)
{
string path = fullPath.Substring(0, fullPath.LastIndexOf("\\") + 1);
string filename = fullPath.Substring(fullPath.LastIndexOf("\\") + 1);
DataSet ds = new DataSet();
try
{
if (File.Exists(fullPath))
{
string ConStr = string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0}" + ";Extended Properties=\"Text;HDR={1};FMT=Delimited\\\"", path, headerRow ? "Yes" : "No");
string SQL = string.Format("SELECT * FROM {0}", filename);
OleDbDataAdapter adapter = new OleDbDataAdapter(SQL, ConStr);
adapter.Fill(ds, "TextFile");
ds.Tables[0].TableName = "Table1";
}
foreach (DataColumn col in ds.Tables["Table1"].Columns)
{
col.ColumnName = col.ColumnName.Replace(" ", "_");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return ds;
}
}
}
关于c# - 验证csv文件的内容#,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48788682/