我的硕士论文是关于通过分析元数据和存储的数据来发现糟糕的数据库设计。为此,我们从给定的 DBMS 中提取元数据模型,然后在该元数据上运行一组规则。
要通过数据分析扩展这个过程,我们需要允许规则直接查询数据库,但我们必须保持 DBMS 独立性,以便查询可以应用于 PostgreSQL、MSSQL 和 MySQL。
我们讨论了一种查询的函数式构造,例如:
new Query(new Select(columnID), new From(tableID), new Where(new Equality(columnID1, columnID2)))
然后使用特定于 DBMS 的序列化程序。
另一种方法是让规则自己处理:
public Query QueryDatabase(DBMS dbms)
{
if (dbms == PostgreSQL) { return "select count(1) from Users"}
if (dbms == MSSQL) {return ....}
}
我们是否遗漏了什么?所有这些实际上都存在于某个不错的图书馆中吗?是的,我们已经研究了 Entity Framework ,但它们似乎依赖于数据库的静态类型模型,由于显而易见的原因无法创建。
我应该提一下,我们维护着一个可扩展的规则架构,允许最终用户实现他们自己的规则。
为了弄清楚我们要实现什么,看下面的查询(mssql),它需要两个参数,表名(@table)和列名(@column):
DECLARE @TotalCount FLOAT;
SELECT @TotalCount = COUNT(1) FROM [@table];
SELECT SUM(pcount * LOG10(@TotalCount / pcount)) / (LOG10(2) * @TotalCount)
FROM (SELECT (Count([@column])) as pcount
FROM [@table]
GROUP BY [@column]) as exp1
查询通过估计熵来测量存储在给定属性中的信息量。它需要访问表中的所有行。为避免从数据库中提取所有行并通过慢速网络连接传输它们,最好用 SQL 表示它们,只传输一个数字。
注意:我们确实拥有我们需要的所有元数据。本题仅用于访问数据!
我不太确定是将这个添加到我已经很长的问题中,编辑现有答案还是要做什么。请随时指教。 ;)
基于我的回答:
new Query()
.Variable(varname => FLOAT)
.Set(varname => new Query().Count(1).From(table) )
.Select(new Aggregate().Sum(varname => "pcount * LOG10(varname / pcount)"))
.From(
new Query()
.Select(pcount => new Aggregate().Count(column)
.From(table)
.GroupBy(column)
)
抛开语法错误和 lambda 语句的误用不谈,我尝试使用一些扩展方法来构建查询。这看起来确实是一种相当复杂的方法。您如何看待这种方法?
基于 LINQ 答案:
let totalCount = Table.Count
from uv un from r in Table
group r by r["attr"]
select r.Count
select r.Count * Log2((totalCount / r.Count))
看起来相当不错,但是要实现的东西太多了......
最佳答案
您可以通过实现自定义 LINQ provider 来实现相同的目的基础设施。查询是通用的,但是 AST tree visitors生成 SQL 查询的函数可以插入。您甚至可以使用内存数据存储来模拟数据库,并将您的自定义 LINQ 查询转换为 LINQ to objects 查询!
您需要创建一个知道如何从对象的索引器中提取列名的提供程序。这是您可以扩展的基本框架:
// Runs in LinqPad!
public class TableQueryObject
{
private readonly Dictionary<string, object> _data = new Dictionary<string, object>();
public string TableName { get; set; }
public object this[string column]
{
get { return _data.ContainsKey(column) ? _data[column] : null; }
set { if (_data.ContainsKey(column)) _data[column] = value; else _data.Add(column, value); }
}
}
public interface ITableQuery : IEnumerable<TableQueryObject>
{
string TableName { get; }
string ConnectionString { get; }
Expression Expression { get; }
ITableQueryProvider Provider { get; }
}
public interface ITableQueryProvider
{
ITableQuery Query { get; }
IEnumerable<TableQueryObject> Execute(Expression expression);
}
public interface ITableQueryFactory
{
ITableQuery Query(string tableName);
}
public static class ExtensionMethods
{
class TableQueryContext : ITableQuery
{
private readonly ITableQueryProvider _queryProvider;
private readonly Expression _expression;
public TableQueryContext(ITableQueryProvider queryProvider, Expression expression)
{
_queryProvider = queryProvider;
_expression = expression;
}
public string TableName { get { return _queryProvider.Query.TableName; } }
public string ConnectionString { get { return _queryProvider.Query.ConnectionString; } }
public Expression Expression { get { return _expression; } }
public ITableQueryProvider Provider { get { return _queryProvider; } }
public IEnumerator<TableQueryObject> GetEnumerator() { return Provider.Execute(Expression).GetEnumerator(); }
IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); }
}
public static MethodInfo MakeGeneric(MethodBase method, params Type[] parameters)
{
return ((MethodInfo)method).MakeGenericMethod(parameters);
}
public static Expression StaticCall(MethodInfo method, params Expression[] expressions)
{
return Expression.Call(null, method, expressions);
}
public static ITableQuery CreateQuery(this ITableQueryProvider source, Expression expression)
{
return new TableQueryContext(source, expression);
}
public static IEnumerable<TableQueryObject> Select<TSource>(this ITableQuery source, Expression<Func<TSource, TableQueryObject>> selector)
{
return source.Provider.CreateQuery(StaticCall(MakeGeneric(MethodBase.GetCurrentMethod(), typeof(TSource)), source.Expression, Expression.Quote(selector)));
}
public static ITableQuery Where(this ITableQuery source, Expression<Func<TableQueryObject, bool>> predicate)
{
return source.Provider.CreateQuery(StaticCall((MethodInfo)MethodBase.GetCurrentMethod(), source.Expression, Expression.Quote(predicate)));
}
}
class SqlTableQueryFactory : ITableQueryFactory
{
class SqlTableQuery : ITableQuery
{
private readonly string _tableName;
private readonly string _connectionString;
private readonly ITableQueryProvider _provider;
private readonly Expression _expression;
public SqlTableQuery(string tableName, string connectionString)
{
_connectionString = connectionString;
_tableName = tableName;
_provider = new SqlTableQueryProvider(this);
_expression = Expression.Constant(this);
}
public IEnumerator<TableQueryObject> GetEnumerator() { return Provider.Execute(Expression).GetEnumerator(); }
IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); }
public string TableName { get { return _tableName; } }
public string ConnectionString { get { return _connectionString; } }
public Expression Expression { get { return _expression; } }
public ITableQueryProvider Provider { get { return _provider; } }
}
class SqlTableQueryProvider : ITableQueryProvider
{
private readonly ITableQuery _query;
public ITableQuery Query { get { return _query; } }
public SqlTableQueryProvider(ITableQuery query) { _query = query; }
public IEnumerable<TableQueryObject> Execute(Expression expression)
{
//var connecitonString = _query.ConnectionString;
//var tableName = _query.TableName;
// TODO visit expression AST (generate any sql dialect you want) and execute resulting sql
// NOTE of course the query can be easily parameterized!
// NOTE here the fun begins, just return some dummy data for now :)
for (int i = 0; i < 100; i++)
{
var obj = new TableQueryObject();
obj["a"] = i;
obj["b"] = "blah " + i;
yield return obj;
}
}
}
private readonly string _connectionString;
public SqlTableQueryFactory(string connectionString) { _connectionString = connectionString; }
public ITableQuery Query(string tableName)
{
return new SqlTableQuery(tableName, _connectionString);
}
}
static void Main()
{
ITableQueryFactory database = new SqlTableQueryFactory("SomeConnectionString");
var result = from row in database.Query("myTbl")
where row["someColumn"] == "1" && row["otherColumn"] == "2"
where row["thirdColumn"] == "2" && row["otherColumn"] == "4"
select row["a"]; // NOTE select executes as linq to objects! FTW
foreach(var a in result)
{
Console.WriteLine(a);
}
}
关于c# - 独立于 DBMS 的查询,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/5063760/