是否有任何现成的函数来扩展 C# 正则表达式样式的字符集?
例如,expand("a-z1")
将返回一个字符串,其中包含所有字符 a 到 z,后跟数字 1。
这是我到目前为止所得到的:
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
return sb.ToString();
}
有什么我忽略的吗?
PS:让我们暂时忽略负字符集。
我认为我的例子已经很清楚了……让我们再试一次。这就是我想要的:
ExpandCharacterSet("a-fA-F0-9") == "abcdefABCDEF0123456789"
最佳答案
得到这个需要一些工作,但这是我能够收集到的。当然,这不会是可移植的,因为我在搞乱内部结构。但它对于简单的测试用例来说已经足够好了。它将接受任何正则表达式字符类,但不适用于否定类。值的范围太宽,没有任何限制。我不知道它是否适用于所有情况并且它根本不处理重复但这是一个开始。至少您不必推出自己的解析器。从 .NET Framework 4.0 开始:
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(@"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
关于c# - 扩展字符集的算法?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4272748/