I would consider the following RegExp a reasonable approximation.
/(\\/|[^/])+/([a-zA-Z])*
The rules are formally defined:
RegularExpressionLiteral :: See 7.8.5
/ RegularExpressionBody / RegularExpressionFlags
RegularExpressionBody :: See 7.8.5
RegularExpressionFirstChar RegularExpressionChars
RegularExpressionChars :: See 7.8.5
[empty]
RegularExpressionChars RegularExpressionChar
RegularExpressionFirstChar :: See 7.8.5
RegularExpressionNonTerminator but not one of * or \ or / or [
RegularExpressionBackslashSequence
RegularExpressionClass
RegularExpressionChar :: See 7.8.5
RegularExpressionNonTerminator but not \ or / or [
RegularExpressionBackslashSequence
RegularExpressionClass
RegularExpressionBackslashSequence :: See 7.8.5
\ RegularExpressionNonTerminator
RegularExpressionNonTerminator :: See 7.8.5
SourceCharacter but not LineTerminator
RegularExpressionClass :: See 7.8.5
[ RegularExpressionClassChars ]
RegularExpressionClassChars :: See 7.8.5
[empty]
RegularExpressionClassChars RegularExpressionClassChar
RegularExpressionClassChar :: See 7.8.5
RegularExpressionNonTerminator but not ] or \
RegularExpressionBackslashSequence
RegularExpressionFlags :: See 7.8.5
[empty]
RegularExpressionFlags IdentifierPart
Full Specification
Here is some quick and dirty code that might get you started.
class CharStream
{
private readonly Stack<int> _states;
private readonly string _input;
private readonly int _length;
private int _index;
public char Current
{
get { return _input[_index]; }
}
public CharStream(string input)
{
_states = new Stack<int>();
_input = input;
_length = input.Length;
_index = -1;
}
public bool Next()
{
if (_index < 0)
_index++;
if (_index == _length)
return false;
_index++;
return true;
}
public bool ExpectNext(char c)
{
if (_index == _length)
return false;
if (_input[_index + 1] != c)
return false;
_index++;
return true;
}
public bool Back()
{
if (_index == 0)
return false;
_index--;
return true;
}
public void PushState()
{
_states.Push(_index);
}
public T PopState<T>()
{
_index = _states.Pop();
return default(T);
}
}
static string ParseRegularExpressionLiteral(CharStream cs)
{
string body, flags;
cs.PushState();
if (!cs.ExpectNext('/'))
return cs.PopState<string>();
if ((body = ParseRegularExpressionBody(cs)) == null)
return cs.PopState<string>();
if (!cs.ExpectNext('/'))
return cs.PopState<string>();
if ((flags = ParseRegularExpressionFlags(cs)) == null)
return cs.PopState<string>();
return "/" + body + "/" + flags;
}
static string ParseRegularExpressionBody(CharStream cs)
{
string firstChar, chars;
cs.PushState();
if ((firstChar = ParseRegularExpressionFirstChar(cs)) == null)
return cs.PopState<string>();
if ((chars = ParseRegularExpressionChars(cs)) == null)
return cs.PopState<string>();
return firstChar + chars;
}
static string ParseRegularExpressionChars(CharStream cs)
{
var sb = new StringBuilder();
string @char;
while ((@char = ParseRegularExpressionChar(cs)) != null)
sb.Append(@char);
return sb.ToString();
}
static string ParseRegularExpressionFirstChar(CharStream cs)
{
return null;
}
static string ParseRegularExpressionChar(CharStream cs)
{
return null;
}
static string ParseRegularExpressionBackslashSequence(CharStream cs)
{
return null;
}
static string ParseRegularExpressionNonTerminator(CharStream cs)
{
return null;
}
static string ParseRegularExpressionClass(CharStream cs)
{
return null;
}
static string ParseRegularExpressionClassChars(CharStream cs)
{
return null;
}
static string ParseRegularExpressionClassChar(CharStream cs)
{
return null;
}
static string ParseRegularExpressionFlags(CharStream cs)
{
return null;
}
As to how you find the end of the literal? Well the trick is to recursively follow the productions I have listed. Consider the production RegularExpressionBody. Simply reading the production tells me that it requires RegularExpressionFirstChar followed by RegularExpressionChars. Notice how RegularExpressionChars has either [empty] or RegularExpressionChars RegularExpressionChar. Essentially it is defined by itself. Once that production terminates with [empty] you know that the only valid character should be the closing /. If that is not found this is not a valid literal.