author  wenzelm 
Mon, 31 Mar 2014 17:41:45 +0200  
changeset 56338  f968f4e3d520 
parent 56335  8953d4cc060a 
child 56471  2293a4350716 
permissions  rwrr 
27901  1 
/* Title: Pure/General/symbol.scala 
2 
Author: Makarius 

3 

27924  4 
Detecting and recoding Isabelle symbols. 
27901  5 
*/ 
6 

7 
package isabelle 

8 

55618  9 

36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1  with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset

10 
import scala.collection.mutable 
31522  11 
import scala.util.matching.Regex 
48922  12 
import scala.annotation.tailrec 
27901  13 

14 

31522  15 
object Symbol 
16 
{ 

43696  17 
type Symbol = String 
18 

55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

19 
// counting Isabelle symbols, starting from 1 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

20 
type Offset = Text.Offset 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

21 
type Range = Text.Range 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

22 

43696  23 

43418  24 
/* ASCII characters */ 
25 

26 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z'  'a' <= c && c <= 'z' 

55497  27 

43418  28 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' 
55497  29 

30 
def is_ascii_hex(c: Char): Boolean = 

31 
'0' <= c && c <= '9'  'A' <= c && c <= 'F'  'a' <= c && c <= 'f' 

32 

43418  33 
def is_ascii_quasi(c: Char): Boolean = c == '_'  c == '\'' 
34 

55497  35 
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) 
36 

43418  37 
def is_ascii_letdig(c: Char): Boolean = 
38 
is_ascii_letter(c)  is_ascii_digit(c)  is_ascii_quasi(c) 

39 

40 
def is_ascii_identifier(s: String): Boolean = 

50238
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
wenzelm
parents:
50233
diff
changeset

41 
s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) 
43418  42 

43 

48775  44 
/* symbol matching */ 
27901  45 

48775  46 
private val symbol_total = new Regex("""(?xs) 
47 
[\ud800\udbff][\udc00\udfff]  \r\n  

48 
\\ < (?: \^raw: [\x20\x7e\u0100\uffff && [^.>]]*  \^? ([AZaz][AZaz09_']*)? ) >?  

49 
.""") 

27924  50 

48775  51 
private def is_plain(c: Char): Boolean = 
52 
!(c == '\r'  c == '\\'  Character.isHighSurrogate(c)) 

48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

53 

0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

54 
def is_malformed(s: Symbol): Boolean = 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

55 
s.length match { 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

56 
case 1 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

57 
val c = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

58 
Character.isHighSurrogate(c)  Character.isLowSurrogate(c)  c == '\ufffd' 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

59 
case 2 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

60 
val c1 = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

61 
val c2 = s(1) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

62 
!(c1 == '\r' && c2 == '\n'  Character.isSurrogatePair(c1, c2)) 
48774  63 
case _ => !s.endsWith(">")  s == "\\<>"  s == "\\<^>" 
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

64 
} 
34137  65 

54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset

66 
def is_newline(s: Symbol): Boolean = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

67 
s == "\n"  s == "\r"  s == "\r\n" 
38877  68 

34137  69 
class Matcher(text: CharSequence) 
70 
{ 

48775  71 
private val matcher = symbol_total.pattern.matcher(text) 
34137  72 
def apply(start: Int, end: Int): Int = 
73 
{ 

74 
require(0 <= start && start < end && end <= text.length) 

34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed  is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset

75 
if (is_plain(text.charAt(start))) 1 
34138  76 
else { 
34137  77 
matcher.region(start, end).lookingAt 
78 
matcher.group.length 

79 
} 

80 
} 

31522  81 
} 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

82 

fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

83 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

84 
/* iterator */ 
33998  85 

43696  86 
private val char_symbols: Array[Symbol] = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

87 
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

88 

43696  89 
def iterator(text: CharSequence): Iterator[Symbol] = 
90 
new Iterator[Symbol] 

40522  91 
{ 
43489  92 
private val matcher = new Matcher(text) 
93 
private var i = 0 

94 
def hasNext = i < text.length 

95 
def next = 

96 
{ 

97 
val n = matcher(i, text.length) 

43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

98 
val s = 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

99 
if (n == 0) "" 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

100 
else if (n == 1) { 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

101 
val c = text.charAt(i) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

102 
if (c < char_symbols.length) char_symbols(c) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

103 
else text.subSequence(i, i + n).toString 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

104 
} 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

105 
else text.subSequence(i, i + n).toString 
43489  106 
i += n 
107 
s 

108 
} 

33998  109 
} 
43489  110 

44949  111 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList 
112 

48922  113 
def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) = 
114 
{ 

115 
var (line, column) = pos 

116 
for (sym < iterator(text)) { 

54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset

117 
if (is_newline(sym)) { line += 1; column = 1 } 
48922  118 
else column += 1 
119 
} 

120 
(line, column) 

121 
} 

122 

33998  123 

124 
/* decoding offsets */ 

125 

52507  126 
object Index 
127 
{ 

128 
def apply(text: CharSequence): Index = new Index(text) 

129 
} 

130 

131 
final class Index private(text: CharSequence) 

31929  132 
{ 
55430  133 
private sealed case class Entry(chr: Int, sym: Int) 
134 
private val index: Array[Entry] = 

31929  135 
{ 
34137  136 
val matcher = new Matcher(text) 
31929  137 
val buf = new mutable.ArrayBuffer[Entry] 
138 
var chr = 0 

139 
var sym = 0 

33998  140 
while (chr < text.length) { 
34137  141 
val n = matcher(chr, text.length) 
142 
chr += n 

31929  143 
sym += 1 
34137  144 
if (n > 1) buf += Entry(chr, sym) 
31929  145 
} 
146 
buf.toArray 

147 
} 

55430  148 

55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

149 
def decode(symbol_offset: Offset): Text.Offset = 
31929  150 
{ 
55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

151 
val sym = symbol_offset  1 
31929  152 
val end = index.length 
48922  153 
@tailrec def bisect(a: Int, b: Int): Int = 
31929  154 
{ 
155 
if (a < b) { 

156 
val c = (a + b) / 2 

157 
if (sym < index(c).sym) bisect(a, c) 

158 
else if (c + 1 == end  sym < index(c + 1).sym) c 

159 
else bisect(c + 1, b) 

160 
} 

161 
else 1 

162 
} 

163 
val i = bisect(0, end) 

164 
if (i < 0) sym 

165 
else index(i).chr + sym  index(i).sym 

166 
} 

55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

167 
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_)) 
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

168 

56338
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents:
56335
diff
changeset

169 
private val hash: Int = index.toList.hashCode 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents:
56335
diff
changeset

170 
override def hashCode: Int = hash 
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

171 
override def equals(that: Any): Boolean = 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

172 
that match { 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

173 
case other: Index => index.sameElements(other.index) 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

174 
case _ => false 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

175 
} 
31929  176 
} 
177 

178 

33998  179 
/* recoding text */ 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

180 

31522  181 
private class Recoder(list: List[(String, String)]) 
182 
{ 

183 
private val (min, max) = 

184 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

185 
var min = '\uffff' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

186 
var max = '\u0000' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

187 
for ((x, _) < list) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

188 
val c = x(0) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

189 
if (c < min) min = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

190 
if (c > max) max = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

191 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

192 
(min, max) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

193 
} 
40443  194 
private val table = 
195 
{ 

196 
var tab = Map[String, String]() 

197 
for ((x, y) < list) { 

198 
tab.get(x) match { 

199 
case None => tab += (x > y) 

200 
case Some(z) => 

44181  201 
error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) 
40443  202 
} 
203 
} 

204 
tab 

205 
} 

31522  206 
def recode(text: String): String = 
207 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

208 
val len = text.length 
48775  209 
val matcher = symbol_total.pattern.matcher(text) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

210 
val result = new StringBuilder(len) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

211 
var i = 0 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

212 
while (i < len) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

213 
val c = text(i) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

214 
if (min <= c && c <= max) { 
31929  215 
matcher.region(i, len).lookingAt 
27938  216 
val x = matcher.group 
52888  217 
result.append(table.getOrElse(x, x)) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

218 
i = matcher.end 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

219 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

220 
else { result.append(c); i += 1 } 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

221 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

222 
result.toString 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

223 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

224 
} 
27924  225 

27918  226 

27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset

227 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

228 
/** symbol interpretation **/ 
27927  229 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

230 
private lazy val symbols = 
50564
c6fde2fc4217
allow to suppress ISABELLE_SYMBOLS for experiments;
wenzelm
parents:
50291
diff
changeset

231 
new Interpretation(File.try_read(Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")))) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

232 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

233 
private class Interpretation(symbols_spec: String) 
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset

234 
{ 
31522  235 
/* read symbols */ 
236 

50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

237 
private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

238 
private val Key = new Regex("""(?xs) (.+): """) 
31522  239 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

240 
private def read_decl(decl: String): (Symbol, Properties.T) = 
31522  241 
{ 
242 
def err() = error("Bad symbol declaration: " + decl) 

243 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

244 
def read_props(props: List[String]): Properties.T = 
31522  245 
{ 
246 
props match { 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

247 
case Nil => Nil 
31522  248 
case _ :: Nil => err() 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

249 
case Key(x) :: y :: rest => (x > y) :: read_props(rest) 
31522  250 
case _ => err() 
251 
} 

252 
} 

253 
decl.split("\\s+").toList match { 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

254 
case sym :: props if sym.length > 1 && !is_malformed(sym) => 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

255 
(sym, read_props(props)) 
34193  256 
case _ => err() 
31522  257 
} 
258 
} 

259 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

260 
private val symbols: List[(Symbol, Properties.T)] = 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

261 
(((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

262 
split_lines(symbols_spec).reverse) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

263 
{ case (res, No_Decl()) => res 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

264 
case ((list, known), decl) => 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

265 
val (sym, props) = read_decl(decl) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

266 
if (known(sym)) (list, known) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

267 
else ((sym, props) :: list, known + sym) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

268 
})._1 
31522  269 

270 

53400  271 
/* basic properties */ 
272 

273 
val properties: Map[Symbol, Properties.T] = Map(symbols: _*) 

31651  274 

43696  275 
val names: Map[Symbol, String] = 
34134  276 
{ 
43456
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
wenzelm
parents:
43455
diff
changeset

277 
val name = new Regex("""\\<\^?([AZaz][AZaz09_']*)>""") 
31651  278 
Map((for ((sym @ name(a), _) < symbols) yield (sym > a)): _*) 
279 
} 

280 

50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

281 
val groups: List[(String, List[Symbol])] = 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

282 
symbols.map({ case (sym, props) => 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

283 
val gs = for (("group", g) < props) yield g 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

284 
if (gs.isEmpty) List(sym > "unsorted") else gs.map(sym > _) 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

285 
}).flatten 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

286 
.groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) }) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

287 
.sortBy(_._1) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

288 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

289 
val abbrevs: Multi_Map[Symbol, String] = 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

290 
Multi_Map(( 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

291 
for { 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

292 
(sym, props) < symbols 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

293 
("abbrev", a) < props.reverse 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

294 
} yield (sym > a)): _*) 
43488  295 

296 

43490  297 
/* recoding */ 
31522  298 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

299 
private val Code = new Properties.String("code") 
31522  300 
private val (decoder, encoder) = 
301 
{ 

302 
val mapping = 

303 
for { 

304 
(sym, props) < symbols 

46997  305 
code = 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

306 
props match { 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

307 
case Code(s) => 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

308 
try { Integer.decode(s).intValue } 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

309 
catch { case _: NumberFormatException => error("Bad code for symbol " + sym) } 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

310 
case _ => error("Missing code for symbol " + sym) 
31522  311 
} 
46997  312 
ch = new String(Character.toChars(code)) 
34193  313 
} yield { 
314 
if (code < 128) error("Illegal ASCII code for symbol " + sym) 

315 
else (sym, ch) 

316 
} 

31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall>  only one backslash should be used;
wenzelm
parents:
31523
diff
changeset

317 
(new Recoder(mapping), 
31548  318 
new Recoder(mapping map { case (x, y) => (y, x) })) 
31522  319 
} 
27918  320 

34098  321 
def decode(text: String): String = decoder.recode(text) 
322 
def encode(text: String): String = encoder.recode(text) 

34134  323 

43490  324 
private def recode_set(elems: String*): Set[String] = 
325 
{ 

326 
val content = elems.toList 

327 
Set((content ::: content.map(decode)): _*) 

328 
} 

329 

330 
private def recode_map[A](elems: (String, A)*): Map[String, A] = 

331 
{ 

332 
val content = elems.toList 

333 
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) 

334 
} 

335 

336 

337 
/* user fonts */ 

338 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

339 
private val Font = new Properties.String("font") 
43696  340 
val fonts: Map[Symbol, String] = 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

341 
recode_map((for ((sym, Font(font)) < symbols) yield (sym > font)): _*) 
43490  342 

343 
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList 

344 
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) 

345 

34134  346 

347 
/* classification */ 

348 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

349 
val letters = recode_set( 
34134  350 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", 
351 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", 

352 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 

353 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 

354 

355 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", 

356 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", 

357 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", 

358 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", 

359 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", 

360 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", 

361 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", 

362 
"\\<x>", "\\<y>", "\\<z>", 

363 

364 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", 

365 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", 

366 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", 

367 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", 

368 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", 

369 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", 

370 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", 

371 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", 

372 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", 

373 

374 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", 

375 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", 

376 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", 

377 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", 

378 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", 

379 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", 

52616
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents:
52507
diff
changeset

380 
"\\<Psi>", "\\<Omega>") 
34134  381 

54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset

382 
val blanks = recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n") 
34138  383 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

384 
val sym_chars = 
34138  385 
Set("!", "#", "$", "%", "&", "*", "+", "", "/", "<", "=", ">", "?", "@", "^", "_", "", "~") 
34134  386 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

387 
val symbolic = recode_set((for { (sym, _) < symbols; if raw_symbolic(sym) } yield sym): _*) 
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

388 

43455  389 

55033  390 
/* cartouches */ 
391 

392 
val open_decoded = decode(open) 

393 
val close_decoded = decode(close) 

394 

395 

43488  396 
/* control symbols */ 
397 

43696  398 
val ctrl_decoded: Set[Symbol] = 
43488  399 
Set((for ((sym, _) < symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) 
400 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

401 
val sub_decoded = decode("\\<^sub>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

402 
val sup_decoded = decode("\\<^sup>") 
43511  403 
val bsub_decoded = decode("\\<^bsub>") 
404 
val esub_decoded = decode("\\<^esub>") 

405 
val bsup_decoded = decode("\\<^bsup>") 

406 
val esup_decoded = decode("\\<^esup>") 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

407 
val bold_decoded = decode("\\<^bold>") 
27918  408 
} 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

409 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

410 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

411 
/* tables */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

412 

53400  413 
def properties: Map[Symbol, Properties.T] = symbols.properties 
43696  414 
def names: Map[Symbol, String] = symbols.names 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

415 
def groups: List[(String, List[Symbol])] = symbols.groups 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

416 
def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

417 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

418 
def decode(text: String): String = symbols.decode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

419 
def encode(text: String): String = symbols.encode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

420 

53337
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset

421 
def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x))) 
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset

422 
def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x))) 
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset

423 

50291
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

424 
def decode_strict(text: String): String = 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

425 
{ 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

426 
val decoded = decode(text) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

427 
if (encode(decoded) == text) decoded 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

428 
else { 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

429 
val bad = new mutable.ListBuffer[Symbol] 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

430 
for (s < iterator(text) if encode(decode(s)) != s && !bad.contains(s)) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

431 
bad += s 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

432 
error("Bad Unicode symbols in text: " + commas_quote(bad)) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

433 
} 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

434 
} 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

435 

43696  436 
def fonts: Map[Symbol, String] = symbols.fonts 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

437 
def font_names: List[String] = symbols.font_names 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

438 
def font_index: Map[String, Int] = symbols.font_index 
43696  439 
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

440 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

441 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

442 
/* classification */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

443 

43696  444 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) 
445 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' 

446 
def is_quasi(sym: Symbol): Boolean = sym == "_"  sym == "'" 

447 
def is_letdig(sym: Symbol): Boolean = is_letter(sym)  is_digit(sym)  is_quasi(sym) 

448 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

449 

55033  450 

451 
/* cartouches */ 

452 

453 
val open = "\\<open>" 

454 
val close = "\\<close>" 

455 

456 
def open_decoded: Symbol = symbols.open_decoded 

457 
def close_decoded: Symbol = symbols.close_decoded 

458 

459 
def is_open(sym: Symbol): Boolean = sym == open_decoded  sym == open 

460 
def is_close(sym: Symbol): Boolean = sym == close_decoded  sym == close 

461 

462 

463 
/* symbols for symbolic identifiers */ 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

464 

aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

465 
private def raw_symbolic(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

466 
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

467 

55033  468 
def is_symbolic(sym: Symbol): Boolean = 
469 
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym)  symbols.symbolic.contains(sym)) 

470 

471 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) 

472 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

473 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

474 
/* control symbols */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

475 

43696  476 
def is_ctrl(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

477 
sym.startsWith("\\<^")  symbols.ctrl_decoded.contains(sym) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

478 

43696  479 
def is_controllable(sym: Symbol): Boolean = 
55033  480 
!is_blank(sym) && !is_ctrl(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

481 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

482 
def sub_decoded: Symbol = symbols.sub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

483 
def sup_decoded: Symbol = symbols.sup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

484 
def bsub_decoded: Symbol = symbols.bsub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

485 
def esub_decoded: Symbol = symbols.esub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

486 
def bsup_decoded: Symbol = symbols.bsup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

487 
def esup_decoded: Symbol = symbols.esup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

488 
def bold_decoded: Symbol = symbols.bold_decoded 
27901  489 
} 