(* make sure to load the Seq module in sequence.ml first *)
(*
.o8
.o888oo oooo ooo oo.ooooo. .ooooo. .oooo.o
888 `88. .8' 888' `88b d88' `88b d88( "8
888 `88..8' 888 888 888ooo888 `"Y88b.
888 . `888' 888 888 888 .o o. )88b
"888" .8' 888bod8P' `Y8bod8P' 8""888P'
.o..P' 888
`Y8P' o888o
*)
(* an outcome is a result and a remaining symbol list *)
type ('symbol,'result) outcome = 'result * ('symbol list)
(* an analyzer is a function from remaining symbols to a stream of outcomes *)
type ('symbol,'result) analyzer = 'symbol list -> ('symbol, 'result) outcome Seq.t
(* trees are a typical kind of value for parsers to return *)
type 'a tree = Node of 'a * 'a tree list
(* some handy constructors for typical tree configurations *)
let branch0 name = Node (name,[])
let branch1 name x = Node (name,[x])
let branch2 name (x,y) = Node (name,[x;y])
let branch3 name ((x,y),z) = Node (name,[x;y;z])
(* if we see a terminal spelled `name',
then return a one-element sequence consisting just of one tree node
otherwise, either it wasn't spelled right or we didn't see it, so return and empty stream *)
let (terminal : 'a -> ('a,'a tree) analyzer) = fun name inp ->
match inp with
first::rest ->
if first=name
then Seq.fromList [ ((branch0 name),rest) ]
else Seq.empty
| [] -> Seq.empty
(* either p1 can parse the input, or p2 can *)
let (alternative : ('a,'b) analyzer -> ('a,'b) analyzer -> ('a,'b) analyzer) = fun p1 p2 inp
-> Seq.append ((p1 inp),(p2 inp))
(* use p2 to parse all the remainders p1 leaves behind. pair-up the results *)
let (sequence : ('a,'b) analyzer -> ('a,'c) analyzer -> ('a,('b * 'c)) analyzer) = fun p1 p2 inp ->
Seq.multiply (function (result1,remainder1) ->
Seq.map
(function (result2,remainder2) -> ((result1,result2),remainder2))
(p2 remainder1)
) (* outermost function takes individual outcomes to sequences of outcomes *)
(p1 inp)
(* map the function f : 'b->'c over the result of parsing inp with p *)
let (gives : ('a,'b) analyzer -> ('b -> 'c) -> ('a,'c) analyzer)= fun p f inp ->
Seq.map (function (result,remainder) -> (f result,remainder)) (p inp)
(* try for bigger and bigger analyses of the same substring *)
let (skipIfFirstFails : ('a,'b) analyzer -> ('a,'b) analyzer -> ('a,'b) analyzer) = fun p1 p2 inp ->
let s1 = p1 inp in
if (s1=Seq.empty)
then Seq.empty
else Seq.append (s1,(p2 inp))
let ( |. ) = alternative
let ( &. ) = sequence
let ( >. ) = gives
let rec adjoin2 postmodifier label base =
function words ->
(skipIfFirstFails base (adjoin2 postmodifier label (base &. postmodifier >. branch2 label))
) words
let rec s words = ( np &. vp >. branch2 "s" ) words
and vp words =
let basevp = (v &. np) >. branch2 "vp" in
( adjoin2 pp "vp" basevp ) words
and np words =
let basenp = (terminal "binoculars" >. branch1 "np") |. ((det &. n) >. branch2 "np") in
( adjoin2 pp "np" basenp ) words
and pp words = ((p &. np) >. branch2 "pp" ) words
and det = terminal "the" >. branch1 "det"
and n = (terminal "cop" >. branch1 "n") |. (terminal "spy" >. branch1 "n") |. (terminal "baton" >. branch1 "n")
and p = terminal "with" >. branch1 "p"
and v = terminal "sees" >. branch1 "v"
(* back and forth from characters to string to char lists *)
let explode s =
let rec explode s i = if i< String.length s
then s.[i]::(explode s (i+1))
else []
in (explode s 0)
let implode l =
begin
let v = (String.create (List.length l)) in
for i = 0 to ((List.length l)-1) do
String.set v i (List.nth l i)
done; v
end
let lex s =
let rec tok c w = match c,w with
[], w -> [implode w]
| (c::cs), w -> match c with
' ' -> [implode w]@(tok cs [])
| '\n' -> [implode w]@(["\n"]@(tok cs []))
| '\t' -> [implode w]@(["\t"]@(tok cs []))
| _ -> tok cs (w@[c])
in
tok (explode s) []
let finished x = (Pervasives.snd x) = []
let testsuite = [
"the spy sees the cop with the baton with binoculars"; (* 2 *)
"the spy sees the cop with the baton with binoculars with binoculars"; (* 3 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars"; (* 4 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars"; (* 5 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 6 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 7 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 8 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 9 *)
"the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars" (* 10 *)
]
(*
Aaccording to Mathworld, the answers should be {1,2,5,14,42,132,429,1430,4862,16796}
make sure to tell Mathematica, e.g.
< Seq.count finished (s (lex x))) (List.nth ts i)) in
Printf.printf "%d PPs\t%d trees %2.6f seconds\n" (i+2) count seconds;
Pervasives.flush stdout
done;;
timeme testsuite