(* make sure to load the Seq module in sequence.ml first *) (* .o8 .o888oo oooo ooo oo.ooooo. .ooooo. .oooo.o 888 `88. .8' 888' `88b d88' `88b d88( "8 888 `88..8' 888 888 888ooo888 `"Y88b. 888 . `888' 888 888 888 .o o. )88b "888" .8' 888bod8P' `Y8bod8P' 8""888P' .o..P' 888 `Y8P' o888o *) (* an outcome is a result and a remaining symbol list *) type ('symbol,'result) outcome = 'result * ('symbol list) (* an analyzer is a function from remaining symbols to a stream of outcomes *) type ('symbol,'result) analyzer = 'symbol list -> ('symbol, 'result) outcome Seq.t (* trees are a typical kind of value for parsers to return *) type 'a tree = Node of 'a * 'a tree list (* some handy constructors for typical tree configurations *) let branch0 name = Node (name,[]) let branch1 name x = Node (name,[x]) let branch2 name (x,y) = Node (name,[x;y]) let branch3 name ((x,y),z) = Node (name,[x;y;z]) (* if we see a terminal spelled `name', then return a one-element sequence consisting just of one tree node otherwise, either it wasn't spelled right or we didn't see it, so return and empty stream *) let (terminal : 'a -> ('a,'a tree) analyzer) = fun name inp -> match inp with first::rest -> if first=name then Seq.fromList [ ((branch0 name),rest) ] else Seq.empty | [] -> Seq.empty (* either p1 can parse the input, or p2 can *) let (alternative : ('a,'b) analyzer -> ('a,'b) analyzer -> ('a,'b) analyzer) = fun p1 p2 inp -> Seq.append ((p1 inp),(p2 inp)) (* use p2 to parse all the remainders p1 leaves behind. pair-up the results *) let (sequence : ('a,'b) analyzer -> ('a,'c) analyzer -> ('a,('b * 'c)) analyzer) = fun p1 p2 inp -> Seq.multiply (function (result1,remainder1) -> Seq.map (function (result2,remainder2) -> ((result1,result2),remainder2)) (p2 remainder1) ) (* outermost function takes individual outcomes to sequences of outcomes *) (p1 inp) (* map the function f : 'b->'c over the result of parsing inp with p *) let (gives : ('a,'b) analyzer -> ('b -> 'c) -> ('a,'c) analyzer)= fun p f inp -> Seq.map (function (result,remainder) -> (f result,remainder)) (p inp) (* try for bigger and bigger analyses of the same substring *) let (skipIfFirstFails : ('a,'b) analyzer -> ('a,'b) analyzer -> ('a,'b) analyzer) = fun p1 p2 inp -> let s1 = p1 inp in if (s1=Seq.empty) then Seq.empty else Seq.append (s1,(p2 inp)) let ( |. ) = alternative let ( &. ) = sequence let ( >. ) = gives let rec adjoin2 postmodifier label base = function words -> (skipIfFirstFails base (adjoin2 postmodifier label (base &. postmodifier >. branch2 label)) ) words let rec s words = ( np &. vp >. branch2 "s" ) words and vp words = let basevp = (v &. np) >. branch2 "vp" in ( adjoin2 pp "vp" basevp ) words and np words = let basenp = (terminal "binoculars" >. branch1 "np") |. ((det &. n) >. branch2 "np") in ( adjoin2 pp "np" basenp ) words and pp words = ((p &. np) >. branch2 "pp" ) words and det = terminal "the" >. branch1 "det" and n = (terminal "cop" >. branch1 "n") |. (terminal "spy" >. branch1 "n") |. (terminal "baton" >. branch1 "n") and p = terminal "with" >. branch1 "p" and v = terminal "sees" >. branch1 "v" (* back and forth from characters to string to char lists *) let explode s = let rec explode s i = if i< String.length s then s.[i]::(explode s (i+1)) else [] in (explode s 0) let implode l = begin let v = (String.create (List.length l)) in for i = 0 to ((List.length l)-1) do String.set v i (List.nth l i) done; v end let lex s = let rec tok c w = match c,w with [], w -> [implode w] | (c::cs), w -> match c with ' ' -> [implode w]@(tok cs []) | '\n' -> [implode w]@(["\n"]@(tok cs [])) | '\t' -> [implode w]@(["\t"]@(tok cs [])) | _ -> tok cs (w@[c]) in tok (explode s) [] let finished x = (Pervasives.snd x) = [] let testsuite = [ "the spy sees the cop with the baton with binoculars"; (* 2 *) "the spy sees the cop with the baton with binoculars with binoculars"; (* 3 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars"; (* 4 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars"; (* 5 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 6 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 7 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 8 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars"; (* 9 *) "the spy sees the cop with the baton with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars with binoculars" (* 10 *) ] (* Aaccording to Mathworld, the answers should be {1,2,5,14,42,132,429,1430,4862,16796} make sure to tell Mathematica, e.g. < Seq.count finished (s (lex x))) (List.nth ts i)) in Printf.printf "%d PPs\t%d trees %2.6f seconds\n" (i+2) count seconds; Pervasives.flush stdout done;; timeme testsuite