(*------------------------------------------------------------------------
 * (c) Microsoft Corporation. All rights reserved 
 *
 * Helper functions for the F# lexer lex.mll
 *-----------------------------------------------------------------------*)

(*F# 
module Microsoft.FSharp.Compiler.Lexhelp
open Microsoft.Research.AbstractIL
open Microsoft.Research.AbstractIL.Internal
open Microsoft.FSharp.Compiler
module Ildiag = Microsoft.Research.AbstractIL.Diagnostics 
F#*) 
open Ildiag
open Pars
open Parsing
open Lexing
open Range
open Ast

(*------------------------------------------------------------------------
!* Setting the current lexer mark differs between F# and OCaml (Ocaml
 * reveals the data structure implementation of the lexer state, F# does not)
 *-----------------------------------------------------------------------*)

let set_pos (lexbuf:lexbuf) p = 
  (*F# lexbuf.EndPos <- p F#*)
  (*IF-OCAML*) lexbuf.lex_curr_p <- p (*ENDIF-OCAML*)

(*------------------------------------------------------------------------
!* Lexer args: status of #light processing.  Mutated when a #light
 * directive is processed. This alters the behaviour of the lexfilter.
 *-----------------------------------------------------------------------*)

type lightSyntaxStatus = LightSyntaxStatus of bool ref
let usingLightSyntax (LightSyntaxStatus(r)) = !r
let setLightSyntax (LightSyntaxStatus(r)) = r := true
let setDarkSyntax (LightSyntaxStatus(r)) = r := false

(*------------------------------------------------------------------------
!* Lexer args: status of #if/#endif processing.  
 *-----------------------------------------------------------------------*)

type ifdefStackEntry = IfDefIf | IfDefElse 
type ifdefStack = ifdefStackEntry list ref

(*------------------------------------------------------------------------
!* Lexer args
 *-----------------------------------------------------------------------*)

type lexargs =  
    { defines: string list;
      ifdefStack: ifdefStack;
      getSourceDirectory: (unit -> string); 
      lightSyntaxStatus : lightSyntaxStatus; }

let resetLexbufPos filename lexbuf = 
    set_pos lexbuf {(lexeme_end_p lexbuf) with pos_fname= encode_file_idx (file_idx_of_file filename); 
                                               pos_cnum=0;
                                               pos_lnum=1 }

let mkLexargs (srcdir,filename,defines,lightSyntaxStatus) =
    (* resetLexbufPos filename lexbuf; *) (* called explicitly from usingLexbuf *)
    { defines = defines;
      ifdefStack= ref [];
      lightSyntaxStatus=lightSyntaxStatus;
      getSourceDirectory=srcdir; }

(*------------------------------------------------------------------------
!* Set some global variables assocaited with the currently active lexbuf.
 *-----------------------------------------------------------------------*)

let registerLexbuf lexbuf concreteSyntaxSink = 
    currentLexbuf := Some lexbuf;
    concreteSyntaxSinkRef := concreteSyntaxSink;
    revXMLs := []

let reusingLexbuf lexbuf concreteSyntaxSink f = 
    registerLexbuf lexbuf concreteSyntaxSink;
    try
      f () 
    with e ->
      raise (WrappedError(e,(try get_lex_range lexbuf with _ -> range0)))

let usingLexbuf lexbuf filename concreteSyntaxSink f =
    resetLexbufPos filename lexbuf;
    reusingLexbuf lexbuf concreteSyntaxSink (fun () -> f lexbuf)

(*------------------------------------------------------------------------
!* Functions to manipulate lexer transient state
 *-----------------------------------------------------------------------*)

let default_string_finish = (fun endm b s -> STRING (s))

let call_string_finish fin buf endm b = fin endm b (Bytes.Bytebuf.close buf)

let add_string buf x = Bytes.Bytebuf.emit_bytes buf (Bytes.string_as_unicode_bytes x)

let add_int_char buf c = 
    Bytes.Bytebuf.emit_int_as_byte buf (c mod 256);
    Bytes.Bytebuf.emit_int_as_byte buf (c / 256)

let add_unichar buf c = add_int_char buf (Nums.u16_to_int c)
let add_byte_char buf c = add_int_char buf (Char.code c mod 256)

(* When lexing bytearrays we don't expect to see any unicode stuff. *)
(* Likewise when lexing string constants we shouldn't see any trigraphs > 127 *)
(* So to turn the bytes collected in the string buffer back into a bytearray *)
(* we just take every second byte we stored.  Note all bytes > 127 should have been *)
(* stored using add_int_char *)
let stringbuf_as_bytes buf = 
    let bytes = Bytes.Bytebuf.close buf in 
    Bytes.make (fun i -> Bytes.get bytes (i*2)) (Bytes.length bytes / 2)

let inc_lnum bol pos = 
    {pos with pos_lnum =pos.pos_lnum+1; pos_bol = bol }

let newline lexbuf = 
    set_pos lexbuf (inc_lnum (lexeme_end lexbuf) (lexeme_end_p lexbuf))

let trigraph c1 c2 c3 =
    let digit d = Char.code d - Char.code '0' in 
    Char.chr (digit c1 * 100 + digit c2 * 10 + digit c3)

let digit d = 
    if d >= '0' && d <= '9' then Char.code d - Char.code '0'   
    else failwith "digit" 

let hexdigit d = 
    if d >= '0' && d <= '9' then digit d 
    else if d >= 'a' && d <= 'f' then Char.code d - Char.code 'a' + 10
    else if d >= 'A' && d <= 'F' then Char.code d - Char.code 'A' + 10
    else failwith "hexdigit" 

let unicodegraph_short s =
    if String.length s <> 4 then failwith "unicodegraph";
    Nums.int_to_u16 (hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3])

let hexgraph_short s =
    if String.length s <> 2 then failwith "hexgraph";
    Nums.int_to_u16 (hexdigit s.[0] * 16 + hexdigit s.[1])

let unicodegraph_long s =
    if String.length s <> 8 then failwith "unicodegraph_long";
    let high = hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3] in 
    let low = hexdigit s.[4] * 4096 + hexdigit s.[5] * 256 + hexdigit s.[6] * 16 + hexdigit s.[7] in 
    if high = 0 then None, Nums.int_to_u16 low 
    else 
      (* A surrogate pair - see http://www.unicode.org/unicode/uni2book/ch03.pdf, section 3.7 *)
      Some (Nums.int_to_u16 (0xD800 + ((high * 0x10000 + low - 0x10000) / 0x400))),
      Nums.int_to_u16 (0xDF30 + ((high * 0x10000 + low - 0x10000) mod 0x400))

let char_to_u16 c = Nums.int_to_u16 (Char.code c)

let escape c = 
    match c with
    | '\\' -> '\\'
    | '\'' -> '\''
    | 'n' -> '\n'
    | 't' -> '\t'
    | 'b' -> '\b'
    | 'r' -> '\r'
    | c -> c

let unexpected_char lexbuf =
    LEX_FAILURE ("Unexpected character '"^(lexeme lexbuf)^"'")

(*------------------------------------------------------------------------
!* Keyword table
 *-----------------------------------------------------------------------*)
    
type compatibilityMode =
    | ALWAYS  (* keyword *)
    | FSHARP  (* keyword, but an identifier under --ml-compatibility mode *)

let keywords = 
 [ FSHARP, "abstract", ABSTRACT;
  ALWAYS, "and"        ,AND;
  ALWAYS, "as"         ,AS;
  ALWAYS, "assert"     ,ASSERT;
  ALWAYS, "asr"        ,INFIX_STAR_STAR_OP "asr";
  ALWAYS, "begin"      ,BEGIN;
  ALWAYS, "class"      ,CLASS;
  FSHARP, "default"    ,DEFAULT;
  FSHARP, "delegate"   ,DELEGATE;
  ALWAYS, "do"         ,DO;
  ALWAYS, "done"       ,DONE;
  FSHARP, "downcast"   ,DOWNCAST;
  ALWAYS, "downto"     ,DOWNTO;
  FSHARP, "elif"       ,ELIF;
  ALWAYS, "else"       ,ELSE;
  ALWAYS, "end"        ,END;
  ALWAYS, "exception"  ,EXCEPTION;
  FSHARP, "extern"     ,EXTERN;
  ALWAYS, "false"      ,FALSE;
  ALWAYS, "finally"    ,FINALLY;
  ALWAYS, "for"        ,FOR;
  ALWAYS, "fun"        ,FUN;
  ALWAYS, "function"   ,FUNCTION;
  ALWAYS, "if"         ,IF;
  ALWAYS, "in"         ,IN;
  ALWAYS, "inherit"    ,INHERIT;
  FSHARP, "inline"     ,INLINE;
  FSHARP, "_instance"  ,INSTANCE;
  FSHARP, "interface"  ,INTERFACE;
  FSHARP, "internal"   ,INTERNAL;
  ALWAYS, "land"       ,INFIX_STAR_DIV_MOD_OP "land";
  ALWAYS, "lazy"       ,LAZY;
  ALWAYS, "let"        ,LET(false);
  ALWAYS, "lor"        ,INFIX_STAR_DIV_MOD_OP "lor";
  ALWAYS, "lsl"        ,INFIX_STAR_STAR_OP "lsl";
  ALWAYS, "lsr"        ,INFIX_STAR_STAR_OP "lsr";
  ALWAYS, "lxor"       ,INFIX_STAR_DIV_MOD_OP "lxor";
  ALWAYS, "match"      ,MATCH;
  FSHARP, "member"     ,MEMBER;
  ALWAYS, "method"     ,METHOD;
  ALWAYS, "mod"        ,INFIX_STAR_DIV_MOD_OP "mod";
  ALWAYS, "module"     ,MODULE;
  ALWAYS, "mutable"    ,MUTABLE;
  FSHARP, "namespace"  ,NAMESPACE;
  ALWAYS, "new"        ,NEW;
  FSHARP, "null"       ,NULL;
  ALWAYS, "of"         ,OF;
  ALWAYS, "open"       ,OPEN;
  ALWAYS, "or"         ,OR;
  FSHARP, "override"   ,OVERRIDE;
  FSHARP, "public"     ,PUBLIC;
  ALWAYS, "rec"        ,REC;
  FSHARP, "return"      ,YIELD(false);
  FSHARP, "static"     ,STATIC;
  ALWAYS, "struct"     ,STRUCT;
  ALWAYS, "sig"        ,SIG;
  ALWAYS, "then"       ,THEN;
  ALWAYS, "to"         ,TO;
  ALWAYS, "true"       ,TRUE;
  ALWAYS, "try"        ,TRY;
  ALWAYS, "type"       ,TYPE;
  FSHARP, "upcast"     ,UPCAST;
  FSHARP, "use"        ,LET(true);
  ALWAYS, "val"        ,VAL;
  ALWAYS, "virtual"    ,VIRTUAL;
  FSHARP, "void"       ,VOID;
  ALWAYS, "when"       ,WHEN;
  ALWAYS, "while"      ,WHILE;
  ALWAYS, "with"       ,WITH;
  FSHARP, "yield"      ,YIELD(true);
  ALWAYS, "_"          ,UNDERSCORE;
  ALWAYS, "private"    ,PRIVATE;  
(*------- for prototyping and explaining offside rule *)
  FSHARP, "_OBLOCKSEP" ,OBLOCKSEP;
  FSHARP, "_OWITH"     ,OWITH;
  FSHARP, "_ODECLEND"  ,ODECLEND;
  FSHARP, "_OTHEN"     ,OTHEN;
  FSHARP, "_OELSE"     ,OELSE;
  FSHARP, "_OEND"      ,OEND;
  FSHARP, "_ODO"       ,ODO;
  FSHARP, "_OLET"      ,OLET(true);
  FSHARP, "_constraint",CONSTRAINT;
  ]
(*------- reserved keywords which are ml-compatibility ids *) 
@ List.map (fun s -> (FSHARP,s,RESERVED)) 
    [ "atomic"; "break"; 
      "checked"; "component"; "const"; "constructor"; "continue"; 
      "eager"; 
      "fixed"; "functor"; "global";
      "include";  (* "instance"; *)
      "mixin"; 
      "object"; "parallel"; "params";  "process"; "protected"; "pure"; (* "pattern"; *)
      "sealed"; "trait";  
      "volatile"; ]

(*------------------------------------------------------------------------
!* Keywords
 *-----------------------------------------------------------------------*)

let unreserve_words = 
    Lib.chooseList (function (mode,keyword,_) -> if mode = FSHARP then Some keyword else None) keywords

let kwd_table = 
    let tab = Hashtbl.create 1000 in
    List.iter (fun (mode,keyword,token) -> Hashtbl.add tab keyword token) keywords;
    tab
let kwd s = Hashtbl.find kwd_table s

(* REVIEW: get rid of this element of global state *)
let permitFsharpKeywords = ref true

exception ReservedKeyword of string * range
exception IndentationProblem of string * range

let kwd_or_id args lexbuf s =
  if not !permitFsharpKeywords && List.mem s unreserve_words then
    IDENT s
  else if Hashtbl.mem kwd_table s then 
    let v = kwd s in 
    if v = RESERVED then
      begin
        let m = get_lex_range lexbuf in 
        warning(ReservedKeyword("The keyword '"^s^"' is reserved for future use by F#.",m));
          (* This will give a proper syntax error at the right location for true F# files. *)
        IDENT s 
      end
    else v
  else 
    match s with 
    | "__SOURCE_DIRECTORY__" -> 
       STRING (Bytes.string_as_unicode_bytes (args.getSourceDirectory()))
    | "__SOURCE_FILE__" -> 
       STRING (Bytes.string_as_unicode_bytes (file_of_file_idx (decode_file_idx (Lexing.lexeme_start_p lexbuf).Lexing.pos_fname)))
    | "__LINE__" -> 
       STRING (Bytes.string_as_unicode_bytes (string_of_int ((Lexing.lexeme_start_p lexbuf).Lexing.pos_lnum)))
    | _ -> IDENT s

(*------------------------------------------------------------------------
!* Token skipper.  Colorizers for language modes such as Visual Studio see some tokens 
 * that the parser does not see.  
 * 
 * NOTE: The "lexer lexbuf" call MUST be a tailcall - this is a 
 * recursive loop back to the lexer. However, when F# is compiled with --no-generics, tailcalls 
 * are not taken if the return type is polymorphic. 
 * Hence we constrain the return type even though this could be polymorphic.
 *-----------------------------------------------------------------------*)

(*F#
#if CLI_AT_MOST_1_1
let inline skipToken (skip:bool) (skippedToken: token) (lexer: bool -> lexbuf -> token) lexbuf =
  lexer skip lexbuf
#else
F#*)
let (*F# inline F#*) skipToken skip (skippedToken: token) (lexer: bool -> lexbuf -> token) lexbuf =
  if skip then lexer skip lexbuf else skippedToken 
(*F#
#endif
F#*)
