// UTF-CHECK2.VDM - check a file for Unicode UTF-16 and its endianess (byte order) // // 24 Jul 2004 by Christian Ziemski // Last change: 28 Jul 2004 by Christian Ziemski, new algorithm (by Pauli Lindgren) // // // To do: more tests and optimization // //---------------------------------------------------------------------------------------- // // Requires: VEDIT for Windows 6.02 or later. // //---------------------------------------------------------------------------------------- // // Numeric Register Usage: // // #103 endianess via BOM (0=no BOM, 1=big endian, 2= little endian) // #104 endianess to guess // #105 counter // #106 counter // // 103 result as text for displaying // //---------------------------------------------------------------------------------------- // Check whether this file is possibly a Unicode UTF-16 file and // determine its byte-order (endianess) if possible // - via (optional) BOM // - or by counting 0h00-bytes at odd and even byte-positions // Save_Pos() Begin_Of_File #104 = 0 #105 = 0 #106 = 0 if (match("|HFE|HFF")==0) { // big-endian BOM #103=1 } else { if (match("|HFF|HFE")==0) { // little-endian BOM #103=2 } else { // no UTF BOM found (This is no error: Unicode does not require BOM) // so try to guess the endianess (not foolproof!) #103=0 // No BOM Repeat(Min(50, File_Size)) { if (Cur_Char==0) { #105++ } Char if (Cur_Char==0) { #106++ } Char } if (#105*3 > Cur_Pos && #106*5 < Cur_Pos) { #104 = 1 // Big endian } else { if (#106*3 > Cur_Pos && #105*5 < Cur_Pos) { #104 = 2 // Little endian } } } } Begin_Of_File // display the result Reg_Set (103, "The file is not an UTF-16 file") if (#103==1) { Reg_Set(103, "The file is UTF-16 big-endian per BOM") } else { if (#103==2) { Reg_Set(103, "The file is UTF-16 little-endian per BOM") } else { Reg_Set(103, "The file seems to be ") if (#104==1) { Reg_Set(103, "UTF-16 big-endian ", APPEND) } else { if (#104==2) { Reg_Set(103, "UTF-16 little-endian ", APPEND) } else { Reg_Set(103, "not UTF-16 ", APPEND) } } } } Dialog_Input_1(121,"`Unicode UTF-16 Check`, `|@(103)`, `[OK]`",APP+CENTER,0,0) Restore_Pos()