// UTF_ANSI.vdm - Unicode (UTF-16) to ANSI conversion of entire file. // Author:- // Ian Binnie // ian_binnie at optusnet dot com dot au ( replace "at" "dot" by the normal characters.) // Last Change: 31 May 2004 // based on unic-asc.vdm by Christian Ziemski // // Requires: VEDIT 6.02 or later. // // Description: This macro converts UTF-16 to ANSI (Windows Code Page 1252) // Converts Microsoft Unicode (UTF-16LE) or little-endian files with BOM only. // If the Unicode file contains characters for which there is no mapping // the result is undefined, but an error is indicated. // See http://www.alanwood.net/demos/ansi.html // // Requires: VEDIT 6.02 or later. // #106 = Cur_Pos //#106 current position // Check that the file is recognized as Unicode. Begin_Of_File if (Cur_Char!=255 || CC(+1)!=254) { // UTF BOM not found // This is not as error Unicode does not require BOM repeat(5) // Check at least 5 lines with UTF-16 CRLF in little-endian format { if (Search("|H0D",NOERR)) { if (Match("|H0D|000|H0A|000")==0) { char Continue } else { Alert() if (OS_TYPE==1) { #104 = Dialog_Input_1(123,"`ERROR - Unicode to ANSI`,`This file is either not Unicode or is\na Unicode file that is not supported by VEDIT`",APP+CENTER,0,0) } else { #104 = Get_Key("ERROR - Not recognized as a Unicode file.",STATLINE) } Goto_Pos(#106) Return } } } } // Give confirmation prompt unless run via "-x" invocation option. if (!(Is_Auto_Execution && Macro_Num == 100)) { Alert() if (OS_TYPE==1) { #104 = Dialog_Input_1(123,"`Confirmation`,`OK to translate entire file from Unicode to ANSI?`,`[&Yes]`,`[&No]`", APP+CENTER,0,0) if (#104>1) { Goto_Pos(#106) Return } } else { while ((#104=Get_Key("OK to translate entire file from Unicode to ANSI? [N]o [Y]es ",STATLINE)&0xFFDF)!='Y' && #104!='N') { Alert } //Get valid input "Y" or "N" if (#104=='N') { Goto_Pos(#106) Return } } } Begin_Of_File() if (Match("|HFF|HFE")==0) { Del_Char(2) // Delete BOM } // Translate Unicode codepoints corresponding to ANSI 0x80 - 0x9F Replace("|HAC|H20","|H80|000",BEGIN|ALL|NOERR) //EURO SIGN Replace("|H1A|H20","|H82|000",BEGIN|ALL|NOERR) //SINGLE LOW-9 QUOTATION MARK Replace("|H92|H01","|H83|000",BEGIN|ALL|NOERR) //LATIN SMALL LETTER F WITH HOOK Replace("|H1E|H20","|H84|000",BEGIN|ALL|NOERR) //DOUBLE LOW-9 QUOTATION MARK Replace("|H26|H20","|H85|000",BEGIN|ALL|NOERR) //HORIZONTAL ELLIPSIS Replace("|H20|H20","|H86|000",BEGIN|ALL|NOERR) //DAGGER Replace("|H21|H20","|H87|000",BEGIN|ALL|NOERR) //DOUBLE DAGGER Replace("|HC6|H02","|H88|000",BEGIN|ALL|NOERR) //MODIFIER LETTER CIRCUMFLEX ACCENT Replace("|H30|H20","|H89|000",BEGIN|ALL|NOERR) //PER MILLE SIGN Replace("|H60|H01","|H8A|000",BEGIN|ALL|NOERR) //LATIN CAPITAL LETTER S WITH CARON Replace("|H39|H20","|H8B|000",BEGIN|ALL|NOERR) //SINGLE LEFT-POINTING ANGLE QUOTATION MARK Replace("|H52|H01","|H8C|000",BEGIN|ALL|NOERR) //LATIN CAPITAL LIGATURE OE Replace("|H7D|H01","|H8E|000",BEGIN|ALL|NOERR) //LATIN CAPITAL LETTER Z WITH CARON Replace("|H18|H20","|H91|000",BEGIN|ALL|NOERR) //LEFT SINGLE QUOTATION MARK Replace("|H19|H20","|H92|000",BEGIN|ALL|NOERR) //RIGHT SINGLE QUOTATION MARK Replace("|H1C|H20","|H93|000",BEGIN|ALL|NOERR) //LEFT DOUBLE QUOTATION MARK Replace("|H1D|H20","|H94|000",BEGIN|ALL|NOERR) //RIGHT DOUBLE QUOTATION MARK Replace("|H22|H20","|H95|000",BEGIN|ALL|NOERR) //BULLET Replace("|H13|H20","|H96|000",BEGIN|ALL|NOERR) //EN DASH Replace("|H14|H20","|H97|000",BEGIN|ALL|NOERR) //EM DASH Replace("|HDC|H02","|H98|000",BEGIN|ALL|NOERR) //SMALL TILDE Replace("|H22|H21","|H99|000",BEGIN|ALL|NOERR) //TRADE MARK SIGN Replace("|H61|H01","|H9A|000",BEGIN|ALL|NOERR) //LATIN SMALL LETTER S WITH CARON Replace("|H3A|H20","|H9B|000",BEGIN|ALL|NOERR) //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK Replace("|H53|H01","|H9C|000",BEGIN|ALL|NOERR) //LATIN SMALL LIGATURE OE Replace("|H7E|H01","|H9E|000",BEGIN|ALL|NOERR) //LATIN SMALL LETTER Z WITH CARON Replace("|H78|H01","|H9F|000",BEGIN|ALL|NOERR) //LATIN CAPITAL LETTER Y WITH DIAERESIS // Main loop to convert file Begin_Of_File() #104=0 While (! At_EOF) { // Delete every second byte of the rest Char(1) #104|=CC // check null byte Del_Char(1) } // If run via "-x" invocation option, save file and exit. if (Is_Auto_Execution && Macro_Num == 100) { Xall } if(#104) { if (OS_TYPE==1) { Dialog_Input_1(123,"`ERROR - Unicode to ANSI`,`This file contains at least one Unicode character\n that could not be translated to ANSI`",APP+CENTER,0,0) } else { Get_Key("ERROR - File contains at least one Unicode character that could not be translated to ANSI",STATLINE) } } // Go to end of first line and determine file type Begin_Of_File() #103 = 64 if (Search("|{|H0D,|H0A}",NOERR)) { if (Match("|H0D|H0A")==0) { #103 = 0 } else { if (Match("|H0A")==0) { #103 = 1 } else { if (Match("|H0D")==0) { #103 = 2 } } } } Config(F_F_TYPE,#103) // If file size < 1 meg, restore (approximate) cursor position. if (File_Size < 1000000) { Goto_Pos(#106/2 -1) } Return