// UNIC-ASC.VDM - Simple Unicode to ASCII conversion of entire file. // Implements {EDIT, Translate, Unicode to ASCII}. // // Originally by: Christian Ziemski // Last Change: 10-March-2002 by Greenview Data, Inc. // // Last Change: 23-July-2003 Ch. Ziemski // // The distributed version is *very* slow on big files // So I tried: // some changes in checking for correct UNICODE // new way of deleting every second byte // // !!! Still not perfect. // !!! Still not stress tested (due to lack of test files) // !!! Contains some DI1()'s for testing // !!! Not checked by Ted // !!! Not ready for DOS usage too (some dialogues) // !!! error messages not ready for -x invocation // Ted: ??????? Is "123" a good choice here in DI1() ? // I would have used "120" or so to be sure not to alter the {Tools} menu. // // // Requires: VEDIT 6.02 or later. // // From VEDIT: Select {EDIT, Translate, Unicode to ASCII}. // It will prompt for confirmation to convert entire file. // // From OS: VPW -X UNIC-ASC.VDM filename // // This will convert the entire file specified as 'filename'. // // Description: This macro converts simple Unicode to ASCII by stripping // the upper byte of each 2-byte Unicode character to create // 1-byte ASCII characters. // // The upper byte of each 2-byte Unicode character is assumed // to be 00 Hex. It does not support languages that require // true 2-byte characters, i.e. where the upper bye is not 00. // // CZ: It does support those files. A bit ... <<<<<<<<<<<<< // // // Note: Restores the cursor position for blocks < 1 Meg, but not // for larger blocks. (It takes extra time and disk space.) // //////////////// #106 = Cur_Pos //#106 current position // // Check that the file is recognized as Unicode. // // ----- CZ new begin ------------------------------------------------------------ BOF if ( Match("|{|hFF|hFE,|hFE|hFF}") == 0 ) { if (Match_Item == 1 ){ DI1(123,"`Unicode to ASCII`,`This file has a FFFE BOM (little-endian, byte swapped, xx00)`",APP+CENTER,0,0) Char(2) } else { DI1(123,"`Unicode to ASCII`,`This file has a FEFF BOM (big-endian, 00xx)`",APP+CENTER,0,0) Char(2) } #105=Match_Item } else { DI1(123,"`Unicode to ASCII`,`This file has no (optional) BOM`",APP+CENTER,0,0) #105 = 0 // flag for no BOM DI1(123,"`Unicode to ASCII`,`Error: File seems to be no Unicode `",APP+CENTER,0,0) Goto_Pos(#106) return } Search("|!|H00|!|H00", NOERR) // search for a two-byte value with both bytes valid if (!EM) { #104=DI1(123,"`Unicode to ASCII`,`Error:\nFile has at least one UNICODE-character\nwith both bytes valid.\nConversion would mean: Loss of information!\nDo you really want? `,`[&Yes]`,`[&No]`",APP+CENTER,0,0) if (#104 != 1) { // Goto_Pos(#106) return } } // ----- CZ new end -------------------------------------------------------------- // // Give confirmation prompt unless run via "-x" invocation option. // if (!(Is_Auto_Execution && Macro_Num == 100)) { Alert() if (OS_TYPE==1) { #104 = DI1(123,"`Confirmation`,`OK to translate entire file from Unicode to ASCII?`,`[&No]`,`[&Yes]`",APP+CENTER,0,0) if (#104<2) { Goto_Pos(#106) return } } else { while ((#104=Get_Key("OK to translate entire file from Unicode to ASCII? [N]o [Y]es ",STATLINE)&0xFFDF)!='Y' && #104!='N') { Alert } //Get valid input "Y" or "N" if (#104=='N') { Goto_Pos(#106) return } } } BOF() Del_Char(2) // Delete header // // Main loop to convert file // //--- CZ begin if (#105==2) { Char(1) // byte order 00xx => skip the first null byte } // While (! AT_EOF) { // Delete every second byte of the rest // Del_Char(1) // Char(1, ERRBREAK) // } // above loop is very slow for big files! // trying it with that: (not really much faster?!) Reg_Set(103, PATHNAME) #103=Buf_Num #104=Buf_Free Buf_Switch(#104) File_Save_As("|@(103).u2a", OK+NOMSG) Buf_Switch(#103) while (! AT_EoF) { // copy only the valid bytes to the other buffer #107=Cur_Char Buf_Switch(#104) Ins_Char(#107) Buf_Switch(#103) Char(2, NOERR) } Buf_Switch(#103) File_Close(NOMSG) Buf_Switch(#104) File_Close(NOMSG) File_Rename("|@(103).u2a","|@(103)", OK) //--- CZ end // // If run via "-x" invocation option, save file and exit. // if (Is_Auto_Execution && Macro_Num == 100) { Xall } // // Go to end of first line and determine file type // BOF() #103 = 64 if (Search("|{|H0D,|H0A}",NOERR)) { if (Match("|H0D|H0A")==0) { #103 = 0 } else { if (Match("|H0A")==0) { #103 = 1 } else { if (Match("|H0D")==0) { #103 = 2 } }}} Config(F_F_TYPE,#103) // // If file size < 1 meg, restore (approximate) cursor position. // //if (File_Size < 1000000) { Goto_Pos(#106/2 -1) } if (File_Size < 1000000) { Goto_Pos(#106/2) } // CZ: the "-1" could have lead to no restore if at BOF return