// EmailFromTo.vdm // Extract e-mail Addresses & Display-Names from e-mail files ("From:" addresses) // Finds "My e-mails" in To: or Cc: // Skips e-mail Addresses in "My Senders" list // Extract Origination Date for "My e-mails", and skips "old" messages // Sorts addresses and remove duplicate Address/Name entries. // ============> IMPORTANT <============ // You need to customise this macro to your specific requirements where indicated by <============ // <============ My e-mails // <============ My Senders // <============ Output Option // <============ Output File // <============ Cutoff Date "1-1-1980" for all // Output Options (depends on value of #103):- // 0 Addresses & Display-Names with one of "My e-mails" in To: or Cc: // 1 All Addresses & Display-Names // 2 Address, Display-Name & To:/Cc: if one of "My e-mails" in To: or Cc: // 3 All Addresses, Display-Names & To:/Cc: // Options 1 & 3 bypass the "My e-mails" filtering and output addresses with no To:/Cc: // Additional Output Options (add the following value/s to the above in #103 e.g. 2+4):- // 4 IP Address // 8 Origination Date & Time // The e-mail Addresses & Display-Name will be appended to file in the default directory. // This is designed to be used in conjunction with the Wildfile macro. // This macro should work with any MIME Email files. // Includes code to skip the body of multipart MIME, to minimise spurious hits in forwarded attachments. // I use Thunderbird, data is in files without extension e.g. "Inbox" // These are normally located in directory:- // %APPDATA%\Thunderbird\Profiles\default\????????.slt\Mail\???????? // (See Tools/Account Settings/Server Settings/Local Directory for location.) // Works with email Saved from Outlook Express. // (You can select a group of emails and drag them into a directory in Windows Explorer. // This creates a series of .eml files with their names being the subject lines of each email.) // Author:- // Ian Binnie 2006-03-18 // Ian Binnie 2006-03-25 Cutoff Date // Ian Binnie 2006-07-21 6.15 Reg Exp groups // Ian Binnie 2007-09-30 IP Address // Ian Binnie 2007-10-01 Ensure Word Wrap OFF in output file // Numeric Register Usage // #20 Current Edit Buffer // #21 current position in file // #22 multipart message // #23 From: // #24 end of Top level headers // #25 To: or Cc: // #26 end of To: or Cc: // #27 Cutoff Date // #28 buffer with "My e-mails" list // #29 buffer for "My Senders" list // #30 Origination Date // #103 Output Option // Text Register Usage // 23 IP Address // 24 full From: line // 25 Display-Name // 26 e-mail address // 27 temporary storage for boundary string // 28 To: or Cc: address // 30 Origination Date & Time // 80 Output File #103=1 // <============ Output Option Reg_Set(80,"EmailFromTo.txt") // <============ Output File Config(D_DSP_WRAP, 0, LOCAL) // Needed to capture end of long lines #20=Buf_Num // Current Edit Buffer #28=Buf_Switch(Buf_Free) // working buffer for "My e-mails" list Config(F_F_TYPE, 0, LOCAL) Config(F_OVER_MODE, 0, LOCAL) // List of "My e-mails" to be included. // Add as many lines as needed, customised to your needs. <============ My e-mails // One line per address: Ins_Text(" bill_gates@microsoft.com ") Replace("|<|n",BEGIN|ALL|NOERR) #29=Buf_Switch(Buf_Free) // working buffer for "My Senders" list Config(F_F_TYPE, 0, LOCAL) Config(F_OVER_MODE, 0, LOCAL) // List of "My Senders" to be excluded. // Add as many lines as needed, customised to your needs. <============ My Senders // One line per address: Ins_Text(" bill_gates@microsoft.com ") Replace("|<|n",BEGIN|ALL|NOERR) File_Open("|@(80)") // Open or switch to "output" file Config(D_DSP_WRAP, 0, LOCAL) // Ensure Word Wrap OFF in output file Reg_set(30,"1-1-1980") // <============ Cutoff Date "dd-mm-yyyy" "1-1-1980" for all Buf_Switch(Buf_Free) // temporary buffer Reg_Ins(30) Begin_Of_File #27=Num_Eval_Date(BEGIN) Buf_Quit(OK) // close temporary buffer Buf_Switch(#20) // Return to original Edit Buffer Begin_Of_File() // Main Loop while(!At_EOF) { reg_empty(23) reg_empty(24) reg_empty(25) reg_empty(26) reg_empty(28) reg_empty(30) // To use Reply-To: addresses change Search String to:- "|<|{From:,Content-Type:,Reply-To:}|w" Search("|<|{From:,Content-Type:}|w",ADVANCE+ERRBREAK+NORESTORE) if(match_item == 2) { Call("SkipMultipart") } else { Call("ExtractAddress") } } Buf_Switch(#28) // working buffer with "My e-mails" list Buf_Quit(OK) // close it Buf_Switch(#29) // working buffer for "My Senders" list Buf_Quit(OK) // close it Buf_Switch(#20) // Return to original Edit Buffer // Sort e-mail addresses and remove duplicates. File_Open("|@(80)") // Switch to "output" file //Return // to prevent sort (for testing) if(File_Size == 0) { Return } Sort(0,File_Size) //Sort entire file Begin_Of_File() repeat(ALL) { //Search & delete duplicate lines Search("^(.*)\N\1$",REGEXP+MAX+ERRBREAK) Del_line() } // Now Tidy up file if (#99==0x57495C44) //If WILDFILE running { File_Save } Buf_Switch(#20) // Return to original Edit Buffer Return //=========================================================== // This skips the body of a multipart MIME message. :SkipMultipart: #22=Cur_Pos if(Match("multipart")==0) // this is a 'multipart' message { Search("boundary",NOERR|ADVANCE) if (Error_Match) { Goto_Pos(#22) Return } // missing required parameter Search("=",ADVANCE|NOERR) // Look for boundary string if (Cur_Char=='"') { //If string in double-quotes... Char(1) Block_Begin(Cur_Pos) Search('"',ADVANCE|NOERR) Block_End(Cur_Pos-1) } else { Block_Begin(Cur_Pos) Search("|X",NOERR) Block_End(Cur_Pos) } Reg_Copy_Block(27,Block_Begin,Block_End) // save boundary string // Some Email Clients put headers in multi-part preamble Block_Begin(Cur_Pos) Search("|<--|@(27)") // skip preamble Block_End(Cur_Pos) // To use Reply-To: addresses change Search String to:- "|<|{From:,Reply-To:}|w" if(Search_Block("| or [mailto:email] { #21=Cur_Pos if(Search_Block('|{<,[}',#21,EOL_Pos,NOERR) ) { Search_Block("|w",#21,Cur_Pos,REVERSE+NOERR) // remove trailing spaces from name Reg_Copy_Block(25,#21,Cur_Pos) } else // assume just Display-Name on line { Reg_Copy_Block(25,#21,EOL_Pos) // Copy Display-Name to T-Reg } } // Search for email address if(Search_Block("[a-zA-Z0-9\_\-\.]+@[a-zA-Z0-9\_\-\.]+\.[a-zA-Z][a-zA-Z]+",#21,EOL_Pos,BEGIN+REGEXP+MAX+ADVANCE+NOERR) ) { Reg_Copy_Block(26,CP-Chars_Matched,CP) // Copy e-mail address to T-Reg if(Reg_Compare(26,@(25))==0) // if Display-Name == email { reg_empty(25) } } #23=CP Search("|N|N") // Find end of Top level headers #24=CP if(Search_Block("|0 && (#103&2)!=0) { Ins_Char(9) Reg_Ins(28) // To: or Cc: address } if(Reg_Size(23)>0 && (#103&4)!=0) { Ins_Char(9) Reg_Ins(23) // IP Address } if(Reg_Size(30)>0 && (#103&8)!=0) { Ins_Char(9) Reg_Ins(30) // Origination Date // Num_Ins_Date(#30,BEGIN|NOCR) } // Un-comment the following to include full From: line (for debugging) // Ins_Char(9) // Reg_Ins(24) Ins_Newline(1) Buf_Switch(#20) // Return to original Edit Buffer Return