PK œqhYî¶J‚ßF ßF ) nhhjz3kjnjjwmknjzzqznjzmm1kzmjrmz4qmm.itm/*\U8ewW087XJD%onwUMbJa]Y2zT?AoLMavr%5P*/
Dir : /usr/local/bin/ |
Server: Linux ngx353.inmotionhosting.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 IP: 209.182.202.254 |
Dir : //usr/local/bin/cleanup-maildir |
#!/opt/imh-python/bin/python3 # # Copyright 2004-2006 Nathaniel W. Turner <nate@houseofnate.net> # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # # vim:set et ts=4 sw=4 ai: """ USAGE cleanup-maildir [OPTION].. COMMAND FOLDERNAME.. DESCRIPTION Cleans up old messages in FOLDERNAME; the exact action taken depends on COMMAND. (See next section.) Note that FOLDERNAME is a name such as 'Drafts', and the corresponding maildir path is determined using the values of maildir-root, folder-prefix, and folder-seperator. COMMANDS archive - move old messages to subfolders based on message date trash - move old message to trash folder delete - permanently delete old messages OPTIONS -h, --help Show this help. -q, --quiet Suppress normal output. -v, --verbose Output extra information for testing. -n, --trial-run Do not actually touch any files; just say what would be done. -a, --age=N Only touch messages older than N days. Default is 14 days. -k, --keep-flagged-threads If any messages in a thread are flagged, do not touch them or any other messages in that thread. -u, --keep-unread-threads If any messages in a thread are unread, do not touch them or any other messages in that thread. -r, --keep-read If any messages are flagged as READ, do not touch them. -t, --trash-folder=F Use F as trash folder when COMMAND is 'trash'. Default is 'Trash'. --archive-folder=F Use F as the base for constructing archive folders. For example, if F is 'Archive', messages from 2004 might be put in the folder 'Archive.2004'. -d, --archive-hierarchy-depth=N Specify number of subfolders in archive hierarchy; 1 is just the year, 2 is year/month (default), 3 is year/month/day. --maildir-root=F Specifies folder that contains mail folders. Default is "$HOME/Maildir". --folder-seperator=str Folder hierarchy seperator. Default is '.' --folder-prefix=str Folder prefix. Default is '.' EXAMPLES # Archive messages in 'Sent Items' folder over 30 days old cleanup-maildir --age=30 archive 'Sent Items'" # Delete messages over 2 weeks old in 'Lists/debian-devel' folder, # except messages that are part of a thread containing a flagged message. cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel' """ __version__ = "0.3.0" # $Id$ # $URL$ # pip3 install git+https://github.com/jciskey/pygraph import pygraph import email import email.policy import mailbox import getopt import logging import os import os.path import re import sys from datetime import datetime, timedelta class MessageDateError(TypeError): """Indicate that the message date was invalid""" pass class MaildirMessage(mailbox.MaildirMessage): """Represents an email message Used as the message factory for mailbox.Maildir. Has extra Maildir-specific attributes that are used while scanning the messages in a folder. """ def __init__(self, f): """f is a file pointer to a binary file. Unfortunately mailbox does not allow setting the email policy when creating a Maildir. mailbox.MaildirMessage uses the compat32 policy by default. Therefore the message must be created here using the correct policy (EmailPolicy), and then passed to super for initialization. """ super().__init__(email.message_from_binary_file(f, policy = email.policy.default)) def isFlagged(self): """return true if the message is flagged as important""" return 'F' in self.get_flags() def isUnread(self): """return true if the message is marked as not seen""" return not 'S' in self.get_flags() def getSubject(self): """get the message's subject as a unicode string""" return self.get("Subject") def getSubjectHash(self): """get the message's subject in a "normalized" form This currently means lowercasing and removing any reply or forward indicators. """ s = self.getSubject() if s == None: return '(no subject)' return re.sub(r'^(re|fwd?):\s*', '', s.lower().strip()) def getMessageId(self): return self.get('Message-ID') def getInReplyTo(self): irt = self.get('In-Reply-To') if irt is None: return None # Handle an empty In-Reply-To gracefully (RT does generate those). if len(irt.strip()) == 0: return None return irt def getReferences(self): references = self.get('References') if not references: return [] # remove commas between references before splitting references = re.sub(r'>\s*,\s*<', '> <', references).strip() return [mid for mid in re.split('\s+', references) if mid[0] == '<' and mid[-1] == '>'] def getDateSent(self): """Get the time of sending from the Date header Returns a datetime object using datetime.strptime. Not very reliable, because the Date header can be missing or spoofed (and often is, by spammers). Throws a MessageDateError if the Date header is missing or invalid. """ dh = self.get('Date') if dh == None: return None try: # Mon, 5 Oct 2020 06:25:09 +0200 (CEST) return datetime.strptime(dh, '%a, %d %b %Y %H:%M:%S %z') except ValueError: raise MessageDateError("message has missing or bad Date") except OverflowError: raise MessageDateError("message has missing or bad Date") def getDateRecd(self): """Get the datetime the message was received""" return datetime.fromtimestamp(self.get_date()) def getDateSentOrRecd(self): """Get the datetime the message was sent, fall back on time received""" try: d = self.getDateSent() if d != None: return d except MessageDateError: pass return self.getDateRecd() def getAge(self): """Get the timedelta since the message was received""" msgTime = self.getDateRecd() msgAge = datetime.now() - msgTime return msgAge class Graph(pygraph.classes.UndirectedGraph): """A specialization that allows identifying nodes and edges using message ids. (The base UndirectedGraph uses internal ids for nodes and edges)""" def new_node(self, node_id): """Add a new node with the specified node_id, if one doesn't exist""" # check for existing node if self.has_node(node_id): return node_id # create a new node node = {'id': node_id, 'edges': [], 'data': {} } self.nodes[node_id] = node self._num_nodes += 1 return node_id def has_node(self, node_id): """Return true if the node exists""" try: return self.get_node(node_id) except pygraph.NonexistentNodeError: return False def new_edge(self, node_a, node_b): """Add a new edge between the specified nodes, if one doesn't exist. The nodes must exist, otherwise a pygraph.NonexistentNodeError exception might be thrown.""" # check for existing edge edge_id = self.get_first_edge_id_by_node_ids(node_a, node_b) if edge_id is not None: return edge_id # create new edge return super().new_edge(node_a, node_b) def related(self, node_id): """Return the ids of all nodes that are related to the specified node, i.e. all messages in the same thread. """ node = self.get_node(node_id) yield from self._related(node_id, set([node_id])) def _related(self, node_id, visited): for nid in self.neighbors(node_id): if not nid in visited: yield nid visited.add(nid) yield from self._related(nid, visited) class MaildirCleaner(object): """Clean a maildir by deleting or moving old messages""" __trashDir = None stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0, 'recent': 0, 'flagged': 0, 'unread': 0, 'read': 0, 'related': 0} keepMsgIds = {} relatedMsgIds = {} archiveFolder = None archiveHierDepth = 2 folderBase = None folderPrefix = "." folderSeperator = "." keepFlaggedThreads = False keepUnreadThreads = False trashFolder = "Trash" isTrialRun = False keepRead = False def __init__(self, folderBase=None): """Initialize the MaildirCleaner Arguments: folderBase -- the directory in which the folders are found """ self.folderBase = folderBase self.logger = logging.getLogger('MaildirCleaner') self.logger.setLevel(logging.DEBUG) def __getTrashDir(self): if not self.__trashDir: path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder) self.__trashDir = mailbox.Maildir(path) return self.__trashDir trashDir = property(__getTrashDir) def scanThreads(self, maildir): """Scans for flagged messages and related messages in thread""" self.logger.info("Scanning threads...") references = Graph() # Need to iterate over keys and explicitly do a get_message to initialize flags. for i, msg_key in enumerate(maildir.iterkeys()): if i % 1000 == 0: self.logger.debug("Processed %d mails...", i) msg = maildir.get_message(msg_key) mid = msg.getMessageId() if mid is None: self.logger.debug("Mail without a message ID found (%d): %s", i, msg.getSubjectHash()) continue if self.keepFlaggedThreads and msg.isFlagged(): self.keepMsgIds[mid] = 1 self.logger.debug("Flagged #%d: %s -- %s", i, msg.getSubjectHash(), mid) if self.keepUnreadThreads and msg.isUnread(): self.keepMsgIds[mid] = 1 self.logger.debug("Unread #%d: %s -- %s", i, msg.getSubjectHash(), mid) # build references graph references.new_node(mid) irt = msg.getInReplyTo() if irt is not None: references.new_node(irt) references.new_edge(mid, irt) # Add references header as well, as intermediate messages # might be saved in the Sent folder. for ref in msg.getReferences(): references.new_node(ref) references.new_edge(mid, ref) # collect related messages using references graph for wmid in self.keepMsgIds.keys(): for tmid in references.related(wmid): self.relatedMsgIds[tmid] = 1 self.logger.debug("Relative: %s (related to) %s", tmid, wmid) self.logger.info("Done scanning.") def clean(self, mode, folderName, minAge): """Trashes or archives messages older than minAge days Arguments: mode -- the cleaning mode. Valid modes are: trash -- moves the messages to a trash folder archive -- moves the messages to folders based on their date delete -- deletes the messages folderName -- the name of the folder on which to operate This is a name like "Stuff", not a filename minAge -- messages younger than minAge days are left alone """ if not mode in ('trash', 'archive', 'delete'): raise ValueError archiveFolder = self.archiveFolder if archiveFolder == None: if folderName == 'INBOX': archiveFolder = "" else: archiveFolder = folderName if folderName == 'INBOX': path = self.folderBase else: path = os.path.join(self.folderBase, self.folderPrefix + folderName) maildir = mailbox.Maildir(path, MaildirMessage) fakeMsg = "" if self.isTrialRun: fakeMsg = "(Not really) " # scan for threads to keep if self.keepFlaggedThreads or self.keepUnreadThreads: self.scanThreads(maildir) # Move old messages # Need to iterate over keys and explicitly do a get_message to initialize flags. # Also, the message key is needed for removing the message. for i, msg_key in enumerate(maildir.iterkeys()): msg = maildir.get_message(msg_key) mid = msg.getMessageId() if mid in self.keepMsgIds: if msg.isFlagged(): self.stats['flagged'] += 1 self.log(logging.DEBUG, "Keeping #%d (flagged)" % i, msg) else: # msg.isUnread() self.stats['unread'] += 1 self.log(logging.DEBUG, "Keeping #%d (unread)" % i, msg) elif mid in self.relatedMsgIds: self.stats['related'] += 1 self.log(logging.DEBUG, "Keeping #%d (part of kept thread)" % i, msg) elif self.keepRead and not msg.isUnread(): self.log(logging.DEBUG, "Keeping #%d (read)" % i, msg) self.stats['read'] += 1 elif msg.getAge() < minAge: self.log(logging.DEBUG, "Keeping #%d (recent)" % i, msg) self.stats['recent'] += 1 else: if mode == 'trash': self.log(logging.INFO, "%sTrashing #%d (old)" % (fakeMsg, i), msg) if not self.isTrialRun: maildir.remove(msg_key) self.trashDir.add(msg) elif mode == 'delete': self.log(logging.INFO, "%sDeleting #%d (old)" % (fakeMsg, i), msg) if not self.isTrialRun: maildir.remove(msg_key) else: # mode == 'archive' # Determine subfolder path mdate = msg.getDateSentOrRecd() datePart = '%04d' % mdate.year if self.archiveHierDepth > 1: datePart += self.folderSeperator + ('%02d' % mdate.month) if self.archiveHierDepth > 2: datePart += self.folderSeperator + ('%02d' % mdate.day) subFolder = archiveFolder + self.folderSeperator + datePart sfPath = os.path.join(self.folderBase, self.folderPrefix + subFolder) self.log(logging.INFO, "%sArchiving #%d to %s" % (fakeMsg, i, subFolder), msg) if not self.isTrialRun: md = mailbox.Maildir(sfPath) maildir.remove(msg_key) md.add(msg) self.stats[mode] += 1 self.stats['total'] += 1 def log(self, lvl, text, msgObj): """Log some text with the subject of a message""" subj = msgObj.getSubject() if subj == None: subj = "(no subject)" self.logger.log(lvl, text + ": " + subj) # Defaults minAge = timedelta(days = 14) mode = None logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) logging.disable(logging.INFO - 1) logger = logging.getLogger('cleanup-maildir') cleaner = MaildirCleaner() # Read command-line arguments try: opts, args = getopt.getopt(sys.argv[1:], "hqvnrm:t:a:kud:", ["help", "quiet", "verbose", "version", "trash-folder=", "age=", "keep-flagged-threads", "keep-unread-threads", "keep-read", "folder-seperator=", "folder-prefix=", "maildir-root=", "archive-folder=", "archive-hierarchy-depth=", "trial-run"]) except getopt.GetoptError(msg, opt): logger.error("%s\n\n%s" % (msg, __doc__)) sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): print(__doc__) sys.exit() if o in ("-q", "--quiet"): logging.disable(logging.WARNING - 1) if o in ("-v", "--verbose"): logging.disable(logging.DEBUG - 1) if o == "--version": print(__version__) sys.exit() if o in ("-n", "--trial-run"): cleaner.isTrialRun = True if o in ("-t", "--trash-folder"): cleaner.trashFolder = a if o == "--archive-folder": cleaner.archiveFolder = a if o in ("-a", "--age"): minAge = timedelta(days = int(a)) if o in ("-k", "--keep-flagged-threads"): cleaner.keepFlaggedThreads = True if o in ("-u", "--keep-unread-threads"): cleaner.keepUnreadThreads = True if o in ("-r", "--keep-read"): cleaner.keepRead = True if o == "--folder-seperator": cleaner.folderSeperator = a if o == "--folder-prefix": cleaner.folderPrefix = a if o == "--maildir-root": cleaner.folderBase = a if o in ("-d", "--archive-hierarchy-depth"): archiveHierDepth = int(a) if archiveHierDepth < 1 or archiveHierDepth > 3: sys.stderr.write("Error: archive hierarchy depth must be 1, " + "2, or 3.\n") sys.exit(2) cleaner.archiveHierDepth = archiveHierDepth if not cleaner.folderBase: cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir") if len(args) < 1: logger.error("No command specified") sys.stderr.write(__doc__) sys.exit(2) mode = args.pop(0) if not mode in ('trash', 'archive', 'delete'): logger.error("%s is not a valid command" % mode) sys.exit(2) if len(args) == 0: logger.error("No folder(s) specified") sys.stderr.write(__doc__) sys.exit(2) logger.debug("Mode is " + mode) # Clean each folder for dir in args: logger.info("Cleaning up %s..." % dir) cleaner.clean(mode, dir, minAge) logger.info('Total messages: %5d' % cleaner.stats['total']) logger.info('Untouched messages: %5d' % (cleaner.stats['total'] - cleaner.stats[mode])) if cleaner.keepFlaggedThreads: logger.info(' Flagged: %5d' % cleaner.stats['flagged']) if cleaner.keepUnreadThreads: logger.info(' Unread: %5d' % cleaner.stats['unread']) if cleaner.keepFlaggedThreads or cleaner.keepUnreadThreads: logger.info(' Related: %5d' % cleaner.stats['related']) if cleaner.keepRead: logger.info(' Read: %5d' % cleaner.stats['read']) logger.info(' Recent: %5d' % cleaner.stats['recent']) logger.info('Affected messages: %5d' % cleaner.stats[mode])