diff -r 99a436de7029 Lib/mailbox.py --- a/Lib/mailbox.py +++ b/Lib/mailbox.py @@ -21,6 +21,8 @@ import email.message import email.generator import io import contextlib +import shutil +import tempfile try: if sys.platform == 'os2emx': # OS/2 EMX fcntl() not adequate @@ -587,8 +589,10 @@ class _singlefileMailbox(Mailbox): self._file = f self._toc = None self._next_key = 0 - self._pending = False # No changes require rewriting the file. - self._pending_sync = False # No need to sync the file + # None: No changes require rewriting the file + # True: The file needs flushing and fsyncing + # (key, offset): Rewrite required starting from this key and file offset + self._pending = None self._locked = False self._file_length = None # Used to record mailbox size @@ -598,21 +602,26 @@ class _singlefileMailbox(Mailbox): self._toc[self._next_key] = self._append_message(message) self._next_key += 1 # _append_message appends the message to the mailbox file. We - # don't need a full rewrite + rename, sync is enough. - self._pending_sync = True + # don't need a rewrite, sync is enough. + if self._pending is None: + self._pending = True return self._next_key - 1 def remove(self, key): """Remove the keyed message; raise KeyError if it doesn't exist.""" self._lookup(key) + if self._pending in (None, True) or key < self._pending[0]: + # Record the key of the first changed message and it's position + self._pending = (key, self._toc[key][0]) del self._toc[key] - self._pending = True def __setitem__(self, key, message): """Replace the keyed message; raise KeyError if it doesn't exist.""" self._lookup(key) + if self._pending in (None, True) or key < self._pending[0]: + # Record the key of the first changed message and it's position + self._pending = (key, self._toc[key][0]) self._toc[key] = self._append_message(message) - self._pending = True def iterkeys(self): """Return an iterator over keys.""" @@ -644,17 +653,20 @@ class _singlefileMailbox(Mailbox): def flush(self): """Write any pending changes to disk.""" - if not self._pending: - if self._pending_sync: - # Messages have only been added, so syncing the file - # is enough. - _sync_flush(self._file) - self._pending_sync = False + if self._pending is None: + # Nothing has changed + return + + if self._pending == True: + # Messages have only been added, so syncing the file + # is enough. + _sync_flush(self._file) + self._pending = None return # In order to be writing anything out at all, self._toc must - # already have been generated (and presumably has been modified - # by adding or deleting an item). + # already have been generated and modified by deleting or + # replacing an item. assert self._toc is not None # Check length of self._file; if it's changed, some other process @@ -666,49 +678,52 @@ class _singlefileMailbox(Mailbox): '(expected %i, found %i)' % (self._file_length, cur_len)) - new_file = _create_temporary(self._path) - try: - new_toc = {} - self._pre_mailbox_hook(new_file) - for key in sorted(self._toc.keys()): - start, stop = self._toc[key] - self._file.seek(start) - self._pre_message_hook(new_file) - new_start = new_file.tell() - while True: - buffer = self._file.read(min(4096, - stop - self._file.tell())) - if not buffer: - break - new_file.write(buffer) - new_toc[key] = (new_start, new_file.tell()) - self._post_message_hook(new_file) - self._file_length = new_file.tell() - except: - new_file.close() - os.remove(new_file.name) - raise - _sync_close(new_file) - # self._file is about to get replaced, so no need to sync. - self._file.close() - # Make sure the new file's mode is the same as the old file's - mode = os.stat(self._path).st_mode - os.chmod(new_file.name, mode) - try: - os.rename(new_file.name, self._path) - except OSError as e: - if e.errno == errno.EEXIST or \ - (os.name == 'os2' and e.errno == errno.EACCES): - os.remove(self._path) - os.rename(new_file.name, self._path) - else: - raise - self._file = open(self._path, 'rb+') - self._toc = new_toc - self._pending = False - self._pending_sync = False - if self._locked: - _lock_file(self._file, dotlock=False) + if len(self._toc) == 0: + # No messages, truncate the file + self._file.truncate(0) + self._pending = None + else: + start_key, start_offset = self._pending + toc_updates = {} + with tempfile.TemporaryFile('w+b') as tmp: + # Write messages starting from the first changed message + # (updated or removed) to a temporary file. + first = True + for key in range(start_key, self._next_key): + if key not in self._toc: + continue + start, stop = self._toc[key] + self._file.seek(start) + if not first: + # start_offset is the offset of the data of + # the first message, after the delimiter + # written by _pre_message_hook(), so the the + # hook isn't required for the first message + # anymore. + self._pre_message_hook(tmp) + else: + first = False + new_start = tmp.tell() + start_offset + while True: + buffer = self._file.read(min(4096, + stop - self._file.tell())) + if not buffer: + break + tmp.write(buffer) + new_end = tmp.tell() + start_offset + toc_updates[key] = (new_start, new_end) + self._post_message_hook(tmp) + # After this point, the flushing cannot be restarted + # in case of a failure. + self._pending = None + self._toc.update(toc_updates) + # Copy the changes back from the temporary file + tmp.seek(0) + self._file.seek(start_offset) + shutil.copyfileobj(tmp, self._file) + self._file.truncate() + _sync_flush(self._file) + self._file_length = self._file.tell() def _pre_mailbox_hook(self, f): """Called before writing the mailbox to file f.""" diff -r 99a436de7029 Lib/test/test_mailbox.py --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -943,25 +943,43 @@ class TestMaildir(TestMailbox, unittest.TestCase): class _TestSingleFile(TestMailbox): '''Common tests for single-file mailboxes''' - def test_add_doesnt_rewrite(self): - # When only adding messages, flush() should not rewrite the - # mailbox file. See issue #9559. + def test_flush_doesnt_rewrite(self): + # flush() should never replace the mailbox file. See issues + # #9559 and #15122. # Inode number changes if the contents are written to another # file which is then renamed over the original file. So we # must check that the inode number doesn't change. inode_before = os.stat(self._path).st_ino - self._box.add(self._template % 0) + for i in range(10): + self._box.add(self._template % i) self._box.flush() inode_after = os.stat(self._path).st_ino self.assertEqual(inode_before, inode_after) - # Make sure the message was really added + # Make sure the messages were really added self._box.close() self._box = self._factory(self._path) - self.assertEqual(len(self._box), 1) + self.assertEqual(len(self._box), 10) + + # Replace and delete some messages + keys = self._box.keys() + for key in keys[2::3]: + self._box.remove(key) + for i, key in enumerate(keys[3::3], start=10): + self._box[key] = self._template % i + self._box.flush() + + inode_after = os.stat(self._path).st_ino + self.assertEqual(inode_before, inode_after) + + # Make sure the messages are as expected + self._box.close() + self._box = self._factory(self._path) + for key, i in zip(self._box.keys(), [0, 1, 10, 4, 11, 7, 12]): + self.assertEqual(self._box.get_string(key), self._template % i) def test_permissions_after_flush(self): # See issue #5346