/*------------------------------------------------------------------------- * vmadump.c: Virtual Memory Area dump/restore routines * * Copyright (C) 1999-2001 by Erik Hendriks * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Id: vmadump_common.c,v 1.86.4.8 2011/10/04 21:07:22 phargrov Exp $ * * THIS VERSION MODIFIED FOR BLCR *-----------------------------------------------------------------------*/ #include #include #include #include "blcr_config.h" #include #include #include #include #include #include #if !HAVE_FILE_F_LOCK #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38) #include #endif #endif #include #include #include #include #include #include #include #include #include #include #include #define __VMADUMP_INTERNAL__ #include "vmadump.h" static char vmad_magic[3] = VMAD_MAGIC; MODULE_AUTHOR("Erik Hendriks and the BLCR Team http://ftg.lbl.gov/checkpoint"); MODULE_DESCRIPTION("VMADump - Virtual Memory Area Dumper (modified for " PACKAGE_STRING ")"); MODULE_LICENSE("GPL"); /* A note about symbols... * * This module requires the following extra symbols from the kernel: * * sys_mprotect * do_exit * do_sigaction * do_execve * [and more w/ BLCR...] */ # if 0 /* BLCR uses its own I/O paths instead */ /*-------------------------------------------------------------------- * Some utility stuff for reading and writing and misc kernel syscalls. *------------------------------------------------------------------*/ static ssize_t default_read_file(struct vmadump_map_ctx *ctx, struct file *file, void *buf, size_t count) { return vfs_read(file, buf, count, &file->f_pos); } static ssize_t read_user(struct vmadump_map_ctx *ctx, struct file *file, void *buf, size_t count) { ssize_t r, bytes = count; ssize_t (*rfunc)(struct vmadump_map_ctx *ctx, struct file *file, void *buf, size_t count); rfunc = (ctx && ctx->read_file) ? ctx->read_file : default_read_file; while (bytes) { #if BITS_PER_LONG == 32 /* Avoid accidentally negative return values */ r = rfunc(ctx, file, buf, ((bytes > 0x40000000) ? 0x40000000 : bytes)); #elif BITS_PER_LONG == 64 r = rfunc(ctx, file, buf, bytes); #else #error "Unknown BITS_PER_LONG" #endif if (r < 0) return r; if (r == 0) return count - bytes; bytes -= r; buf = (char *)buf + r; } return count; } ssize_t read_kern(struct vmadump_map_ctx *ctx, struct file *file, void *buf, size_t count) { ssize_t err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = read_user(ctx, file, buf, count); set_fs(oldfs); return err; } static ssize_t default_write_file(struct vmadump_map_ctx *ctx, struct file *file, const void *buf, size_t count) { return vfs_write(file, buf, count, &file->f_pos); } static ssize_t write_user(struct vmadump_map_ctx *ctx, struct file *file, const void *buf, size_t count) { ssize_t w, bytes = count; ssize_t (*wfunc)(struct vmadump_map_ctx *ctx, struct file *file, const void *buf, size_t count); wfunc = (ctx && ctx->write_file) ? ctx->write_file : default_write_file; while (bytes) { #if BITS_PER_LONG == 32 /* Avoid accidentally negative return values */ w = wfunc(ctx, file, buf, ((bytes > 0x40000000) ? 0x40000000 : bytes)); #elif BITS_PER_LONG == 64 w = wfunc(ctx, file, buf, bytes); #else #error "Unknown BITS_PER_LONG" #endif if (w < 0) return w; if (w == 0) return count - bytes; bytes -= w; buf = (char *)buf + w; } return count; } ssize_t write_kern(struct vmadump_map_ctx *ctx, struct file *file, const void *buf, size_t count) { ssize_t err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = write_user(ctx, file, buf, count); set_fs(oldfs); return err; } #endif #if 0 /* Not needed/maintained for BLCR */ /*-------------------------------------------------------------------- * Library list stuff. *------------------------------------------------------------------*/ /* XXX This should be more configurable than this - this is idea of * figuring out what's a library */ struct liblist_entry { struct list_head list; char libname[0]; }; static LIST_HEAD(liblist); static void liblist_clear(void) { struct liblist_entry *entry; while (!list_empty(&liblist)) { entry = list_entry(liblist.next, struct liblist_entry, list); list_del(liblist.next); kfree(entry); } } static struct liblist_entry * liblist_find(const char *filename) { struct list_head *p; struct liblist_entry *entry; p = liblist.next; while (p != &liblist) { entry = list_entry(p, struct liblist_entry, list); if (strcmp(entry->libname, filename) == 0) return entry; p = p->next; } return 0; } static int liblist_add(char *filename) { struct liblist_entry *entry; /* Don't add things twice */ if (liblist_find(filename)) return 0; entry = kmalloc(sizeof(*entry)+strlen(filename)+1, GFP_KERNEL); if (!entry) return -ENOMEM; strcpy(entry->libname, filename); list_add(&entry->list, &liblist); return 0; } static int liblist_del(char *filename) { struct liblist_entry *entry; entry = liblist_find(filename); if (entry) { list_del(&entry->list); kfree(entry); return 0; } return -ENOENT; } /* Returns the size of our library list converted to text */ static int liblist_size(void) { int len; struct list_head *p; struct liblist_entry *entry; len = 0; p = liblist.next; while (p != &liblist) { entry = list_entry(p, struct liblist_entry, list); len += strlen(entry->libname)+1; p = p->next; } len++; /* for trailing null. */ return len; } static int do_lib_op(int request, char *buf, int size) { int err, len; char *filename; struct list_head *p; struct liblist_entry *entry; switch(request) { case VMAD_LIB_CLEAR: if (!capable(CAP_SYS_ADMIN)) return -EPERM; liblist_clear(); return 0; case VMAD_LIB_ADD: if (!capable(CAP_SYS_ADMIN)) return -EPERM; filename = getname(buf); if (IS_ERR(filename)) return PTR_ERR(filename); err = liblist_add(filename); putname(filename); return err; case VMAD_LIB_DEL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; filename = getname(buf); if (IS_ERR(filename)) return PTR_ERR(filename); err = liblist_del(filename); putname(filename); return err; case VMAD_LIB_SIZE: return liblist_size(); case VMAD_LIB_LIST: len = liblist_size(); if (len > size) return -ENOSPC; size = len; /* Copy all those strings out to user space. */ p = liblist.next; while (p != &liblist) { entry = list_entry(p, struct liblist_entry, list); len = strlen(entry->libname); if (copy_to_user(buf, entry->libname, len)) return EFAULT; buf += len; put_user('\0', buf++); p = p->next; } put_user('\0', buf); return size; default: return -EINVAL; } } #endif static char *default_map_name(struct file *f, char *buffer, int size) { #if HAVE_NAMEIDATA_DENTRY return d_path(f->f_dentry, f->f_vfsmnt, buffer, size); #elif HAVE_NAMEIDATA_PATH return d_path(&f->f_path, buffer, size); #else #error #endif } #if 0 /* Not needed/maintained for BLCR */ /* this is gonna be handled with contexts too */ static int is_library(const char *filename) { return (liblist_find(filename) != 0); } #else #define is_library(FILE) 0 #endif static struct file *default_map_open(cr_rstrt_proc_req_t *ctx, const char *filename, int flags) { struct file *filp; const char *reloc_filename; reloc_filename = cr_relocate_path(ctx->req->relocate, filename, 0); if (IS_ERR(reloc_filename)) { filp = (struct file *)reloc_filename; goto out; } filp = filp_open(reloc_filename, flags, 0); if (IS_ERR(filp)) { if (reloc_filename != filename) { CR_ERR_CTX(ctx, "failed to open '%s', relocated from '%s'", reloc_filename, filename); } /* Caller prints flags and return value */ } if (reloc_filename != filename) { __putname(reloc_filename); } out: return filp; } #if 0 /* Hooks code not needed/maintained for BLCR */ /*-------------------------------------------------------------------- * Dump hook stuff. *------------------------------------------------------------------*/ struct hook_t { struct list_head list; struct vmadump_hook *hook; }; struct vmadump_hook_handle { int rw; /* for checking on the user */ struct vmadump_map_ctx *ctx; struct file *file; }; static struct rw_semaphore hook_lock; static LIST_HEAD(hooks); int vmadump_add_hook(struct vmadump_hook *hook) { struct hook_t *h, *new_h; struct list_head *l; new_h = kmalloc(sizeof(*new_h), GFP_KERNEL); new_h->hook = hook; down_write(&hook_lock); /* check to make sure that this hook isn't being added twice */ for (l = hooks.next; l != &hooks; l = l->next) { h = list_entry(l, struct hook_t, list); if (h->hook == hook) { up_write(&hook_lock); kfree(new_h); return -EEXIST; } } list_add_tail(&new_h->list, &hooks); up_write(&hook_lock); printk(KERN_INFO "vmadump: Registered hook \"%s\"\n", hook->tag); return 0; } int vmadump_del_hook(struct vmadump_hook *hook) { struct hook_t *h; struct list_head *l; down_write(&hook_lock); for (l = hooks.next; l != &hooks; l = l->next) { h = list_entry(l, struct hook_t, list); if (h->hook == hook) { list_del(&h->list); up_write(&hook_lock); printk(KERN_INFO "vmadump: Unregistered hook \"%s\"\n", hook->tag); kfree(h); return 0; } } up_write(&hook_lock); return -ENOENT; } struct vmadump_callback_handle { int rw; struct vmadump_map_ctx *ctx; struct file *file; }; /* Call every hook freeze function */ static int do_freeze_hooks(struct vmadump_map_ctx *ctx, struct file *file, struct pt_regs *regs, int flags) { long bytes = 0, r; struct hook_t *h; struct list_head *l; struct vmadump_hook_header hookhdr; struct vmadump_hook_handle hh = { 1, ctx, file}; down_read(&hook_lock); for (l = hooks.next; l != &hooks; l = l->next) { h = list_entry(l, struct hook_t, list); r = h->hook->freeze(&hh, regs, flags); if (r < 0) { up_read(&hook_lock); return r; } bytes += r; } up_read(&hook_lock); /* Terminate the list of hooks */ memset(&hookhdr, 0, sizeof(hookhdr)); r = write_kern(ctx, file, &hookhdr, sizeof(hookhdr)); if (r < 0) return r; if (r != sizeof(hookhdr)) return -EIO; bytes += r; return bytes; } static long skip_data(struct vmadump_map_ctx *ctx, struct file *file, long len) { long r = 0; void *page; page = (void *) __get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; while (len > 0) { r = read_kern(ctx, file, page, (len>PAGE_SIZE) ? PAGE_SIZE : len); if (r <= 0) break; len -= r; } free_page((long) page); if (r == 0) r = -EIO; /* end of file.... */ return 0; } static int do_thaw_hooks(struct vmadump_map_ctx *ctx, struct file *file, struct pt_regs *regs) { long r; struct hook_t *h; struct list_head *l; struct vmadump_hook_header hookhdr; struct vmadump_hook_handle hh = { 0, ctx, file}; r = read_kern(ctx, file, &hookhdr, sizeof(hookhdr)); if (r != sizeof(hookhdr)) goto err; while (hookhdr.tag[0]) { /* Do a bit of sanity checking on this dump header */ hookhdr.tag[VMAD_HOOK_TAG_LEN-1] = 0; /* null terminate that string... */ if (hookhdr.size <= 0) { r = -EINVAL; goto err; } /* See if we find a matching hook */ down_read(&hook_lock); for (l = hooks.next; l != &hooks; l = l->next) { h = list_entry(l, struct hook_t, list); if (strcmp(hookhdr.tag, h->hook->tag) == 0) { r = h->hook->thaw(&hh, &hookhdr, regs); break; } } if (l == &hooks) r = skip_data(ctx, file, hookhdr.size); up_read(&hook_lock); if (r) goto err; r = read_kern(ctx, file, &hookhdr, sizeof(hookhdr)); if (r != sizeof(hookhdr)) goto err; } return 0; err: if (r >= 0) r = -EIO; return r; } /* read/write calls for use by hooks */ ssize_t vmadump_read_u(struct vmadump_hook_handle *h, void *buf, size_t count){ if (h->rw != 0) return -EINVAL; return read_user(h->ctx, h->file, buf, count); } ssize_t vmadump_read_k(struct vmadump_hook_handle *h, void *buf, size_t count){ ssize_t err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = vmadump_read_u(h, buf, count); set_fs(oldfs); return err; } ssize_t vmadump_write_u(struct vmadump_hook_handle *h, const void *buf, size_t count) { if (h->rw != 1) return -EINVAL; return write_user(h->ctx, h->file, buf, count); } ssize_t vmadump_write_k(struct vmadump_hook_handle *h, const void *buf, size_t count) { ssize_t err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = vmadump_write_u(h, buf, count); set_fs(oldfs); return err; } #endif /* * directio_start * * The caller MUST ensure that the filp is aligned properly (usually to * 512 bytes) before calling this routine. I've aligned everything to page * size. */ static unsigned int directio_start(struct file *filp) { unsigned int saved_flags = filp->f_flags; /* * This was basically ripped out of setfl() in 2.6.22, with some #if's * added based on 2.6.1->2 and 2.6.7->8 changes to that function. * And for 2.6.29->30 changes (file.f_lock) * * The alternative is to call sys_fcntl() directly. This is a good idea. * But right now I'm not in the mood to walk the fd array to identify the * proper file descriptor. * * XXX: Do that. */ #if HAVE_FILE_F_MAPPING /* Order matters, if kernel has both f_mapping and i_mapping use f_mapping */ if (!filp->f_mapping || !filp->f_mapping->a_ops || !filp->f_mapping->a_ops->direct_IO) goto out; #elif HAVE_INODE_I_MAPPING struct inode *inode = filp->f_dentry->d_inode; if (!inode->i_mapping || !inode->i_mapping->a_ops || !inode->i_mapping->a_ops->direct_IO) goto out; #else #error #endif #if HAVE_FILE_OPERATIONS_CHECK_FLAGS if (filp->f_op && filp->f_op->check_flags && filp->f_op->check_flags(saved_flags | O_DIRECT)) goto out; #else /* OK */ #endif #if HAVE_FILE_F_LOCK spin_lock(&filp->f_lock); filp->f_flags |= O_DIRECT; spin_unlock(&filp->f_lock); #else /* The lock_kernel() does appear necessary here because both * setfl() and ioctl() use it when modifying f_flags. So, to * prevent corruption of f_flags, we take the lock too. * Of course if somebody is changing f_flags on the context * fd while we are actively writing... not good. */ lock_kernel(); filp->f_flags |= O_DIRECT; unlock_kernel(); #endif out: return saved_flags; } static void directio_stop(struct file *filp, unsigned int saved_flags) { #if HAVE_FILE_F_LOCK spin_lock(&filp->f_lock); filp->f_flags = saved_flags; spin_unlock(&filp->f_lock); #else lock_kernel(); filp->f_flags = saved_flags; unlock_kernel(); #endif } /* sys_prctl "by value" */ int vmadump_prctl_v(int option, unsigned long arg2) { return sys_prctl(option, arg2, 0, 0, 0); } /* sys_prctl "by reference" */ int vmadump_prctl_k(int option, void *arg2) { int err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = sys_prctl(option, (unsigned long)arg2, 0, 0, 0); set_fs(oldfs); return err; } int vmadump_sigaltstack_k(const stack_t *ss, stack_t *oss) { int err; mm_segment_t oldfs; oldfs = get_fs(); set_fs(KERNEL_DS); err = do_sigaltstack(ss, oss, 0); set_fs(oldfs); return err; } /*-------------------------------------------------------------------- * Process "thawing" routines. *------------------------------------------------------------------*/ long vmad_remap(cr_rstrt_proc_req_t *ctx, unsigned long from_addr, unsigned long to_addr, long len) { long r; unsigned long new_addr; long diff = (from_addr > to_addr) ? (from_addr - to_addr) : (to_addr - from_addr); if (diff < len) { /* Check for overlap case */ unsigned long tmp_addr; /*CR_WARN_CTX(ctx, "vmad_remap using temporary");*/ tmp_addr = do_mmap(NULL, 0, len, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, 0); r = (tmp_addr & (PAGE_SIZE-1)); if (r) { CR_ERR_CTX(ctx, "vmad_remap failed to allocate temporary %d", (int)r); goto err; } new_addr = sys_mremap(from_addr, len, len, MREMAP_FIXED|MREMAP_MAYMOVE, tmp_addr); if (new_addr != tmp_addr) { r = (new_addr & (PAGE_SIZE-1)) ? new_addr : -ENOMEM; CR_ERR_CTX(ctx, "vmad_remap failed to use temporary %d", (int)r); goto err; } from_addr = tmp_addr; } new_addr = sys_mremap(from_addr, len, len, MREMAP_FIXED|MREMAP_MAYMOVE, to_addr); if (new_addr != to_addr) { r = (new_addr & (PAGE_SIZE-1)) ? new_addr : -ENOMEM; CR_ERR_CTX(ctx, "vmad_remap failed %d", (int)r); goto err; } r = 0; err: return r; } static int mmap_file(cr_rstrt_proc_req_t *ctx, const struct vmadump_vma_header *head, char *filename, unsigned long flags) { struct file *file; long mapaddr; int open_flags; unsigned long prot; if (head->flags & VM_MAYSHARE) { if (head->flags & VM_MAYWRITE) { open_flags = (head->flags & (VM_MAYREAD|VM_MAYEXEC)) ? O_RDWR : O_WRONLY; prot = PROT_WRITE; } else { open_flags = O_RDONLY; prot = 0; } if (head->flags & VM_MAYREAD) prot |= PROT_READ; if (head->flags & VM_MAYEXEC) prot |= PROT_EXEC; } else { open_flags = O_RDONLY; prot = PROT_READ|PROT_WRITE|PROT_EXEC; } /* This is a lot like open w/o a file descriptor */ #if 0 /* Not supported in BLCR */ if (ctx && ctx->map_open) file = ctx->map_open(ctx, filename, open_flags); else #endif file = default_map_open(ctx, filename, open_flags); if (IS_ERR(file)) { CR_ERR_CTX(ctx, "open('%s', 0x%x) failed: %d", filename, open_flags, (int)PTR_ERR(file)); return PTR_ERR(file); } down_write(¤t->mm->mmap_sem); mapaddr = do_mmap(file, head->start, head->end - head->start, prot, flags, head->offset); up_write(¤t->mm->mmap_sem); fput(file); if (mapaddr != head->start) CR_ERR_CTX(ctx, "do_mmap(, %p, %p, ...) failed: %p", (void *) head->start, (void *) (head->end-head->start), (void *) mapaddr); return (mapaddr == head->start) ? 0 : mapaddr; } /* Reads in the header giving the the number of bytes of "fill" to * achieve alignment, and returns that value in *buf_len. * ONLY if "fill" is less than VMAD_CHUNKHEADER_MIN bytes is the * corresponding padding read here. */ static long load_page_list_header(cr_rstrt_proc_req_t * ctx, struct file *file, void *buf, unsigned int *buf_len) { struct vmadump_page_list_header header; long bytes = 0; long r; /* load in the header so we know how many bytes of alignment there are */ r = read_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto err; bytes += r; /* now read in the padding if too small to fit page headers */ if (!header.fill) { /* No padding at all */ } else if (header.fill < VMAD_CHUNKHEADER_MIN) { /* TODO: use cr_skip(), which may someday seek */ r = read_kern(ctx, file, buf, header.fill); if (r != header.fill) goto err; bytes += r; } else { /* Padding is large enough to use for the page headers */ *buf_len = header.fill; } return bytes; err: if (r >= 0) r = -EIO; /* map short reads to EIO */ return r; } /* * Returns 0 or 1 on success. * 0 = EOF, meaning no more pages are available * 1 = Caller must call again to load more pages * <0 Some error */ long load_page_chunks(cr_rstrt_proc_req_t * ctx, struct file *file, struct vmadump_page_header *headers, int sizeof_headers, int is_exec) { unsigned long old_filp_flags; long r = 1; const int max_chunks = sizeof_headers/sizeof(*headers); int i; /* Now load the chunk page */ r = read_kern(ctx, file, headers, sizeof_headers); if (r != sizeof_headers) { goto bad_read; } /* spin up direct IO */ old_filp_flags = directio_start(file); /* load each chunk */ for (i = 0; i < max_chunks; ++i) { const long len = headers[i].num_pages << PAGE_SHIFT; const unsigned long page_start = headers[i].start; if (page_start == VMAD_END_OF_CHUNKS) { /* EOF */ r = 0; break; } r = read_user(ctx, file, (void *) page_start, len); if (r != len) { goto bad_read; } if (is_exec) { flush_icache_range(page_start, page_start + len); } r = 1; } /* disable direct IO */ directio_stop(file, old_filp_flags); return r; bad_read: if (r >= 0) { r = -EIO; } /* map short reads to EIO */ return r; } int vmadump_load_page_list(cr_rstrt_proc_req_t *ctx, struct file *file, int is_exec) { struct vmadump_page_header *chunks; long r; unsigned int sizeof_chunks = VMAD_CHUNKHEADER_SIZE; chunks = (struct vmadump_page_header *) kmalloc(sizeof_chunks, GFP_KERNEL); if (chunks == NULL) { r = -ENOMEM; goto err; } /* handle alignment padding - either skip it or use it for first batch of chunks */ r = load_page_list_header(ctx, file, chunks, &sizeof_chunks); if (r < 0) { goto out_free; } /* now load all the page chunks */ do { r = load_page_chunks(ctx, file, chunks, sizeof_chunks, is_exec); if (r < 0) { goto out_free; } sizeof_chunks = VMAD_CHUNKHEADER_SIZE; /* After the first, all chunk arrays are this size */ } while (r > 0); out_free: kfree(chunks); err: return r; } static int load_map(cr_rstrt_proc_req_t *ctx, struct file *file, struct vmadump_vma_header *head) { long r; unsigned long mmap_prot, mmap_flags, addr; if (head->namelen == VMAD_NAMELEN_ARCH) { #if VMAD_HAVE_ARCH_MAPS return vmad_load_arch_map(ctx, file, head); #else CR_ERR_CTX(ctx, "Found an arch-specific mapping in a kernel without any???"); return -EINVAL; #endif } mmap_prot = 0; mmap_flags = MAP_FIXED | ((head->flags & VM_MAYSHARE) ? MAP_SHARED : MAP_PRIVATE); if (head->flags & VM_READ) mmap_prot |= PROT_READ; if (head->flags & VM_WRITE) mmap_prot |= PROT_WRITE; if (head->flags & VM_EXEC) mmap_prot |= PROT_EXEC; if (head->flags & VM_GROWSDOWN) mmap_flags |= MAP_GROWSDOWN; if (head->flags & VM_EXECUTABLE) mmap_flags |= MAP_EXECUTABLE; if (head->flags & VM_DENYWRITE) mmap_flags |= MAP_DENYWRITE; if (head->namelen > 0) { char *filename; if (head->namelen > PAGE_SIZE) { CR_ERR_CTX(ctx, "thaw: bogus namelen %d", (int) head->namelen); return -EINVAL; } filename = kmalloc(head->namelen+1,GFP_KERNEL); if (!filename) { r = -ENOMEM; goto err; } r = read_kern(ctx, file, filename, head->namelen); if (r != head->namelen) { kfree(filename); goto err; } filename[head->namelen] = 0; r = mmap_file(ctx, head, filename, mmap_flags); if (r) { CR_ERR_CTX(ctx, "mmap failed: %s", filename); kfree(filename); return r; } kfree(filename); } else { /* Load the data from the dump file */ down_write(¤t->mm->mmap_sem); addr = do_mmap(0, head->start, head->end - head->start, mmap_prot|PROT_WRITE, mmap_flags, 0); up_write(¤t->mm->mmap_sem); if (addr != head->start) { CR_ERR_CTX(ctx, "do_mmap(0, %08lx, %08lx, ...) = 0x%08lx (failed)", head->start, head->end - head->start, addr); if ((addr != head->start) && IS_ERR((void *) addr)) { r = PTR_ERR((void *) addr); } else { r = -EINVAL; } return r; } } /* Read in patched pages */ r = vmadump_load_page_list(ctx, file, (mmap_prot & PROT_EXEC)); if (r) goto err; if (sys_mprotect(head->start,head->end - head->start, mmap_prot)) CR_ERR_CTX(ctx, "thaw: mprotect failed. (ignoring)"); return 0; err: if (r >= 0) r = -EIO; /* map short reads to EIO */ return r; } static int vmadump_load_sigpending(cr_rstrt_proc_req_t *ctx, struct file *file, struct sigpending *sigpending, int shared) { sigset_t pending_set; int flags; long r; r = read_kern(ctx, file, &flags, sizeof(flags)); if (r != sizeof(flags)) goto bad_read; while (flags != -1) { struct siginfo info; r = read_kern(ctx, file, &info, sizeof(info)); if (r != sizeof(info)) goto bad_read; /* XXX: if/when we do support posix timers directly, we might * need to deal with SIGQUEUE_PREALLOC in flags. * For now there is no harm in ignoring it. */ if (shared) { read_lock(&tasklist_lock); r = group_send_sig_info(info.si_signo, &info, current); read_unlock(&tasklist_lock); } else { r = send_sig_info(info.si_signo, &info, current); } r = read_kern(ctx, file, &flags, sizeof(flags)); if (r != sizeof(flags)) goto bad_read; } /* Corresponding signal set comes last */ r = read_kern(ctx, file, &pending_set, sizeof(pending_set)); if (r != sizeof(pending_set)) goto bad_read; spin_lock_irq(¤t->sighand->siglock); sigorsets(&sigpending->signal, &sigpending->signal, &pending_set); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); return 0; bad_read: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } /* Values of (struct vmad_prctl).type */ enum vmad_prctl_type { vmad_prctl_int_ref, /* integer read by reference, written by value*/ vmad_prctl_int_val, /* integer read and written by value */ vmad_prctl_bool, /* integer read and written by value, write must be 0 or 1 */ vmad_prctl_comm, /* string of length TASK_COMM_LEN, set by reference */ }; struct vmad_prctl { int option; /* First argument to prctl() to set */ int type; /* How to handle 2nd argument */ }; static int vmadump_load_prctl(cr_rstrt_proc_req_t *ctx, struct file *file) { struct vmad_prctl header; long r; r = read_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto bad_read; while (header.option) { /* option==0 is end of the list */ switch (header.type) { case vmad_prctl_bool: case vmad_prctl_int_val: case vmad_prctl_int_ref: { unsigned int value; r = read_kern(ctx, file, &value, sizeof(value)); if (r != sizeof(value)) goto bad_read; r = vmadump_prctl_v(header.option, value); break; } case vmad_prctl_comm: { #if defined(PR_SET_NAME) char value[TASK_COMM_LEN]; r = read_kern(ctx, file, &value, sizeof(value)); if (r != sizeof(value)) goto bad_read; r = vmadump_prctl_k(header.option, &value); #else if (sizeof(current->comm) != TASK_COMM_LEN) { CR_ERR_CTX(ctx, "vmadump: TASK_COMM_LEN changed?"); r = -EINVAL; goto bad_read; } r = read_kern(ctx, file, current->comm, TASK_COMM_LEN); if (r != TASK_COMM_LEN) goto bad_read; #endif break; } default: CR_ERR_CTX(ctx, "vmadump: prtcl %d has unknown type code %d", header.option, header.type); goto bad_read; } #if 0 /* Confusing when issued, for instance, for PR_SET_SECCOMP where permission is denied */ if (r < 0) { CR_WARN_CTX(ctx, "vmadump warning: failed to set prtcl(%d) err %ld", header.option, r); } #endif r = read_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto bad_read; } bad_read: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } long vmadump_thaw_proc(cr_rstrt_proc_req_t *ctx, struct file *file, struct pt_regs *regs, int flags) { long r; pid_t pid; #if BITS_PER_LONG == 64 unsigned long orig_personality = personality(current->personality); #endif { struct vmadump_header header; /*--- First some sanity checking ---*/ r = read_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) { CR_ERR_CTX(ctx, "vmadump: failed to read header: %ld", r); return -EINVAL; } if (memcmp(header.magic, vmad_magic, sizeof(header.magic))) { CR_ERR_CTX(ctx, "vmadump: invalid signature"); return -EINVAL; } if (header.fmt_vers != VMAD_FMT_VERS) { CR_ERR_CTX(ctx, "vmadump: dump version mistmatch. dump=%d; " "kernel=%d", (int)header.fmt_vers, (int)VMAD_FMT_VERS); return -EINVAL; } if (header.arch != VMAD_ARCH) { CR_ERR_CTX(ctx, "vmadump: architecture mismatch."); return -EINVAL; } if (header.major != ((LINUX_VERSION_CODE >> 16) & 0xFF) || header.minor != ((LINUX_VERSION_CODE >> 8) & 0xFF) #if defined(STRICT_VERSION_CHECK) && STRICT_VERSION_CHECK ||header.patch != (LINUX_VERSION_CODE & 0xFF) #endif ) { CR_ERR_CTX(ctx, "vmadump: kernel version mismatch."); return -EINVAL; } } /* Ummm... Point of no-return is here.... maybe try to move this * down a bit? */ #if 0 /* Now handled w/ other prctl values */ if (!(flags & VMAD_DUMP_REGSONLY)) { /* Read our new comm */ r = read_kern(ctx, file, current->comm, sizeof(current->comm)); if (r != sizeof(current->comm)) goto bad_read; } #endif vmadump_load_prctl(ctx, file); /* PID, provided only for the benefit of our caller */ { r = read_kern(ctx, file, &pid, sizeof(pid)); if (r != sizeof(pid)) goto bad_read; } /* Credentials, important to do before we try to open any files */ { r = cr_load_creds(ctx); if (r < 0) goto bad_read; } /* * CPU-specific register restore stuff * * Note that we're not presuming that our current regs pointer * points to anything even vaguely reasonable. This is done to * support bproc type kernel threads that have never been user * processes before. */ r = vmadump_restore_cpu(ctx, file, regs); if (r) goto bad_read; /*--- Signal information ---------------------------------------*/ { int i; stack_t sig_stack; sigset_t sig_blocked; struct k_sigaction sig_action; /* Restore sigaltstack */ r = read_kern(ctx, file, &sig_stack, sizeof(sig_stack)); if (r != sizeof(sig_stack)) goto bad_read; r = vmadump_sigaltstack_k(&sig_stack, NULL); if (r < 0) goto bad_read; /* Install sets of blocked and pending signals */ r = read_kern(ctx, file, &sig_blocked, sizeof(sig_blocked)); if (r != sizeof(sig_blocked)) goto bad_read; sigdelsetmask(&sig_blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); spin_lock_irq(¤t->sighand->siglock); memcpy(¤t->blocked, &sig_blocked, sizeof(sig_blocked)); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); r = vmadump_load_sigpending(ctx, file, ¤t->pending, 0); if (r < 0) goto bad_read; if (!(flags & VMAD_DUMP_REGSONLY)) { /* Restore shared queue if not actually shared, or if we are the "leader" */ r = vmadump_load_sigpending(ctx, file, ¤t->signal->shared_pending, 1); if (r < 0) goto bad_read; } if (!(flags & VMAD_DUMP_REGSONLY)) { for (i=0; i < _NSIG; i++) { r = read_kern(ctx, file, &sig_action, sizeof(sig_action)); if (r != sizeof(sig_action)) goto bad_read; if (i != SIGKILL-1 && i != SIGSTOP-1) { r = do_sigaction(i+1, &sig_action, 0); if (r) goto bad_read; } } } } /*--- Misc other stuff -----------------------------------------*/ { /* our tid ptr */ r = read_kern(ctx, file, ¤t->clear_child_tid, sizeof(current->clear_child_tid)); if (r != sizeof(current->clear_child_tid)) { goto bad_read; } } { /* personality */ unsigned long tmp_personality; r = read_kern(ctx, file, &tmp_personality, sizeof(tmp_personality)); if (r != sizeof(tmp_personality)) { goto bad_read; } set_personality(tmp_personality); } /*--- Memory map meta data -------------------------------------*/ if (!(flags & VMAD_DUMP_REGSONLY)) { struct mm_struct *mm; struct vm_area_struct *map; struct vmadump_mm_info mm_info; struct vmadump_vma_header mapheader; #if HAVE_MM_MMAP_BASE unsigned long mmap_base; #endif int map_count; #if BITS_PER_LONG == 64 /* If restarting a 32-bit application from a 64-bit cr_restart, we need * to reset TIF_32BIT when unmapping the pages, otherwise do_munmap * enters an infinite loop! */ #if !defined(TIF_32BIT) && defined(TIF_IA32) #define TIF_32BIT TIF_IA32 #endif int fiddle_tif_32bit = test_thread_flag(TIF_32BIT) && (orig_personality != PER_LINUX32); #endif mm = current->mm; r = read_kern(ctx, file, &mm_info, sizeof(mm_info)); if (r != sizeof(mm_info)) { goto bad_read; } #if HAVE_MM_MMAP_BASE r = read_kern(ctx, file, &mmap_base, sizeof(mmap_base)); if (r != sizeof(mmap_base)) { goto bad_read; } #endif #if BITS_PER_LONG == 64 if (fiddle_tif_32bit) { clear_thread_flag(TIF_32BIT); } #endif /* Purge current maps - I'm sure there's a way to keep theses around * incase creation of the new ones fails in some unfortunate way... */ while(mm->mmap) { map = mm->mmap; map_count = mm->map_count; r = do_munmap(mm, map->vm_start, map->vm_end - map->vm_start); if (r) { CR_ERR_CTX(ctx, "do_munmap(%lu, %lu) = %d", map->vm_start, map->vm_end-map->vm_start, (int)r); } if (map_count == mm->map_count) { CR_ERR_CTX(ctx, "do_munmap() loop stuck."); r = -EINVAL; goto bad_read; } } #if BITS_PER_LONG == 64 if (fiddle_tif_32bit) { set_thread_flag(TIF_32BIT); } #endif /* (re)set mm paramaters which influence get_unmapped_area() */ down_write(¤t->mm->mmap_sem); #if HAVE_MM_TASK_SIZE mm->task_size = TASK_SIZE; #endif #if defined(CR_KCODE_arch_pick_mmap_layout) arch_pick_mmap_layout(mm); #endif #if HAVE_MM_MMAP_BASE /* want to restore these even if arch_pick_mmap_layout() set them */ mm->mmap_base = mmap_base; mm->free_area_cache = mmap_base; #else mm->free_area_cache = TASK_UNMAPPED_BASE; #endif #if HAVE_MM_CACHED_HOLE_SIZE mm->cached_hole_size = ~0UL; #endif up_write(¤t->mm->mmap_sem); /* Load new map data */ r = read_kern(ctx, file, &mapheader, sizeof(mapheader)); while (r == sizeof(mapheader) && (mapheader.start != ~0 || mapheader.end != ~0)) { if ((r = load_map(ctx, file, &mapheader))) goto bad_read; r = read_kern(ctx, file, &mapheader, sizeof(mapheader)); } if (r != sizeof(mapheader)) goto bad_read; down_write(¤t->mm->mmap_sem); mm->start_code = mm_info.start_code; mm->end_code = mm_info.end_code; mm->start_data = mm_info.start_data; mm->end_data = mm_info.end_data; mm->start_brk = mm_info.start_brk; mm->brk = mm_info.brk; mm->start_stack= mm_info.start_stack; /* FIX ME: validate these pointers */ mm->arg_start = mm_info.arg_start; mm->arg_end = mm_info.arg_end; mm->env_start = mm_info.env_start; mm->env_end = mm_info.env_end; up_write(¤t->mm->mmap_sem); } #if 0 /* Hooks code not needed/maintained for BLCR */ /*--- Call external thaw hooks ---------------------------------*/ r = do_thaw_hooks(ctx, file, regs); if (r) goto bad_read; #endif return pid; bad_read: if (r >= 0) r = -EIO; return r; } /*-------------------------------------------------------------------- * Process "freezing" routines *------------------------------------------------------------------*/ /* This routine walks the page tables. * If addr is a huge page, returns NULL and sets *pagep non-null * If addr is not in the page tables, returns NULL and sets *pagep=NULL * Otherwise returns non-NULL (w/ *pagep undefined) and caller must pte_unmap(). */ static inline pte_t *vmad_follow_addr(struct page **pagep, struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; #ifdef PTRS_PER_PUD pud_t *pud; #endif pmd_t *pmd; #if !defined(CONFIG_HUGETLBFS) /* Nothing to do here */ #elif HAVE_4_ARG_FOLLOW_HUGE_ADDR struct vm_area_struct *vma = hugepage_vma(mm, addr); if (vma) { *pagep = follow_huge_addr(mm, vma, addr, 0); return NULL; } #elif HAVE_3_ARG_FOLLOW_HUGE_ADDR struct page *pg = follow_huge_addr(mm, addr, 0); if (!IS_ERR(pg)) { *pagep = pg; return NULL; } #else #error "No way to call follow_huge_addr()" #endif *pagep = NULL; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return NULL; #ifdef PTRS_PER_PUD pud = pud_offset(pgd, addr); if (pud_none(*pud)) return NULL; pmd = pmd_offset(pud, addr); #else pmd = pmd_offset(pgd, addr); #endif if (pmd_none(*pmd)) return NULL; #ifdef CONFIG_HUGETLBFS if (pmd_huge(*pmd)) { *pagep = follow_huge_pmd(mm, addr, pmd, 0); return NULL; } #endif return pte_offset_map(pmd, addr); } /* This routine checks if a page from a filemap has been copied via * copy on write. Basically, this is just checking to see if the page * is still a member of the map or not. Note this this should not end * up copying random things from VM_IO regions. */ static int addr_copied(struct mm_struct *mm, unsigned long addr) { pte_t *ptep; struct page *pg; int ret; #if !HAVE_PAGEANON #define PageAnon(pg) (!(pg)->mapping) #endif spin_lock(&mm->page_table_lock); ptep = vmad_follow_addr(&pg, mm, addr); if (ptep) { pte_t pte = *ptep; pte_unmap(ptep); if (pte_present(pte)) { pg = pte_page(pte); ret = PageAnon(pg); } else { /* pte_none is false for a swapped (written) page */ ret = !pte_none(pte); } } else { ret = pg && PageAnon(pg); } spin_unlock(&mm->page_table_lock); return ret; } /* This is the version for working on a region that is a file map. In * this case we need to fault the page in to check for zero. This * isn't a big deal since we'll be faulting in for sending anyway if * it's not. */ static int addr_nonzero_file(struct mm_struct *mm, unsigned long addr) { int i; unsigned long val = 0; /* Simple zero check */ for (i=0; i < (PAGE_SIZE/sizeof(long)); i++) { /* We ignore EFAULT and presume that it's zero here */ get_user(val, (((long*)addr)+i)); if (val) return 1; } return 0; } /* This version is for use on regions which are *NOT* file maps. Here * we look at the page tables to see if a page is zero. If it's never * been faulted in, we know it's zero - and we don't fault it in while * checking for this. */ static int addr_nonzero(struct mm_struct *mm, unsigned long addr) { int i; struct page *pg; pte_t *ptep; unsigned long val; spin_lock(&mm->page_table_lock); ptep = vmad_follow_addr(&pg, mm, addr); if (ptep) { pte_t pte = *ptep; pte_unmap(ptep); if (pte_none(pte)) goto out_zero; /* Never faulted */ if (pte_present(pte) && (pte_page(pte) == ZERO_PAGE(addr))) goto out_zero; /* Only READ faulted */ } else if (!pg) { goto out_zero; } spin_unlock(&mm->page_table_lock); /* Ok, the page could be non-zero - check it... */ for (i=0; i < (PAGE_SIZE/sizeof(long)); i++) { get_user(val, (((long*)addr)+i)); if (val) return 1; } return 0; out_zero: spin_unlock(&mm->page_table_lock); return 0; } /* Writes out the header giving the reader the number of bytes of * "fill", and returns that value in *buf_len. * ONLY if "fill" is smaller than VMAD_CHUNKHEADER_MIN bytes is * the corresponding padding written here. */ static long store_page_list_header(cr_chkpt_proc_req_t *ctx, struct file *file, void *buf, unsigned int *buf_len) { struct vmadump_page_list_header header; long bytes = 0; long r; unsigned int fill; fill = PAGE_SIZE - ((file->f_pos + sizeof(header)) & (PAGE_SIZE - 1)); header.fill = fill; r = write_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto bad_write; bytes += r; if (!fill) { /* no padding at all */ } else if (fill < VMAD_CHUNKHEADER_MIN) { /* TODO: seek when possible? */ r = write_kern(ctx, file, buf, fill); if (r != fill) goto bad_write; bytes += r; } else { /* Padding is large enough to use for the page headers */ *buf_len = fill; } return bytes; bad_write: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } /* * Returns < 0 on failure, or written byte count on success. */ static long store_page_chunks(cr_chkpt_proc_req_t *ctx, struct file *file, struct vmadump_page_header *headers, int sizeof_headers) { unsigned long old_filp_flags; unsigned long chunk_start; long r, bytes = 0; int i; const int num_headers = sizeof_headers/sizeof(*headers); r = write_kern(ctx, file, headers, sizeof_headers); if (r != sizeof_headers) goto bad_write; bytes += r; /* Avoid directio_start/stop if there is no page I/O */ if (headers[0].start == VMAD_END_OF_CHUNKS) goto empty; /* * This routine attempts to set up direct IO for the chunk writes. * If direct IO is not available, it does nothing. */ old_filp_flags = directio_start(file); for (i=0; i= 0) r = -EIO; /* Map short writes to EIO */ return r; } /* Add an element to the chunks array, and if its full or "finished" then * write the chunks array. Elements with zero pages are omitted. * "Finished" is defined as having start == VMAD_END_OF_CHUNKS in this chunk. * * Returns the number of bytes written (could be zero), or <0 on error. * If data was written, then sizeof_chunks and chunk_number are reset. * * chunks - the chunks array to write * *sizeof_chunks - length of chunks in BYTES * *chunk_number - next unoccupied entry in array */ static inline loff_t write_chunk(cr_chkpt_proc_req_t *ctx, struct file *file, struct vmadump_page_header *chunks, unsigned int *sizeof_chunks, int *chunk_number, unsigned long start, unsigned long num_pages) { long r = 0; /* Add an element to the array unless is would be empty*/ if (num_pages || (start == VMAD_END_OF_CHUNKS)) { const int max_chunks = *sizeof_chunks/sizeof(*chunks); int index; index = (*chunk_number)++; chunks[index].start = start; chunks[index].num_pages = num_pages; /* Write the array if full or finished */ if (((index + 1) >= max_chunks) || (start == VMAD_END_OF_CHUNKS)) { r = store_page_chunks(ctx, file, chunks, *sizeof_chunks); *sizeof_chunks = VMAD_CHUNKHEADER_SIZE; *chunk_number = 0; } } return r; } /* * SOMEDAY: we may want a field in vmadump_page_header for page hashes * * SOMEDAY: add a buffer argument through some of these routines * so that we can put compression in there. */ static loff_t store_page_list(cr_chkpt_proc_req_t * ctx, struct file *file, unsigned long start, unsigned long end, int (*need_to_save) (struct mm_struct * mm, unsigned long)) { long r; loff_t bytes = 0; unsigned long addr; unsigned long chunk_start, chunk_end, num_contig_pages; struct vmadump_page_header *chunks; int chunk_number; unsigned int sizeof_chunks = VMAD_CHUNKHEADER_SIZE; /* A page 'chunk' is a contiguous range of pages in virtual memory. * * We identify the chunks, and store them in the chunks array. * Our calls to write_chunk() will write all of the chunks out when needed, * and will start filling the chunks array back from the beginning. */ chunks = (struct vmadump_page_header *) cr_kzalloc(sizeof_chunks, GFP_KERNEL); if (chunks == NULL) { r = -ENOMEM; } /* * TODO: Move this alignment step outward, perhaps as far as freeze_proc? */ /* Handle alignment of the current file pointer to a page boundary for direct IO. * The "fill" required to acheive alignment is used for the first array of chunks * if large enough. Otherwise it is written as zeros for padding. */ r = store_page_list_header(ctx, file, chunks, &sizeof_chunks); if (r < 0) { goto out_kfree; } bytes += r; /* * Ok, this loop does all the work. We're going to make a pass through. */ chunk_start = chunk_end = start; num_contig_pages = 0; chunk_number = 0; for (addr = start; addr < end; addr += PAGE_SIZE) { /* The topmost if clause (need_to_save) is to identify things like * unmodified pages that can be reread from disk, or pages that were * allocated and never touched (zero pages). */ if (need_to_save(current->mm, addr)) { /* test for contiguous pages. (chunk_end == addr) * * break up a contiguous page range if too large, (num < ...) */ if (chunk_end == addr) { num_contig_pages++; } else { r = write_chunk(ctx, file, chunks, &sizeof_chunks, &chunk_number, chunk_start, num_contig_pages); if (r < 0) goto out_io; bytes += r; /* Start a new chunk */ chunk_start = addr; num_contig_pages = 1; } /* this is part of the current chunk */ chunk_end = addr + PAGE_SIZE; } } /* store the last chunk */ r = write_chunk(ctx, file, chunks, &sizeof_chunks, &chunk_number, chunk_start, num_contig_pages); if (r < 0) goto out_io; bytes += r; /* now store a record marking the end of the chunks, * which should force writting of the chunks array */ r = write_chunk(ctx, file, chunks, &sizeof_chunks, &chunk_number, VMAD_END_OF_CHUNKS, 0); if (r < 0) goto out_io; if (r == 0) { /* This absolutely should not happen. At the very least an EOF * record should have been written. */ printk("write_chunks unexpectedly returned 0!\n"); } bytes += r; out_io: out_kfree: kfree(chunks); if (r < 0) { return r; } else { return bytes; } } loff_t vmadump_store_page_list(cr_chkpt_proc_req_t *ctx, struct file *file, unsigned long start, unsigned long end) { return store_page_list(ctx, file, start, end, addr_nonzero_file); } loff_t vmadump_store_dirty_page_list(cr_chkpt_proc_req_t *ctx, struct file *file, unsigned long start, unsigned long end) { return store_page_list(ctx, file, start, end, addr_copied); } static loff_t store_map(cr_chkpt_proc_req_t *ctx, struct file *file, struct vm_area_struct *map, int flags) { loff_t bytes; struct vmadump_vma_header head; char *filename=0; char *buffer = 0; loff_t r; unsigned long start, end; int isfilemap = 0; #if VMAD_HAVE_ARCH_MAPS r = vmad_store_arch_map(ctx, file, map, flags); if (r) return r; #endif head.start = map->vm_start; head.end = map->vm_end; head.flags = map->vm_flags; head.namelen = 0; head.offset = map->vm_pgoff << PAGE_SHIFT; /* XXX LFS! */ /* Decide Whether or not we're gonna store the map's contents or * a reference to the file they came from */ if (map->vm_file) { unsigned short vmflags = map->vm_flags; buffer = (char *) __get_free_page(GFP_KERNEL); if (!buffer) { return -ENOMEM; } #if 0 /* Not supported in BLCR */ if (ctx && ctx->map_name) filename = ctx->map_name(ctx, map->vm_file, buffer, PAGE_SIZE); else #endif filename = default_map_name(map->vm_file, buffer, PAGE_SIZE); head.namelen = strlen(filename); if (vmflags & VM_IO) { /* Region is an IO map. */ /* Never store the contents of a VM_IO region */ } else if (vmad_is_special_mmap(map, flags)) { /* Let BLCR deal with it */ free_page((long)buffer); return 0; } else if (vmad_dentry_unlinked(map->vm_file->f_dentry)) { /* Region is an unlinked file - store contents, not filename */ head.namelen = 0; } else if (vmflags & VM_EXECUTABLE) { /* Region is an executable */ if (flags & VMAD_DUMP_EXEC) head.namelen = 0; } else if (is_library(filename)) { /* Region is a library */ if (flags & VMAD_DUMP_LIBS) head.namelen = 0; } else { /* Region is something else */ if (flags & VMAD_DUMP_OTHER) head.namelen=0; } isfilemap = 1; } start = map->vm_start; end = map->vm_end; /* Release the mm_sem here to avoid deadlocks with page faults and * write locks that may happen during the writes. (We can't use * the "map" pointer beyond this point. */ up_read(¤t->mm->mmap_sem); /* Spit out the section header */ r = write_kern(ctx, file, &head, sizeof(head)); if (r != sizeof(head)) goto err; bytes = r; if (head.namelen > 0) { /* Store the filename */ r = write_kern(ctx, file, filename, head.namelen); if (r != head.namelen) goto err; bytes += r; r = store_page_list(ctx, file, start, end, addr_copied); if (r < 0) goto err; bytes += r; } else { /* Store the contents of the VMA as defined by start, end */ r = store_page_list(ctx, file, start, end, isfilemap ? addr_nonzero_file : addr_nonzero); if (r < 0) goto err; bytes += r; } if (buffer) free_page((long)buffer); down_read(¤t->mm->mmap_sem); return bytes; err: if (r >= 0) r = -EIO; /* Map short writes to EIO */ if (buffer) free_page((long)buffer); down_read(¤t->mm->mmap_sem); return r; } static long vmadump_store_sigpending(cr_chkpt_proc_req_t *ctx, struct file *file, struct sigpending *sigpending) { sigset_t pending_set; long bytes = 0, r; struct list_head *elem; int flags; /* We know the list can't shrink (as all threads are stopped), * So, we are playing a bit loose w/ the locking. */ spin_lock_irq(¤t->sighand->siglock); list_for_each(elem, &sigpending->list) { struct sigqueue *q = list_entry(elem, struct sigqueue, list); struct siginfo tmp_info, info; mm_segment_t oldfs; flags = q->flags; tmp_info = q->info; spin_unlock_irq(¤t->sighand->siglock); /* copy_siginfo_to_user() copies only non-padding fields */ memset(&info, 0, sizeof(info)); oldfs = get_fs(); set_fs(KERNEL_DS); r = copy_siginfo_to_user(&info, &tmp_info); set_fs(oldfs); if (r) goto err; r = write_kern(ctx, file, &flags, sizeof(flags)); if (r != sizeof(flags)) goto err; bytes += r; r = write_kern(ctx, file, &info, sizeof(info)); if (r != sizeof(info)) goto err; bytes += r; spin_lock_irq(¤t->sighand->siglock); } memcpy(&pending_set, &sigpending->signal, sizeof(pending_set)); spin_unlock_irq(¤t->sighand->siglock); /* Flags == -1 marks end of the list */ flags = -1; r = write_kern(ctx, file, &flags, sizeof(flags)); if (r != sizeof(flags)) goto err; bytes += r; /* Corresponding signal set comes last */ r = write_kern(ctx, file, &pending_set, sizeof(pending_set)); if (r != sizeof(pending_set)) goto err; bytes += r; return bytes; err: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } static long vmadump_store_prctl_aux(cr_chkpt_proc_req_t *ctx, struct file *file, int option, int type, void *addr, int len) { struct vmad_prctl header; long r; long bytes = 0; header.option = option; header.type = type; r = write_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto err; bytes += r; if (len) { r = write_kern(ctx, file, addr, len); if (r != len) goto err; bytes += r; } return bytes; err: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } static int vmadump_store_prctl(cr_chkpt_proc_req_t *ctx, struct file *file) { long r; long bytes = 0; int i; const static struct { int type; int get_option; int set_option; } vmad_prctl_tbl[] = { #if defined(PR_GET_PDEATHSIG) {vmad_prctl_int_ref, PR_GET_PDEATHSIG, PR_SET_PDEATHSIG}, #endif #if defined(PR_GET_DUMPABLE) {vmad_prctl_bool, PR_GET_DUMPABLE, PR_SET_DUMPABLE}, #endif #if defined(PR_GET_UNALIGN) && defined(GET_UNALIGN_CTL) {vmad_prctl_int_ref, PR_GET_UNALIGN, PR_SET_UNALIGN}, #endif #if defined(PR_GET_KEEPCAPS) {vmad_prctl_bool, PR_GET_KEEPCAPS, PR_SET_KEEPCAPS}, #endif #if defined(PR_GET_FPEMU) && defined(GET_FPEMU_CTL) {vmad_prctl_int_ref, PR_GET_FPEMU, PR_SET_FPEMU}, #endif #if defined(PR_GET_FPEXC) && defined(GET_FPEXC_CTL) {vmad_prctl_int_ref, PR_GET_FPEXC, PR_SET_FPEXC}, #endif #if defined(PR_GET_TIMING) {vmad_prctl_int_val, PR_GET_TIMING, PR_SET_TIMING}, #endif #if defined(PR_GET_ENDIAN) && defined(GET_ENDIAN) {vmad_prctl_int_ref, PR_GET_ENDIAN, PR_SET_ENDIAN}, #endif #if defined(PR_GET_SECCOMP) {vmad_prctl_int_val, PR_GET_SECCOMP, PR_SET_SECCOMP}, #endif #if defined(PR_GET_TSC) && defined(GET_TSC_CTL) {vmad_prctl_int_ref, PR_GET_TSC, PR_SET_TSC}, #endif #if defined(PR_GET_TIMERSLACK) {vmad_prctl_int_val, PR_GET_TIMERSLACK, PR_SET_TIMERSLACK}, #endif }; /* First the comm, which was not handled via prctl prior to 2.6.9 */ #if defined(PR_GET_NAME) { char value[TASK_COMM_LEN]; r = vmadump_prctl_k(PR_GET_NAME, &value); if (r < 0) goto err; r = vmadump_store_prctl_aux(ctx, file, PR_SET_NAME, vmad_prctl_comm, &value, sizeof(value)); if (r < 0) goto err; bytes += r; } #else /* Value of header.option is ignored, but use PR_SET_NAME to lessen confusion */ if (sizeof(current->comm) != TASK_COMM_LEN) { CR_ERR_CTX(ctx, "vmadump: TASK_COMM_LEN changed?"); r = -EINVAL; goto err; } r = vmadump_store_prctl_aux(ctx, file, 15 /* PR_SET_NAME */, vmad_prctl_comm, ¤t->comm, TASK_COMM_LEN); if (r < 0) goto err; bytes += r; #endif /* Now all the rest */ for (i = 0; i < sizeof(vmad_prctl_tbl)/sizeof(vmad_prctl_tbl[0]); ++i) { unsigned int value; const int type = vmad_prctl_tbl[i].type; const int get = vmad_prctl_tbl[i].get_option; const int set = vmad_prctl_tbl[i].set_option; switch (type) { case vmad_prctl_bool: r = vmadump_prctl_v(get, 0); if (!IS_ERR((void *)r)) { /* ICK */ value = r ? 1 : 0; r = 0; } break; case vmad_prctl_int_val: r = vmadump_prctl_v(get, 0); if (!IS_ERR((void *)r)) { /* ICK */ value = (unsigned int)r; r = 0; } break; case vmad_prctl_int_ref: r = vmadump_prctl_k(get, &value); break; } if (r < 0) { #if 0 /* Confusing when issued, for instance, for PR_GET_ENDIAN where CPU lacks support */ CR_WARN_CTX(ctx, "vmadump warning: failed to read prtcl(%d) err %ld", get, r); #endif continue; } r = vmadump_store_prctl_aux(ctx, file, set, type, &value, sizeof(value)); if (r < 0) goto err; bytes += r; } /* Write the list terminator */ r = vmadump_store_prctl_aux(ctx, file, 0, 0, NULL, 0); if (r < 0) goto err; bytes += r; return bytes; err: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } loff_t vmadump_freeze_proc(cr_chkpt_proc_req_t *ctx, struct file *file, struct pt_regs *regs, int flags) { loff_t r, bytes=0; static struct vmadump_header header ={VMAD_MAGIC, VMAD_FMT_VERS, VMAD_ARCH, (LINUX_VERSION_CODE >> 16) & 0xFF, (LINUX_VERSION_CODE >> 8) & 0xFF, LINUX_VERSION_CODE & 0xFF }; /*--- Write out the file header ---*/ r = write_kern(ctx, file, &header, sizeof(header)); if (r != sizeof(header)) goto err; bytes += r; #if 0 /* Now handled w/ other prctl values */ if (!(flags & VMAD_DUMP_REGSONLY)) { r = write_kern(ctx, file, current->comm, sizeof(current->comm)); if (r != sizeof(current->comm)) goto err; bytes += r; } #endif vmadump_store_prctl(ctx, file); /*--- PID ---*/ r = write_kern(ctx, file, ¤t->pid, sizeof(current->pid)); if (r != sizeof(current->pid)) goto err; bytes += r; /*--- Credentials (uids, gids and supplemental ids) ---*/ r = cr_save_creds(ctx); if (r < 0) goto err; bytes += r; /*--- CPU State Information ------------------------------------*/ r = vmadump_store_cpu(ctx, file, regs); if (r < 0) goto err; bytes += r; /*--- Signal information ---------------------------------------*/ { int i; stack_t sig_stack; sigset_t sig_blocked; struct k_sigaction sig_action; r = vmadump_sigaltstack_k(NULL, &sig_stack); if (r < 0) goto err; r = write_kern(ctx, file, &sig_stack, sizeof(sig_stack)); if (r != sizeof(sig_stack)) goto err; bytes += r; spin_lock_irq(¤t->sighand->siglock); memcpy(&sig_blocked, ¤t->blocked, sizeof(sig_blocked)); spin_unlock_irq(¤t->sighand->siglock); r = write_kern(ctx, file, &sig_blocked, sizeof(sig_blocked)); if (r != sizeof(sig_blocked)) goto err; bytes += r; r = vmadump_store_sigpending(ctx, file, ¤t->pending); if (r < 0) goto err; bytes += r; if (!(flags & VMAD_DUMP_REGSONLY)) { /* Dump shared queue if not actually shared, or if we are the "leader" */ r = vmadump_store_sigpending(ctx, file, ¤t->signal->shared_pending); if (r < 0) goto err; bytes += r; } if (!(flags & VMAD_DUMP_REGSONLY)) { for (i=0; i < _NSIG; i++) { spin_lock_irq(¤t->sighand->siglock); memcpy(&sig_action, ¤t->sighand->action[i], sizeof(sig_action)); spin_unlock_irq(¤t->sighand->siglock); r = write_kern(ctx, file, &sig_action, sizeof(sig_action)); if (r != sizeof(sig_action)) goto err; bytes += r; } } } /*--- Misc other stuff -----------------------------------------*/ r = write_kern(ctx, file, ¤t->clear_child_tid, sizeof(current->clear_child_tid)); if (r != sizeof(current->clear_child_tid)) goto err; bytes += r; { /* personality */ unsigned long tmp_personality = current->personality; r = write_kern(ctx, file, &tmp_personality, sizeof(tmp_personality)); if (r != sizeof(tmp_personality)) goto err; bytes += r; } /* XXX Will we need FUTEX related stuff here as well? */ /*--- Memory Information ---------------------------------------*/ if (!(flags & VMAD_DUMP_REGSONLY)) { struct vm_area_struct *map, *next_map; struct vmadump_mm_info mm_info; struct mm_struct *mm = current->mm; struct vmadump_vma_header term; unsigned long next_addr; #if HAVE_MM_MMAP_BASE unsigned long mmap_base; #endif down_read(&mm->mmap_sem); mm_info.start_code = mm->start_code; mm_info.end_code = mm->end_code; mm_info.start_data = mm->start_data; mm_info.end_data = mm->end_data; mm_info.start_brk = mm->start_brk; mm_info.brk = mm->brk; mm_info.start_stack = mm->start_stack; mm_info.arg_start = mm->arg_start; mm_info.arg_end = mm->arg_end; mm_info.env_start = mm->env_start; mm_info.env_end = mm->env_end; #if HAVE_MM_MMAP_BASE mmap_base = mm->mmap_base; #endif up_read(&mm->mmap_sem); r = write_kern(ctx, file, &mm_info, sizeof(mm_info)); if (r != sizeof(mm_info)) goto err; bytes += r; #if HAVE_MM_MMAP_BASE r = write_kern(ctx, file, &mmap_base, sizeof(mmap_base)); if (r != sizeof(mmap_base)) goto err; bytes += r; #endif down_read(&mm->mmap_sem); next_map = mm->mmap; next_addr = next_map ? next_map->vm_start : 0; while (next_map) { /* Call vma_find() to find the map we're looking for. We * have to do it this way because store_map needs to release * the mmap_sem. */ map = find_vma(mm, next_addr); if (map != next_map) break; next_map = map->vm_next; next_addr = next_map ? next_map->vm_start : 0; r = store_map(ctx, file, map, flags); if (r < 0) { up_read(&mm->mmap_sem); goto err; } bytes += r; } up_read(&mm->mmap_sem); /* Terminate maps list */ term.start = term.end = ~0L; r = write_kern(ctx, file, &term, sizeof(term)); if (r != sizeof(term)) goto err; bytes += r; } #if 0 /* Hooks code not needed/maintained for BLCR */ /*--- Call freeze hooks ----------------------------------------*/ r = do_freeze_hooks(ctx, file, regs, flags); if (r < 0) goto err; bytes += r; #endif return bytes; err: if (r >= 0) r = -EIO; /* Map short writes to EIO */ return r; } /*-------------------------------------------------------------------- * syscall interface *------------------------------------------------------------------*/ #if 0 /* Not needed/maintained for BLCR */ long do_vmadump(long op, long arg0, long arg1, struct pt_regs *regs) { long retval; struct file *file; switch (op) { case VMAD_DO_DUMP: if (arg1 & ~VMAD_FLAG_USER_MASK) { retval = -EINVAL; break; } if ((file = fget(arg0))) { retval = vmadump_freeze_proc(0, file, regs, arg1, 1); fput(file); } else retval = -EBADF; break; case VMAD_DO_UNDUMP: if ((file = fget(arg0))) { retval = vmadump_thaw_proc(0, file, regs, 0, NULL); fput(file); } else retval = -EBADF; /* Un-dump is a whole lot like exec() */ if (retval == 0) { if (current->euid == current->uid && current->egid == current->gid) current->mm->dumpable = 1; current->did_exec = 1; current->self_exec_id++; if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); } break; case VMAD_DO_EXECDUMP: { struct vmadump_execdump_args args; char * filename; if (copy_from_user(&args, (void *) arg0, sizeof(args))) { retval = -EFAULT; break; } if (args.flags & ~VMAD_FLAG_USER_MASK) { retval = -EINVAL; break; } filename = getname(args.arg0); retval = PTR_ERR(filename); if (IS_ERR(filename)) break; file = fget(args.fd); if (!file) { retval = -EBADF; putname(filename); break; } retval = do_execve(filename, (char **)args.argv, (char **)args.envp, regs); putname(filename); if (retval) { fput(file); break; } /* Check to make sure we're actually allowed to dump :) */ if (!current->mm->dumpable) { fput(file); do_exit(-EPERM); } retval = vmadump_freeze_proc(0, file, regs, args.flags, 1); fput(file); if (retval > 0) retval = 0; do_exit(retval); /* Ok, we're done... */ /* NOT REACHED */ } break; case VMAD_LIB_CLEAR: case VMAD_LIB_ADD: case VMAD_LIB_DEL: case VMAD_LIB_LIST: case VMAD_LIB_SIZE: lock_kernel(); /* very very lazy... */ retval = do_lib_op(op, (char *) arg0, arg1); unlock_kernel(); break; default: retval = -EINVAL; } return retval; } #endif /*-------------------------------------------------------------------- * New binary format code *------------------------------------------------------------------*/ #ifdef HAVE_BINFMT_VMADUMP static int load_vmadump(struct linux_binprm *bprm, struct pt_regs *regs) { int retval; struct vmadump_header *header; header = (struct vmadump_header *) bprm->buf; if (memcmp(header->magic, vmad_magic, sizeof(header->magic)) != 0 || header->fmt_vers != VMAD_FMT_VERS || header->major != ((LINUX_VERSION_CODE >> 16) & 0xFF) || header->minor != ((LINUX_VERSION_CODE >> 8) & 0xFF) #if defined(STRICT_VERSION_CHECK) && STRICT_VERSION_CHECK || header->patch != (LINUX_VERSION_CODE & 0xFF) #endif ) return -ENOEXEC; if (!bprm->file->f_op || !bprm->file->f_op->mmap) return -ENOEXEC; retval = vmadump_thaw_proc(0, bprm->file, regs, 0, NULL); if (retval == 0) { if (current->euid == current->uid && current->egid == current->gid) current->mm->dumpable = 1; current->did_exec = 1; current->self_exec_id++; #if 0 if (current->exec_domain && current->exec_domain->module) __MOD_DEC_USE_COUNT(current->exec_domain->module); if (current->binfmt && current->binfmt->module) __MOD_DEC_USE_COUNT(current->binfmt->module); current->exec_domain = 0; #endif current->binfmt = 0; } return retval; } struct linux_binfmt vmadump_fmt = { 0,THIS_MODULE,load_vmadump,0,0,0 }; #endif /* This is some stuff that allows vmadump to latch onto the BProc * syscall for testing purposes. */ #ifdef CONFIG_BPROC static int syscall = 0; MODULE_PARM(syscall, "i"); MODULE_PARM_DESC(syscall, "Syscall number to allow calling VMADump directly. The default (0) means " "no VMADump syscall. There is no bounds check on the syscall number so " "be careful with this option."); #endif int __init vmadump_init_module(void) { CR_INFO("vmadump: (from bproc-%s) Erik Hendriks " "", __stringify(BPROC_VERSION)); CR_INFO("vmadump: Modified for %s <%s>", PACKAGE_STRING, PACKAGE_BUGREPORT); /* Check for correct blcr_imports */ if (strcmp(blcr_config_timestamp, BLCR_CONFIG_TIMESTAMP)) { CR_ERR("vmadump: Module blcr_imports timestamp (%s) does not match that of blcr_vmadump (" BLCR_CONFIG_TIMESTAMP ").", blcr_config_timestamp); return -EINVAL; } #if 0 /* Not needed/maintained for BLCR */ init_rwsem(&hook_lock); #endif #ifdef CONFIG_BPROC { /*extern struct rw_semaphore do_bproc_lock;*/ extern long (*do_bproc_ptr)(long,long,long,struct pt_regs *); /*down_write(&do_bproc_lock);*/ do_bproc_ptr = 0; if (syscall) { if (do_bproc_ptr) printk("vmadump: BProc syscall hook is occupied. " "Can't attach to BProc syscall.\n"); else { do_bproc_ptr = do_vmadump; printk("vmadump: Attached to BProc syscall.\n"); } } /*up_write(&do_bproc_lock);*/ } #endif #ifdef HAVE_BINFMT_VMADUMP register_binfmt(&vmadump_fmt); #endif return 0; } void __exit vmadump_cleanup_module(void) { #if 0 /* Not needed/maintained for BLCR */ liblist_clear(); #endif #ifdef HAVE_BINFMT_VMADUMP unregister_binfmt(&vmadump_fmt); #endif #ifdef CONFIG_BPROC { /*extern struct rw_semaphore do_bproc_lock;*/ extern long (*do_bproc_ptr)(long,long,long,struct pt_regs *); if (syscall) { /*down_write(&do_bproc_lock);*/ if (do_bproc_ptr == do_vmadump) do_bproc_ptr = 0; /*up_write(&do_bproc_lock);*/ } } #endif } #if 0 /* EXPORTs are not needed/maintained for BLCR */ EXPORT_SYMBOL(vmadump_freeze_proc); EXPORT_SYMBOL(vmadump_thaw_proc); EXPORT_SYMBOL(do_vmadump); EXPORT_SYMBOL(vmadump_add_hook); EXPORT_SYMBOL(vmadump_del_hook); EXPORT_SYMBOL(vmadump_read_u); EXPORT_SYMBOL(vmadump_read_k); EXPORT_SYMBOL(vmadump_write_u); EXPORT_SYMBOL(vmadump_write_k); #endif /* * Local variables: * c-basic-offset: 4 * End: */