WARNING: Very dodgy hack just to demonstrate how ghex handles large files with mmap. As expected, results are dramatic, but this patch is far too simplistic and buggy to consider using for other purposes... Gianni Tedesco diff --git a/src/hex-document.c b/src/hex-document.c index 394f805..11e1cc4 100644 --- a/src/hex-document.c +++ b/src/hex-document.c @@ -21,6 +21,7 @@ Author: Jaka Mocnik */ +#define _GNU_SOURCE #include #include @@ -33,6 +34,28 @@ #include #include +#include +#include +#include + +#if 1 +#define dprintf printf +#else +#define dprintf(x...) do{}while(0); +#endif + + +static size_t page_size; +static size_t page_mask; +static size_t pa_up(size_t val) +{ + return (val + page_size - 1) & page_mask; +} +static size_t pa_down(size_t val) +{ + return val & page_mask; +} + static void hex_document_class_init (HexDocumentClass *); static void hex_document_init (HexDocument *doc); static void hex_document_finalize (GObject *obj); @@ -195,12 +218,12 @@ get_document_attributes(HexDocument *doc) { static struct stat stats; - if(doc->file_name == NULL) + if(doc->fd < 0 ) return FALSE; - if(!stat(doc->file_name, &stats) && + if(!fstat(doc->fd, &stats) && S_ISREG(stats.st_mode) && - stats.st_size > 0) { + stats.st_size > 0 && stats.st_size < ~0UL) { doc->file_size = stats.st_size; return TRUE; @@ -215,43 +238,122 @@ move_gap_to(HexDocument *doc, guint offset, gint min_size) { guchar *tmp, *buf_ptr, *tmp_ptr; + dprintf("Move gap to %u (size = %i, pos = %u)\n", + offset, doc->gap_size, + doc->gap_pos - doc->map); + if(doc->gap_size < min_size) { + void *ret; + void *new_gap = doc->gap_map; + void *new_map = doc->map; + + /* Add a new page on the end */ + if ( doc->gap_map ) { + size_t osz; + + osz = doc->map_size - pa_up(doc->file_size); + ret = mremap(doc->gap_map, osz, + osz + page_size, 0); + dprintf("remap-gap: %u %u: %p\n", + osz / page_size, + (osz / page_size) + 1, ret); + }else{ + dprintf("map new page at: %p\n", + doc->map + doc->map_size); + ret = mmap(doc->map + doc->map_size, + page_size, + PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + new_gap = ret; + } + + /* Try to re-base if re run out of vm space */ + if ( ret == MAP_FAILED ) { + void *ng; + + /* Map new base */ + ret = mmap(NULL, doc->map_size + page_size, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + g_assert(ret != MAP_FAILED); + dprintf("Re-base from %p to %p\n", doc->map, ret); + + /* remap file-backed part */ + ng = mremap(doc->map, + pa_up(doc->file_size), + pa_up(doc->file_size), + MREMAP_FIXED, ret); + g_assert(ng == ret); + dprintf(" remap: %p -> %p: %u pages\n", + doc->map, ret, + pa_up(doc->file_size) / page_size); + + new_map = ret; + + /* move anonymous part */ + if ( doc->gap_map ) { + size_t osz = (doc->map + doc->map_size) + - doc->map; + ng = mremap(doc->gap_map, + osz, osz, + MREMAP_FIXED, + new_map + osz); + g_assert(ng == (new_map + osz)); + dprintf(" remap: %p -> %p : %u pages", + doc->gap_map, ng, + osz / page_size); + new_gap = ng; + } + } + + doc->map = new_map; + doc->gap_map = new_gap; + doc->gap_pos = doc->map + doc->file_size; + doc->gap_size = page_size; + doc->map_size = doc->file_size + doc->gap_size; + + g_assert((doc->map_size & ~page_mask) == 0); + if ( doc->gap_map ) + g_assert(doc->gap_map > doc->map); + + /* tmp = g_malloc(sizeof(guchar)*doc->file_size); - buf_ptr = doc->buffer; + buf_ptr = doc->map; tmp_ptr = tmp; while(buf_ptr < doc->gap_pos) *tmp_ptr++ = *buf_ptr++; buf_ptr += doc->gap_size; - while(buf_ptr < doc->buffer + doc->buffer_size) + while(buf_ptr < doc->map + doc->map_size) *tmp_ptr++ = *buf_ptr++; doc->gap_size = MAX(min_size, 32); - doc->buffer_size = doc->file_size + doc->gap_size; - doc->buffer = g_realloc(doc->buffer, sizeof(guchar)*doc->buffer_size); - doc->gap_pos = doc->buffer + offset; + doc->map_size = doc->file_size + doc->gap_size; + doc->map = g_realloc(doc->map, sizeof(guchar)*doc->map_size); + doc->gap_pos = doc->map + offset; - buf_ptr = doc->buffer; + buf_ptr = doc->map; tmp_ptr = tmp; while(buf_ptr < doc->gap_pos) *buf_ptr++ = *tmp_ptr++; buf_ptr += doc->gap_size; - while(buf_ptr < doc->buffer + doc->buffer_size) + while(buf_ptr < doc->map + doc->map_size) *buf_ptr++ = *tmp_ptr++; g_free(tmp); + */ } - else { - if(doc->buffer + offset < doc->gap_pos) { - buf_ptr = doc->gap_pos + doc->gap_size - 1; - while(doc->gap_pos > doc->buffer + offset) - *buf_ptr-- = *(--doc->gap_pos); - } - else if(doc->buffer + offset > doc->gap_pos) { - buf_ptr = doc->gap_pos + doc->gap_size; - while(doc->gap_pos < doc->buffer + offset) - *doc->gap_pos++ = *buf_ptr++; - } + + if(doc->map + offset < doc->gap_pos) { + buf_ptr = doc->gap_pos + doc->gap_size - 1; + while(doc->gap_pos > doc->map + offset) + *buf_ptr-- = *(--doc->gap_pos); + } + else if(doc->map + offset > doc->gap_pos) { + buf_ptr = doc->gap_pos + doc->gap_size; + while(doc->gap_pos < doc->map + offset) + *doc->gap_pos++ = *buf_ptr++; } } @@ -289,8 +391,9 @@ hex_document_finalize(GObject *obj) hex = HEX_DOCUMENT(obj); - if(hex->buffer) - g_free(hex->buffer); + if(hex->map) + munmap(hex->map, hex->file_size); + //g_free(hex->map); if(hex->file_name) g_free(hex->file_name); @@ -330,6 +433,9 @@ static void hex_document_class_init (HexDocumentClass *klass) { GObjectClass *gobject_class = G_OBJECT_CLASS(klass); + + page_size = getpagesize(); + page_mask = ~(page_size - 1); parent_class = g_type_class_peek_parent(klass); @@ -382,8 +488,8 @@ hex_document_class_init (HexDocumentClass *klass) static void hex_document_init (HexDocument *doc) { - doc->buffer = NULL; - doc->buffer_size = 0; + doc->map = NULL; + doc->map_size = 0; doc->file_size = 0; doc->gap_pos = NULL; doc->gap_size = 0; @@ -439,8 +545,9 @@ hex_document_new() doc->gap_size = 100; doc->file_size = 0; - doc->buffer_size = doc->file_size + doc->gap_size; - doc->gap_pos = doc->buffer = (guchar *)g_malloc(doc->buffer_size); + doc->map_size = doc->file_size + doc->gap_size; + doc->gap_pos = doc->map = (guchar *)g_malloc(doc->map_size); + g_assert_not_reached(); /* FIXME */ doc->path_end = g_strdup(_("New document")); @@ -459,11 +566,7 @@ hex_document_new_from_file(const gchar *name) g_return_val_if_fail (doc != NULL, NULL); doc->file_name = (gchar *)g_strdup(name); - if(get_document_attributes(doc)) { - doc->gap_size = 100; - doc->buffer_size = doc->file_size + doc->gap_size; - doc->buffer = (guchar *)g_malloc(doc->buffer_size); - + if(hex_document_read(doc)) { /* find the start of the filename without path */ for(i = strlen(doc->file_name); (i >= 0) && (doc->file_name[i] != '/'); i--) ; @@ -473,10 +576,8 @@ hex_document_new_from_file(const gchar *name) path_end = doc->file_name; doc->path_end = g_filename_to_utf8 (path_end, -1, NULL, NULL, NULL); - if(hex_document_read(doc)) { - doc_list = g_list_append(doc_list, doc); - return doc; - } + doc_list = g_list_append(doc_list, doc); + return doc; } g_object_unref(G_OBJECT(doc)); @@ -487,9 +588,9 @@ guchar hex_document_get_byte(HexDocument *doc, guint offset) { if(offset < doc->file_size) { - if(doc->gap_pos <= doc->buffer + offset) + if(doc->gap_pos <= doc->map + offset) offset += doc->gap_size; - return doc->buffer[offset]; + return doc->map[offset]; } else return 0; @@ -501,7 +602,7 @@ hex_document_get_data(HexDocument *doc, guint offset, guint len) guchar *ptr, *data, *dptr; guint i; - ptr = doc->buffer + offset; + ptr = doc->map + offset; if(ptr >= doc->gap_pos) ptr += doc->gap_size; dptr = data = g_malloc(sizeof(guchar)*len); @@ -541,16 +642,16 @@ hex_document_set_nibble(HexDocument *doc, guchar val, guint offset, doc->file_size++; change_data.rep_len = 0; if(offset == doc->file_size) - doc->buffer[offset] = 0; + doc->map[offset] = 0; } else { - if(doc->buffer + offset >= doc->gap_pos) + if(doc->map + offset >= doc->gap_pos) offset += doc->gap_size; change_data.rep_len = 1; } - change_data.v_byte = doc->buffer[offset]; - doc->buffer[offset] = (doc->buffer[offset] & (lower_nibble?0xF0:0x0F)) | (lower_nibble?val:(val << 4)); + change_data.v_byte = doc->map[offset]; + doc->map[offset] = (doc->map[offset] & (lower_nibble?0xF0:0x0F)) | (lower_nibble?val:(val << 4)); hex_document_changed(doc, &change_data, undoable); } @@ -580,11 +681,14 @@ hex_document_set_byte(HexDocument *doc, guchar val, guint offset, doc->gap_pos++; doc->file_size++; } - else if(doc->buffer + offset >= doc->gap_pos) + else if(doc->map + offset >= doc->gap_pos) { + dprintf("byte gap size = %u (offset=%u)\n", + doc->gap_size, offset); offset += doc->gap_size; + } - change_data.v_byte = doc->buffer[offset]; - doc->buffer[offset] = val; + change_data.v_byte = doc->map[offset]; + doc->map[offset] = val; hex_document_changed(doc, &change_data, undoable); } @@ -612,7 +716,7 @@ hex_document_set_data(HexDocument *doc, guint offset, guint len, change_data.lower_nibble = FALSE; i = 0; - ptr = &doc->buffer[offset]; + ptr = &doc->map[offset]; if(ptr >= doc->gap_pos) ptr += doc->gap_size; while(offset + i < doc->file_size && i < rep_len) { @@ -623,7 +727,7 @@ hex_document_set_data(HexDocument *doc, guint offset, guint len, } if(rep_len == len) { - if(doc->buffer + offset >= doc->gap_pos) + if(doc->map + offset >= doc->gap_pos) offset += doc->gap_size; } else { @@ -638,9 +742,9 @@ hex_document_set_data(HexDocument *doc, guint offset, guint len, doc->file_size += (gint)len - (gint)rep_len; } - ptr = &doc->buffer[offset]; + ptr = &doc->map[offset]; i = 0; - while(offset + i < doc->buffer_size && i < len) { + while(offset + i < doc->map_size && i < len) { *ptr++ = *data++; i++; } @@ -658,22 +762,60 @@ hex_document_delete_data(HexDocument *doc, guint offset, guint len, gboolean und gint hex_document_read(HexDocument *doc) { - FILE *file; static HexChangeData change_data; if(doc->file_name == NULL) return FALSE; - if(!get_document_attributes(doc)) + if((doc->fd = open(doc->file_name, O_RDONLY)) < 0) return FALSE; - if((file = fopen(doc->file_name, "r")) == NULL) - return FALSE; + if(!get_document_attributes(doc)) + goto err_close; + + doc->gap_size = (doc->file_size + 100 + (page_size - 1)) & page_mask; + doc->gap_size -= doc->file_size; + doc->map_size = doc->file_size + doc->gap_size; + + g_assert((doc->map_size % page_size) == 0); + g_assert(doc->map_size == (doc->gap_size + doc->file_size)); + + dprintf("filesz=%.8x gapsz=%.8x mapsz=0x%.8x\n", + doc->file_size, doc->gap_size, doc->map_size); + + if ( doc->gap_size >= page_size ) { + doc->gap_map = mmap(NULL, doc->map_size, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (doc->gap_map == MAP_FAILED ) { + perror("mmap gap"); + goto err_close; + } + doc->map = mmap(doc->gap_map, doc->file_size, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_FIXED, + doc->fd, 0); + doc->gap_map += pa_up(doc->file_size); + }else{ + doc->map = mmap(NULL, doc->map_size, + PROT_READ|PROT_WRITE, MAP_PRIVATE, + doc->fd, 0); + doc->gap_map = NULL; + } - doc->gap_size = doc->buffer_size - doc->file_size; - fread(doc->buffer + doc->gap_size, 1, doc->file_size, file); - doc->gap_pos = doc->buffer; - fclose(file); + if (doc->map == MAP_FAILED ) { + perror("mmap"); + goto err_close; + } + + doc->gap_pos = doc->map + doc->file_size; + dprintf("mmap: %p : %p : %p (file:%3u / gap:%3u + %u bytes)\n", + doc->map, doc->gap_map, doc->map + doc->map_size, + (pa_up(doc->file_size) / page_size), + (doc->gap_map) ? + (doc->map_size - (doc->gap_map - doc->map)) / page_size : 0, + doc->gap_size & ~page_mask); undo_stack_free(doc); change_data.start = 0; @@ -682,6 +824,9 @@ hex_document_read(HexDocument *doc) hex_document_changed(doc, &change_data, FALSE); return TRUE; +err_close: + close(doc->fd); + return FALSE; } gint @@ -690,13 +835,13 @@ hex_document_write_to_file(HexDocument *doc, FILE *file) gint ret = TRUE; size_t exp_len; - if(doc->gap_pos > doc->buffer) { - exp_len = MIN(doc->file_size, doc->gap_pos - doc->buffer); - ret = fwrite(doc->buffer, 1, exp_len, file); + if(doc->gap_pos > doc->map) { + exp_len = MIN(doc->file_size, doc->gap_pos - doc->map); + ret = fwrite(doc->map, 1, exp_len, file); ret = (ret == exp_len)?TRUE:FALSE; } - if(doc->gap_pos < doc->buffer + doc->file_size) { - exp_len = doc->file_size - (size_t)(doc->gap_pos - doc->buffer); + if(doc->gap_pos < doc->map + doc->file_size) { + exp_len = doc->file_size - (size_t)(doc->gap_pos - doc->map); ret = fwrite(doc->gap_pos + doc->gap_size, 1, exp_len, file); ret = (ret == exp_len)?TRUE:FALSE; } @@ -707,20 +852,40 @@ hex_document_write_to_file(HexDocument *doc, FILE *file) gint hex_document_write(HexDocument *doc) { - FILE *file; gint ret = FALSE; + FILE *file; + gchar *ofn; + size_t sz; + int fd; if(doc->file_name == NULL) return FALSE; - if((file = fopen(doc->file_name, "w")) != NULL) { + sz = strlen(doc->file_name); + ofn = g_malloc(sz + 8); + memcpy(ofn, doc->file_name, sz); + memcpy(ofn + sz, ".XXXXXX", 8); + fd = mkstemp(ofn); + + if((file = fdopen(fd, "w")) != NULL) { + ret = TRUE; ret = hex_document_write_to_file(doc, file); - fclose(file); - if(ret) { - doc->changed = FALSE; - } + if ( ferror(file) ) + ret = FALSE; + /* FIXME: Re-map new file */ + if ( fclose(file) == EOF ) + ret = FALSE; } + if ( ret ) + rename(ofn, doc->file_name); + else + unlink(ofn); + + g_free(ofn); + + if ( ret ) + doc->changed = FALSE; return ret; } diff --git a/src/hex-document.h b/src/hex-document.h index 684b544..08eb001 100644 --- a/src/hex-document.h +++ b/src/hex-document.h @@ -61,11 +61,13 @@ struct _HexDocument gchar *file_name; gchar *path_end; - guchar *buffer; /* data buffer */ + gint fd; /* file descriptor */ + guchar *map; /* map start */ guchar *gap_pos; /* pointer to the start of insertion gap */ gint gap_size; /* insertion gap size */ - guint buffer_size; /* buffer size = file size + gap size */ guint file_size; /* real file size */ + guint map_size; /* total map size */ + guchar *gap_map; /* gap anon map pointer */ gboolean changed;