From: Heng Li Date: Fri, 28 Oct 2011 18:49:37 +0000 (-0400) Subject: added the BGZF library X-Git-Tag: spawn-final~84 X-Git-Url: http://www.kaiwu.me/postgresql/commit/static/gitweb.js?a=commitdiff_plain;h=189e688b47809488e45bd9e0a5293a18bc4e2f0c;p=klib.git added the BGZF library --- diff --git a/bgzf.c b/bgzf.c new file mode 100644 index 0000000..76e266c --- /dev/null +++ b/bgzf.c @@ -0,0 +1,555 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011 Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include "bgzf.h" + +#ifdef _USE_KNETFILE +#include "knetfile.h" +typedef knetFile *_bgzf_file_t; +#define _bgzf_open(fn, mode) knet_open(fn, mode) +#define _bgzf_dopen(fp, mode) knet_dopen(fp, mode) +#define _bgzf_close(fp) knet_close(fp) +#define _bgzf_fileno(fp) ((fp)->fd) +#define _bgzf_tell(fp) knet_tell(fp) +#define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence) +#define _bgzf_read(fp, buf, len) knet_read(fp, buf, len) +#define _bgzf_write(fp, buf, len) knet_write(fp, buf, len) +#else // ~defined(_USE_KNETFILE) +#if defined(_WIN32) || defined(_MSC_VER) +#define ftello(fp) ftell(fp) +#define fseeko(fp, offset, whence) fseek(fp, offset, whence) +#else // ~defined(_WIN32) +extern off_t ftello(FILE *stream); +extern int fseeko(FILE *stream, off_t offset, int whence); +#endif // ~defined(_WIN32) +typedef FILE *_bgzf_file_t; +#define _bgzf_open(fn, mode) fopen(fn, mode) +#define _bgzf_dopen(fp, mode) fdopen(fp, mode) +#define _bgzf_close(fp) fclose(fp) +#define _bgzf_fileno(fp) fileno(fp) +#define _bgzf_tell(fp) ftello(fp) +#define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence) +#define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp) +#define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp) +#endif // ~define(_USE_KNETFILE) + +#define BLOCK_HEADER_LENGTH 18 +#define BLOCK_FOOTER_LENGTH 8 + +/* BGZF/GZIP header (speciallized from RFC 1952; little endian): + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ +*/ +static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; + +#ifdef BGZF_CACHE +typedef struct { + int size; + uint8_t *block; + int64_t end_offset; +} cache_t; +#include "khash.h" +KHASH_MAP_INIT_INT64(cache, cache_t) +#endif + +static inline void packInt16(uint8_t *buffer, uint16_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; +} + +static inline int unpackInt16(const uint8_t *buffer) +{ + return buffer[0] | buffer[1] << 8; +} + +static inline void packInt32(uint8_t *buffer, uint32_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; + buffer[2] = value >> 16; + buffer[3] = value >> 24; +} + +static BGZF *bgzf_read_init() +{ + BGZF *fp; + fp = calloc(1, sizeof(BGZF)); + fp->open_mode = 'r'; + fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE); + fp->compressed_block = malloc(BGZF_BLOCK_SIZE); +#ifdef BGZF_CACHE + fp->cache = kh_init(cache); +#endif + return fp; +} + +static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level +{ + BGZF *fp; + fp = calloc(1, sizeof(BGZF)); + fp->open_mode = 'w'; + fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE); + fp->compressed_block = malloc(BGZF_BLOCK_SIZE); + fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 + if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; + return fp; +} +// get the compress level from the mode string +static int mode2level(const char *__restrict mode) +{ + int i, compress_level = -1; + for (i = 0; mode[i]; ++i) + if (mode[i] >= '0' && mode[i] <= '9') break; + if (mode[i]) compress_level = (int)mode[i] - '0'; + if (strchr(mode, 'u')) compress_level = 0; + return compress_level; +} + +BGZF *bgzf_open(const char *path, const char *mode) +{ + BGZF *fp = 0; + if (strchr(mode, 'r') || strchr(mode, 'R')) { + _bgzf_file_t fpr; + if ((fpr = _bgzf_open(path, "r")) == 0) return 0; + fp = bgzf_read_init(); + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'W')) { + FILE *fpw; + if ((fpw = fopen(path, "w")) == 0) return 0; + fp = bgzf_write_init(mode2level(mode)); + fp->fp = fpw; + } + return fp; +} + +BGZF *bgzf_dopen(int fd, const char *mode) +{ + BGZF *fp = 0; + if (strchr(mode, 'r') || strchr(mode, 'R')) { + _bgzf_file_t fpr; + if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0; + fp = bgzf_read_init(); + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'W')) { + FILE *fpw; + if ((fpw = fdopen(fd, "w")) == 0) return 0; + fp = bgzf_write_init(mode2level(mode)); + fp->fp = fpw; + } + return fp; +} + +// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. +static int deflate_block(BGZF *fp, int block_length) +{ + uint8_t *buffer = fp->compressed_block; + int buffer_size = BGZF_BLOCK_SIZE; + int input_length = block_length; + int compressed_length = 0; + int remaining; + uint32_t crc; + + assert(block_length <= BGZF_BLOCK_SIZE); // guaranteed by the caller + memcpy(buffer, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block + while (1) { // loop to retry for blocks that do not compress enough + int status; + z_stream zs; + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = fp->uncompressed_block; + zs.avail_in = input_length; + zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH]; + zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; + status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer + if (status != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + status = deflate(&zs, Z_FINISH); + if (status != Z_STREAM_END) { // not compressed enough + deflateEnd(&zs); // reset the stream + if (status == Z_OK) { // reduce the size and recompress + input_length -= 1024; + assert(input_length > 0); // logically, this should not happen + continue; + } + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + if (deflateEnd(&zs) != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + compressed_length = zs.total_out; + compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + assert(compressed_length <= BGZF_BLOCK_SIZE); + break; + } + + assert(compressed_length > 0); + packInt16((uint8_t*)&buffer[16], compressed_length - 1); // write the compressed_length; -1 to fit 2 bytes + crc = crc32(0L, NULL, 0L); + crc = crc32(crc, fp->uncompressed_block, input_length); + packInt32((uint8_t*)&buffer[compressed_length-8], crc); + packInt32((uint8_t*)&buffer[compressed_length-4], input_length); + + remaining = block_length - input_length; + if (remaining > 0) { + assert(remaining <= input_length); + memcpy(fp->uncompressed_block, fp->uncompressed_block + input_length, remaining); + } + fp->block_offset = remaining; + return compressed_length; +} + +// Inflate the block in fp->compressed_block into fp->uncompressed_block +static int inflate_block(BGZF* fp, int block_length) +{ + z_stream zs; + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = fp->compressed_block + 18; + zs.avail_in = block_length - 16; + zs.next_out = fp->uncompressed_block; + zs.avail_out = BGZF_BLOCK_SIZE; + + if (inflateInit2(&zs, -15) != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + if (inflate(&zs, Z_FINISH) != Z_STREAM_END) { + inflateEnd(&zs); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + if (inflateEnd(&zs) != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + return zs.total_out; +} + +static int check_header(const uint8_t *header) +{ + return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0 + && unpackInt16((uint8_t*)&header[10]) == 6 + && header[12] == 'B' && header[13] == 'C' + && unpackInt16((uint8_t*)&header[14]) == 2); +} + +#ifdef BGZF_CACHE +static void free_cache(BGZF *fp) +{ + khint_t k; + khash_t(cache) *h = (khash_t(cache)*)fp->cache; + if (fp->open_mode != 'r') return; + for (k = kh_begin(h); k < kh_end(h); ++k) + if (kh_exist(h, k)) free(kh_val(h, k).block); + kh_destroy(cache, h); +} + +static int load_block_from_cache(BGZF *fp, int64_t block_address) +{ + khint_t k; + cache_t *p; + khash_t(cache) *h = (khash_t(cache)*)fp->cache; + k = kh_get(cache, h, block_address); + if (k == kh_end(h)) return 0; + p = &kh_val(h, k); + if (fp->block_length != 0) fp->block_offset = 0; + fp->block_address = block_address; + fp->block_length = p->size; + memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE); + _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET); + return p->size; +} + +static void cache_block(BGZF *fp, int size) +{ + int ret; + khint_t k; + cache_t *p; + khash_t(cache) *h = (khash_t(cache)*)fp->cache; + if (BGZF_BLOCK_SIZE >= fp->cache_size) return; + if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) { + /* A better way would be to remove the oldest block in the + * cache, but here we remove a random one for simplicity. This + * should not have a big impact on performance. */ + for (k = kh_begin(h); k < kh_end(h); ++k) + if (kh_exist(h, k)) break; + if (k < kh_end(h)) { + free(kh_val(h, k).block); + kh_del(cache, h, k); + } + } + k = kh_put(cache, h, fp->block_address, &ret); + if (ret == 0) return; // if this happens, a bug! + p = &kh_val(h, k); + p->size = fp->block_length; + p->end_offset = fp->block_address + size; + p->block = malloc(BGZF_BLOCK_SIZE); + memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE); +} +#else +static void free_cache(BGZF *fp) {} +static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} +static void cache_block(BGZF *fp, int size) {} +#endif + +static int bgzf_read_block(BGZF *fp) +{ + uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; + int count, size = 0, block_length, remaining; + int64_t block_address; + block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + if (load_block_from_cache(fp, block_address)) return 0; + count = _bgzf_read(fp->fp, header, sizeof(header)); + if (count == 0) { // no data read + fp->block_length = 0; + return 0; + } + if (count != sizeof(header) || !check_header(header)) { + fp->errcode |= BGZF_ERR_HEADER; + return -1; + } + size = count; + block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" + compressed_block = (uint8_t*)fp->compressed_block; + memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); + remaining = block_length - BLOCK_HEADER_LENGTH; + count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); + if (count != remaining) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + size += count; + if (inflate_block(fp, block_length) < 0) return -1; + if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. + fp->block_address = block_address; + fp->block_length = count; + cache_block(fp, size); + return 0; +} + +ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length) +{ + ssize_t bytes_read = 0; + uint8_t *output = data; + if (length <= 0) return 0; + assert(fp->open_mode == 'r'); + while (bytes_read < length) { + int copy_length, available = fp->block_length - fp->block_offset; + uint8_t *buffer; + if (available <= 0) { + if (bgzf_read_block(fp) != 0) return -1; + available = fp->block_length - fp->block_offset; + if (available <= 0) break; + } + copy_length = length - bytes_read < available? length - bytes_read : available; + buffer = fp->uncompressed_block; + memcpy(output, buffer + fp->block_offset, copy_length); + fp->block_offset += copy_length; + output += copy_length; + bytes_read += copy_length; + } + if (fp->block_offset == fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = fp->block_length = 0; + } + return bytes_read; +} + +int bgzf_flush(BGZF *fp) +{ + assert(fp->open_mode == 'w'); + while (fp->block_offset > 0) { + int block_length; + block_length = deflate_block(fp, fp->block_offset); + if (block_length < 0) return -1; + if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) { + fp->errcode |= BGZF_ERR_IO; // possibly truncated file + return -1; + } + fp->block_address += block_length; + } + return 0; +} + +int bgzf_flush_try(BGZF *fp, ssize_t size) +{ + if (fp->block_offset + size > BGZF_BLOCK_SIZE) + return bgzf_flush(fp); + return -1; +} + +ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length) +{ + const uint8_t *input = data; + int block_length = BGZF_BLOCK_SIZE, bytes_written; + assert(fp->open_mode == 'w'); + input = data; + bytes_written = 0; + while (bytes_written < length) { + uint8_t* buffer = fp->uncompressed_block; + int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written; + memcpy(buffer + fp->block_offset, input, copy_length); + fp->block_offset += copy_length; + input += copy_length; + bytes_written += copy_length; + if (fp->block_offset == block_length && bgzf_flush(fp)) break; + } + return bytes_written; +} + +int bgzf_close(BGZF* fp) +{ + int ret, count, block_length; + if (fp == 0) return -1; + if (fp->open_mode == 'w') { + if (bgzf_flush(fp) != 0) return -1; + block_length = deflate_block(fp, 0); // write an empty block + count = fwrite(fp->compressed_block, 1, block_length, fp->fp); + if (fflush(fp->fp) != 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + } + ret = fp->open_mode == 'w'? fclose(fp->fp) : _bgzf_close(fp->fp); + if (ret != 0) return -1; + free(fp->uncompressed_block); + free(fp->compressed_block); + free_cache(fp); + free(fp); + return 0; +} + +void bgzf_set_cache_size(BGZF *fp, int cache_size) +{ + if (fp) fp->cache_size = cache_size; +} + +int bgzf_check_EOF(BGZF *fp) +{ + static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; + uint8_t buf[28]; + off_t offset; + offset = _bgzf_tell((_bgzf_file_t)fp->fp); + if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0; + _bgzf_read(fp->fp, buf, 28); + _bgzf_seek(fp->fp, offset, SEEK_SET); + return (memcmp(magic, buf, 28) == 0)? 1 : 0; +} + +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) +{ + int block_offset; + int64_t block_address; + + if (fp->open_mode != 'r' || where != SEEK_SET) { + fp->errcode |= BGZF_ERR_MISUSE; + return -1; + } + block_offset = pos & 0xFFFF; + block_address = pos >> 16; + if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + fp->block_length = 0; // indicates current block has not been loaded + fp->block_address = block_address; + fp->block_offset = block_offset; + return 0; +} + +int bgzf_is_bgzf(const char *fn) +{ + uint8_t buf[16]; + int n; + _bgzf_file_t fp; + if ((fp = _bgzf_open(fn, "r")) == 0) return 0; + n = _bgzf_read(fp, buf, 16); + _bgzf_close(fp); + if (n != 16) return 0; + return memcmp(g_magic, buf, 16) == 0? 1 : 0; +} + +int bgzf_getc(BGZF *fp) +{ + int c; + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) return -2; /* error */ + if (fp->block_length == 0) return -1; /* end-of-file */ + } + c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + if (fp->block_offset == fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = 0; + fp->block_length = 0; + } + return c; +} + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +int bgzf_getline(BGZF *fp, int delim, kstring_t *str) +{ + int l, state = 0; + unsigned char *buf = (unsigned char*)fp->uncompressed_block; + str->l = 0; + do { + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) { state = -2; break; } + if (fp->block_length == 0) { state = -1; break; } + } + for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); + if (l < fp->block_length) state = 1; + l -= fp->block_offset; + if (str->l + l + 1 >= str->m) { + str->m = str->l + l + 2; + kroundup32(str->m); + str->s = (char*)realloc(str->s, str->m); + } + memcpy(str->s + str->l, buf + fp->block_offset, l); + str->l += l; + fp->block_offset += l + 1; + if (fp->block_offset >= fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = 0; + fp->block_length = 0; + } + } while (state == 0); + if (str->l == 0 && state < 0) return state; + str->s[str->l] = 0; + return str->l; +} diff --git a/bgzf.h b/bgzf.h new file mode 100644 index 0000000..1a3fe21 --- /dev/null +++ b/bgzf.h @@ -0,0 +1,188 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011 Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* The BGZF library was originally written by Bob Handsaker from the Broad + * Institute. It was later improved by the SAMtools developers. */ + +#ifndef __BGZF_H +#define __BGZF_H + +#include +#include +#include + +#define BGZF_BLOCK_SIZE 0x10000 // 64k + +#define BGZF_ERR_ZLIB 1 +#define BGZF_ERR_HEADER 2 +#define BGZF_ERR_IO 4 +#define BGZF_ERR_MISUSE 8 + +typedef struct { + int open_mode:8, compress_level:8, errcode:16; + int cache_size; + int block_length, block_offset; + int64_t block_address; + void *uncompressed_block, *compressed_block; + void *cache; // a pointer to a hash table + void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading +} BGZF; + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + /****************** + * Basic routines * + ******************/ + + /** + * Open an existing file descriptor for reading or writing. + * + * @param fd file descriptor + * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies + * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. + * @return BGZF file handler; 0 on error + */ + BGZF* bgzf_dopen(int fd, const char *mode); + + /** + * Open the specified file for reading or writing. + */ + BGZF* bgzf_open(const char* path, const char *mode); + + /** + * Close the BGZF and free all associated resources. + * + * @param fp BGZF file handler + * @return 0 on success and -1 on error + */ + int bgzf_close(BGZF *fp); + + /** + * Read up to _length_ bytes from the file storing into _data_. + * + * @param fp BGZF file handler + * @param data data array to read into + * @param length size of data to read + * @return number of bytes actually read; 0 on end-of-file and -1 on error + */ + ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); + + /** + * Write _length_ bytes from _data_ to the file. + * + * @param fp BGZF file handler + * @param data data array to write + * @param length size of data to write + * @return number of bytes actually written; -1 on error + */ + ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); + + /** + * Write the data in the buffer to the file. + */ + int bgzf_flush(BGZF *fp); + + /** + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + */ + #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) + + /** + * Set the file to read from the location specified by _pos_. + * + * @param fp BGZF file handler + * @param pos virtual file offset returned by bgzf_tell() + * @param whence must be SEEK_SET + * @return 0 on success and -1 on error + */ + int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); + + /** + * Check if the BGZF end-of-file (EOF) marker is present + * + * @param fp BGZF file handler opened for reading + * @return 1 if EOF is present; 0 if not or on I/O error + */ + int bgzf_check_EOF(BGZF *fp); + + /** + * Check if a file is in the BGZF format + * + * @param fn file name + * @return 1 if _fn_ is BGZF; 0 if not or on I/O error + */ + int bgzf_is_bgzf(const char *fn); + + /********************* + * Advanced routines * + *********************/ + + /** + * Set the cache size. Only effective when compiled with -DBGZF_CACHE. + * + * @param fp BGZF file handler + * @param size size of cache in bytes; 0 to disable caching (default) + */ + void bgzf_set_cache_size(BGZF *fp, int size); + + /** + * Flush the file if the remaining buffer size is smaller than _size_ + */ + int bgzf_flush_try(BGZF *fp, ssize_t size); + + /** + * Read one byte from a BGZF file. It is faster than bgzf_read() + * @param fp BGZF file handler + * @return byte read; -1 on end-of-file or error + */ + int bgzf_getc(BGZF *fp); + + /** + * Read one line from a BGZF file. It is faster than bgzf_getc() + * + * @param fp BGZF file handler + * @param delim delimitor + * @param str string to write to; must be initialized + * @return length of the string; 0 on end-of-file; negative on error + */ + int bgzf_getline(BGZF *fp, int delim, kstring_t *str); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/knetfile.c b/knetfile.c index af09146..2511395 100644 --- a/knetfile.c +++ b/knetfile.c @@ -541,10 +541,8 @@ off_t knet_seek(knetFile *fp, int64_t off, int whence) return -1; } fp->offset = offset; - return 0; - } - else if (fp->type == KNF_TYPE_FTP) - { + return off; + } else if (fp->type == KNF_TYPE_FTP) { if (whence==SEEK_CUR) fp->offset += off; else if (whence==SEEK_SET) @@ -552,10 +550,8 @@ off_t knet_seek(knetFile *fp, int64_t off, int whence) else if ( whence==SEEK_END) fp->offset = fp->file_size+off; fp->is_ready = 0; - return 0; - } - else if (fp->type == KNF_TYPE_HTTP) - { + return off; + } else if (fp->type == KNF_TYPE_HTTP) { if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future? fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n"); errno = ESPIPE; @@ -566,7 +562,7 @@ off_t knet_seek(knetFile *fp, int64_t off, int whence) else if (whence==SEEK_SET) fp->offset = off; fp->is_ready = 0; - return 0; + return off; } errno = EINVAL; fprintf(stderr,"[knet_seek] %s\n", strerror(errno));