From dd4e7007259c2dc0f5d1fe6b788bf5b41a936686 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 18 Nov 2013 11:48:26 -0500 Subject: [PATCH] better interface and documentations --- kurl.c | 90 ++++++++++++++++++++++++++++++++++++++-------------------- kurl.h | 30 ++++++++++++++++++++ 2 files changed, 90 insertions(+), 30 deletions(-) diff --git a/kurl.c b/kurl.c index 4bb66fd..0685220 100644 --- a/kurl.c +++ b/kurl.c @@ -7,25 +7,36 @@ #include #include #include -#include #include "kurl.h" #define KU_DEF_BUFLEN 0x10000 -//#define KU_MAX_SKIP (KU_DEF_BUFLEN<<1) // if seek step is smaller than this, skip -#define KU_MAX_SKIP 0 - -typedef struct { - CURLM *multi; - CURL *curl; - char *url; - off_t off0; // offset of the first byte in the buffer - uint8_t *buf; - int fd, l_buf, m_buf, p_buf; - int err, done_reading; -} kurl_t; +#define KU_MAX_SKIP (KU_DEF_BUFLEN<<1) // if seek step is smaller than this, skip #define kurl_isfile(u) ((u)->fd >= 0) +struct kurl_t { + CURLM *multi; // cURL multi handler + CURL *curl; // cURL easy handle + uint8_t *buf; // buffer + off_t off0; // offset of the first byte in the buffer; the actual file offset equals off0 + p_buf + int fd; // file descriptor for a normal file; <0 for a network file + int m_buf; // max buffer size; for a network file, set to CURL_MAX_WRITE_SIZE*2 + int l_buf; // length of the buffer; l_buf == 0 iff the input read entirely; l_buf <= m_buf + int p_buf; // file position in the buffer; p_buf <= l_buf + int done_reading; // true if we can read nothing from the file; buffer may not be empty even if done_reading is set + int err; // error code +}; + +void kurl_init(void) // required for SSL and win32 socket; NOT thread safe +{ + curl_global_init(CURL_GLOBAL_DEFAULT); +} + +void kurl_destroy(void) +{ + curl_global_cleanup(); +} + static int prepare(kurl_t *ku) { if (kurl_isfile(ku)) { @@ -41,24 +52,35 @@ static int prepare(kurl_t *ku) return 0; } -static size_t write_cb(char *ptr, size_t size, size_t nmemb, void *data) +static size_t write_cb(char *ptr, size_t size, size_t nmemb, void *data) // callback required by cURL { kurl_t *ku = (kurl_t*)data; - size_t nbytes = size * nmemb; + ssize_t nbytes = size * nmemb; assert(nbytes + ku->l_buf < ku->m_buf); memcpy(ku->buf + ku->l_buf, ptr, nbytes); ku->l_buf += nbytes; return nbytes; } -static int fill_buffer(kurl_t *ku) +static int fill_buffer(kurl_t *ku) // fill the buffer { assert(ku->p_buf == ku->l_buf); // buffer is always used up when fill_buffer() is called; otherwise a bug ku->off0 += ku->l_buf; - ku->p_buf = 0; + ku->p_buf = ku->l_buf = 0; + // For a normal file, it is not necessary to test ->done_read. For a cURL connection, however, + // if we have already read the entire content, this function will be blocked at "select()" below. + // Probably there are better means to check if reading is over. if (ku->done_reading) return 0; - if (ku->fd >= 0) { - ku->l_buf = read(ku->fd, ku->buf, ku->m_buf); + if (kurl_isfile(ku)) { + // The following block is equivalent to "ku->l_buf = read(ku->fd, ku->buf, ku->m_buf)" on Mac. + // On Linux, the man page does not specify whether read() guarantees to read ku->m_buf bytes + // even if ->fd references a normal file with sufficient remaining bytes. + while (ku->l_buf < ku->m_buf) { + int l; + l = read(ku->fd, ku->buf + ku->l_buf, ku->m_buf - ku->l_buf); + if (l == 0) break; + ku->l_buf += l; + } if (ku->l_buf < ku->m_buf) ku->done_reading = 1; } else { int n_running, rc; @@ -94,7 +116,7 @@ int kurl_close(kurl_t *ku) curl_easy_cleanup(ku->curl); curl_multi_cleanup(ku->multi); } else close(ku->fd); - free(ku->url); free(ku->buf); + free(ku->buf); free(ku); return 0; } @@ -113,20 +135,19 @@ kurl_t *kurl_open(const char *url) } if (is_file && (fd = open(url, O_RDONLY)) < 0) return 0; - ku = calloc(1, sizeof(kurl_t)); + ku = (kurl_t*)calloc(1, sizeof(kurl_t)); ku->fd = is_file? fd : -1; - ku->url = strdup(url); if (!kurl_isfile(ku)) { ku->multi = curl_multi_init(); ku->curl = curl_easy_init(); - curl_easy_setopt(ku->curl, CURLOPT_URL, ku->url); + curl_easy_setopt(ku->curl, CURLOPT_URL, url); curl_easy_setopt(ku->curl, CURLOPT_WRITEDATA, ku); curl_easy_setopt(ku->curl, CURLOPT_VERBOSE, 0L); curl_easy_setopt(ku->curl, CURLOPT_WRITEFUNCTION, write_cb); } ku->m_buf = kurl_isfile(ku)? KU_DEF_BUFLEN : CURL_MAX_WRITE_SIZE<<1; - ku->buf = calloc(ku->m_buf, 1); - if (prepare(ku) < 0 || fill_buffer(ku) == 0) { + ku->buf = (uint8_t*)calloc(ku->m_buf, 1); + if (prepare(ku) < 0 || fill_buffer(ku) <= 0) { kurl_close(ku); return 0; } @@ -139,13 +160,13 @@ ssize_t kurl_read(kurl_t *ku, void *buf, size_t nbytes) if (ku->l_buf == 0) return 0; // end-of-file while (rest) { if (ku->l_buf - ku->p_buf >= rest) { - if (buf) memcpy(buf + (nbytes - rest), ku->buf + ku->p_buf, rest); + if (buf) memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, rest); ku->p_buf += rest; rest = 0; } else { int ret; if (buf && ku->l_buf > ku->p_buf) - memcpy(buf + (nbytes - rest), ku->buf + ku->p_buf, ku->l_buf - ku->p_buf); + memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, ku->l_buf - ku->p_buf); rest -= ku->l_buf - ku->p_buf; ku->p_buf = ku->l_buf; ret = fill_buffer(ku); @@ -164,7 +185,11 @@ off_t kurl_seek(kurl_t *ku, off_t offset, int whence) if (whence == SEEK_SET) new_off = offset; else if (whence == SEEK_CUR) new_off += cur_off + offset; else { // not supported whence - ku->err = EINVAL; + ku->err = KURL_INV_WHENCE; + return -1; + } + if (new_off < 0) { // negtive absolute offset + ku->err = KURL_SEEK_OUT; return -1; } if (new_off - cur_off + ku->p_buf < ku->l_buf) { @@ -180,27 +205,31 @@ off_t kurl_seek(kurl_t *ku, off_t offset, int whence) r = kurl_read(ku, 0, new_off - cur_off); if (r + cur_off != new_off) failed = 1; // out of range } - if (failed) ku->err = EINVAL, ku->l_buf = ku->p_buf = 0, new_off = -1; + if (failed) ku->err = KURL_SEEK_OUT, ku->l_buf = ku->p_buf = 0, new_off = -1; return new_off; } off_t kurl_tell(const kurl_t *ku) { + if (ku == 0) return -1; return ku->off0 + ku->p_buf; } int kurl_eof(const kurl_t *ku) { + if (ku == 0) return 1; return (ku->l_buf == 0); // unless file end, buffer should never be empty } int kurl_fileno(const kurl_t *ku) { + if (ku == 0) return -1; return ku->fd; } int kurl_error(const kurl_t *ku) { + if (ku == 0) return KURL_NULL; return ku->err; } @@ -210,7 +239,8 @@ int main() int i, l, rc; char buf[0x10000]; // f = kurl_open("f.c"); - f = kurl_open("http://lh3lh3.users.sourceforge.net/"); +// f = kurl_open("http://lh3lh3.users.sourceforge.net/"); + f = kurl_open("https://github.com/"); rc = kurl_seek(f, 0x100, SEEK_SET); fprintf(stderr, "rc=%d\n", rc); l = kurl_read(f, buf, 0x10000); diff --git a/kurl.h b/kurl.h index 2ea0fcf..66ac804 100644 --- a/kurl.h +++ b/kurl.h @@ -1,4 +1,34 @@ #ifndef KURL_H #define KURL_H +#include + +#define KURL_NULL 1 +#define KURL_INV_WHENCE 2 +#define KURL_SEEK_OUT 3 + +struct kurl_t; +typedef struct kurl_t kurl_t; + +#ifdef __cplusplus +extern "C" { +#endif + +void kurl_init(void); +void kurl_destroy(void); + +kurl_t *kurl_open(const char *url); +int kurl_close(kurl_t *ku); +ssize_t kurl_read(kurl_t *ku, void *buf, size_t nbytes); +off_t kurl_seek(kurl_t *ku, off_t offset, int whence); + +off_t kurl_tell(const kurl_t *ku); +int kurl_eof(const kurl_t *ku); +int kurl_fileno(const kurl_t *ku); +int kurl_error(const kurl_t *ku); + +#ifdef __cplusplus +} +#endif + #endif -- 2.47.3