]> git.kaiwu.me - klib.git/commitdiff
compatibility with the Windows line separator
authorHeng Li <lh3@me.com>
Tue, 6 Mar 2012 02:23:01 +0000 (21:23 -0500)
committerHeng Li <lh3@me.com>
Tue, 6 Mar 2012 02:23:01 +0000 (21:23 -0500)
kseq.h

diff --git a/kseq.h b/kseq.h
index 9b4b00170ed010d4b4b121533d4b1e94c093a332..99b9a678e518936cc90828ab11af765a229575fe 100644 (file)
--- a/kseq.h
+++ b/kseq.h
@@ -23,7 +23,7 @@
    SOFTWARE.
 */
 
-/* Last Modified: 29DEC2011 */
+/* Last Modified: 05MAR2012 */
 
 #ifndef AC_KSEQ_H
 #define AC_KSEQ_H
@@ -34,7 +34,8 @@
 
 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
 #define KS_SEP_TAB   1 // isspace() && !' '
-#define KS_SEP_MAX   1
+#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
+#define KS_SEP_MAX   2
 
 #define __KS_TYPE(type_t)                                              \
        typedef struct __kstream_t {                            \
@@ -103,7 +104,10 @@ typedef struct __kstring_t {
                                        if (ks->end == 0) break;                                                        \
                                } else break;                                                                                   \
                        }                                                                                                                       \
-                       if (delimiter > KS_SEP_MAX) {                                                           \
+                       if (delimiter == KS_SEP_LINE) { \
+                               for (i = ks->begin; i < ks->end; ++i) \
+                                       if (ks->buf[i] == '\n') break; \
+                       } else if (delimiter > KS_SEP_MAX) {                                            \
                                for (i = ks->begin; i < ks->end; ++i)                                   \
                                        if (ks->buf[i] == delimiter) break;                                     \
                        } else if (delimiter == KS_SEP_SPACE) {                                         \
@@ -129,7 +133,7 @@ typedef struct __kstring_t {
                if (str->s == 0) {                                                                                              \
                        str->m = 1;                                                                                                     \
                        str->s = (char*)calloc(1, 1);                                                           \
-               }                                                                                                                               \
+               } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
                str->s[str->l] = '\0';                                                                                  \
                return str->l;                                                                                                  \
        } \
@@ -142,6 +146,8 @@ typedef struct __kstring_t {
        __KS_GETC(__read, __bufsize)                            \
        __KS_GETUNTIL(__read, __bufsize)
 
+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
+
 #define __KSEQ_BASIC(SCOPE, type_t)                                                                            \
        SCOPE kseq_t *kseq_init(type_t fd)                                                                      \
        {                                                                                                                                       \
@@ -149,11 +155,6 @@ typedef struct __kstring_t {
                s->f = ks_init(fd);                                                                                             \
                return s;                                                                                                               \
        }                                                                                                                                       \
-       SCOPE void kseq_rewind(kseq_t *ks)                                                                      \
-       {                                                                                                                                       \
-               ks->last_char = 0;                                                                                              \
-               ks->f->is_eof = ks->f->begin = ks->f->end = 0;                                  \
-       }                                                                                                                                       \
        SCOPE void kseq_destroy(kseq_t *ks)                                                                     \
        {                                                                                                                                       \
                if (!ks) return;                                                                                                \
@@ -179,14 +180,14 @@ typedef struct __kstring_t {
                } /* else: the first header char has been read in the previous call */ \
                seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
                if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
-               if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); /* read FASTA/Q comment */ \
+               if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
                if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
                        seq->seq.m = 256; \
                        seq->seq.s = (char*)malloc(seq->seq.m); \
                } \
                while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
                        seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
-                       ks_getuntil2(ks, '\n', &seq->seq, 0, 1); /* read the rest of the line */ \
+                       ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
                } \
                if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
                if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
@@ -202,7 +203,7 @@ typedef struct __kstring_t {
                } \
                while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
                if (c == -1) return -2; /* error: no quality string */ \
-               while (ks_getuntil2(ks, '\n', &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+               while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
                seq->last_char = 0;     /* we have not come to the next header line */ \
                if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
                return seq->seq.l; \