diff options
Diffstat (limited to 'jscompress.c')
-rw-r--r-- | jscompress.c | 918 |
1 files changed, 0 insertions, 918 deletions
diff --git a/jscompress.c b/jscompress.c deleted file mode 100644 index a68c0e8..0000000 --- a/jscompress.c +++ /dev/null @@ -1,918 +0,0 @@ -/* - * Javascript Compressor - * - * Copyright (c) 2008-2018 Fabrice Bellard - * Copyright (c) 2017-2018 Charlie Gordon - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include <stdlib.h> -#include <stdio.h> -#include <getopt.h> -#include <stdarg.h> -#include <string.h> -#include <inttypes.h> -#include <unistd.h> - -#include "cutils.h" - -typedef struct JSToken { - int tok; - char buf[20]; - char *str; - int len; - int size; - int line_num; /* line number for start of token */ - int lines; /* number of embedded linefeeds in token */ -} JSToken; - -enum { - TOK_EOF = 256, - TOK_IDENT, - TOK_STR1, - TOK_STR2, - TOK_STR3, - TOK_NUM, - TOK_COM, - TOK_LCOM, -}; - -void tok_reset(JSToken *tt) -{ - if (tt->str != tt->buf) { - free(tt->str); - tt->str = tt->buf; - tt->size = sizeof(tt->buf); - } - tt->len = 0; -} - -void tok_add_ch(JSToken *tt, int c) -{ - if (tt->len + 1 > tt->size) { - tt->size *= 2; - if (tt->str == tt->buf) { - tt->str = malloc(tt->size); - memcpy(tt->str, tt->buf, tt->len); - } else { - tt->str = realloc(tt->str, tt->size); - } - } - tt->str[tt->len++] = c; -} - -FILE *infile; -const char *filename; -int output_line_num; -int line_num; -int ch; -JSToken tokc; - -int skip_mask; -#define DEFINE_MAX 20 -char *define_tab[DEFINE_MAX]; -int define_len; - -void error(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - if (filename) { - fprintf(stderr, "%s:%d: ", filename, line_num); - } else { - fprintf(stderr, "jscompress: "); - } - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - va_end(ap); - exit(1); -} - -void define_symbol(const char *def) -{ - int i; - for (i = 0; i < define_len; i++) { - if (!strcmp(tokc.str, define_tab[i])) - return; - } - if (define_len >= DEFINE_MAX) - error("too many defines"); - define_tab[define_len++] = strdup(def); -} - -void undefine_symbol(const char *def) -{ - int i, j; - for (i = j = 0; i < define_len; i++) { - if (!strcmp(tokc.str, define_tab[i])) { - free(define_tab[i]); - } else { - define_tab[j++] = define_tab[i]; - } - } - define_len = j; -} - -const char *find_symbol(const char *def) -{ - int i; - for (i = 0; i < define_len; i++) { - if (!strcmp(tokc.str, define_tab[i])) - return "1"; - } - return NULL; -} - -void next(void); - -void nextch(void) -{ - ch = fgetc(infile); - if (ch == '\n') - line_num++; -} - -int skip_blanks(void) -{ - for (;;) { - next(); - if (tokc.tok != ' ' && tokc.tok != '\t' && - tokc.tok != TOK_COM && tokc.tok != TOK_LCOM) - return tokc.tok; - } -} - -void parse_directive(void) -{ - int ifdef, mask = skip_mask; - /* simplistic preprocessor: - #define / #undef / #ifdef / #ifndef / #else / #endif - no symbol substitution. - */ - skip_mask = 0; /* disable skipping to parse preprocessor line */ - nextch(); - if (skip_blanks() != TOK_IDENT) - error("expected preprocessing directive after #"); - - if (!strcmp(tokc.str, "define")) { - if (skip_blanks() != TOK_IDENT) - error("expected identifier after #define"); - define_symbol(tokc.str); - } else if (!strcmp(tokc.str, "undef")) { - if (skip_blanks() != TOK_IDENT) - error("expected identifier after #undef"); - undefine_symbol(tokc.str); - } else if ((ifdef = 1, !strcmp(tokc.str, "ifdef")) || - (ifdef = 0, !strcmp(tokc.str, "ifndef"))) { - if (skip_blanks() != TOK_IDENT) - error("expected identifier after #ifdef/#ifndef"); - mask = (mask << 2) | 2 | ifdef; - if (find_symbol(tokc.str)) - mask ^= 1; - } else if (!strcmp(tokc.str, "else")) { - if (!(mask & 2)) - error("#else without a #if"); - mask ^= 1; - } else if (!strcmp(tokc.str, "endif")) { - if (!(mask & 2)) - error("#endif without a #if"); - mask >>= 2; - } else { - error("unsupported preprocessing directive"); - } - if (skip_blanks() != '\n') - error("extra characters on preprocessing line"); - skip_mask = mask; -} - -/* return -1 if invalid char */ -static int hex_to_num(int ch) -{ - if (ch >= 'a' && ch <= 'f') - return ch - 'a' + 10; - else if (ch >= 'A' && ch <= 'F') - return ch - 'A' + 10; - else if (ch >= '0' && ch <= '9') - return ch - '0'; - else - return -1; -} - -void next(void) -{ -again: - tok_reset(&tokc); - tokc.line_num = line_num; - tokc.lines = 0; - switch(ch) { - case EOF: - tokc.tok = TOK_EOF; - if (skip_mask) - error("missing #endif"); - break; - case 'a' ... 'z': - case 'A' ... 'Z': - case '_': - case '$': - tok_add_ch(&tokc, ch); - nextch(); - while ((ch >= 'a' && ch <= 'z') || - (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || - (ch == '_' || ch == '$')) { - tok_add_ch(&tokc, ch); - nextch(); - } - tok_add_ch(&tokc, '\0'); - tokc.tok = TOK_IDENT; - break; - case '.': - nextch(); - if (ch >= '0' && ch <= '9') { - tok_add_ch(&tokc, '.'); - goto has_dot; - } - tokc.tok = '.'; - break; - case '0': - tok_add_ch(&tokc, ch); - nextch(); - if (ch == 'x' || ch == 'X') { - /* hexa */ - tok_add_ch(&tokc, ch); - nextch(); - while ((ch >= 'a' && ch <= 'f') || - (ch >= 'A' && ch <= 'F') || - (ch >= '0' && ch <= '9')) { - tok_add_ch(&tokc, ch); - nextch(); - } - tok_add_ch(&tokc, '\0'); - tokc.tok = TOK_NUM; - break; - } - goto has_digit; - - case '1' ... '9': - tok_add_ch(&tokc, ch); - nextch(); - has_digit: - /* decimal */ - while (ch >= '0' && ch <= '9') { - tok_add_ch(&tokc, ch); - nextch(); - } - if (ch == '.') { - tok_add_ch(&tokc, ch); - nextch(); - has_dot: - while (ch >= '0' && ch <= '9') { - tok_add_ch(&tokc, ch); - nextch(); - } - } - if (ch == 'e' || ch == 'E') { - tok_add_ch(&tokc, ch); - nextch(); - if (ch == '+' || ch == '-') { - tok_add_ch(&tokc, ch); - nextch(); - } - while (ch >= '0' && ch <= '9') { - tok_add_ch(&tokc, ch); - nextch(); - } - } - tok_add_ch(&tokc, '\0'); - tokc.tok = TOK_NUM; - break; - case '`': - { - nextch(); - while (ch != '`' && ch != EOF) { - if (ch == '\\') { - tok_add_ch(&tokc, ch); - nextch(); - if (ch == EOF) { - error("unexpected char after '\\'"); - } - tok_add_ch(&tokc, ch); - } else { - tok_add_ch(&tokc, ch); - nextch(); - } - } - nextch(); - tok_add_ch(&tokc, 0); - tokc.tok = TOK_STR3; - } - break; - case '\"': - case '\'': - { - int n, i, c, hex_digit_count; - int quote_ch; - quote_ch = ch; - nextch(); - while (ch != quote_ch && ch != EOF) { - if (ch == '\\') { - nextch(); - switch(ch) { - case 'n': - tok_add_ch(&tokc, '\n'); - nextch(); - break; - case 'r': - tok_add_ch(&tokc, '\r'); - nextch(); - break; - case 't': - tok_add_ch(&tokc, '\t'); - nextch(); - break; - case 'v': - tok_add_ch(&tokc, '\v'); - nextch(); - break; - case '\"': - case '\'': - case '\\': - tok_add_ch(&tokc, ch); - nextch(); - break; - case '0' ... '7': - n = 0; - while (ch >= '0' && ch <= '7') { - n = n * 8 + (ch - '0'); - nextch(); - } - tok_add_ch(&tokc, n); - break; - case 'x': - case 'u': - if (ch == 'x') - hex_digit_count = 2; - else - hex_digit_count = 4; - nextch(); - n = 0; - for(i = 0; i < hex_digit_count; i++) { - c = hex_to_num(ch); - if (c < 0) - error("unexpected char after '\\x'"); - n = n * 16 + c; - nextch(); - } - if (n >= 256) - error("unicode is currently unsupported"); - tok_add_ch(&tokc, n); - break; - - default: - error("unexpected char after '\\'"); - } - } else { - /* XXX: should refuse embedded newlines */ - tok_add_ch(&tokc, ch); - nextch(); - } - } - nextch(); - tok_add_ch(&tokc, 0); - if (quote_ch == '\'') - tokc.tok = TOK_STR1; - else - tokc.tok = TOK_STR2; - } - break; - case '/': - nextch(); - if (ch == '/') { - tok_add_ch(&tokc, '/'); - tok_add_ch(&tokc, ch); - nextch(); - while (ch != '\n' && ch != EOF) { - tok_add_ch(&tokc, ch); - nextch(); - } - tok_add_ch(&tokc, '\0'); - tokc.tok = TOK_LCOM; - } else if (ch == '*') { - int last; - tok_add_ch(&tokc, '/'); - tok_add_ch(&tokc, ch); - last = 0; - for(;;) { - nextch(); - if (ch == EOF) - error("unterminated comment"); - if (ch == '\n') - tokc.lines++; - tok_add_ch(&tokc, ch); - if (last == '*' && ch == '/') - break; - last = ch; - } - nextch(); - tok_add_ch(&tokc, '\0'); - tokc.tok = TOK_COM; - } else { - tokc.tok = '/'; - } - break; - case '#': - parse_directive(); - goto again; - case '\n': - /* adjust line number */ - tokc.line_num--; - tokc.lines++; - /* fall thru */ - default: - tokc.tok = ch; - nextch(); - break; - } - if (skip_mask & 1) - goto again; -} - -void print_tok(FILE *f, JSToken *tt) -{ - /* keep output lines in sync with input lines */ - while (output_line_num < tt->line_num) { - putc('\n', f); - output_line_num++; - } - - switch(tt->tok) { - case TOK_IDENT: - case TOK_COM: - case TOK_LCOM: - fprintf(f, "%s", tt->str); - break; - case TOK_NUM: - { - unsigned long a; - char *p; - a = strtoul(tt->str, &p, 0); - if (*p == '\0' && a <= 0x7fffffff) { - /* must be an integer */ - fprintf(f, "%d", (int)a); - } else { - fprintf(f, "%s", tt->str); - } - } - break; - case TOK_STR3: - fprintf(f, "`%s`", tt->str); - break; - case TOK_STR1: - case TOK_STR2: - { - int i, c, quote_ch; - if (tt->tok == TOK_STR1) - quote_ch = '\''; - else - quote_ch = '\"'; - fprintf(f, "%c", quote_ch); - for(i = 0; i < tt->len - 1; i++) { - c = (uint8_t)tt->str[i]; - switch(c) { - case '\r': - fprintf(f, "\\r"); - break; - case '\n': - fprintf(f, "\\n"); - break; - case '\t': - fprintf(f, "\\t"); - break; - case '\v': - fprintf(f, "\\v"); - break; - case '\"': - case '\'': - if (c == quote_ch) - fprintf(f, "\\%c", c); - else - fprintf(f, "%c", c); - break; - case '\\': - fprintf(f, "\\\\"); - break; - default: - /* XXX: no utf-8 support! */ - if (c >= 32 && c <= 255) { - fprintf(f, "%c", c); - } else if (c <= 255) - fprintf(f, "\\x%02x", c); - else - fprintf(f, "\\u%04x", c); - break; - } - } - fprintf(f, "%c", quote_ch); - } - break; - default: - if (tokc.tok >= 256) - error("unsupported token in print_tok: %d", tt->tok); - fprintf(f, "%c", tt->tok); - break; - } - output_line_num += tt->lines; -} - -/* check if token pasting could occur */ -static BOOL compat_token(int c1, int c2) -{ - if ((c1 == TOK_IDENT || c1 == TOK_NUM) && - (c2 == TOK_IDENT || c2 == TOK_NUM)) - return FALSE; - - if ((c1 == c2 && strchr("+-<>&|=*/.", c1)) - || (c2 == '=' && strchr("+-<>&|!*/^%", c1)) - || (c1 == '=' && c2 == '>') - || (c1 == '/' && c2 == '*') - || (c1 == '.' && c2 == TOK_NUM) - || (c1 == TOK_NUM && c2 == '.')) - return FALSE; - - return TRUE; -} - -void js_compress(const char *filename, const char *outfilename, - BOOL do_strip, BOOL keep_header) -{ - FILE *outfile; - int ltok, seen_space; - - line_num = 1; - infile = fopen(filename, "rb"); - if (!infile) { - perror(filename); - exit(1); - } - - output_line_num = 1; - outfile = fopen(outfilename, "wb"); - if (!outfile) { - perror(outfilename); - exit(1); - } - - nextch(); - next(); - ltok = 0; - seen_space = 0; - if (do_strip) { - if (keep_header) { - while (tokc.tok == ' ' || - tokc.tok == '\n' || - tokc.tok == '\t' || - tokc.tok == '\v' || - tokc.tok == '\b' || - tokc.tok == '\f') { - seen_space = 1; - next(); - } - if (tokc.tok == TOK_COM) { - print_tok(outfile, &tokc); - //fprintf(outfile, "\n"); - ltok = tokc.tok; - seen_space = 0; - next(); - } - } - - for(;;) { - if (tokc.tok == TOK_EOF) - break; - if (tokc.tok == ' ' || - tokc.tok == '\r' || - tokc.tok == '\t' || - tokc.tok == '\v' || - tokc.tok == '\b' || - tokc.tok == '\f' || - tokc.tok == TOK_LCOM || - tokc.tok == TOK_COM) { - /* don't print spaces or comments */ - seen_space = 1; - } else if (tokc.tok == TOK_STR3) { - print_tok(outfile, &tokc); - ltok = tokc.tok; - seen_space = 0; - } else if (tokc.tok == TOK_STR1 || tokc.tok == TOK_STR2) { - int count, i; - /* find the optimal quote char */ - count = 0; - for(i = 0; i < tokc.len; i++) { - if (tokc.str[i] == '\'') - count++; - else if (tokc.str[i] == '\"') - count--; - } - if (count > 0) - tokc.tok = TOK_STR2; - else if (count < 0) - tokc.tok = TOK_STR1; - print_tok(outfile, &tokc); - ltok = tokc.tok; - seen_space = 0; - } else { - if (seen_space && !compat_token(ltok, tokc.tok)) { - fprintf(outfile, " "); - } - print_tok(outfile, &tokc); - ltok = tokc.tok; - seen_space = 0; - } - next(); - } - } else { - /* just handle preprocessing */ - while (tokc.tok != TOK_EOF) { - print_tok(outfile, &tokc); - next(); - } - } - - fclose(outfile); - fclose(infile); -} - -#define HASH_SIZE 30011 -#define MATCH_LEN_MIN 3 -#define MATCH_LEN_MAX (4 + 63) -#define DIST_MAX 65535 - -static int find_longest_match(int *pdist, const uint8_t *src, int src_len, - const int *hash_next, int cur_pos) -{ - int pos, i, match_len, match_pos, pos_min, len_max; - - len_max = min_int(src_len - cur_pos, MATCH_LEN_MAX); - match_len = 0; - match_pos = 0; - pos_min = max_int(cur_pos - DIST_MAX - 1, 0); - pos = hash_next[cur_pos]; - while (pos >= pos_min) { - for(i = 0; i < len_max; i++) { - if (src[cur_pos + i] != src[pos + i]) - break; - } - if (i > match_len) { - match_len = i; - match_pos = pos; - } - pos = hash_next[pos]; - } - *pdist = cur_pos - match_pos - 1; - return match_len; -} - -int lz_compress(uint8_t **pdst, const uint8_t *src, int src_len) -{ - int *hash_table, *hash_next; - uint32_t h, v; - int i, dist, len, len1, dist1; - uint8_t *dst, *q; - - /* build the hash table */ - - hash_table = malloc(sizeof(hash_table[0]) * HASH_SIZE); - for(i = 0; i < HASH_SIZE; i++) - hash_table[i] = -1; - hash_next = malloc(sizeof(hash_next[0]) * src_len); - for(i = 0; i < src_len; i++) - hash_next[i] = -1; - - for(i = 0; i < src_len - MATCH_LEN_MIN + 1; i++) { - h = ((src[i] << 16) | (src[i + 1] << 8) | src[i + 2]) % HASH_SIZE; - hash_next[i] = hash_table[h]; - hash_table[h] = i; - } - for(;i < src_len; i++) { - hash_next[i] = -1; - } - free(hash_table); - - dst = malloc(src_len + 4); /* never larger than the source */ - q = dst; - *q++ = src_len >> 24; - *q++ = src_len >> 16; - *q++ = src_len >> 8; - *q++ = src_len >> 0; - /* compress */ - i = 0; - while (i < src_len) { - if (src[i] >= 128) - return -1; - len = find_longest_match(&dist, src, src_len, hash_next, i); - if (len >= MATCH_LEN_MIN) { - /* heuristic: see if better length just after */ - len1 = find_longest_match(&dist1, src, src_len, hash_next, i + 1); - if (len1 > len) - goto no_match; - } - if (len < MATCH_LEN_MIN) { - no_match: - *q++ = src[i]; - i++; - } else if (len <= (3 + 15) && dist < (1 << 10)) { - v = 0x8000 | ((len - 3) << 10) | dist; - *q++ = v >> 8; - *q++ = v; - i += len; - } else if (len >= 4 && len <= (4 + 63) && dist < (1 << 16)) { - v = 0xc00000 | ((len - 4) << 16) | dist; - *q++ = v >> 16; - *q++ = v >> 8; - *q++ = v; - i += len; - } else { - goto no_match; - } - } - free(hash_next); - *pdst = dst; - return q - dst; -} - -static int load_file(uint8_t **pbuf, const char *filename) -{ - FILE *f; - uint8_t *buf; - int buf_len; - - f = fopen(filename, "rb"); - if (!f) { - perror(filename); - exit(1); - } - fseek(f, 0, SEEK_END); - buf_len = ftell(f); - fseek(f, 0, SEEK_SET); - buf = malloc(buf_len + 1); - fread(buf, 1, buf_len, f); - buf[buf_len] = '\0'; - fclose(f); - *pbuf = buf; - return buf_len; -} - -static void save_file(const char *filename, const uint8_t *buf, int buf_len) -{ - FILE *f; - - f = fopen(filename, "wb"); - if (!f) { - perror(filename); - exit(1); - } - fwrite(buf, 1, buf_len, f); - fclose(f); -} - -static void save_c_source(const char *filename, const uint8_t *buf, int buf_len, - const char *var_name) -{ - FILE *f; - int i; - - f = fopen(filename, "wb"); - if (!f) { - perror(filename); - exit(1); - } - fprintf(f, "/* This file is automatically generated - do not edit */\n\n"); - fprintf(f, "const uint8_t %s[] = {\n", var_name); - for(i = 0; i < buf_len; i++) { - fprintf(f, " 0x%02x,", buf[i]); - if ((i % 8) == 7 || (i == buf_len - 1)) - fprintf(f, "\n"); - } - fprintf(f, "};\n"); - fclose(f); -} - -#define DEFAULT_OUTPUT_FILENAME "out.js" - -void help(void) -{ - printf("jscompress version 1.0 Copyright (c) 2008-2018 Fabrice Bellard\n" - "usage: jscompress [options] filename\n" - "Javascript compressor\n" - "\n" - "-h print this help\n" - "-n do not compress spaces\n" - "-H keep the first comment\n" - "-c compress to file\n" - "-C name compress to C source ('name' is the variable name)\n" - "-D symbol define preprocessor symbol\n" - "-U symbol undefine preprocessor symbol\n" - "-o outfile set the output filename (default=%s)\n", - DEFAULT_OUTPUT_FILENAME); - exit(1); -} - -int main(int argc, char **argv) -{ - int c, do_strip, keep_header, compress; - const char *out_filename, *c_var, *fname; - char tmpfilename[1024]; - - do_strip = 1; - keep_header = 0; - out_filename = DEFAULT_OUTPUT_FILENAME; - compress = 0; - c_var = NULL; - for(;;) { - c = getopt(argc, argv, "hno:HcC:D:U:"); - if (c == -1) - break; - switch(c) { - case 'h': - help(); - break; - case 'n': - do_strip = 0; - break; - case 'o': - out_filename = optarg; - break; - case 'H': - keep_header = 1; - break; - case 'c': - compress = 1; - break; - case 'C': - c_var = optarg; - compress = 1; - break; - case 'D': - define_symbol(optarg); - break; - case 'U': - undefine_symbol(optarg); - break; - } - } - if (optind >= argc) - help(); - - filename = argv[optind++]; - - if (compress) { -#if defined(__ANDROID__) - /* XXX: use another directory ? */ - snprintf(tmpfilename, sizeof(tmpfilename), "out.%d", getpid()); -#else - snprintf(tmpfilename, sizeof(tmpfilename), "/tmp/out.%d", getpid()); -#endif - fname = tmpfilename; - } else { - fname = out_filename; - } - js_compress(filename, fname, do_strip, keep_header); - - if (compress) { - uint8_t *buf1, *buf2; - int buf1_len, buf2_len; - - buf1_len = load_file(&buf1, fname); - unlink(fname); - buf2_len = lz_compress(&buf2, buf1, buf1_len); - if (buf2_len < 0) { - fprintf(stderr, "Could not compress file (UTF8 chars are forbidden)\n"); - exit(1); - } - - if (c_var) { - save_c_source(out_filename, buf2, buf2_len, c_var); - } else { - save_file(out_filename, buf2, buf2_len); - } - free(buf1); - free(buf2); - } - return 0; -} |