The zilb library and file compression in Node.js

Three compression algorithms supported by zlib: gzip, deflate, and brotli. Basic usage examples.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import zlib from 'zlib';
 
function zip(str, encoding = 'gzip') {
  str = typeof str === 'string' ? str : JSON.stringify(str);
 
  if (encoding === 'deflate') return zlib.deflateSync(str);
  if (encoding === 'brotli') return zlib.brotliCompressSync(str);
  return zlib.gzipSync(str);
}
 
function upzip(buffer, encoding = 'gzip') {
  return new Promise((resolve) => {
    buffer instanceof Buffer ? buffer : Buffer.from(buffer);
 
    if (encoding === 'gzip') {
      // const buf = zlib.gunzipSync(buffer);
      zlib.gunzip(buffer, (error, buf) => {
        resolve({ error, buffer: error ? null : buf });
      });
    } else if (encoding === 'deflate') {
      // const buf = zlib.inflateSync(buffer);
      zlib.inflate(buffer, (error, buf) => {
        resolve({ error, buffer: error ? null : buf });
      });
    } else if (encoding === 'brotli') {
      // const buf = zlib.brotliDecompressSync(buffer);
      zlib.brotliDecompress(buffer, (error, buf) => {
        resolve({ error, buffer: error ? null : buf });
      });
    }
  });
}

The above is a simple implementation of three different compression algorithms, gzip, deflate and brotli. How efficient is their compression? Let’s write a script to simply test.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
function genRandomStrs(length = 100, range = 32) {
  const list = [];
  for (let i = 0; i < length; i++) {
    list.push(String.fromCharCode(Math.ceil(Math.random() * range) + 32));
  }
  return list.join('');
}
 
async function zlibTest() {
  const charsRange = [5, 15, 30, 90];
  const testDataLens = [100, 1_000, 10_000, 100_000]; // 1_000_000
  const algorithmList = ['gzip', 'deflate', 'brotli'];
  const stats = {};
 
  for (const range of charsRange) {
      const item = stats[`charsRange-` + range] = {};
 
      for (const len of testDataLens) {
        const testStr = genRandomStrs(len, range);
        if (!item.size) item.size = [];
        item.size.push(len);
 
        for (const algo of algorithmList) {
          const buf = zip(testStr, algo);
          const cLen = buf.toString().length;
          const rate = Math.ceil(100 * cLen / len) + '%';
          if (!item[algo]) item[algo] = [];
          item[algo].push(rate);
          // const d = await upzip(buf, algo);
        }
      }
  }
  console.log(stats);
}
 
zlibTest();

It can be seen that.

  • The compression efficiency is in the order of: brotli > deflate > gzip when the character range is small (high repetition)
  • Under the same conditions, the larger the text (more repetitive characters), the higher the compression rate
  • The compression efficiency is basically the same for larger text ranges.

Simply put, the more repetitive strings, the higher the compressibility. If conditions allow, brotli is the appropriate choice, but gzip has better general support in various scenarios.

Implementing zip compression in the browser using the third-party library pako

A zlib implementation library that supports use in the browser, claiming to be faster than the zlib libraries that come with Node.JS 12 and 14.

Here is an example of a browser-side implementation of text zip, unzip compression.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
import pako from 'pako';
// 也可以通过在页面中直接加载 CDN 的脚本测试
// <script src="https://cdn.staticfile.org/pako/1.0.10/pako.min.js"></script>
 
function gzip(str) {
  if (!str) return '';
  if (typeof str !== str) str = JSON.stringfiy(str);
 
  // 返回值为 utf16 编码,浏览器中对该编码字符串的读写可能会出现异常
  // 这里也可以用 pako.zip,它是对 pako.deflate 的一个包装方法
  const compressed  = pako.deflate(str, {to: 'string'});
  // 转换为 base64 编码
  constt strBase64 = btoa(compressed);
 
  return strBase64;
}
 
function unGzip(strBase64) {
  // 也可以用 pako.ungzip,它们执行的是相同的方法逻辑
  return pako.inflate(atob(strBase64), {to: 'string'});
}

More details can be found in their official documentation: http://nodeca.github.io/pako/

Compressing and decompressing folders using compressing

The above analyzes the zlib based file compression scheme. For folder compression, it involves file traversal of directories and subdirectories and archiving of all files, which requires more details to be handled. We can simply implement the requirements with the help of a third-party toolkit.

compressing is a relatively popular Node.js compression and decompression toolkit that provides an easy-to-use API. The following are examples.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
import compressing from 'compressing';
 
// zip 压缩一个目录
compressing.zip.compressDir('./lzwme-test', 'lzwme-test.zip').then(() => console.log('success')).catch(e => console.error(e.stack));
// zip 解压
compressing.zip.uncompress('lzwme-test.zip', './lzwme-test').then(() => console.log('success')).catch(e => console.error(e.stack));
 
// tar 目录归档
compressing.tar.compressDir('./lzwme-test', 'lzwme-test.tar').then(() => console.log('success')).catch(e => console.error(e.stack));
// tar 归档解压
compressing.tar.uncompress('lzwme-test.tar', './lzwme-test').then(() => console.log('success')).catch(e => console.error(e.stack));
 
// gzip 压缩
compressing.gzip.compressFile('./lzwme-test.tar', 'lzwme-test.tar.gz').then(() => console.log('success')).catch(e => console.error(e.stack));
// gzip 解压
compressing.gzip.uncompress('lzwme-test.tar.gz', './lzwme-test').then(() => console.log('success')).catch(e => console.error(e.stack));
 
// tgz 压缩与解压(先 tar 对文件夹归档,再 gzip 对 tar 文件压缩)
compressing.tgz.compressDir('./lzwme-test', 'lzwme-test.tar.gz').then(() => console.log('success')).catch(e => console.error(e.stack));
// tgz 解压
compressing.tgz.uncompress('lzwme-test.tar.gz', './lzwme-test').then(() => console.log('success')).catch(e => console.error(e.stack));

Implementing zip file compression in the browser using JSZip

JSZip supports zip compression and download of files on the browser-only side. The API is also easy to understand. It also supports use under Node.js.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
import JSZip from 'jszip';
 
const zip = new JSZip();
zip.file("Hello.txt", "Hello World\n");
 
const img = zip.folder("images");
img.file("smile.gif", imgData, {base64: true});
 
zip.generateAsync({type:"blob"})
  .then(function(content) {
    // see FileSaver.js
    saveAs(content, "example.zip");
});

For more detailed usage, please refer to its official example: https://stuk.github.io/jszip/documentation/examples.html