Add module gzip

tinylambda · tinylambda · commit 2a67c04edcff · 2020-05-17T22:49:32.000+08:00
diff --git a/module_gzip/__init__.py b/module_gzip/__init__.py
@@ -0,0 +1,3 @@
+# The gzip module provides a file-like interface
+# to GUN zip files, using zlib to compress and uncompress the data
+
diff --git a/module_gzip/gzip_bytesio.py b/module_gzip/gzip_bytesio.py
@@ -0,0 +1,30 @@
+import gzip
+from io import BytesIO
+import binascii
+
+
+uncompressed_data = b'The same line, over and over.\n' * 10
+print('UNCOMPRESSED: ', len(uncompressed_data))
+print(uncompressed_data)
+
+buf = BytesIO()
+with gzip.GzipFile(mode='wb', fileobj=buf) as f:
+    f.write(uncompressed_data)
+
+compressed_data = buf.getvalue()
+print('COMPRESSED: ', len(compressed_data))
+print(binascii.hexlify(compressed_data))
+
+inbuffer = BytesIO(compressed_data)
+with gzip.GzipFile(mode='rb', fileobj=inbuffer) as f:
+    reread_data = f.read(len(uncompressed_data))
+
+print('\nREREAD: ', len(reread_data))
+print(reread_data)
+
+
+# One benefit of using GzipFile over zlib is that it supports the file API. However, when re-reading the previously
+# compressed data, an explicit length is passed to read(). Leaving the length off resulted in a CRC error, possibly
+# because BytesIO returned an empty string before reporting EOF. When working with streams of compressed data, either
+# prefix the data with an integer representing the actual amount of data to be read or use the incremental decompression
+# API in zlib.
diff --git a/module_gzip/gzip_compresslevel.py b/module_gzip/gzip_compresslevel.py
@@ -0,0 +1,26 @@
+import gzip
+import io
+import os
+import hashlib
+
+
+def get_hash(data):
+    return hashlib.md5(data).hexdigest()
+
+
+data = open('/tmp/stop_easyconnect.sh', 'r').read() * 1024
+cksum = get_hash(data.encode('utf-8'))
+
+print('Level  Size       Checksum')
+print('-----  ---------- ------------------')
+print('data {:>10} {}'.format(len(data), cksum))
+
+for i in range(0, 10):
+    filename = '/tmp/compress-level-{}.gz'.format(i)
+    with gzip.open(filename, 'wb', compresslevel=i) as output:
+        with io.TextIOWrapper(output, encoding='utf-8') as enc:
+            enc.write(data)
+    size = os.stat(filename).st_size
+    cksum = get_hash(open(filename, 'rb').read())
+    print('{:>5d}  {:>10d}  {}'.format(i, size, cksum))
+
diff --git a/module_gzip/gzip_read.py b/module_gzip/gzip_read.py
@@ -0,0 +1,10 @@
+import gzip
+import io
+
+
+with gzip.open('/tmp/example.txt.gz', 'rb') as input_file:
+    with io.TextIOWrapper(input_file, encoding='utf-8') as dec:
+        print(dec.read())
+
+
+
diff --git a/module_gzip/gzip_seek.py b/module_gzip/gzip_seek.py
@@ -0,0 +1,23 @@
+import gzip
+# The seek() position is relative to the uncompressed data, so the caller does not need to know
+# that the data file is compressed.
+
+with gzip.open('/tmp/example.txt.gz', 'rb') as input_file:
+    print('Entire file: ')
+    all_data = input_file.read()
+    print(all_data)
+
+    expected = all_data[5:15]
+
+    # rewind to beginning
+    input_file.seek(0)
+
+    # move ahead 5 bytes
+    input_file.seek(5)
+    print('Starting at position 5 for 10 bytes:')
+    partial = input_file.read(10)
+    print(partial)
+
+    print()
+    print(expected == partial)
+
diff --git a/module_gzip/gzip_write.py b/module_gzip/gzip_write.py
@@ -0,0 +1,13 @@
+import gzip
+import io
+import os
+
+
+outfilename = '/tmp/example.txt.gz'
+with gzip.open(outfilename, 'wb') as output:
+    with io.TextIOWrapper(output, encoding='utf-8') as enc:
+        enc.write('Contents of the example file goes here.\n')
+
+print(outfilename, 'contains', os.stat(outfilename).st_size, 'bytes')
+os.system('file -b -mime {}'.format(outfilename))
+
diff --git a/module_gzip/gzip_writelines.py b/module_gzip/gzip_writelines.py
@@ -0,0 +1,13 @@
+import gzip
+import io
+import itertools
+import os
+
+
+with gzip.open('/tmp/example_lines.txt.gz', 'wb') as output:
+    with io.TextIOWrapper(output, encoding='utf-8') as enc:
+        enc.writelines(
+            itertools.repeat('The same line, over and over.\n', 10)
+        )
+
+os.system('gzcat /tmp/example_lines.txt.gz')

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# The gzip module provides a file-like interface`
	`2`	`+# to GUN zip files, using zlib to compress and uncompress the data`
	`3`	`+`