Add module gzip · tinylambda/keep@2a67c04 · GitHub
Skip to content

Commit 2a67c04

Browse files
committed
Add module gzip
1 parent 6443722 commit 2a67c04

7 files changed

Lines changed: 118 additions & 0 deletions

File tree

module_gzip/__init__.py

Lines changed: 3 additions & 0 deletions

module_gzip/gzip_bytesio.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import gzip
2+
from io import BytesIO
3+
import binascii
4+
5+
6+
uncompressed_data = b'The same line, over and over.\n' * 10
7+
print('UNCOMPRESSED: ', len(uncompressed_data))
8+
print(uncompressed_data)
9+
10+
buf = BytesIO()
11+
with gzip.GzipFile(mode='wb', fileobj=buf) as f:
12+
f.write(uncompressed_data)
13+
14+
compressed_data = buf.getvalue()
15+
print('COMPRESSED: ', len(compressed_data))
16+
print(binascii.hexlify(compressed_data))
17+
18+
inbuffer = BytesIO(compressed_data)
19+
with gzip.GzipFile(mode='rb', fileobj=inbuffer) as f:
20+
reread_data = f.read(len(uncompressed_data))
21+
22+
print('\nREREAD: ', len(reread_data))
23+
print(reread_data)
24+
25+
26+
# One benefit of using GzipFile over zlib is that it supports the file API. However, when re-reading the previously
27+
# compressed data, an explicit length is passed to read(). Leaving the length off resulted in a CRC error, possibly
28+
# because BytesIO returned an empty string before reporting EOF. When working with streams of compressed data, either
29+
# prefix the data with an integer representing the actual amount of data to be read or use the incremental decompression
30+
# API in zlib.

module_gzip/gzip_compresslevel.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import gzip
2+
import io
3+
import os
4+
import hashlib
5+
6+
7+
def get_hash(data):
8+
return hashlib.md5(data).hexdigest()
9+
10+
11+
data = open('/tmp/stop_easyconnect.sh', 'r').read() * 1024
12+
cksum = get_hash(data.encode('utf-8'))
13+
14+
print('Level Size Checksum')
15+
print('----- ---------- ------------------')
16+
print('data {:>10} {}'.format(len(data), cksum))
17+
18+
for i in range(0, 10):
19+
filename = '/tmp/compress-level-{}.gz'.format(i)
20+
with gzip.open(filename, 'wb', compresslevel=i) as output:
21+
with io.TextIOWrapper(output, encoding='utf-8') as enc:
22+
enc.write(data)
23+
size = os.stat(filename).st_size
24+
cksum = get_hash(open(filename, 'rb').read())
25+
print('{:>5d} {:>10d} {}'.format(i, size, cksum))
26+

module_gzip/gzip_read.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import gzip
2+
import io
3+
4+
5+
with gzip.open('/tmp/example.txt.gz', 'rb') as input_file:
6+
with io.TextIOWrapper(input_file, encoding='utf-8') as dec:
7+
print(dec.read())
8+
9+
10+

module_gzip/gzip_seek.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import gzip
2+
# The seek() position is relative to the uncompressed data, so the caller does not need to know
3+
# that the data file is compressed.
4+
5+
with gzip.open('/tmp/example.txt.gz', 'rb') as input_file:
6+
print('Entire file: ')
7+
all_data = input_file.read()
8+
print(all_data)
9+
10+
expected = all_data[5:15]
11+
12+
# rewind to beginning
13+
input_file.seek(0)
14+
15+
# move ahead 5 bytes
16+
input_file.seek(5)
17+
print('Starting at position 5 for 10 bytes:')
18+
partial = input_file.read(10)
19+
print(partial)
20+
21+
print()
22+
print(expected == partial)
23+

module_gzip/gzip_write.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import gzip
2+
import io
3+
import os
4+
5+
6+
outfilename = '/tmp/example.txt.gz'
7+
with gzip.open(outfilename, 'wb') as output:
8+
with io.TextIOWrapper(output, encoding='utf-8') as enc:
9+
enc.write('Contents of the example file goes here.\n')
10+
11+
print(outfilename, 'contains', os.stat(outfilename).st_size, 'bytes')
12+
os.system('file -b -mime {}'.format(outfilename))
13+

module_gzip/gzip_writelines.py

Lines changed: 13 additions & 0 deletions

0 commit comments

Comments
 (0)