{{ message }}
Commit c6461a4
authored
perf(storage): use google_crc32c.value for checksums (#16719)
Updates _ReadResumptionStrategy to use google_crc32c.value(data) instead
of manually converting Checksum(data).digest() to an integer.
Updated unit tests to mock google_crc32c.value accordingly.
Test code
```
import timeit
import os
import google_crc32c
from google_crc32c import Checksum
def method1(data):
return int.from_bytes(Checksum(data).digest(), "big")
def method2(data):
return google_crc32c.value(data)
def benchmark():
# Testing larger sizes and more iterations
# 1KB, 1MB, 10MB, 100MB
data_sizes = [1024, 1024 * 1024, 10 * 1024 * 1024, 100 * 1024 * 1024]
for size in data_sizes:
data = os.urandom(size)
print(f"\nData size: {size / (1024*1024):.2f} MB" if size >= 1024*1024 else f"\nData size: {size / 1024:.2f} KB")
# Correctness check
res1 = method1(data)
res2 = method2(data)
assert res1 == res2, f"Failed for size {size}: {res1} != {res2}"
print("Assertion passed: results are equal.")
# Increase iterations for more stable results, with a minimum of 1000
if size <= 1024:
number = 100000
elif size <= 1024 * 1024:
number = 10000
else:
number = 1000
t1 = timeit.timeit(lambda: method1(data), number=number)
t2 = timeit.timeit(lambda: method2(data), number=number)
avg1 = t1 / number
avg2 = t2 / number
print(f"Method 1 (Checksum(data).digest()): {t1:.6f} s total ({avg1:.8f} s/call)")
print(f"Method 2 (google_crc32c.value(data)): {t2:.6f} s total ({avg2:.8f} s/call)")
print(f"Improvement: {(t1 - t2) / t1 * 100:.2f}%")
if __name__ == "__main__":
print(f"google_crc32c implementation: {google_crc32c.implementation}")
benchmark()
```
Output
```
google_crc32c implementation: c
Data size: 1.00 KB
Assertion passed: results are equal.
Method 1 (Checksum(data).digest()): 0.046088 s total (0.00000046 s/call)
Method 2 (google_crc32c.value(data)): 0.016062 s total (0.00000016 s/call)
Improvement: 65.15%
Data size: 1.00 MB
Assertion passed: results are equal.
Method 1 (Checksum(data).digest()): 0.464044 s total (0.00004640 s/call)
Method 2 (google_crc32c.value(data)): 0.439121 s total (0.00004391 s/call)
Improvement: 5.37%
Data size: 10.00 MB
Assertion passed: results are equal.
Method 1 (Checksum(data).digest()): 0.450953 s total (0.00045095 s/call)
Method 2 (google_crc32c.value(data)): 0.445793 s total (0.00044579 s/call)
Improvement: 1.14%
Data size: 100.00 MB
Assertion passed: results are equal.
Method 1 (Checksum(data).digest()): 6.287893 s total (0.00628789 s/call)
Method 2 (google_crc32c.value(data)): 6.095833 s total (0.00609583 s/call)
Improvement: 3.05%
```1 parent 7c8412a commit c6461a4
2 files changed
Lines changed: 12 additions & 20 deletions
File tree
- packages/google-cloud-storage
- google/cloud/storage/asyncio/retry
- tests/unit/asyncio
Lines changed: 2 additions & 2 deletions
Lines changed: 10 additions & 18 deletions

0 commit comments