- Handle exceptions gracefully
- Work with files and paths
- Use regular expressions for pattern matching
- Manipulate dates and times
- Apply object-oriented programming
┌─────────────────────────────────────────────────────────────────────┐
│ EXCEPTION HANDLING │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ try: │
│ risky_operation() │
│ except ValueError as e: # Specific exception │
│ handle_value_error(e) │
│ except (TypeError, KeyError): # Multiple exceptions │
│ handle_type_or_key() │
│ except Exception as e: # Catch-all (use sparingly) │
│ log_error(e) │
│ raise # Re-raise │
│ else: # No exception occurred │
│ success_action() │
│ finally: # Always runs │
│ cleanup() │
│ │
│ COMMON EXCEPTIONS │
│ ───────────────── │
│ ValueError - Invalid value KeyError - Dict key miss │
│ TypeError - Wrong type IndexError - List index │
│ FileNotFoundError - No file AttributeError - No attr │
│ │
└─────────────────────────────────────────────────────────────────────┘
# Read file
with open('file.txt', 'r') as f:
content = f.read() # All at once
lines = f.readlines() # List of lines
for line in f: # Memory efficient
process(line)
# Write file
with open('file.txt', 'w') as f:
f.write("Hello\n")
f.writelines(["line1\n", "line2\n"])
# Path operations (pathlib)
from pathlib import Path
path = Path('/data/file.csv')
path.exists() # True/False
path.name # 'file.csv'
path.stem # 'file'
path.suffix # '.csv'
path.parent # Path('/data')
list(path.parent.glob('*.csv')) # All CSVs in dirimport re
# Common patterns
r'\d+' # One or more digits
r'\w+' # Word characters
r'\s+' # Whitespace
r'^start' # Start of string
r'end$' # End of string
r'[a-zA-Z]+' # Letters only
# Methods
re.search(pattern, text) # Find first match
re.findall(pattern, text) # Find all matches
re.sub(pattern, repl, text) # Replace
re.split(pattern, text) # Split on pattern
# Examples
emails = re.findall(r'\S+@\S+', text)
cleaned = re.sub(r'\s+', ' ', text)
parts = re.split(r'[,;]\s*', text)from datetime import datetime, date, timedelta
# Current
now = datetime.now()
today = date.today()
# Parse/format
dt = datetime.strptime("2024-03-15", "%Y-%m-%d")
s = dt.strftime("%B %d, %Y") # "March 15, 2024"
# Arithmetic
tomorrow = today + timedelta(days=1)
diff = date1 - date2 # timedelta
# Components
dt.year, dt.month, dt.day
dt.hour, dt.minute, dt.second
dt.weekday() # 0=Mondayclass DataProcessor:
"""Process data records."""
default_batch = 1000 # Class attribute
def __init__(self, name):
self.name = name # Instance attribute
def process(self, records):
"""Instance method."""
return [self._transform(r) for r in records]
def _transform(self, record): # Private method
return record
@property
def info(self): # Property
return f"Processor: {self.name}"
@classmethod
def create(cls, name): # Class method
return cls(name)
@staticmethod
def validate(record): # Static method
return 'id' in record- Always use
withfor files - Ensures proper cleanup - Catch specific exceptions - Avoid bare
except: - Use pathlib over os.path - More Pythonic
- Compile regex for reuse -
re.compile(pattern) - Prefer composition - Over deep inheritance
← Back to Module 03: Functions & Modules
→ Continue to Module 05: Data Engineering Essentials
