scrapingant-client is the official library to access ScrapingAnt API from your
Python applications. It provides useful features like parameters encoding to improve the ScrapingAnt usage experience.
Requires python 3.6+.
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
# Scrape the example.com site.
result = client.general_request('https://example.com')
print(result.content)In order to get API token you'll need to register at ScrapingAnt Service
All public classes, methods and their parameters can be inspected in this API reference.
Main class of this library.
| Param | Type |
|---|---|
| token | string |
https://docs.scrapingant.com/request-response-format#available-parameters
| Param | Type | Default |
|---|---|---|
| url | string |
|
| cookies | List[Cookie] |
None |
| js_snippet | string |
None |
| proxy_type | ProxyType |
datacenter |
| proxy_country | str |
None |
| return_text | boolean |
False |
| wait_for_selector | str |
None |
| browser | boolean |
True |
IMPORTANT NOTE: js_snippet will be encoded to Base64 automatically by the ScrapingAnt client library.
Class defining cookie. Currently it supports only name and value
| Param | Type |
|---|---|
| name | string |
| value | string |
Class defining response from API.
| Param | Type |
|---|---|
| content | string |
| cookies | List[Cookie] |
ScrapingantClientException is base Exception class, used for all errors.
from scrapingant_client import ScrapingAntClient
from scrapingant_client import Cookie
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
result = client.general_request(
'https://httpbin.org/cookies',
cookies=[
Cookie(name='cookieName1', value='cookieVal1'),
Cookie(name='cookieName2', value='cookieVal2'),
]
)
print(result.content)
# Response cookies is a list of Cookie objects
# They can be used in next requests
response_cookies = result.cookies from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
customJsSnippet = """
var str = 'Hello, world!';
var htmlElement = document.getElementsByTagName('html')[0];
htmlElement.innerHTML = str;
"""
result = client.general_request(
'https://example.com',
js_snippet=customJsSnippet,
)
print(result.content)from scrapingant_client import ScrapingAntClient, ScrapingantClientException
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
RETRIES_COUNT = 3
def parse_html(html: str):
... # Implement your data extraction here
parsed_data = None
for retry_number in range(RETRIES_COUNT):
try:
scrapingant_response = client.general_request(
'https://example.com',
)
except ScrapingantClientException as e:
print(f'Got ScrapingAnt exception {repr(e)}')
except Exception as e:
print(f'Got unexpected exception {repr(e)}') # please report this kind of exceptions by creating a new issue
else:
try:
parsed_data = parse_html(scrapingant_response.content)
break # Data is parsed successfully, so we dont need to retry
except Exception as e:
print(f'Got exception while parsing data {repr(e)}')
if parsed_data is None:
print(f'Failed to retrieve and parse data after {RETRIES_COUNT} tries')
# Can sleep and retry later, or stop the script execution, and research the reason
else:
print(f'Successfully parsed data: {parsed_data}')