OnPage API checks websites for 60+ customizable on-page parameters defines and displays all found flaws and opportunities for optimization so that you can easily fix them. It checks meta tags, duplicate content, image tags, response codes, and other parameters on every page. You can find the full list of OnPage API check-up parameters in the Pages section.
Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-access
Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
# Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access \
login="login"
password="password"
cred="$(printf ${login}:${password} | base64)"
curl --location --request POST "https://api.dataforseo.com/v3/on_page/task_post" \
<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
$client = new RestClient($api_url, null, 'login', 'password');
$post_array = array();
// example #1 - a simple way to set a task
$post_array[] = array(
"target" => "dataforseo.com",
"max_crawl_pages" => 10
);
// example #2 - a way to set a task with additional parameters
$post_array[] = array(
"target" => "dataforseo.com",
"max_crawl_pages" => 10,
"load_resources" => true,
"enable_javascript" => true,
"custom_js" => "meta = {}; meta.url = document.URL; meta;",
"tag" => "some_string_123",
"pingback_url" => 'https://your-server.com/pingscript?id=$id&tag=$tag'
);
// this example has a 2 elements, but in the case of large number of tasks - send up to 100 elements per POST request
if (count($post_array) > 0) {
try {
// POST /v3/on_page/task_post
// the full list of possible parameters is available in documentation
$result = $client->post('/v3/on_page/task_post', $post_array);
print_r($result);
// do something with post result
} catch (RestClientException $e) {
echo "\n";
print "HTTP code: {$e->getHttpCode()}\n";
print "Error code: {$e->getCode()}\n";
print "Message: {$e->getMessage()}\n";
print $e->getTraceAsString();
echo "\n";
}
}
$client = null;
?>
<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
$client = new RestClient($api_url, null, 'login', 'password');
$post_array = array();
// example #1 - a simple way to set a task
$post_array[] = array(
"target" => "dataforseo.com",
"max_crawl_pages" => 10
);
// example #2 - a way to set a task with additional parameters
$post_array[] = array(
"target" => "dataforseo.com",
"max_crawl_pages" => 10,
"load_resources" => true,
"enable_javascript" => true,
"custom_js" => "meta = {}; meta.url = document.URL; meta;",
"tag" => "some_string_123",
"pingback_url" => 'https://your-server.com/pingscript?id=$id&tag=$tag'
);
// this example has a 2 elements, but in the case of large number of tasks - send up to 100 elements per POST request
if (count($post_array) > 0) {
try {
// POST /v3/on_page/task_post
// the full list of possible parameters is available in documentation
$result = $client->post('/v3/on_page/task_post', $post_array);
print_r($result);
// do something with post result
} catch (RestClientException $e) {
echo "\n";
print "HTTP code: {$e->getHttpCode()}\n";
print "Error code: {$e->getCode()}\n";
print "Message: {$e->getMessage()}\n";
print $e->getTraceAsString();
echo "\n";
}
}
$client = null;
?>
Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
from random import Random
from client import RestClient
# You can download this file from here https://api.dataforseo.com/v3/_examples/python/_python_Client.zip
client = RestClient("login", "password")
rnd = Random()
post_data = dict()
# example #1 - a simple way to set a task
post_data[rnd.randint(1, 30000000)] = dict(
target="dataforseo.com",
max_crawl_pages=10
)
# example #2 - a way to set a task with additional parameters
from random import Random
from client import RestClient
# You can download this file from here https://api.dataforseo.com/v3/_examples/python/_python_Client.zip
client = RestClient("login", "password")
rnd = Random()
post_data = dict()
# example #1 - a simple way to set a task
post_data[rnd.randint(1, 30000000)] = dict(
target="dataforseo.com",
max_crawl_pages=10
)
# example #2 - a way to set a task with additional parameters
post_data[rnd.randint(1, 30000000)] = dict(
target="dataforseo.com",
max_crawl_pages=10,
load_resources=True,
enable_javascript=True,
custom_js="meta = {}; meta.url = document.URL; meta;",
tag="some_string_123",
pingback_url="https://your-server.com/pingscript?id=$id&tag=$tag"
)
# POST /v3/on_page/task_post
# the full list of possible parameters is available in documentation
response = client.post("/v3/on_page/task_post", post_data)
# you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if response["status_code"] == 20000:
print(response)
# do something with result
else:
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))
from random import Random
from client import RestClient
# You can download this file from here https://api.dataforseo.com/v3/_examples/python/_python_Client.zip
client = RestClient("login", "password")
rnd = Random()
post_data = dict()
# example #1 - a simple way to set a task
post_data[rnd.randint(1, 30000000)] = dict(
target="dataforseo.com",
max_crawl_pages=10
)
# example #2 - a way to set a task with additional parameters
post_data[rnd.randint(1, 30000000)] = dict(
target="dataforseo.com",
max_crawl_pages=10,
load_resources=True,
enable_javascript=True,
custom_js="meta = {}; meta.url = document.URL; meta;",
tag="some_string_123",
pingback_url="https://your-server.com/pingscript?id=$id&tag=$tag"
)
# POST /v3/on_page/task_post
# the full list of possible parameters is available in documentation
response = client.post("/v3/on_page/task_post", post_data)
# you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if response["status_code"] == 20000:
print(response)
# do something with result
else:
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task on_page_task_post()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var postData = new List<object>();
// example #1 - a simple way to set a task
postData.Add(new
{
target = "dataforseo.com",
max_crawl_pages = 10
});
// example #2 - a way to set a task with additional parameters
postData.Add(new
{
target = "dataforseo.com",
max_crawl_pages = 10,
load_resources = true,
enable_javascript = true,
custom_js = "meta = {}; meta.url = document.URL; meta;",
tag = "some_string_123",
pingback_url = "https://your-server.com/pingscript?id=$id&tag=$tag"
});
// POST /v3/on_page/task_post
// the full list of possible parameters is available in documentation
var taskPostResponse = await httpClient.PostAsync("/v3/on_page/task_post", new StringContent(JsonConvert.SerializeObject(postData)));
var result = JsonConvert.DeserializeObject<dynamic>(await taskPostResponse.Content.ReadAsStringAsync());
// you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if (result.status_code == 20000)
{
// do something with result
Console.WriteLine(result);
}
else
Console.WriteLine($"error. Code: {result.status_code} Message: {result.status_message}");
}
}
}
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task on_page_task_post()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var postData = new List<object>();
// example #1 - a simple way to set a task
postData.Add(new
{
target = "dataforseo.com",
max_crawl_pages = 10
});
// example #2 - a way to set a task with additional parameters
postData.Add(new
{
target = "dataforseo.com",
max_crawl_pages = 10,
load_resources = true,
enable_javascript = true,
custom_js = "meta = {}; meta.url = document.URL; meta;",
tag = "some_string_123",
pingback_url = "https://your-server.com/pingscript?id=$id&tag=$tag"
});
// POST /v3/on_page/task_post
// the full list of possible parameters is available in documentation
var taskPostResponse = await httpClient.PostAsync("/v3/on_page/task_post", new StringContent(JsonConvert.SerializeObject(postData)));
var result = JsonConvert.DeserializeObject<dynamic>(await taskPostResponse.Content.ReadAsStringAsync());
// you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if (result.status_code == 20000)
{
// do something with result
Console.WriteLine(result);
}
else
Console.WriteLine($"error. Code: {result.status_code} Message: {result.status_message}");
}
}
}
The above command returns JSON structured like this:
All POST data should be sent in the JSON format (UTF-8 encoding). Task setting is done using the POST method. When setting a task, you should send all task parameters in the task array of the generic POST array. You can send up to 2000 API calls per minute, with each POST call containing no more than 100 tasks. If your POST call contains over 100 tasks, the tasks over this limit will return the 40006 error.
The maximum number of simultaneous requests you can send is limited to 30.
Visit DataForSEO Help Center to get practical tips for request handling depending on your OnPage API payload volume.
Description of the fields for setting a task:
Field name
Type
Description
target
string
target domain required field
domain name should be specified without https:// and www.
if you specify the page URL, the results will be returned for the domain included in the URL
max_crawl_pages
integer
crawled pages limit required field
the number of pages to crawl on the specified domain Note:
if you set max_crawl_pages to 1 and do not specify start_url or set a homepage in it, the following sitewide checks will be disabled: test_canonicalization, enable_www_redirect_check, test_hidden_server_signature, test_page_not_found, test_directory_browsing, test_https_redirect
to enable them anyway, set force_sitewide_checks to trueif you set max_crawl_pages to 1 and specify start_url other than a homepage, all sitewide checks will be disabled;
to enable them anyway, set force_sitewide_checks to true
start_url
string
the first url to crawl
optional field Note: you should specify an absolute URL
if you want to crawl a single page, specify its URL in this field and additionally set the max_crawl_pages parameter to 1
you can also use the live Instant Pages endpoint to get page-specific data
force_sitewide_checks
boolean
enable sitewide checks when crawling a single page
optional field
set to true to get data on sitewide checks when crawling a single page;
default value: false
priority_urls
array
urls to be crawled bypassing the queue
optional field
URLs specified in this array will be crawled in the first instance, bypassing the crawling queue; Note: you should specify the absolute URL;
you can specify up to 20 URLs;
all URLs in the array must belong to the target domain;
subdomains will be ignored unless the allow_subdomains parameter is set to trueexample: "priority_urls": [
"https://dataforseo.com/apis/serp-api",
"https://dataforseo.com/contact"
]
max_crawl_depth
integer
crawl depth
optional field
the linking depth of the pages to crawl;
for example, starting page of the crawl is level 0, pages that have links from that page are level 1, etc.
crawl_delay
integer
delay between hits, ms
optional field
the custom delay between crawler hits to the server
default value: 2000
store_raw_html
boolean
store HTML of crawled pages
optional field
set to true if you want to get the HTML of the page using the OnPage Raw HTML endpoint
default value: false
support cookies on crawled pages
optional field
set to true to support cookies when crawling the pages
default value: false
accept_language
string
language header for accessing the website
optional field
all locale formats are supported (xx, xx-XX, xxx-XX, etc.) Note: if you do not specify this parameter, some websites may deny access; in this case, pages will be returned with the "type":"broken in the response array
custom_robots_txt
string
custom robots.txt settings
optional field
example: Disallow: /directory1/
robots_txt_merge_mode
string
merge with or override robots.txt settings
optional field
possible values: merge, override;
set to override if you want to ignore website crawling restrictions and other robots.txt settings
default value: merge; Note: if set to override, specify the custom_robots_txt parameter
custom_user_agent
string
custom user agent
optional field
custom user agent for crawling a website
example: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
preset for browser screen parameters
optional field
if you use this field, you don’t need to indicate browser_screen_width, browser_screen_height, browser_screen_scale_factorpossible values: desktop, mobile, tabletdesktop preset will apply the following values:browser_screen_width: 1920 browser_screen_height: 1080 browser_screen_scale_factor: 1mobile preset will apply the following values:browser_screen_width: 390 browser_screen_height: 844 browser_screen_scale_factor: 3tablet preset will apply the following values:browser_screen_width: 1024 browser_screen_height: 1366 browser_screen_scale_factor: 2 Note: to use this parameter, set enable_javascript or enable_browser_rendering to true
browser_screen_width
integer
browser screen width
optional field
you can set a custom browser screen width to perform audit for a particular device;
if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to trueminimum value, in pixels: 240
maximum value, in pixels: 9999
browser_screen_height
integer
browser screen height
optional field
you can set a custom browser screen height to perform an audit for a particular device;
if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to trueminimum value, in pixels: 240
maximum value, in pixels: 9999
browser_screen_scale_factor
float
browser screen scale factor
optional field
you can set a custom browser screen resolution ratio to perform audit for a particular device;
if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to trueminimum value: 0.5
maximum value: 3
respect_sitemap
boolean
respect sitemap when crawling
optional field
set to true if you want to follow the order of pages indicated in the primary sitemap when crawling;
default value: false Note: if set to true, the click_depth value in the API response will equal 0;
the max_crawl_depth field of the request will be ignored, you can specify the number of pages to crawl using the max_crawl_pages parameter
custom_sitemap
string
custom sitemap url
optional field
the URL of the page where the alternative sitemap is located Note: if you want to use this parameter, respect_sitemap should be true
crawl_sitemap_only
boolean
crawl only pages indicated in the sitemap
optional field
set to true if you want to crawl only the pages indicated in the sitemap
if you set this parameter to true and do not specify custom_sitemap, we will crawl the default sitemap
default value: false Note: if you want to use this parameter, respect_sitemap should be true
load_resources
boolean
load resources
optional field
set to true if you want to load image, stylesheets, scripts, and broken resources
default value: false Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
enable_www_redirect_check
boolean
check if the domain implemented the www redirection
optional field
set to true if you want to check if the requested domain implemented the www to non-www redirect;
default value: false
enable_javascript
boolean
load javascript on a page
optional field
set to true if you want to load the scripts available on a page
default value: false Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
enable_xhr
boolean
enable XMLHttpRequest on a page
optional field
set to true if you want our crawler to request data from a web server using the XMLHttpRequest object
default value: false;if you use this field, enable_javascript must be set to true;
enable_browser_rendering
boolean
emulate browser rendering to measure Core Web Vitals
optional field
by using this parameter you will be able to emulate a browser when loading a web page; enable_browser_rendering loads styles, images, fonts, animations, videos, and other resources on a page;
default value: false
set to true to obtain Core Web Vitals (FID, CLS, LCP) metrics in the response; if you use this field, enable_javascript, and load_resources parameters must be set to true Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
disable_cookie_popup
boolean
disable the cookie popup
optional field
set to true if you want to disable the popup requesting cookie consent from the user;
default value: false
custom_js
string
custom javascript
optional field Note that the execution time for the script you enter here should be 700 ms maximum, for example, you can use the following JS snippet to check if the website contains Google Tag Manager as a scr attribute: let meta = { haveGoogleAnalytics: false, haveTagManager: false };\r\nfor (var i = 0; i < document.scripts.length; i++) {\r\n let src = document.scripts[i].getAttribute(\"src\");\r\n if (src != undefined) {\r\n if (src.indexOf(\"analytics.js\") >= 0)\r\n meta.haveGoogleAnalytics = true;\r\n\tif (src.indexOf(\"gtm.js\") >= 0)\r\n meta.haveTagManager = true;\r\n }\r\n}\r\nmeta;the returned value depends on what you specified in this field. For instance, if you specify the following script: meta = {}; meta.url = document.URL; meta.test = 'test'; meta;
as a response you will receive the following data: "custom_js_response": {
"url": "https://dataforseo.com/",
"test": "test"
} Note: the length of the script you enter must be no more than 2000 characters Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
validate_micromarkup
boolean
enable microdata validation
optional field
set to true if you want to use the OnPage API Microdata endpoint
default value: false
allow_subdomains
boolean
include pages on subdomains
optional field
set to true if you want to crawl all subdomains of a target website
default value: false
allowed_subdomains
array
subdomains to crawl
optional field
specify subdomains that you want to crawl
example: ["blog.site.com", "my.site.com", "shop.site.com"] Note: to use this parameter, the allow_subdomains parameter should be set to false;
otherwise, the content of allowed_subdomains field will be ignored and the results will be returned for all subdomains
disallowed_subdomains
array
subdomains not to crawl
optional field
specify subdomains that you don’t want to crawl
example: ["status.site.com", "docs.site.com"] Note: to use this parameter, the allow_subdomains parameter should be set to true
check_spell
boolean
check spelling
optional field
set to true to check spelling on a website using Hunspell library
default value: false
check_spell_language
string
language of the spell check
optional field
supported languages: ‘hy’, ‘eu’, ‘bg’, ‘ca’, ‘hr’, ‘cs’, ‘da’, ‘nl’, ‘en’, ‘eo’, ‘et’, ‘fo’, ‘fa’, ‘fr’, ‘fy’, ‘gl’, ‘ka’, ‘de’, ‘el’, ‘he’, ‘hu’, ‘is’, ‘ia’, ‘ga’, ‘it’, ‘rw’, ‘la’, ‘lv’, ‘lt’, ‘mk’, ‘mn’, ‘ne’, ‘nb’, ‘nn’, ‘pl’, ‘pt’, ‘ro’, ‘gd’, ‘sr’, ‘sk’, ‘sl’, ‘es’, ‘sv’, ‘tr’, ‘tk’, ‘uk’, ‘vi’ Note: if no language is specified, it will be set automatically based on page content
check_spell_exceptions
array
words excluded from spell check
optional field
specify the words that you want to exclude from spell check
maximum word length: 100 characters
maximum amount of words: 1000
example: "SERP", "minifiers", "JavaScript"
calculate_keyword_density
boolean
calculate keyword density for the target domain
optional field
set to true if you want to calculate keyword density for website pages
default value: false Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article
once the crawl is completed, you can obtain keyword density values with the Keyword Density endpoint
checks_threshold
object
custom threshold values for checks
optional field
you can specify custom threshold values for the parameters included in the checks object of OnPage API responses; Note: only integer threshold values can be modified;
for example, the high_loading_time and large_page_size parameters are set to 3 seconds and 1 megabyte respectively by default;
if you want to change these thresholds to 1 second and 1000 kbytes, use the following snippet: "checks_threshold": {
"high_loading_time": 1,
"large_page_size": 1000
}available customizable parameters with default values: "title_too_short", default value: 30, type: "int"
"title_too_long", default value: 65, type: "int"
"small_page_size", default value: 1024, type: "int"
"large_page_size", default value: 1048576 (1024 * 1024), type: "int"
"low_character_count", default value: 1024, type: "int"
"high_character_count", default value: 256000 (250 * 1024), type: "int"
"low_content_rate", default value: 0.1, type: "float"
"high_content_rate", default value: 0.9, type: "float"
"high_loading_time", default value: 3000, type: "int"
"high_waiting_time", default value: 1500, type: "int"
"low_readability_rate", default value: 15.0, type: "float"
"irrelevant_description", default value: 0.2, type: "float"
"irrelevant_title", default value: 0.3, type: "float"
"irrelevant_meta_keywords", default value: 0.6, type: "float"
disable_sitewide_checks
array
prevent certain sitewide checks from running
optional field
specify the following checks to prevent them from running on the target website: "test_page_not_found" "test_canonicalization" "test_https_redirect" "test_directory_browsing"example: "disable_sitewide_checks": ["test_directory_browsing", "test_page_not_found"]learn more on our help center
disable_page_checks
array
prevent certain page checks from running
optional field
specify certain checks to prevent them from running and impacting the onpage_scoreexample: "disable_page_checks": ["is_5xx_code", "is_4xx_code"]
switch_pool
boolean
switch proxy pool
optional field
if true, additional proxy pools will be used to obtain the requested data;
the parameter can be used if a multitude of tasks is set simultaneously, resulting in occasional rate-limit and/or site_unreachable errors
return_despite_timeout
boolean
return data on pages despite the timeout error
optional field
if true, the data will be provided on pages that failed to load within 120 seconds and responded with a timeout error;
default value: false
tag
string
user-defined task identifier
optional field the character limit is 255
you can use this parameter to identify the task and match it with the result
you will find the specified tag value in the data object of the response
pingback_url
string
notification URL of a completed task
optional field
when a task is completed we will notify you by GET request sent to the URL you have specified
you can use the ‘$id’ string as a $id variable and ‘$tag’ as urlencoded $tag variable. We will set the necessary values before sending the request.
example: http://your-server.com/pingscript?id=$id http://your-server.com/pingscript?id=$id&tag=$tag Note: special characters in pingback_url will be urlencoded;
i.a., the # character will be encoded into %23
As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.
Description of the fields in the results array:
Field name
Type
Description
version
string
the current version of the API
status_code
integer
general status code
you can find the full list of the response codes here Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message
string
general informational message
you can find the full list of general informational messages here
time
string
execution time, seconds
cost
float
total tasks cost, USD
tasks_count
integer
the number of tasks in the tasks array
tasks_error
integer
the number of tasks in the tasks array returned with an error
tasks
array
array of tasks
id
string
task identifier unique task identifier in our system in the UUID format
status_code
integer
status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
status_message
string
informational message of the task
you can find the full list of general informational messages here
time
string
execution time, seconds
cost
float
cost of the task, USD
result_count
integer
number of elements in the result array
path
array
URL path
data
object
contains the same parameters that you specified in the POST request
result
array
array of results
in this case, the value will be null