on_page/task_post – DataForSEO API v.3

Setting OnPage Tasks

‌
OnPage API checks websites for 60+ customizable on-page parameters defines and displays all found flaws and opportunities for optimization so that you can easily fix them. It checks meta tags, duplicate content, image tags, response codes, and other parameters on every page. You can find the full list of OnPage API check-up parameters in the Pages section.

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-access

# Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access \
login="login" 
password="password" 
cred="$(printf ${login}:${password} | base64)" 
curl --location --request POST "https://api.dataforseo.com/v3/on_page/task_post" \
--header "Authorization: Basic ${cred}"  \
--header "Content-Type: application/json" \
--data-raw '[
  {
    "target": "dataforseo.com",
    "max_crawl_pages": 10,
    "load_resources": true,
    "enable_javascript": true,
    "custom_js": "meta = {}; meta.url = document.URL; meta;",
    "tag": "some_string_123",
    "pingback_url": "https://your-server.com/pingscript?id=$id&tag=$tag"
  }
]'

<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
$client = new RestClient($api_url, null, 'login', 'password');

$post_array = array();
// example #1 - a simple way to set a task
$post_array[] = array(
   "target" => "dataforseo.com",
   "max_crawl_pages" => 10
);
// example #2 - a way to set a task with additional parameters
$post_array[] = array(
   "target" => "dataforseo.com",
   "max_crawl_pages" => 10,
   "load_resources" => true,
   "enable_javascript" => true,
   "custom_js" => "meta = {}; meta.url = document.URL; meta;",
   "tag" => "some_string_123",
   "pingback_url" => 'https://your-server.com/pingscript?id=$id&tag=$tag'
);

// this example has a 2 elements, but in the case of large number of tasks - send up to 100 elements per POST request
if (count($post_array) > 0) {
   try {
      // POST /v3/on_page/task_post
      // the full list of possible parameters is available in documentation
      $result = $client->post('/v3/on_page/task_post', $post_array);
      print_r($result);
      // do something with post result
   } catch (RestClientException $e) {
      echo "\n";
      print "HTTP code: {$e->getHttpCode()}\n";
      print "Error code: {$e->getCode()}\n";
      print "Message: {$e->getMessage()}\n";
      print  $e->getTraceAsString();
      echo "\n";
   }
}
$client = null;
?>

from random import Random
from client import RestClient
# You can download this file from here https://api.dataforseo.com/v3/_examples/python/_python_Client.zip
client = RestClient("login", "password")

rnd = Random()
post_data = dict()
# example #1 - a simple way to set a task
post_data[rnd.randint(1, 30000000)] = dict(
    target="dataforseo.com",
    max_crawl_pages=10
)
# example #2 - a way to set a task with additional parameters
post_data[rnd.randint(1, 30000000)] = dict(
    target="dataforseo.com",
    max_crawl_pages=10,
    load_resources=True,
    enable_javascript=True,
    custom_js="meta = {}; meta.url = document.URL; meta;",
    tag="some_string_123",
    pingback_url="https://your-server.com/pingscript?id=$id&tag=$tag"
)
# POST /v3/on_page/task_post
# the full list of possible parameters is available in documentation
response = client.post("/v3/on_page/task_post", post_data)
# you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if response["status_code"] == 20000:
    print(response)
    # do something with result
else:
    print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))

const post_array = [];

post_array.push({
  "target": "dataforseo.com",
  "max_crawl_pages": 10,
  "load_resources": true,
  "enable_javascript": true,
  "custom_js": "meta = {}; meta.url = document.URL; meta;",
  "tag": "some_string_123",
  "pingback_url": "https://your-server.com/pingscript?id=$id&tag=$tag"
});

const axios = require('axios');

axios({
  method: 'post',
  url: 'https://api.dataforseo.com/v3/on_page/task_post',
  auth: {
    username: 'login',
    password: 'password'
  },
  data: post_array,
  headers: {
    'content-type': 'application/json'
  }
}).then(function (response) {
  var result = response['data']['tasks'];
  // Result data
  console.log(result);
}).catch(function (error) {
  console.log(error);
});

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task on_page_task_post()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                // Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };
            var postData = new List<object>();
            // example #1 - a simple way to set a task
            postData.Add(new
            {
                target = "dataforseo.com",
                max_crawl_pages = 10
            });
            // example #2 - a way to set a task with additional parameters
            postData.Add(new
            {
                target = "dataforseo.com",
                max_crawl_pages = 10,
                load_resources = true,
                enable_javascript = true,
                custom_js = "meta = {}; meta.url = document.URL; meta;",
                tag = "some_string_123",
                pingback_url = "https://your-server.com/pingscript?id=$id&tag=$tag"
            });
            // POST /v3/on_page/task_post
            // the full list of possible parameters is available in documentation
            var taskPostResponse = await httpClient.PostAsync("/v3/on_page/task_post", new StringContent(JsonConvert.SerializeObject(postData)));
            var result = JsonConvert.DeserializeObject<dynamic>(await taskPostResponse.Content.ReadAsStringAsync());
            // you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
            if (result.status_code == 20000)
            {
                // do something with result
                Console.WriteLine(result);
            }
            else
                Console.WriteLine($"error. Code: {result.status_code} Message: {result.status_message}");
        }
    }
}

The above command returns JSON structured like this:

{
  "version": "0.1.20200805",
  "status_code": 20000,
  "status_message": "Ok.",
  "time": "0.0815 sec.",
  "cost": 0.00125,
  "tasks_count": 1,
  "tasks_error": 0,
  "tasks": [
    {
      "id": "08071719-1535-0216-0000-3aabdf68a6ef",
      "status_code": 20100,
      "status_message": "Task Created.",
      "time": "0.0044 sec.",
      "cost": 0.00125,
      "result_count": 0,
      "path": [
        "v3",
        "on_page",
        "task_post"
      ],
      "data": {
        "api": "on_page",
        "function": "task_post",
        "target": "dataforseo.com",
        "max_crawl_pages": 10
      },
      "result": null
    }
  ]
}

All POST data should be sent in the JSON format (UTF-8 encoding). Task setting is done using the POST method. When setting a task, you should send all task parameters in the task array of the generic POST array. You can send up to 2000 API calls per minute, with each POST call containing no more than 100 tasks. If your POST call contains over 100 tasks, the tasks over this limit will return the 40006 error.
The maximum number of simultaneous requests you can send is limited to 30.

Visit DataForSEO Help Center to get practical tips for request handling depending on your OnPage API payload volume.

Description of the fields for setting a task:

Field name	Type	Description
`target`	string	target domain required field domain name should be specified without `https://` and `www.` if you specify the page URL, the results will be returned for the domain included in the URL
`max_crawl_pages`	integer	crawled pages limit required field the number of pages to crawl on the specified domain Note: if you set `max_crawl_pages` to 1 and do not specify `start_url` or set a homepage in it, the following sitewide checks will be disabled: `test_canonicalization`, `enable_www_redirect_check`, `test_hidden_server_signature`, `test_page_not_found`, `test_directory_browsing`, `test_https_redirect` to enable them anyway, set `force_sitewide_checks` to `true`if you set `max_crawl_pages` to 1 and specify `start_url` other than a homepage, all sitewide checks will be disabled; to enable them anyway, set `force_sitewide_checks` to `true`
`start_url`	string	the first url to crawl optional field Note: you should specify an absolute URL if you want to crawl a single page, specify its URL in this field and additionally set the `max_crawl_pages` parameter to `1` you can also use the live Instant Pages endpoint to get page-specific data
`force_sitewide_checks`	boolean	enable sitewide checks when crawling a single page optional field set to `true` to get data on sitewide checks when crawling a single page; default value: `false`
`priority_urls`	array	urls to be crawled bypassing the queue optional field URLs specified in this array will be crawled in the first instance, bypassing the crawling queue; Note: you should specify the absolute URL; you can specify up to 20 URLs; all URLs in the array must belong to the `target` domain; subdomains will be ignored unless the `allow_subdomains` parameter is set to `true`example: `"priority_urls": [ "https://dataforseo.com/apis/serp-api", "https://dataforseo.com/contact" ]`
`max_crawl_depth`	integer	crawl depth optional field the linking depth of the pages to crawl; for example, starting page of the crawl is level 0, pages that have links from that page are level 1, etc.
`crawl_delay`	integer	delay between hits, ms optional field the custom delay between crawler hits to the server default value: `2000`
`store_raw_html`	boolean	store HTML of crawled pages optional field set to `true` if you want to get the HTML of the page using the OnPage Raw HTML endpoint default value: `false`
`enable_content_parsing`	boolean	parse content on crawled pages optional field set to `true` to use the OnPage Content Parsing endpoint default value: `false`
`support_cookies`	boolean	support cookies on crawled pages optional field set to `true` to support cookies when crawling the pages default value: `false`
`accept_language`	string	language header for accessing the website optional field all locale formats are supported (xx, xx-XX, xxx-XX, etc.) Note: if you do not specify this parameter, some websites may deny access; in this case, pages will be returned with the `"type":"broken` in the response array
`custom_robots_txt`	string	custom robots.txt settings optional field example: `Disallow: /directory1/`
`robots_txt_merge_mode`	string	merge with or override robots.txt settings optional field possible values: `merge`, `override`; set to `override` if you want to ignore website crawling restrictions and other robots.txt settings default value: `merge`; Note: if set to `override`, specify the `custom_robots_txt` parameter
`custom_user_agent`	string	custom user agent optional field custom user agent for crawling a website example: `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36` default value: `Mozilla/5.0 (compatible; RSiteAuditor)`
`browser_preset`	string	preset for browser screen parameters optional field if you use this field, you don’t need to indicate `browser_screen_width`, `browser_screen_height`, `browser_screen_scale_factor`possible values: `desktop`, `mobile`, `tabletdesktop` preset will apply the following values:`browser_screen_width: 1920` `browser_screen_height: 1080` `browser_screen_scale_factor: 1mobile` preset will apply the following values:`browser_screen_width: 390` `browser_screen_height: 844` `browser_screen_scale_factor: 3tablet` preset will apply the following values:`browser_screen_width: 1024` `browser_screen_height: 1366` `browser_screen_scale_factor: 2` Note: to use this parameter, set `enable_javascript` or `enable_browser_rendering` to `true`
`browser_screen_width`	integer	browser screen width optional field you can set a custom browser screen width to perform audit for a particular device; if you use this field, you don’t need to indicate `browser_preset` as it will be ignored; Note: to use this parameter, set `enable_javascript` or `enable_browser_rendering` to `true`minimum value, in pixels: `240` maximum value, in pixels: `9999`
`browser_screen_height`	integer	browser screen height optional field you can set a custom browser screen height to perform an audit for a particular device; if you use this field, you don’t need to indicate `browser_preset` as it will be ignored; Note: to use this parameter, set `enable_javascript` or `enable_browser_rendering` to `true`minimum value, in pixels: `240` maximum value, in pixels: `9999`
`browser_screen_scale_factor`	float	browser screen scale factor optional field you can set a custom browser screen resolution ratio to perform audit for a particular device; if you use this field, you don’t need to indicate `browser_preset` as it will be ignored; Note: to use this parameter, set `enable_javascript` or `enable_browser_rendering` to `true`minimum value: `0.5` maximum value: `3`
`respect_sitemap`	boolean	respect sitemap when crawling optional field set to `true` if you want to follow the order of pages indicated in the primary sitemap when crawling; default value: `false` Note: if set to `true`, the `click_depth` value in the API response will equal `0`; the `max_crawl_depth` field of the request will be ignored, you can specify the number of pages to crawl using the `max_crawl_pages` parameter
`custom_sitemap`	string	custom sitemap url optional field the URL of the page where the alternative sitemap is located Note: if you want to use this parameter, `respect_sitemap` should be `true`
`crawl_sitemap_only`	boolean	crawl only pages indicated in the sitemap optional field set to `true` if you want to crawl only the pages indicated in the sitemap if you set this parameter to `true` and do not specify `custom_sitemap`, we will crawl the default sitemap default value: `false` Note: if you want to use this parameter, `respect_sitemap` should be `true`
`load_resources`	boolean	load resources optional field set to `true` if you want to load image, stylesheets, scripts, and broken resources default value: `false` Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
`enable_www_redirect_check`	boolean	check if the domain implemented the www redirection optional field set to `true` if you want to check if the requested domain implemented the www to non-www or non-www to www redirect; default value: `false`
`enable_javascript`	boolean	load javascript on a page optional field set to `true` if you want to load the scripts available on a page default value: `false` Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
`enable_xhr`	boolean	enable XMLHttpRequest on a page optional field set to `true` if you want our crawler to request data from a web server using the XMLHttpRequest object default value: `false`;if you use this field, `enable_javascript` must be set to `true`;
`enable_browser_rendering`	boolean	emulate browser rendering to measure Core Web Vitals optional field by using this parameter you will be able to emulate a browser when loading a web page; `enable_browser_rendering` loads styles, images, fonts, animations, videos, and other resources on a page; default value: `false` set to `true` to obtain Core Web Vitals (FID, CLS, LCP) metrics in the response; if you use this field, `enable_javascript`, and `load_resources` parameters must be set to `true` Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
`disable_cookie_popup`	boolean	disable the cookie popup optional field set to `true` if you want to disable the popup requesting cookie consent from the user; default value: `false`
`custom_js`	string	custom javascript optional field Note that the execution time for the script you enter here should be 700 ms maximum, for example, you can use the following JS snippet to check if the website contains Google Tag Manager as a `scr` attribute: `let meta = { haveGoogleAnalytics: false, haveTagManager: false };\r\nfor (var i = 0; i < document.scripts.length; i++) {\r\n let src = document.scripts[i].getAttribute(\"src\");\r\n if (src != undefined) {\r\n if (src.indexOf(\"analytics.js\") >= 0)\r\n meta.haveGoogleAnalytics = true;\r\n\tif (src.indexOf(\"gtm.js\") >= 0)\r\n meta.haveTagManager = true;\r\n }\r\n}\r\nmeta;`the returned value depends on what you specified in this field. For instance, if you specify the following script: `meta = {}; meta.url = document.URL; meta.test = 'test'; meta;` as a response you will receive the following data: `"custom_js_response": { "url": "https://dataforseo.com/", "test": "test" }` Note: the length of the script you enter must be no more than 2000 characters Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page
`validate_micromarkup`	boolean	enable microdata validation optional field set to `true` if you want to use the OnPage API Microdata endpoint default value: `false`
`allow_subdomains`	boolean	include pages on subdomains optional field set to `true` if you want to crawl all subdomains of a target website default value: `false`
`allowed_subdomains`	array	subdomains to crawl optional field specify subdomains that you want to crawl example: `["blog.site.com", "my.site.com", "shop.site.com"]` Note: to use this parameter, the `allow_subdomains` parameter should be set to `false`; otherwise, the content of `allowed_subdomains` field will be ignored and the results will be returned for all subdomains
`disallowed_subdomains`	array	subdomains not to crawl optional field specify subdomains that you don’t want to crawl example: `["status.site.com", "docs.site.com"]` Note: to use this parameter, the `allow_subdomains` parameter should be set to `true`
`check_spell`	boolean	check spelling optional field set to `true` to check spelling on a website using Hunspell library default value: `false`
`check_spell_language`	string	language of the spell check optional field supported languages: ‘hy’, ‘eu’, ‘bg’, ‘ca’, ‘hr’, ‘cs’, ‘da’, ‘nl’, ‘en’, ‘eo’, ‘et’, ‘fo’, ‘fa’, ‘fr’, ‘fy’, ‘gl’, ‘ka’, ‘de’, ‘el’, ‘he’, ‘hu’, ‘is’, ‘ia’, ‘ga’, ‘it’, ‘rw’, ‘la’, ‘lv’, ‘lt’, ‘mk’, ‘mn’, ‘ne’, ‘nb’, ‘nn’, ‘pl’, ‘pt’, ‘ro’, ‘gd’, ‘sr’, ‘sk’, ‘sl’, ‘es’, ‘sv’, ‘tr’, ‘tk’, ‘uk’, ‘vi’ Note: if no language is specified, it will be set automatically based on page content
`check_spell_exceptions`	array	words excluded from spell check optional field specify the words that you want to exclude from spell check maximum word length: 100 characters maximum amount of words: 1000 example: `"SERP", "minifiers", "JavaScript"`
`calculate_keyword_density`	boolean	calculate keyword density for the target domain optional field set to `true` if you want to calculate keyword density for website pages default value: `false` Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article once the crawl is completed, you can obtain keyword density values with the Keyword Density endpoint
`checks_threshold`	object	custom threshold values for checks optional field you can specify custom threshold values for the parameters included in the `checks` object of OnPage API responses; Note: only integer threshold values can be modified; for example, the `high_loading_time` and `large_page_size` parameters are set to 3 seconds and 1 megabyte respectively by default; if you want to change these thresholds to 1 second and 1000 kbytes, use the following snippet: `"checks_threshold": { "high_loading_time": 1, "large_page_size": 1000 }`available customizable parameters with default values: "title_too_short", default value: 30, type: "int" "title_too_long", default value: 65, type: "int" "small_page_size", default value: 1024, type: "int" "large_page_size", default value: 1048576 (1024 * 1024), type: "int" "low_character_count", default value: 1024, type: "int" "high_character_count", default value: 256000 (250 * 1024), type: "int" "low_content_rate", default value: 0.1, type: "float" "high_content_rate", default value: 0.9, type: "float" "high_loading_time", default value: 3000, type: "int" "high_waiting_time", default value: 1500, type: "int" "low_readability_rate", default value: 15.0, type: "float" "irrelevant_description", default value: 0.2, type: "float" "irrelevant_title", default value: 0.3, type: "float" "irrelevant_meta_keywords", default value: 0.6, type: "float"
`disable_sitewide_checks`	array	prevent certain sitewide checks from running optional field specify the following `checks` to prevent them from running on the `target` website: `"test_page_not_found"` `"test_canonicalization"` `"test_https_redirect"` `"test_directory_browsing"`example: `"disable_sitewide_checks": ["test_directory_browsing", "test_page_not_found"]`learn more on our help center
`disable_page_checks`	array	prevent certain page checks from running optional field specify certain `checks` to prevent them from running and impacting the `onpage_score`example: `"disable_page_checks": ["is_5xx_code", "is_4xx_code"]`
`switch_pool`	boolean	switch proxy pool optional field if `true`, additional proxy pools will be used to obtain the requested data; the parameter can be used if a multitude of tasks is set simultaneously, resulting in occasional `rate-limit` and/or `site_unreachable` errors
`return_despite_timeout`	boolean	return data on pages despite the timeout error optional field if `true`, the data will be provided on pages that failed to load within 120 seconds and responded with a timeout error; default value: `false`
`tag`	string	user-defined task identifier optional field the character limit is 255 you can use this parameter to identify the task and match it with the result you will find the specified `tag` value in the `data` object of the response
`pingback_url`	string	notification URL of a completed task optional field when a task is completed we will notify you by GET request sent to the URL you have specified you can use the ‘$id’ string as a `$id` variable and ‘$tag’ as urlencoded `$tag` variable. We will set the necessary values before sending the request. example: `http://your-server.com/pingscript?id=$id` `http://your-server.com/pingscript?id=$id&tag=$tag` Note: special characters in `pingback_url` will be urlencoded; i.a., the `#` character will be encoded into `%23` learn more on our Help Center

As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name	Type	Description
`version`	string	the current version of the API
`status_code`	integer	general status code you can find the full list of the response codes here Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
`status_message`	string	general informational message you can find the full list of general informational messages here
`time`	string	execution time, seconds
`cost`	float	total tasks* cost, USD*
`tasks_count`	integer	the number of tasks in the `tasks`* array*
`tasks_error`	integer	the number of tasks in the `tasks`* array returned with an error*
`tasks`	array	array of tasks
`id`	string	task identifier unique task identifier in our system in the UUID format
`status_code`	integer	status code of the task generated by DataForSEO; can be within the following range: 10000-60000 you can find the full list of the response codes here
`status_message`	string	informational message of the task you can find the full list of general informational messages here
`time`	string	execution time, seconds
`cost`	float	cost of the task, USD
`result_count`	integer	number of elements in the `result` array
`path`	array	URL path
`data`	object	contains the same parameters that you specified in the POST request
`result`	array	array of results in this case, the value will be `null`

‌‌