NAV Navbar
Logo
php python csharp java

OnPage API

The OnPage API is designed to improve your website’s performance and get better rankings in SERP. Crawl diagnostics is one of the most powerful tools for the internal site optimization.

OnPage API checks your website for 60+ parameters, defines and displays all found flaws, so you can easily fix them. It checks meta tags, content, images tags, response codes and parameters of every page. The full list of parameters that your website is checked for you can find in the page result fields section.

Setting Tasks

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com/', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
    exit();
}

$post_array = array();
$my_unq_id = mt_rand(0,30000000); //your unique ID (like you DB "id" field. type 'string'). will be returned with all results
$post_array[$my_unq_id] = array(
    "site" => "ranksonic.com",
    "crawl_max_pages" => 10
);

try {
    // POST /v2/op_tasks_post/$data
    // $tasks_data must by array with key 'data'
    $task_post_result = $client->post("v2/op_tasks_post", array('data' => $post_array));
    print_r($task_post_result);

    //do something with post results

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from random import Random
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

#Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
client = RestClient("login", "password")
rnd = Random() #you can set as "index of post_data" your ID, string, etc. we will return it with all results.
post_data = dict()
post_data[rnd.randint(1, 30000000)] = dict(
    site="ranksonic.com",
    crawl_max_pages=10
)

response = client.post("/v2/op_tasks_post", dict(data=post_data))
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_post()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };
            var rnd = new Random(); //you can set as "index of post_data" your ID, string, etc. we will return it with all results.
            var postObject = new Dictionary<int, object>
            {
                [rnd.Next(1, 30000000)] = new
                {
                    site = "ranksonic.com",
                    crawl_max_pages = 10
                }
            };
            var taskPostResponse = await httpClient.PostAsync("v2/op_tasks_post", new StringContent(JsonConvert.SerializeObject(new { data = postObject })));
            var obj = JsonConvert.DeserializeObject<dynamic>(await taskPostResponse.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else
            {
                foreach (var result in obj.results)
                {
                    var taskState = ((IEnumerable<dynamic>)result).First();
                    if (taskState.status == "error")
                        Console.WriteLine($"Error in task with post_id {taskState.post_id}. Code: {taskState.error.code} Message: {taskState.error.message}");
                    Console.WriteLine(taskState);
                }
            }
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_post() throws JSONException, IOException, URISyntaxException {
        URI url = new URI("https://api.dataforseo.com/v2/op_tasks_post");
        HttpClient client = HttpClientBuilder.create().build();
        HttpPost post = new HttpPost(url);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        Map<Integer, Map<String, Object>> postValues = new HashMap<>();

        Random rnd = new Random();
        Map<String, Object> postObj = new HashMap<>();
        postObj.put("site", "ranksonic.com");
        postObj.put("crawl_max_pages", 10);
        postValues.put(rnd.nextInt(30000000), postObj);
        JSONObject json = new JSONObject().put("data", postValues);

        StringEntity input = new StringEntity(json.toString());
        input.setContentType("application/json");
        post.setHeader("Content-type", "application/json");
        post.setHeader("Authorization", "Basic " + basicAuth);
        post.setEntity(input);
        HttpResponse taskPostResponse = client.execute(post);
        JSONObject taskPostObj = new JSONObject(EntityUtils.toString(taskPostResponse.getEntity()));

        if (taskPostObj.get("status").equals("error")) {
            System.out.println("error. Code:" + taskPostObj.getJSONObject("error").get("code") + " Message:" + taskPostObj.getJSONObject("error").get("message"));
        } else {
            JSONObject results = taskPostObj.getJSONObject("results");
            Iterator<String> jkeys = results.keys();
            while (jkeys.hasNext()) {
                String key = jkeys.next();
                String status = "";
                if (!results.getJSONObject(key).isNull("status")) {
                    status = results.getJSONObject(key).get("status").toString();
                }
                if (status.equals("error"))
                    System.out.println("Error in task with post_id " + results.getJSONObject(key).get("post_id") + ". Code: " + results.getJSONObject(key).getJSONObject("error").get("code") + " Message: " + results.getJSONObject(key).getJSONObject("error").get("message"));
                else {
                    System.out.println(results.getJSONObject(key).toString());
                }
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.1029 sec.",
    "results_count": 1,
    "results": {
        "104574": {
            "post_id": 104574,
            "post_site": "ranksonic.com",            
            "task_id": 130491671,
            "status": "ok"
        }
    }
}

Using this function, you can set tasks for scanning of a website. After a task has been set, there will be three stages of its completion. Each of them will be shown in the status of the task (the status field). Firstly, the task is queued to the completion status="in_queue". Secondly, the website is crawled status="crawling". On the final stage, when the scanning process is finished, the task gets status="crawled". After the task is completed (status="crawled"), you will be able to use functions to analyze the received results. The current status of a task can be found in the results of the Get Tasks Status function. The results of the completed task will be available within 30 days of its completion.

Task completion time depends on many factors. They are a number of scanned pages (the field crawl_max_pages that you must specify when setting a task), a response time of the server where a website is located, the volume of the pages that are being analyzed, etc.

If number of pages crawled is fewer than the specified parameter crawl_max_pages, remaining credits will be refunded

All POST data should be sent in the JSON format (UTF-8 encoding). The task setting is done using POST method when array of tasks is sent to the data field. Each of the array elements has the following structure:

Name of a field Type Description
site string site
required field
crawl_max_pages integer maximum number of test pages
required field
credits will be withdrawn on the basis of this parameter
if number of pages crawled is fewer than this parameter, remaining credits will be refunded.
crawl_max_depth integer crawl depth
optional field
crawl depth of the website. for example: homepage is level 0, links from the homepage is level 1, etc. unique links are taken into account (for example, links from level 1 will be reflected as level 0).
default value: 0.
crawl_delay float delay between queries, sec
optional field
this parameter enables adjusting the frequency of queries to the server in order to reduce the burden and avoid DDOS.
default value: 6.
cookies_use integer usage of cookies when a website is being scanned
optional field
сan take the values: 0 - no, 1 - yes.
default value: 1.
robots string user robots.txt
optional field
You can set up your robots.txt for this crawling task.
robots_mode string merger mode with robots.txt of the website
optional field
can take values: ‘merge’, ‘override’.
default value: ‘merge’.
string_search_containment string presence of the text on the page
optional field
the result of the search will be shown in the string_containment_check field.
default value: ‘null’.
pingback_url string notification URL of a completed task
optional field
when a task is completed we will notify you by GET request sent to the URL you have specified
you can use string ‘$task_id’ as $task_id variable and ‘$post_id’ as $post_id variable. we will set necessary values before sending a request. for example:
  http://your-server.com/pingscript?taskId=$task_id
  http://your-server.com/pingscript?taskId=$task_id&postId=$post_id

As a response of API server you will receive JSON array in the field results where you wil find an information appropriate to the tasks set.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
the list of possible errors can be found below.
      code integer error code
      message string text description of the error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array of tasks setting
      post_id string index in the array received in a POST request
      post_site string site received in a POST request
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time.
      status string results of this task setting
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
      error array informational array of error
only if status=“error”
the list of possible errors can be found below.
            code integer error code
            message string text description of an error

Possible errors codes

Error Code Meaning
404 “not found or not enough data: site” - you didn’t specify a website in the task
404 “not found or not enough data: crawl_max_pages” - you didn’t specify a crawl_max_pages field in the task
501 “invalid ‘data’ field” - probably you haven’t passed data for the tasks in the field data. POST data should be represented as an array and added to the field data: array(‘data’ => $post_array_for_tasks)
501 “invalid data” - Data in the field data isn’t an array with the required structure.
500 “internal error” - some internal error. We did our best to avoid this type of error.

Get Tasks Status

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

//get ALL results status
try {
    //GET /v2/op_tasks_get
    $task_get_result = $client->get("v2/op_tasks_get");
    print_r($task_get_result);

    //do something with results

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                DefaultRequestHeaders = {Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password")))}
            };

            var response = await httpClient.GetAsync("v2/op_tasks_get");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    var resultItem = ((IEnumerable<dynamic>) result).First();
                    Console.WriteLine(resultItem);
                }
            }
            else
                Console.WriteLine("no results");

        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get() throws JSONException, IOException, URISyntaxException {
        URI url = new URI("https://api.dataforseo.com/v2/op_tasks_get");
        HttpClient client;
        client = HttpClientBuilder.create().build();
        HttpGet get = new HttpGet(url);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse completedTasksResponse = client.execute(get);
        JSONObject completedTasksObj = new JSONObject(EntityUtils.toString(completedTasksResponse.getEntity()));

        if (completedTasksObj.get("status") == "error") {
            JSONObject errorObj = completedTasksObj.getJSONObject("error");
            System.out.println("error. Code: " + errorObj.get("code") + " Message: " + errorObj.get("message"));
        } else if (!completedTasksObj.get("results_count").equals(0)) {
            JSONArray results = completedTasksObj.getJSONArray("results");
            for (int i = 0; i < results.length(); i++) {
                System.out.println(results.getJSONObject(i));
            }
        } else {
            System.out.println("no results");
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0043 sec.",
    "results_count": 1,
    "results": [
        {
            "post_id": "104574",
            "post_site": "ranksonic.com",
            "task_id": 130434581,
            "string_search_containment": "the implementation",
            "crawl_max_pages": 100,
            "crawl_start": "2017-09-07 18:09:02.609802+03",
            "crawl_end": "2017-09-09 12:56:49.164561+03",
            "status": "crawled"
        }
    ]
}

Using this function, you can get the current status of a task completion. If a task has status="crawled" you can use functions to analyze the received results.

You can receive results status in two different ways:

  1. GET https://api.dataforseo.com/v2/op_tasks_get
    you will receive all results status.
  2. When setting a task (Setting OnPage Tasks) you’ve specified pingback_url. As soon as the task is completed we will send GET request to the URL you’ve specified as pingback_url.

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of the error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
if the task status is “crawled” you can get the results for this task

Get Task Result Summary

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get/$task_id
    $task_get_result = $client->get("v2/op_tasks_get/123456789");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get/123456789")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_by_task_id()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };
            var taskid = 123456789;
            var response = await httpClient.GetAsync($"v2/op_tasks_get/{taskid}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_by_task_id() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get/" + taskId);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0395 sec.",
    "results_count": 1,
    "results": [
        {
            "post_id": "1214",
            "post_site": "webbysite.co.uk",
            "task_id": 123456789,
            "string_search_containment": "the implementation",
            "crawl_max_pages": 4,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "summary": [
                {
                    "absent_doctype": 0,
                    "absent_encoding_meta_tag": 0,
                    "absent_h1_tags": 0,
                    "canonical_another": 0,
                    "canonical_recursive": 0,
                    "cms": "wordpress 4.8.2",
                    "compression_disabled": 0,
                    "content_invalid_rate": 4,
                    "content_invalid_size": 0,
                    "content_readability_bad": 0,
                    "crawl_end": "2017-10-12T08:14:56.766+00:00",
                    "crawl_start": "2017-10-12T08:14:24.762+00:00",
                    "deprecated_html_tags": 0,
                    "domain": "webbysite.co.uk",
                    "duplicate_meta_descriptions": 0,
                    "duplicate_meta_tags": 0,
                    "duplicate_titles": 0,
                    "favicon_invalid": 0,
                    "have_robots": true,
                    "have_sitemap": true,
                    "images_invalid_alt": 0,
                    "images_invalid_title": 4,
                    "ip": "37.61.232.138",
                    "links_broken": 0,
                    "links_external": 8,
                    "links_internal": 54,
                    "meta_description_empty": 3,
                    "meta_description_inappropriate": 0,
                    "meta_keywords_empty": 4,
                    "meta_keywords_inappropriate": 0,
                    "pages_broken": 0,
                    "pages_http": 4,
                    "pages_https": 0,
                    "pages_invalid_size": 0,
                    "pages_non_www": 4,
                    "pages_total": 4,
                    "pages_with_flash": 0,
                    "pages_with_frame": 0,
                    "pages_with_lorem_ipsum": 0,
                    "pages_www": 0,
                    "response_code_1xx": 0,
                    "response_code_2xx": 4,
                    "response_code_3xx": 0,
                    "response_code_4xx": 0,
                    "response_code_5xx": 0,
                    "response_code_other": 0,
                    "seo_friendly_url": 4,
                    "seo_non_friendly_url": 0,
                    "server": "Apache",
                    "ssl": false,
                    "ssl_certificate_expiration": "0001-01-01T00:00:00+00:00",
                    "ssl_certificate_hash_algorithm": null,
                    "ssl_certificate_issuer": null,
                    "ssl_certificate_subject": null,
                    "ssl_certificate_valid": false,
                    "ssl_certificate_x509_version": 0,
                    "start_page_has_deny_flag": false,
                    "string_containment_check": 1,
                    "test_canonicalization": 200,
                    "test_directory_browsing": true,
                    "test_server_signature": false,
                    "test_trash_page": 404,
                    "time_load_high": 0,
                    "time_waiting_high": 0,
                    "title_duplicate_tag": 0,
                    "title_empty": 0,
                    "title_inappropriate": 0,
                    "title_long": 0,
                    "title_short": 2,
                    "www": false
                }
            ]
        }
    ]
}

Using this function, you can get the overall information of a website. This information allows you to detect exact on-page issues of a website that has been scanned. As a result, you will know what functions to use for receiving of detailed data for each of the found problems.

The data for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of the error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
if the task status “crawled” you will get the composite result in the summary array
      summary array composite result
            absent_doctype integer number of pages without <!DOCTYPE html>
            absent_encoding_meta_tag integer number of pages without <meta charset=...>, but only if the encoding is not explicit in the header Content-Type (for example Content-Type: "text/html; charset=utf8")
            absent_h1_tags integer number of pages without H1
only for canonical pages
            canonical_another integer number of pages with the canonical to another page
only for pages with 200 response code
            canonical_recursive integer number of pages with recursive canonicals
            cms string content of generator meta tag
the data is taken from the first random page with 200 response code
            compression_disabled integer number of pages without enabled gzip or deflate compression
only for pages with 200 response code
            content_readability_bad integer pages that scored less than 15 points on Flesch–Kincaid readability tests
only for canonical pages
            content_invalid_rate integer number of pages, which have value (plaintext size/page size) less than 0.1 or more than 0.9
the data is available only for canonical pages
            content_invalid_size integer number of pages, which have plain text size less than 1024 bytes or more than 256 kbytes
the data is available only for canonical pages
            crawl_end string date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
            crawl_start string date and time of the start of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-14 11:50:01 +00:00’
            deprecated_html_tags integer number of pages with deprecated html tags
the data is available only for canonical pages
more info: list of deprecated tags
            domain string root domain without subdomains
for example: if ‘blog.example.com’ is checked, it’s value would be ‘example.com’
            duplicate_meta_descriptions integer number of pages with duplicate meta description
only for canonical pages
            duplicate_meta_tags integer number of pages with 2 or more meta tags of the same type
only for canonical pages
            duplicate_pages integer number of pages with duplicate content
only for canonical pages
            duplicate_titles integer number of pages with duplicated tag <title>
only for canonical pages
            favicon_invalid integer number of pages that don’t contain link rel="icon"
the data is available only for canonical pages
            have_robots boolean presenсe of robots.txt
            have_sitemap boolean presenсe of sitemap.xml
            images_invalid_alt integer number of pages that have at least one image with empty or absent alt attribute of <img> tag
the data is available only for canonical pages
            images_invalid_title integer number of pages that have at least one image with empty or absent title attribute of <img> tag
the data is available only for canonical pages
            ip string IP address of the website
            links_broken integer number of pages that have at least one reference to the page with a broken link
the link is considered as a broken one if it leads to the page which response code is >=400 and <500
the data is available for all pages
            links_external integer total number of external links
the data is available for all pages
            links_internal integer total number of internal links
the data is available for all pages
            meta_description_empty integer number of pages with empty or absent meta tag description
the data is available only for canonical pages
            meta_description_inappropriate integer number of pages with content of description tag that is irrelevant to the content of a page (only for canonical pages)
the relevance threshold is 0.2
the data is not available for the pages that don’t have description tag
            meta_keywords_empty integer number of pages with empty keywords in meta tags
the data is available only for canonical pages
            meta_keywords_inappropriate integer number of pages with content of keywords tag that is irrelevant to the content of a page (only for canonical pages)
the relevance threshold is 0.6
the data is not available for the pages that don’t have keywords tag
            pages_broken integer number of pages which response code is >=400 or <200
the data is available for all pages
            pages_http integer number of pages with HTTP protocol
the data is available for all pages
            pages_https integer number of pages with HTTPS protocol
the data is available for all pages
            pages_invalid_size integer number of pages with the page size less than 1024 bytes or more than 256 kbytes
the data is available only for canonical pages
            pages_non_www integer number of pages without subdomain www
the data is available for all pages
            pages_total integer total number of scanned HTML pages
            pages_with_flash integer number of pages with flash elements
the data is available for all pages
            pages_with_frame integer number of pages that contain frame, iframe, frameset tags
the data is available for all pages
            pages_with_lorem_ipsum integer number of pages that probably contain ‘lorem ipsum’
the data is available for all pages
            pages_www integer number of pages with subdomain www
the data is available for all pages
            response_code_1xx integer number of pages which response code is >=100 and <200
the data is available for all pages
            response_code_2xx integer number of pages which response code is >=200 and <300
the data is available for all pages
            response_code_3xx integer number of pages which response code is >=300 and <400
the data is available for all pages
            response_code_4xx integer number of pages which response code is >=400 and <500
the data is available for all pages
            response_code_5xx integer number of pages which response code is >=500 and <600
the data is available for all pages
            response_code_other integer number of pages which response code is >=600 and <100
also, the number includes those pages which response code was not retrieved
the data is available for all pages
            seo_friendly_url integer number of pages with an ‘SEO-friendly URL’
the ‘SEO-friendliness’ of a page URL is checked by four parameters:
- length of relative path is less than 120 symbols
- no special characters
- no dynamic parameters
- relevance of URL to the page
if at least one of them is failed then such URL is considered as not ‘SEO-friendly’
the data is available only for canonical pages
            seo_non_friendly_url integer number of pages that don’t have an ‘SEO-friendly URL’
the ‘SEO-friendliness’ of a page URL is checked by four parameters:
- length of relative path is less than 120 symbols
- no special characters
- no dynamic parameters
- relevance of URL to the page
if at least one of them is failed then such URL is considered as not ‘SEO-friendly’
the data is available only for canonical pages
            server string content of header server
the information is taken from the first page which response code is 200
            ssl boolean usage of the secure SSL protocol
true - if there is at least one HTTPS page
relevant fields will contain data if ssl = true
the information about sertificate is taken from the first page that has https
            ssl_certificate_expiration string expiration date and time of the SSL certificate
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-25 05:10:34 +00:00’
            ssl_certificate_hash_algorithm string encryption algorithm of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
            ssl_certificate_issuer string issuer of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
            ssl_certificate_subject string issuer of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
            ssl_certificate_valid boolean validation of the SSL certificate
if website does not support SSL (field ssl=false), there is always ‘false’
            ssl_certificate_x509_version integer version of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
            start_page_has_deny_flag boolean checkup of the possibility to scan a start page of a website
true - if scanning of a start page is disallowed in robots.txt
            string_containment_check integer the number of pages that contain text specified in the string_search_containment field
            test_canonicalization integer the checkup of server behavior when our crawler tries to access the website via IP
the field is a status code of server response
normally, a server returns 301 response code
            test_directory_browsing boolean the checkup of the possibility to access a content directory of a website
some webservers may return the data that the server directory has
the checkup will be conducted if a website has at least one page which response code is 200
            test_server_signature boolean *the checkup of header server
if the version is specified along with the server, the test will be considered as a failed one
knowing the version of the server, the attacker can exploit the vulnerabilities specific to this version to attack the site
the test is conducted after the information about header server is received
            test_trash_page integer the checkup of website behavior when the crawler requests a non-existent page
the field is a status code of server response
normally, a server returns 404 response code
            time_load_high integer number of pages with the loading time of more than 3 seconds
the data is available for all pages
            time_waiting_high integer number of pages with waiting time
(time spent waiting for the initial response, also known as the Time To First Byte) of more than 1.5 sec

the data is available for all pages
            title_duplicate_tag integer number of pages with more than one tag <title> on a page
the data is available only for canonical pages
            title_empty integer number of pages with empty or absent tag <title>
the data is available only for canonical pages
            title_inappropriate integer number of pages with content of <title> tag that is irrelevant to the content of a page (only for canonical pages)
the relevance threshold is 0.3

the data is not available for the pages that don’t have <title> tag
            title_long integer number of pages with too long tag <title>
in case the length is more than 65 characters

maximum number of displayed characters is 255, if the number is
bigger - only first 255 characters will be shown
the data is available only for canonical pages
            title_short integer number of pages with too short tag <title>
in case the length is less than 30 characters

the data is available only for canonical pages
            www boolean usage of subdomain www
true - if there is at least one page on the subdomain ‘www’ which response code is 200
‘www’ is the only subdomain that is parsed by our crawler within the specified domain

Get Pages

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get/$task_id
    $task_get_result = $client->get("v2/op_tasks_get_pages/123456789");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_pages/123456789")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_pages()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var response = await httpClient.GetAsync($"v2/op_tasks_get_pages/{taskid}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_pages() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_pages/" + taskId);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0642 sec.",
    "results_total": 50,
    "results_count": 50,
    "results": [
        {
            "post_id": "0",
            "post_site": "dataforseo.com",
            "task_id": 2987626526,
            "string_search_containment": "",
            "crawl_max_pages": 50,
            "crawl_start": "2019-06-19 11:07:30.65506+03",
            "crawl_end": "2019-06-19 11:14:38.512563+03",
            "status": "crawled",
            "pages": [
                {
                    "address_full": "https://dataforseo.com/",
                    "address_relative": "/",
                    "canonical_another": false,
                    "canonical_page": "/",
                    "canonical_page_recursive": "",
                    "content_charset": 65001,
                    "content_count_words": 1199,
                    "content_encoding": "gzip",
                    "content_readability_ari": 8.29543,
                    "content_readability_coleman_liau": 9.585588,
                    "content_readability_dale_chall": 7.13011169,
                    "content_readability_flesh_kincaid": 57.5031776,
                    "content_readability_smog": 17.5158157,
                    "crawl_depth": 0,
                    "crawl_end": "2019-06-19T08:07:38+00:00",
                    "crawled": true,
                    "deprecated_html_tags": [],
                    "duplicate_meta_tags": [
                        "generator"
                    ],
                    "favicon": "/wp-content/uploads/2016/11/cropped-Favicon_512-180x180.png",
                    "h1_count": 0,
                    "h2_count": 9,
                    "h3_count": 0,
                    "have_deprecated_tags": false,
                    "have_doctype": true,
                    "have_enc_meta_tag": true,
                    "have_flash": false,
                    "have_frame": false,
                    "have_lorem_ipsum": false,
                    "have_meta_description_duplicates": false,
                    "have_page_duplicates": false,
                    "have_recursive_canonical": false,
                    "have_title_duplicates": false,
                    "images_count": 49,
                    "images_invalid_alt": 17,
                    "images_invalid_title": 46,
                    "links_broken": 0,
                    "links_external": 9,
                    "links_internal": 41,
                    "links_referring": 49,
                    "meta_description": "DataForSEO ➤➤➤ SEO Software API ➤➤➤ SEO API data Provider built for SEO-Software companies and agencies. ✓✓✓ Great Speed, Clear Stats, Simple Pricing. Try for free now!",
                    "meta_description_consistency": 0.8,
                    "meta_description_length": 168,
                    "meta_keywords": "",
                    "meta_keywords_consistency": -1,
                    "page_allowed": true,
                    "page_redirect": null,
                    "page_size": 123243,
                    "plain_text_rate": 0.0573562235,
                    "plain_text_size": 7066,
                    "relative_path_length": 1,
                    "response_code": 200,
                    "seo_friendly_url": true,
                    "seo_friendly_url_characters_check": true,
                    "seo_friendly_url_dynamic_check": true,
                    "seo_friendly_url_keywords_check": true,
                    "seo_friendly_url_relative_length_check": true,
                    "ssl": true,
                    "ssl_handshake_time": 2,
                    "string_containment_check": false,
                    "time_connection": 1,
                    "time_download": 0,
                    "time_sending_request": 0,
                    "time_total_load": 155,
                    "time_waiting": 152,
                    "title": "SEO software API ⓴⓳. API for agencies. Best SEO data API provider.",
                    "title_consistency": 1,
                    "title_duplicate_tag": false,
                    "title_length": 66,
                    "www": false
                }
            ]
        }
    ]
}

Using this function, you can retrieve structured data for each page of a website that has been scanned. To get the list of pages based on the set parameters, use the extended version of the Get Filtered Pages function.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      pages array array of scanned pages
            address_full string full page address
            address_relative string relative page address
more info about relative
            canonical_another boolean presence of another canonical page
‘true’ - if page is non-canonical
            canonical_page string canonical link
            canonical_page_recursive string canonical link if its recursive
            content_charset integer content character encoding
list of charset
            content_count_words integer number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
            content_encoding string compression algorithm of the content of the page
more information
            content_readability_ari float ratio of readability by the algotithm Automated Readability Index (ARI)
            content_readability_coleman_liau float ratio of readability by the algotithm Coleman–Liau Index
            content_readability_dale_chall float ratio of readability by the algotithm Dale–Chall Readability
            content_readability_flesh_kincaid float ratio of readability by the algotithm Flesch–Kincaid readability tests
            content_readability_smog float ratio of readability by the algotithm SMOG
            crawl_depth integer level of the page in the website hierarchy
            crawl_end string date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
            crawled boolean status of the page
            deprecated_html_tags array array of deprecated html tags of the page
            duplicate_meta_tags array array of meta tags that are duplicated
            favicon string favicon of the page
            h1_count integer count of H1 tags
            h2_count integer count of H2 tags
            h3_count integer count of H3 tags
            have_deprecated_tags boolean presence of deprecated tags on the page
            have_doctype boolean presence of <!DOCTYPE html> on the page
            have_page_duplicates boolean presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning - the value of this field will be ‘null’
            have_enc_meta_tag boolean presence of tag <charset> on the page
            have_flash boolean presence of flash elements on the page
            have_frame boolean presence of frames on the page
            have_lorem_ipsum boolean presence of ‘lorem ipsum’ text on the page
            have_meta_description_duplicates boolean there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
            have_recursive_canonical boolean presence of recursive canonical
if you request the data during the scanning - the value of this field will be ‘null’
            have_title_duplicates boolean there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
            images_count integer number of images on the page
            images_invalid_alt integer number of images with empty or missing tag alt
            images_invalid_title integer number of images with empty or missing tag title
            links_broken integer number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning - the value of this field will be ‘null’
            links_external integer number of external links on the page
            links_referring integer number of referring links to the pagehave_recursive_canonical
            links_internal integer number of internal links on the page
            meta_description string content of meta tag description
            meta_description_consistency float consistency of meta tag description with page content
from 0 to 1
            meta_description_length integer length of meta tag description content
            meta_keywords string content of meta tag keywords
            meta_keywords_consistency float consistency of meta tag keywords with page content
from 0 to 1
            page_allowed boolean page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
            page_redirect string url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
            page_size integer page size in bytes
            plain_text_rate float plaintext rate value (plain_text_size / page_size)
            plain_text_size integer page size in symbols
            relative_path_length integer relative path length of the page URL
            response_code integer HTTP response code
            seo_friendly_url boolean page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
            seo_friendly_url_characters_check boolean checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ - if the test is passed.
            seo_friendly_url_dynamic_check boolean presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5
if there are dynamic symbols in the URL then the status will be ‘false’
            seo_friendly_url_keywords_check boolean consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
            seo_friendly_url_relative_length_check boolean checking the length of the relative way
url should not be longer than 120 characters
            ssl boolean usage of the secure SSL protocol
            ssl_handshake_time integer time (in milliseconds) spent on the ‘SSL handshake’
            string_containment_check boolean shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
            time_connection integer time (in milliseconds) spent on establishing the connection
            time_download integer time (in milliseconds) spent on the loading of resources
            time_total_load integer total time
time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
            time_sending_request integer time (in milliseconds) spent on sending a request to a server
            time_waiting integer time spent waiting for the initial response, also known as the Time To First Byte
            title string content of tag <title>
            title_consistency integer consistency of tag <title> with page content
from 0 to 1
            title_duplicate_tag boolean tag <title> is duplicated
(specified more than once within the same page)
            title_length integer length of tag <title> content
            www boolean usage of the subdomain www

Get Filtered Pages

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    $post_array = array();
    $post_array[] = array(
        "task_id" => 151668277,
        "limit" => 1000,
        "offset" => 0,
        "filters" => array(
            array("h1_count", "=", 0),
            array("content_count_words", ">", 200)
        )
    );

    // POST /api/v2/op_tasks_get_pages_filter/$data
    $pages_post_result = $client->post("/v2/op_tasks_get_pages_filter", array('data' => $post_array));
    print_r($pages_post_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
post_data = dict()
post_data = [
    dict(
        task_id=151668277,
        limit=1000,
        offset=0,
        filters=[
            ["h1_count", "=", 0],
            ["content_count_words", ">", 200]
        ]
    )
]
response = client.post("/v2/op_tasks_get_pages_filter", dict(data=post_data))
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_pages_filter()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var postObject = new[]
            {
                new
                {
                    task_id = 123456789,
                    limit = 1000,
                    offset = 0,
                    filters = new[]
                    {
                        new object[] { "h1_count", ">", 0 },
                        new object[] { "content_count_words", ">", 100 }
                    }
                }
            };
            var pagePostResponse = await httpClient.PostAsync("v2/op_tasks_get_pages_filter", new StringContent(JsonConvert.SerializeObject(new { data = postObject })));
            var obj = JsonConvert.DeserializeObject<dynamic>(await pagePostResponse.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_pages_filter() throws JSONException, IOException, URISyntaxException {
        URI url = new URI("https://api.dataforseo.com/v2/op_tasks_get_pages_filter");
        HttpClient client = HttpClientBuilder.create().build();
        HttpPost post = new HttpPost(url);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        Map<Integer, Map<String, Object>> postValues = new HashMap<>();

        Random rnd = new Random();
        Map<String, Object> postObj = new HashMap<>();
        postObj.put("task_id", 151668277);
        postObj.put("limit", 1000);
        postObj.put("offset", 0);
        postObj.put("filters", new Object[]{
            new Object[]{"h1_count", "=", 0},
            new Object[]{"content_count_words", ">", 200}
        });
        postValues.put(rnd.nextInt(30000000), postObj);

        JSONObject json = new JSONObject().put("data", postValues);
        StringEntity input = new StringEntity(json.toString());
        input.setContentType("application/json");
        post.setHeader("Content-type", "application/json");
        post.setHeader("Authorization", "Basic " + basicAuth);
        post.setEntity(input);
        HttpResponse pagePostResponse = client.execute(post);
        JSONObject obj = new JSONObject(EntityUtils.toString(pagePostResponse.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message:" + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    if (results.getJSONObject(i).get("status").equals("error"))
                        System.out.println("Error in task with post_id " + results.getJSONObject(i).get("post_id") + ". Code: " + results.getJSONObject(i).getJSONObject("error").get("code") + " Message: " + results.getJSONObject(i).getJSONObject("error").get("message"));
                    else {
                        System.out.println(results.get(i));
                    }
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0402 sec.",
    "results_count": 5,
    "results": [
        {
            "post_id": "9#999#555",
            "post_site": "ranksonic.com",
            "task_id": 151668277,
            "string_search_containment": "the implementation",
            "crawl_max_pages": 10,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "pages": [
                {
                    "address_full": "https:\/\/ranksonic.com\/compare.html",
                    "address_relative": "\/compare.html",
                    "canonical_another": false,
                    "canonical_page": "\/compare.html",
                    "canonical_page_recursive": "",
                    "content_charset": 65001,
                    "content_count_words": 203,
                    "content_encoding": "gzip",
                    "content_readability_ari": 8.418127,
                    "content_readability_coleman_liau": 11.8246012,
                    "content_readability_dale_chall": 8.241003,
                    "content_readability_flesh_kincaid": 44.30253,
                    "content_readability_smog": 16.4731522,
                    "crawl_depth": 1,
                    "crawl_end": "2017-10-12T08:15:00+00:00",
                    "crawled": true,
                    "deprecated_html_tags": [],
                    "duplicate_meta_tags": [],
                    "favicon": "\/themes\/default\/images\/favicon\/favicon_152x152.png",
                    "h1_count": 0,
                    "h2_count": 0,
                    "h3_count": 0,
                    "have_deprecated_tags": false,
                    "have_doctype": true,
                    "have_enc_meta_tag": true,
                    "have_flash": false,
                    "have_frame": false,
                    "have_lorem_ipsum": false,
                    "have_meta_description_duplicates": false,
                    "have_page_duplicates": false,
                    "have_recursive_canonical": false,
                    "have_title_duplicates": false,
                    "images_count": 5,
                    "images_invalid_alt": 0,
                    "images_invalid_title": 5,
                    "links_broken": 0,
                    "links_external": 10,
                    "links_internal": 16,
                    "links_referring": 20,
                    "meta_description": "SEO software comparison. Compare SEO Platforms. RankSonic SEO Platform.",
                    "meta_description_consistency": 0.428571433,
                    "meta_description_length": 71,
                    "meta_keywords": "ranksonic, seo, seo comparison",
                    "meta_keywords_consistency": 0.6666667,
                    "page_allowed": true,
                    "page_redirect": null,
                    "page_size": 107829,
                    "plain_text_rate": 0.224707633,
                    "plain_text_size": 24230,
                    "relative_path_length": 13,
                    "response_code": 200,
                    "seo_friendly_url": false,
                    "seo_friendly_url_characters_check": true,
                    "seo_friendly_url_dynamic_check": true,
                    "seo_friendly_url_keywords_check": false,
                    "seo_friendly_url_relative_length_check": true,
                    "ssl": true,
                    "ssl_handshake_time": 1,
                    "string_containment_check": true,
                    "time_connection": 3,
                    "time_download": 0,
                    "time_total_load": 48,
                    "time_sending_request": 0,
                    "time_waiting": 44,
                    "title": "SEO software comparison. Compare SEO Platforms. RankSonic SEO Platform.",
                    "title_consistency": 0.428571433,
                    "title_length": 71,
                    "www": false
                }
            ]
        }
    ]
}

Using this function, you can get a list of pages based on the parameters you specify. It is the primary function which you can use to get pages with on-page errors. For instance, you can set the parameters to receive the list of pages with not SEO-friendly URL, get the list of pages with too high loading time, get the list of pages with low readability score, etc.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

All POST data should be sent in the JSON format (UTF-8 encoding). The pages filtering request is done using POST method when array of filtering request is sent to the data field. Each of the array elements has the following structure:

Name of a field Type Description
task_id integer unique identifier returned to you in the response from our service when you set a task
required field
limit integer maximum number of returned pages
offset integer offset in results array of returned pages
filter array array with filters
required field
      $field string the name of filtered field
required field
list of all available fields you can see in the results of Get Pages
      $operator string comparison operator
required field
available operators: >, =, !=, <>, <, <=, >=, contains, notcontains, startswith, endswith
      $value string comparison value
required field

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      pages array array of pages
            address_full string full page address
            address_relative string relative page address
more info about relative
            absent_h1_tags integer number of pages without H1 tags
            canonical_another boolean presence of another canonical page
‘true’ - if page is non-canonical
            canonical_page string canonical page
            canonical_page_recursive string recursive canonical page
if you request the data during the scanning - the value of this field will be ‘null’
            content_charset integer content character encoding
list of charset
            content_count_words integer number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
            content_encoding string compression algorithm of the content of the page
more information
            content_readability_ari float ratio of readability by the algotithm Automated Readability Index (ARI)
            content_readability_coleman_liau float ratio of readability by the algotithm Coleman–Liau Index
            content_readability_dale_chall float ratio of readability by the algotithm Dale–Chall Readability
            content_readability_flesh_kincaid float ratio of readability by the algotithm Flesch–Kincaid readability tests
            content_readability_smog float ratio of readability by the algotithm SMOG
            crawl_depth integer level of the page in the website hierarchy
            crawl_end string date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
            crawled boolean status of the page
            deprecated_html_tags array array of deprecated html tags of the page
            duplicate_meta_tags array array of meta tags that are duplicated
            favicon string favicon of the page
            h1_count integer count of H1 tags
            h2_count integer count of H2 tags
            h3_count integer count of H3 tags
            have_deprecated_tags boolean presence of deprecated tags on the page
            have_doctype boolean presence of <!DOCTYPE html> on the page
            have_page_duplicates boolean presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning - the value of this field will be ‘null’
            have_enc_meta_tag boolean presence of tag <charset> on the page
            have_flash boolean presence of flash elements on the page
            have_frame boolean presence of frames on the page
            have_lorem_ipsum boolean presence of ‘lorem ipsum’ text on the page
            have_meta_description_duplicates boolean there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
            have_recursive_canonical boolean presence of recursive canonical
if you request the data during the scanning - the value of this field will be ‘null’
            have_title_duplicates boolean there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
            images_count integer number of images on the page
            images_invalid_alt integer number of images with empty or missing tag alt
            images_invalid_title integer number of images with empty or missing tag title
            links_broken integer number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning - the value of this field will be ‘null’
            links_external integer number of external links on the page
            links_referring integer number of referring links to the page
if you request the data during the scanning - the value of this field will be ‘null’
            links_internal integer number of internal links on the page
            meta_description string content of meta tag description
            meta_description_consistency float consistency of meta tag description with page content
from 0 to 1
            meta_description_length integer length of meta tag description content
            meta_keywords string content of meta tag keywords
            meta_keywords_consistency float consistency of meta tag keywords with page content
from 0 to 1
            page_allowed boolean page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
            page_redirect string url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
            page_size integer page size in bytes
            plain_text_rate float plaintext rate value (plain_text_size / page_size)
            plain_text_size integer page size in symbols
            relative_path_length integer relative path length of the page URL
            response_code integer HTTP response code
            seo_friendly_url boolean page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
            seo_friendly_url_characters_check boolean checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ - if the test is passed.
            seo_friendly_url_dynamic_check boolean presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5
if there are dynamic symbols in the URL then the status will be ‘false’
            seo_friendly_url_keywords_check boolean consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
            seo_friendly_url_relative_length_check boolean checking the length of the relative way
url should not be longer than 120 characters
            ssl boolean usage of the secure SSL protocol
            ssl_handshake_time integer time (in milliseconds) spent on the ‘SSL handshake’
            string_containment_check boolean shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
            time_connection integer time (in milliseconds) spent on establishing the connection
            time_download integer time (in milliseconds) spent on the loading of resources
            time_total_load integer total time
time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
            time_sending_request integer time (in milliseconds) spent on sending a request to a server
            time_waiting integer time spent waiting for the initial response, also known as the Time To First Byte
            title string content of tag <title>
            title_consistency integer consistency of tag <title> with page content
from 0 to 1
            title_length integer length of tag <title> content
            www boolean usage of the subdomain www

Get Broken Pages

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get_broken_pages/$task_id
    $task_get_result = $client->get("v2/op_tasks_get_broken_pages/123456789");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_broken_pages/123456789")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_broken_pages()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var response = await httpClient.GetAsync($"v2/op_tasks_get_broken_pages/{taskid}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_broken_pages() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_broken_pages/" + taskId);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0675 sec.",
    "results_count": 1,
    "results": [
        {
            "post_id": "10##4",
            "post_site": "ranksonic.com",
            "task_id": 130491671,
            "string_search_containment": "the implementation",
            "crawl_max_pages": 10,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "broken_pages": [
                {
                    "address_full": "https:\/\/ranksonic.com\/features-common.html",
                    "address_relative": "\/features-common.html",
                    "canonical_another": false,
                    "canonical_page": null,
                    "canonical_page_recursive": "",
                    "content_charset": 0,
                    "content_count_words": 0,
                    "content_encoding": "none",
                    "content_readability_ari": 0,
                    "content_readability_coleman_liau": 0,
                    "content_readability_dale_chall": 0,
                    "content_readability_flesh_kincaid": 0,
                    "content_readability_smog": 0,
                    "crawl_depth": 2,
                    "crawl_end": "2017-10-12T08:15:00+00:00",
                    "crawled": true,
                    "deprecated_html_tags": [],
                    "duplicate_meta_tags": [],
                    "favicon": "",
                    "h1_count": 0,
                    "h2_count": 0,
                    "h3_count": 0,
                    "have_deprecated_tags": false,
                    "have_doctype": false,
                    "have_enc_meta_tag": false,
                    "have_flash": false,
                    "have_frame": false,
                    "have_lorem_ipsum": false,
                    "have_meta_description_duplicates": false,
                    "have_page_duplicates": false,
                    "have_recursive_canonical": false,
                    "have_title_duplicates": false,
                    "images_count": 0,
                    "images_invalid_alt": 0,
                    "images_invalid_title": 0,
                    "links_broken": 0,
                    "links_external": 0,
                    "links_internal": 0,
                    "links_referring": 14,
                    "meta_description": null,
                    "meta_description_consistency": 0,
                    "meta_description_length": 0,
                    "meta_keywords": "",
                    "meta_keywords_consistency": 0,
                    "page_allowed": true,
                    "page_redirect": null,
                    "page_size": 0,
                    "plain_text_rate": 0,
                    "plain_text_size": 0,
                    "relative_path_length": 21,
                    "response_code": 404,
                    "seo_friendly_url": false,
                    "seo_friendly_url_characters_check": false,
                    "seo_friendly_url_dynamic_check": false,
                    "seo_friendly_url_keywords_check": false,
                    "seo_friendly_url_relative_length_check": false,
                    "ssl": false,
                    "ssl_handshake_time": 0,
                    "string_containment_check": true,
                    "time_connection": 35,
                    "time_download": 0,
                    "time_total_load": 461,
                    "time_sending_request": 0,
                    "time_waiting": 426,
                    "title": null,
                    "title_consistency": 0,
                    "title_length": 0,
                    "www": false
                }
            ]
        }
    ]
}

Using this function, you can get a list of broken pages (4xx response code). There may be referring links from other pages of a website to the broken pages that don’t exist.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      broken_pages array array of broken pages
            address_full string full page address
            address_relative string relative page address
more info about relative
            absent_h1_tags integer number of pages without H1 tags
            canonical_another boolean presence of another canonical page
‘true’ - if page is non-canonical
            canonical_page string canonical page
            canonical_page_recursive string recursive canonical page
if you request the data during the scanning - the value of this field will be ‘null’
            content_charset integer content character encoding
list of charset
            content_count_words integer number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
            content_encoding string compression algorithm of the content of the page
more information
            content_readability_ari float ratio of readability by the algotithm Automated Readability Index (ARI)
            content_readability_coleman_liau float ratio of readability by the algotithm Coleman–Liau Index
            content_readability_dale_chall float ratio of readability by the algotithm Dale–Chall Readability
            content_readability_flesh_kincaid float ratio of readability by the algotithm Flesch–Kincaid readability tests
            content_readability_smog float ratio of readability by the algotithm SMOG
            crawl_depth integer level of the page in the website hierarchy
            crawl_end string date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
            crawled boolean status of the page
            deprecated_html_tags array array of deprecated html tags of the page
            duplicate_meta_tags array array of meta tags that are duplicated
            favicon string favicon of the page
            h1_count integer count of H1 tags
            h2_count integer count of H2 tags
            h3_count integer count of H3 tags
            have_deprecated_tags boolean presence of deprecated tags on the page
            have_doctype boolean presence of <!DOCTYPE html> on the page
            have_page_duplicates boolean presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning - the value of this field will be ‘null’
            have_enc_meta_tag boolean presence of tag <charset> on the page
            have_flash boolean presence of flash elements on the page
            have_frame boolean presence of frames on the page
            have_lorem_ipsum boolean presence of ‘lorem ipsum’ text on the page
            have_meta_description_duplicates boolean there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
            have_recursive_canonical boolean presence of recursive canonical
if you request the data during the scanning - the value of this field will be ‘null’
            have_title_duplicates boolean there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
            images_count integer number of images on the page
            images_invalid_alt integer number of images with empty or missing tag alt
            images_invalid_title integer number of images with empty or missing tag title
            links_broken integer number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning - the value of this field will be ‘null’
            links_external integer number of external links on the page
            links_referring integer number of referring links to the page
if you request the data during the scanning - the value of this field will be ‘null’
            links_internal integer number of internal links on the page
            meta_description string content of meta tag description
            meta_description_consistency float consistency of meta tag description with page content
from 0 to 1
            meta_description_length integer length of meta tag description content
            meta_keywords string content of meta tag keywords
            meta_keywords_consistency float consistency of meta tag keywords with page content
from 0 to 1
            page_allowed boolean page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
            page_redirect string url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
            page_size integer page size in bytes
            plain_text_rate float plaintext rate value (plain_text_size / page_size)
            plain_text_size integer page size in symbols
            relative_path_length integer relative path length of the page URL
            response_code integer HTTP response code
            seo_friendly_url boolean page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
            seo_friendly_url_characters_check boolean checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ - if the test is passed
            seo_friendly_url_dynamic_check boolean presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5
if there are dynamic symbols in the URL then the status will be ‘false’
            seo_friendly_url_keywords_check boolean consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
            seo_friendly_url_relative_length_check boolean checking the length of the relative way
url should not be longer than 120 characters
            ssl boolean usage of the secure SSL protocol
            ssl_handshake_time integer time (in milliseconds) spent on the ‘SSL handshake’
            string_containment_check boolean shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
            time_connection integer time (in milliseconds) spent on establishing the connection
            time_download integer time (in milliseconds) spent on the loading of resources
            time_total_load integer total time
time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
            time_sending_request integer time (in milliseconds) spent on sending a request to a server
            time_waiting integer time spent waiting for the initial response, also known as the Time To First Byte
            title string content of tag <title>
            title_consistency integer consistency of tag <title> with page content
from 0 to 1
            title_length integer length of tag <title> content
            www boolean usage of the subdomain www

Get Duplicate Pages

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get_duplicates/$task_id
    $task_get_result = $client->get("v2/op_tasks_get_duplicates/123456789");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_duplicates/123456789")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_duplicates()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var response = await httpClient.GetAsync($"v2/op_tasks_get_duplicates/{taskid}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_duplicates() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_duplicates/" + taskId);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.1450 sec.",
    "results_count": 21,
    "results": [
        {
            "post_id": "9999555",
            "post_site": "ranksonic.com",            
            "task_id": 136371534,
            "string_search_containment": null,
            "crawl_max_pages": 100,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "duplicates": [
                {
                    "accumulator": "Keyword generator",
                    "pages": [
                        {
                            "accumulator": "Keyword generator",
                            "address_full": "http:\/\/ranksonic.com\/keyword-generator\/",
                            "address_relative": "\/keyword-generator\/",
                            "canonical_another": false,
                            "canonical_page": "\/keyword-generator\/",
                            "canonical_page_recursive": "",
                            "content_charset": 65001,
                            "content_count_words": 68,
                            "content_encoding": "gzip",
                            "content_readability_ari": 6.24265766,
                            "content_readability_coleman_liau": 2.14972973,
                            "content_readability_dale_chall": 4.42414236,
                            "content_readability_flesh_kincaid": 66.9806747,
                            "content_readability_smog": 15.9031887,
                            "crawl_depth": 2,
                            "crawl_end": "2017-10-12T08:15:00+00:00",
                            "crawled": true,
                            "deprecated_html_tags": [],
                            "duplicate_meta_tags": [],
                            "favicon": "\/wp-content\/themes\/theme1760\/favicon.ico",
                            "h1_count": 1,
                            "h2_count": 1,
                            "h3_count": 2,
                            "have_deprecated_tags": false,
                            "have_doctype": true,
                            "have_enc_meta_tag": true,
                            "have_flash": false,
                            "have_frame": false,
                            "have_lorem_ipsum": false,
                            "have_meta_description_duplicates": false,
                            "have_page_duplicates": false,
                            "have_recursive_canonical": false,
                            "have_title_duplicates": false,
                            "images_count": 2,
                            "images_invalid_alt": 0,
                            "images_invalid_title": 0,
                            "links_broken": 0,
                            "links_external": 3,
                            "links_internal": 35,
                            "links_referring": 0,
                            "meta_description": "» keyword generator",
                            "meta_description_consistency": 0,
                            "meta_description_length": 34,
                            "meta_keywords": "",
                            "meta_keywords_consistency": 0,
                            "page_allowed": true,
                            "page_redirect": null,
                            "page_size": 25476,
                            "plain_text_rate": 0.07878837,
                            "plain_text_size": 1956,
                            "relative_path_length": 19,
                            "response_code": 200,
                            "seo_friendly_url": false,
                            "seo_friendly_url_characters_check": false,
                            "seo_friendly_url_dynamic_check": false,
                            "seo_friendly_url_keywords_check": false,
                            "seo_friendly_url_relative_length_check": false,
                            "ssl": false,
                            "ssl_handshake_time": 0,
                            "string_containment_check": false,
                            "time_connection": 35,
                            "time_download": 1,
                            "time_total_load": 1069,
                            "time_sending_request": 0,
                            "time_waiting": 1033,
                            "title": "ranksonic - keyword generator",
                            "title_consistency": 0,
                            "title_length": 60,
                            "www": false
                        }
                    ]
                }
            ]
        }
    ]
}

Using this function, you can get a list of duplicate pages. Duplicates can be received based on some accumulator, for instance, title, description or content of a page.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

$duplicate_type - can have the following string values: ‘title’, ‘description’. Default value is ‘title’.
If you want to find duplicated content, you will need to specify the relative URL of the page, which content will be used to run the search. For example:
https://api.dataforseo.com/v2/op_tasks_get_duplicates/$task_id/'/your-content-page'

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      duplicates array array of duplicate pages
            accumulator string the sign on which the pages were grouped
            pages array array of duplicate pages with the accumulator
                  address_full string full page address
                  address_relative string relative page address
                  absent_h1_tags integer number of pages without H1 tags
                  canonical_another boolean presence of another canonical page
‘true’ - if page is non-canonical
                  canonical_page string canonical page
                  canonical_page_recursive string recursive canonical page
if you request the data during the scanning - the value of this field will be ‘null’
                  content_charset integer content character encoding
list of charset
                  content_count_words integer number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
                  content_encoding string compression algorithm of the content of the page
more information
                  content_readability_ari float ratio of readability by the algotithm Automated Readability Index (ARI)
                  content_readability_coleman_liau float ratio of readability by the algotithm Coleman–Liau Index
                  content_readability_dale_chall float ratio of readability by the algotithm Dale–Chall Readability
                  content_readability_flesh_kincaid float ratio of readability by the algotithm Flesch–Kincaid readability tests
                  content_readability_smog float ratio of readability by the algotithm SMOG
                  crawl_depth integer level of the page in the website hierarchy
                  crawl_end string date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
                  crawled boolean status of the page
                  deprecated_html_tags array array of deprecated html tags of the page
                  duplicate_meta_tags array array of meta tags that are duplicated
                  favicon string favicon of the page
                  h1_count integer count of H1 tags
                  h2_count integer count of H2 tags
                  h3_count integer count of H3 tags
                  have_deprecated_tags boolean presence of deprecated tags on the page
                  have_doctype boolean presence of <!DOCTYPE html> on the page
                  have_page_duplicates boolean presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning - the value of this field will be ‘null’
                  have_enc_meta_tag boolean presence of tag <charset> on the page
                  have_flash boolean presence of flash elements on the page
                  have_frame boolean presence of frames on the page
                  have_lorem_ipsum boolean presence of ‘lorem ipsum’ text on the page
                  have_meta_description_duplicates boolean there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
                  have_recursive_canonical boolean presence of recursive canonical
if you request the data during the scanning - the value of this field will be ‘null’
                  have_title_duplicates boolean there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
                  images_count integer number of images on the page
                  images_invalid_alt integer number of images with empty or missing tag alt
                  images_invalid_title integer number of images with empty or missing tag title
                  links_broken integer number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning - the value of this field will be ‘null’
                  links_external integer number of external links on the page
                  links_referring integer number of referring links to the page
if you request the data during the scanning - the value of this field will be ‘null’
                  links_internal integer number of internal links on the page
                  meta_description string content of meta tag description
                  meta_description_consistency float consistency of meta tag description with page content
from 0 to 1
                  meta_description_length integer length of meta tag description content
                  meta_keywords string content of meta tag keywords
                  meta_keywords_consistency float consistency of meta tag keywords with page content
from 0 to 1
                  page_allowed boolean page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
                  page_redirect string url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
                  page_size integer page size in bytes
                  plain_text_rate float plaintext rate value (plain_text_size / page_size)
                  plain_text_size integer page size in symbols
                  relative_path_length integer relative path length of the page URL
                  response_code integer HTTP response code
                  seo_friendly_url boolean page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
                  seo_friendly_url_characters_check boolean checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ - if the test is passed.
                  seo_friendly_url_dynamic_check boolean presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5
if there are dynamic symbols in the URL then the status will be ‘false’
                  seo_friendly_url_keywords_check boolean consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
                  seo_friendly_url_relative_length_check boolean checking the length of the relative way
url should not be longer than 120 characters
                  ssl boolean usage of the secure SSL protocol
                  ssl_handshake_time integer time (in milliseconds) spent on the ‘SSL handshake’
                  string_containment_check boolean shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
                  time_connection integer time (in milliseconds) spent on establishing the connection
                  time_download integer time (in milliseconds) spent on the loading of resources
                  time_sending_request integer time (in milliseconds) spent on sending a request to a server
                  time_total_load integer total time
time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
                  time_waiting integer time spent waiting for the initial response, also known as the Time To First Byte
                  title string content of tag <title>
                  title_consistency integer consistency of tag <title> with page content
from 0 to 1
                  title_length integer length of tag <title> content
                  www boolean usage of the subdomain www

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get_links_to/$task_id/$page
    $task_get_result = $client->get("v2/op_tasks_get_links_to/123456789/'/relative/page/on/site.html'");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_links_to/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_links_to()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var pageonsite = "'/relative/page/on/site.html'";
            var response = await httpClient.GetAsync($"v2/op_tasks_get_links_to/{taskid}/{pageonsite}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_links_to() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        String pageonsite = "'/relative/page/on/site.html'";
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_links_to/" + taskId + "/" + pageonsite);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0741 sec.",
    "results_count": 14,
    "results": [
        {
            "post_id": "9999555",
            "post_site": "ranksonic.com",
            "task_id": 136371534,
            "string_search_containment": null,
            "crawl_max_pages": 100,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "links_to": [
                {
                    "alt": "DAIQUIRI - DSC_0081",
                    "anchor": "",
                    "link_from": "http:\/\/ranksonic.com\/2017\/10\/",
                    "link_to": "http:\/\/ranksonic.com\/letter-d\/",
                    "nofollow": false,
                    "page_from": "\/2017\/10\/",
                    "page_to": "\/letter-d\/",
                    "relative": true,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": "",
                    "type": "image",
                    "www_from_use": false,
                    "www_to_use": false
                },
                {
                    "alt": null,
                    "anchor": "...",
                    "link_from": "http:\/\/ranksonic.com\/2017\/10\/",
                    "link_to": "http:\/\/ranksonic.com\/letter-d\/",
                    "nofollow": false,
                    "page_from": "\/2017\/10\/",
                    "page_to": "\/letter-d\/",
                    "relative": true,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": ".",
                    "type": "href",
                    "www_from_use": false,
                    "www_to_use": false
                },

                {
                    "alt": null,
                    "anchor": null,
                    "link_from": "http:\/\/ranksonic.com\/letter-d\/",
                    "link_to": "http:\/\/ranksonic.com\/letter-d\/",
                    "nofollow": false,
                    "page_from": "\/letter-d\/",
                    "page_to": "\/letter-d\/",
                    "relative": true,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": null,
                    "text_pre": null,
                    "type": "canonical",
                    "www_from_use": false,
                    "www_to_use": false
                }
            ]
        }
    ]
}

Using this function, you can get a list of all referring links to a certain page.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

$page - is a relative page address on a site. The page value must be enclosed in single quotes, like this
https://api.dataforseo.com/v2/op_tasks_get_links_to/12345/'/page/on/site.html'

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      links_to array array of referring links
            alt string alt attribute of an element
            anchor string an anchor of a link
            link_from string full page address of referring page
            link_to string full page address of requested page
            nofollow boolean presence of nofollow attribute on the referring page
information about “nofollow”: “nofollow” provides a way for webmasters to tell search engines “don’t follow links on this page” or “don’t follow this specific link.”
            page_from string relative page address of referring page
            page_to string relative page address of requested page
            relative boolean relevance of the referring page
            ssl_from_use boolean ssl used on the referring page
            ssl_to_use boolean ssl used on the requested page
            state string current link state
possible values: ‘dead’ or ‘alive’
            text_post string text after anchor
            text_pre string text before anchor
            type string type of link
possible values:
‘href’ - standard link, for instance <a href...>...</a>,
‘image’ - a link where <img> tag was found,
                for instance <a href...>...<img>...</img>...</a>,
‘canonical’ - meta canonical link,
‘external’ - external link,
‘http-equiv’ - link from redirect meta http-equiv refresh.
            www_from_use boolean www. subdomain used on the referring page
            www_to_use string www. subdomain used on the requested page

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_get_links_from/$task_id/$page
    $task_get_result = $client->get("v2/op_tasks_get_links_from/123456789/'/relative/page/on/site.html'");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_links_from/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_get_links_from()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var pageonsite = "'/relative/page/on/site.html'";
            var response = await httpClient.GetAsync($"v2/op_tasks_get_links_from/{taskid}/{pageonsite}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_get_links_from() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        String pageonsite = "'/relative/page/on/site.html'";
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_links_from/" + taskId + "/" + pageonsite);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0668 sec.",
    "results_count": 38,
    "results": [
        {
            "post_id": "9999555",
            "post_site": "ranksonic.com",
            "task_id": 136371534,
            "string_search_containment": null,
            "crawl_max_pages": 100,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "links_from": [
                {
                    "alt": null,
                    "anchor": "Facebook",
                    "link_from": "http:\/\/ranksonic.com\/letter-d\/",
                    "link_to": "https:\/\/facebook.com\/nina.babaeva.14",
                    "nofollow": false,
                    "page_from": "\/letter-d\/",
                    "page_to": "facebook.com\/nina.babaeva.14",
                    "relative": false,
                    "ssl_from_use": false,
                    "ssl_to_use": true,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": "",
                    "type": "href",
                    "www_from_use": false,
                    "www_to_use": true
                },
                {
                    "alt": null,
                    "anchor": "gavr1l0",
                    "link_from": "http:\/\/ranksonic.com\/letter-d\/",
                    "link_to": "http:\/\/gavr1l0.pro\/",
                    "nofollow": false,
                    "page_from": "\/letter-d\/",
                    "page_to": "gavr1l0.pro\/",
                    "relative": false,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": "bsp;      site development:",
                    "type": "href",
                    "www_from_use": false,
                    "www_to_use": false
                },
                {
                    "alt": null,
                    "anchor": "ranksonic",
                    "link_from": "http:\/\/ranksonic.com\/letter-d\/",
                    "link_to": "http:\/\/ranksonic.com\/",
                    "nofollow": false,
                    "page_from": "\/letter-d\/",
                    "page_to": "\/",
                    "relative": true,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": "",
                    "type": "href",
                    "www_from_use": false,
                    "www_to_use": false
                },
                {
                    "alt": null,
                    "anchor": "General",
                    "link_from": "http:\/\/ranksonic.com\/letter-d\/",
                    "link_to": "http:\/\/ranksonic.com\/",
                    "nofollow": false,
                    "page_from": "\/letter-d\/",
                    "page_to": "\/",
                    "relative": true,
                    "ssl_from_use": false,
                    "ssl_to_use": false,
                    "state": "alive",
                    "text_post": "",
                    "text_pre": "",
                    "type": "href",
                    "www_from_use": false,
                    "www_to_use": false
                }
            ]
        }
    ]
}

Using this function, you can get a list of external and internal links from a certain page.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

$page - is a relative page address on a site. The page value must be enclosed in single quotes, like this
https://api.dataforseo.com/v2/op_tasks_get_links_from/12345/'/page/on/site.html'

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      links_from array array of links
            alt string alt attribute of an element
            anchor string an anchor of a link
            link_from string full page address of the requested page
            link_to string full page address of a link from the page
            nofollow boolean presence of nofollow attribute on the requested page
information about “nofollow”: “nofollow” provides a way for webmasters to tell search engines “don’t follow links on this page” or “don’t follow this specific link.”
            page_from string relative page address of the requested page
            page_to string relative page address of a link from the page
            relative boolean relevance of a link from the page
            ssl_from_use boolean ssl used on the requested page
            ssl_to_use boolean ssl used on the result page
            state string current link state
possible values: ‘dead’ or ‘alive’
            text_post string text after anchor
            text_pre string text before anchor
            type string type of link
possible values:
‘href’ - standard link, for instance <a href...>...</a>,
‘image’ - a link where <img> tag was found,
                for instance <a href...>...<img>...</img>...</a>,
‘canonical’ - meta canonical link,
‘external’ - external link,
‘http-equiv’ - link from redirect meta http-equiv refresh.
            www_from_use boolean www. subdomain used on the requested page
            www_to_use string www. subdomain used on the result page

Get H Tags On Page

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_htags_on_page/$task_id/$page
    $task_get_result = $client->get("v2/op_tasks_htags_on_page/123456789/'/relative/page/on/site.html'");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_htags_on_page/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_htags_on_page()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var pageonsite = "'/relative/page/on/site.html'";
            var response = await httpClient.GetAsync($"v2/op_tasks_htags_on_page/{taskid}/{pageonsite}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else
            {
                foreach (var result in obj.results)
                {
                    Console.WriteLine(result);
                }
            }
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_htags_on_page() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        String pageonsite = "'/relative/page/on/site.html'";
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_htags_on_page/" + taskId + "/" + pageonsite);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.0343 sec.",
    "results_count": 9,
    "results": [
        {
            "post_id": "0",
            "post_site": "rankactive.com",
            "task_id": 136371534,
            "string_search_containment": "",
            "crawl_max_pages": 20,
            "crawl_start": "2018-04-02 19:02:24.659055+03",
            "crawl_end": "2018-04-02 19:05:14.710688+03",
            "status": "crawled",
            "htags_on_page": [
                {
                    "h1": [
                        "Notification Manager"
                    ],
                    "h2": [
                        "Notification manager: video overview",
                        "How does it help?",
                        "See it in action",
                        "Amazing opportunities for your success",
                        "Ready to try?",
                        "Customers reviews"  
                    ],
                    "h3": [
                        "Ready to try?",
                        "Featured on"
                    ]
                }
            ]
        }
    ]
}

Using this function, you can get a list of H1 tags on a page.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

$page - is a relative page address on a site. The page value must be enclosed in single quotes, like this
https://api.dataforseo.com/v2/op_tasks_htags_on_page/12345/'/page/on/site.html'

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      htags_on_page array array of H tags
            h1 array array of H1 tags
            h2 array array of H2 tags
            h3 array array of H3 tags

Get Images On Page

Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/login

<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip

try {
    $client = new RestClient('https://api.dataforseo.com', null, 'login', 'password');
} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

try {

    // GET /api/v2/op_tasks_images_on_page/$task_id/$page
    $task_get_result = $client->get("v2/op_tasks_images_on_page/123456789/'/relative/page/on/site.html'");
    print_r($task_get_result);

    //do something with result

} catch (RestClientException $e) {
    echo "\n";
    print "HTTP code: {$e->getHttpCode()}\n";
    print "Error code: {$e->getCode()}\n";
    print "Message: {$e->getMessage()}\n";
    print  $e->getTraceAsString();
    echo "\n";
}

$client = null;
?>
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip

client = RestClient("login", "password")
response = client.get("/v2/op_tasks_images_on_page/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
    print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
    print(response["results"])
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

namespace DataForSeoDemos
{
    public static partial class Demos
    {
        public static async Task op_tasks_images_on_page()
        {
            var httpClient = new HttpClient
            {
                BaseAddress = new Uri("https://api.dataforseo.com/"),
                //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
                DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
            };

            var taskid = 123456789;
            var pageonsite = "'/relative/page/on/site.html'";
            var response = await httpClient.GetAsync($"v2/op_tasks_images_on_page/{taskid}/{pageonsite}");
            var obj = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
            if (obj.status == "error")
                Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
            else if (obj.results_count != 0)
            {
                foreach (var result in obj.results)
                {
                    var resultItem = ((IEnumerable<dynamic>)result).First();
                    Console.WriteLine(resultItem);
                }
            }
            else
                Console.WriteLine("no results");
        }
    }
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;

public class Demos {
    public static void op_tasks_images_on_page() throws JSONException, IOException {
        HttpClient client;
        client = HttpClientBuilder.create().build();
        int taskId = 123456789;
        String pageonsite = "'/relative/page/on/site.html'";
        HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_images_on_page/" + taskId + "/" + pageonsite);
        //Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/login
        String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));

        get.setHeader("Content-type", "application/json");
        get.setHeader("Authorization", "Basic " + basicAuth);
        HttpResponse response = client.execute(get);
        JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));

        if (obj.get("status").equals("error")) {
            System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
        } else {
            JSONArray results = obj.getJSONArray("results");
            if (results.length() > 0) {
                for (int i = 0; i < results.length(); i++) {
                    System.out.println(results.get(i));
                }
            } else {
                System.out.println("no results");
            }
        }
    }
}

The above command returns JSON structured like this:

{
    "status": "ok",
    "results_time": "0.1068 sec.",
    "results_count": 12,
    "results": [
        {
            "post_id": "9#999#555",
            "post_site": "ranksonic.com",
            "task_id": 151668277,
            "string_search_containment": null,
            "crawl_max_pages": 12,
            "crawl_start": "2017-10-12 11:14:23.624803+03",
            "crawl_end": "2017-10-12 11:15:08.908063+03",
            "status": "crawled",
            "images_on_page": [
                {
                    "alt": "RankS",
                    "src": "\/assets\/images\/logo_ranks.png",
                    "title": ""
                },
                {
                    "alt": "O",
                    "src": "\/assets\/images\/logo_o.png",
                    "title": ""
                },
                {
                    "alt": "nic",
                    "src": "\/assets\/images\/logo_nic.png",
                    "title": ""
                },
                {
                    "alt": "menu",
                    "src": "\/assets\/images\/icon-menu.png",
                    "title": ""
                },
                {
                    "alt": "Generate keywords",
                    "src": "\/assets\/images\/gk-img.png",
                    "title": ""
                },
                {
                    "alt": "700 keywords",
                    "src": "\/assets\/images\/gk-find.png",
                    "title": ""
                },
                {
                    "alt": "According to the region",
                    "src": "\/assets\/images\/gk-location.png",
                    "title": ""
                },
                {
                    "alt": "Export results",
                    "src": "\/assets\/images\/gk-export.png",
                    "title": ""
                },
                {
                    "alt": "menu",
                    "src": "\/assets\/images\/close-icon.png",
                    "title": ""
                },
                {
                    "alt": "close",
                    "src": "\/themes\/default\/images\/icons\/close-ticket.png",
                    "title": ""
                },
                {
                    "alt": "close",
                    "src": "\/themes\/default\/images\/icons\/close-ticket.png",
                    "title": ""
                },
                {
                    "alt": "close",
                    "src": "\/themes\/default\/images\/icons\/close-ticket.png",
                    "title": ""
                }
            ]
        }
    ]
}

Using this function, you can get a list of images on a page.

All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.

$page - is a relative page address on a site. The page value must be enclosed in single quotes, like this
https://api.dataforseo.com/v2/op_tasks_images_on_page/12345/'/page/on/site.html'

You will receive array from the API server in the results field where you will find results.

Name of a field Type Description
status string general result
“ok” - successful
“error” - error
if status=“error”, then you can see more detailed information in the error array
error array informational array of error
only if status=“error”
      code integer error code
      message string text description of an error
results_time string execution time, seconds
results_count string number of elements in the results array
results array results array
      post_id string index in the array received in a POST array
      post_site string site received in a POST array
      task_id integer unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
      string_search_containment string string_search_containment received in a POST request
default value: ‘null’.
      crawl_max_pages integer maximum number of test pages
      crawl_start string date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
      crawl_end string date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning - the value of this field will be ‘null’
      status string current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
      images_on_page array array of images
            alt string alt attribute
            src string src attribute
            title string title attribute