NAVNavbar
Logo
cURL php NodeJS Python cSharp cURL php NodeJS Python cSharp

OnPage API Summary


Using this function, you can get the overall information on a website as well as drill down into exact on-page issues of a website that has been scanned. As a result, you will know what functions to use for receiving detailed data for each of the found issues.

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-access

<?php
// You can download this file from here https://api.dataforseo.com/v3/_examples/php/_php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-access
$client = new RestClient($api_url, null, 'login', 'password');

try {
   $result = array();
   // using this method you can get summary for task
   // GET /v3/on_page/summary/$id
   $id = "07281559-0695-0216-0000-c269be8b7592";
   $result[] = $client->get('/v3/on_page/summary/' . $id);
   print_r($result);
   // do something with result
} catch (RestClientException $e) {
   echo "\n";
   print "HTTP code: {$e->getHttpCode()}\n";
   print "Error code: {$e->getCode()}\n";
   print "Message: {$e->getMessage()}\n";
   print  $e->getTraceAsString();
   echo "\n";
}
$client = null;
?>

The above command returns JSON structured like this:

{
  "version": "0.1.20221214",
  "status_code": 20000,
  "status_message": "Ok.",
  "time": "1.0297 sec.",
  "cost": 0,
  "tasks_count": 1,
  "tasks_error": 0,
  "tasks": [
    {
      "id": "05021654-1535-0216-0000-bb170c8efb5b",
      "status_code": 20000,
      "status_message": "Ok.",
      "time": "1.0125 sec.",
      "cost": 0,
      "result_count": 1,
      "path": [
        "v3",
        "on_page",
        "summary",
        "05021654-1535-0216-0000-bb170c8efb5b"
      ],
      "data": {
        "api": "on_page",
        "function": "summary",
        "target": "dataforseo.com",
        "max_crawl_pages": 1000,
        "enable_www_redirect_check": "true"
      },
      "result": [
        {
          "crawl_progress": "in_progress",
          "crawl_status": {
            "max_crawl_pages": 1000,
            "pages_in_queue": 142,
            "pages_crawled": 15
          },
          "crawl_gateway_address": "168.119.141.170",
          "crawl_stop_reason": null,
          "domain_info": {
            "name": "dataforseo.com",
            "cms": "powered by wpbakery page builder - drag and drop page builder for wordpress.",
            "ip": "104.26.6.202",
            "server": "cloudflare",
            "crawl_start": "2023-05-02 13:54:55 +00:00",
            "crawl_end": null,
            "extended_crawl_status": "no_errors",
            "ssl_info": {
              "valid_certificate": true,
              "certificate_issuer": "CN=GTS CA 1P5, O=Google Trust Services LLC, C=US",
              "certificate_subject": "CN=dataforseo.com",
              "certificate_version": 3,
              "certificate_hash": "sha256RSA",
              "certificate_expiration_date": "2023-07-23 14:16:19 +00:00"
            },
            "checks": {
              "sitemap": true,
              "robots_txt": true,
              "start_page_deny_flag": false,
              "ssl": true,
              "http2": true,
              "test_canonicalization": false,
              "test_www_redirect": true,
              "test_hidden_server_signature": true,
              "test_page_not_found": true,
              "test_directory_browsing": true,
              "test_https_redirect": true
            },
            "total_pages": 15,
            "page_not_found_status_code": 404,
            "canonicalization_status_code": 403,
            "directory_browsing_status_code": 301,
            "www_redirect_status_code": 301,
            "main_domain": "dataforseo.com"
          },
          "page_metrics": {
            "links_external": 386,
            "links_internal": 1407,
            "duplicate_title": 0,
            "duplicate_description": 0,
            "duplicate_content": 4,
            "broken_links": 0,
            "broken_resources": 0,
            "links_relation_conflict": 0,
            "redirect_loop": 0,
            "onpage_score": 96.93,
            "non_indexable": 45,
            "checks": {
              "canonical": 15,
              "duplicate_meta_tags": 15,
              "no_description": 1,
              "frame": 0,
              "large_page_size": 0,
              "irrelevant_description": 0,
              "irrelevant_meta_keywords": 15,
              "is_https": 15,
              "is_http": 0,
              "title_too_long": 1,
              "low_content_rate": 15,
              "small_page_size": 0,
              "no_h1_tag": 0,
              "recursive_canonical": 0,
              "no_favicon": 0,
              "no_image_alt": 0,
              "no_image_title": 0,
              "seo_friendly_url": 15,
              "seo_friendly_url_characters_check": 15,
              "seo_friendly_url_dynamic_check": 15,
              "seo_friendly_url_keywords_check": 15,
              "seo_friendly_url_relative_length_check": 15,
              "title_too_short": 2,
              "no_content_encoding": 0,
              "high_waiting_time": 0,
              "high_loading_time": 0,
              "is_redirect": 0,
              "is_broken": 0,
              "is_4xx_code": 0,
              "is_5xx_code": 0,
              "is_www": 0,
              "no_doctype": 0,
              "no_encoding_meta_tag": 0,
              "high_content_rate": 0,
              "low_character_count": 0,
              "high_character_count": 0,
              "low_readability_rate": 0,
              "irrelevant_title": 0,
              "deprecated_html_tags": 0,
              "duplicate_title_tag": 0,
              "no_title": 0,
              "flash": 0,
              "lorem_ipsum": 0,
              "has_misspelling": null,
              "canonical_to_broken": 0,
              "canonical_to_redirect": 0,
              "has_links_to_redirects": 0,
              "is_orphan_page": 0,
              "has_meta_refresh_redirect": 0,
              "meta_charset_consistency": 15,
              "size_greater_than_3mb": 0,
              "has_html_doctype": 15,
              "https_to_http_links": 1,
              "has_render_blocking_resources": 15,
              "redirect_chain": 0,
              "canonical_chain": 0,
              "is_link_relation_conflict": 0
            }
          }
        }
      ]
    }
  ]
}

Description of the fields for sending a request:

Field name Type Description
id string task identifier
required field
you can get this ID in the response of the Task POST endpoint
example:
“07131248-1535-0216-1000-17384017ad04”

‌‌‌‌As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name Type Description
version string the current version of the API
status_code integer general status code
you can find the full list of the response codes here
Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message string general informational message
you can find the full list of general informational messages here
time string execution time, seconds
cost float total tasks cost, USD
tasks_count integer the number of tasks in the tasks array
tasks_error integer the number of tasks in the tasks array returned with an error
tasks array array of tasks
        id string task identifier
unique task identifier in our system in the UUID format
        status_code integer status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
        status_message string informational message of the task
you can find the full list of general informational messages here
        time string execution time, seconds
        cost float cost of the task, USD
        result_count integer number of elements in the result array
        path array URL path
        data object contains the same parameters that you specified in the POST request
        result array array of results
            crawl_progress string status of the crawling session
possible values: in_progress, finished
            crawl_status object details of the crawling session
               max_crawl_pages integer maximum number of pages to crawl
indicates the max_crawl_pages limit you specified when setting a task
               pages_in_queue integer number of pages that are currently in the crawling queue
               pages_crawled integer number of crawled pages
            crawl_gateway_address string crawler ip address
displays the IP address used by the crawler to initiate the current crawling session
you can find the full list of IPs used by our crawler in the Overview section
            crawl_stop_reason string reason why the crawling stopped
information about the reason why the crawling process stopped;
possible values:
limit_exceeded – the limit set in the max_crawl_pages was exceeded;
empty_queue – all URLs in the queue were crawled;
force_stopped – the crawling process was halted using the On Page API Force Stop function;
unexpected_exception – an internal error was encountered while crawling the target, contact support for more info
            domain_info object domain-wide info
on-page information about the target domain and crawling process
                name string domain name
                cms string content management system
content management system identified on a website
the content of the generator meta tag
the data is taken from the first random page that returns the 200 response code
if our crawler was unable to identify the cms, the value would be null
                ip string domain ip address
                server string website server
the version of the server detected on a website
the content of the server header
the information is taken from the first page which response code is 200
                crawl_start string time when the crawling start
date and time when the website was sent for crawling
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
                crawl_end string time when the crawling ended
date and time when the crawling was finished
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00note: informative only if "crawl_progress" is "finished"
if "crawl_progress" is in_progress, the value will be null
                extended_crawl_status string crawl status and errors
indicates the reason why a website was not crawled;
can take the following values:
no_errors – no crawling errors were detected;
site_unreachable – our crawler could not reach a website and thus was not able to obtain a status code;
invalid_page_status_code – status code of the first crawled page >= 400;
forbidden_meta_tag – the first crawled page contains the <meta robots=”noindex”> tag;
forbidden_robots – robots.txt forbids crawling the page;
forbidden_http_header – HTTP header of the page contains “X-Robots-Tag: noindex” ;
too_many_redirects – the first crawled page has more than 10 redirects;
unknown – the reason is unknown
                ssl_info object ssl certificate info
information about the Secure Sockets Layer protocol detected on a website
                      valid_certificate boolean ssl certificate validity
indicates whether the ssl certificate detected on a website is not expired, suspended, revoked or invalid
                      certificate_issuer string ssl certificate authority
the entity that issued the detected ssl certificate
                      certificate_subject string ssl certificate subject
the entity associated with the public key
                      certificate_version string ssl certificate version
indicates the version of X.509 used by an ssl certificate
                      certificate_hash string ssl certificate hash
the version of the ssl certificate’s hash function
                      certificate_expiration_date string ssl certificate expiration date
the date and time when the ssl certificate expires
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
                checks object website checks
other on-page check-ups related to the website
                      sitemap boolean website sitemap
indicates whether a sitemap was detected on a target website
                      robots_txt boolean robots.txt file
indicates whether a target website has a robots.txt file
                      start_page_deny_flag boolean deny flag on a start page
indicates whether a start page on a target website has a [F] flag which causes the server to return a 403 Forbidden status code to the client
                      ssl boolean ssl certificate
indicates whether a target website has an SSL certificate
                      http2 boolean HTTP/2 protocol
indicates whether a target website is using the HTTP/2 protocol
                     test_canonicalization boolean canonical tags
the checkup of the server behavior when our crawler tries to access the website via IP;
is true if the canonicalization_status_code returns 301
                     test_www_redirect boolean www to non-www redirect
is true if the www to non-www redirect is implemented by the requested domain
                     test_hidden_server_signature boolean hidden server signature
indicates whether the server signature is hidden from crawlers
if the value is false, our crawler was able to access the website’s server signature
                      test_page_not_found boolean 404 status for a page that cannot be found
indicates whether a target responds with a 404 status code when the requested resource cannot be found
                      test_directory_browsing boolean directory browsing not accessible
indicates whether a target website doesn’t allow accessing a file directory without authentication
if the directory is not accessible, the value is true
                      test_https_redirect boolean http requests are redirected to https
indicates whether a target website redirects http requests to the https version;
if the website’s home page redirects http requests to https, the value is true
                total_pages integer total crawled pages
the total number of crawled pages
                page_not_found_status_code integer status code returned by a non-existent page
in most cases, it is recommended a server returns a 404 response code
                canonicalization_status_code integer status code returned by a canonicalized page
the checkup of the server behavior when our crawler tries to access the website via IP;
in most cases, it is recommended that canonicalized pages respond with a 301 or 302 status code
                directory_browsing_status_code integer status code returned by a directory
the status code returned by a directory page on a target website
in most cases, it is recommended that directories respond with a 403 or 401 status code
                www_redirect_status_code integer redirect status code
the status code of the www to non-www redirect
in most cases, it is recommended that redirect returns a 301 status code
                main_domain string root domain name
            page_metrics object page-specific info
metrics information on the target website pages
                links_external integer number of external links
the number of links pointing to other websites
                links_internal integer number of internal links
the number of links pointing to other pages within the target website
                duplicate_title integer number of pages with duplicate titles
                duplicate_description integer number of pages with duplicate descriptions
                duplicate_content integer number of pages with duplicate content
                broken_links integer number of broken links
number of broken links across all crawled pages on a target website
                broken_resources integer number of broken resources
the number of images and other resources with broken links
                links_relation_conflict integer number of links present on the target website that may have a conflict
for example, if "links_relation_conflict": 2, the target website is referring to the same source by at least one internal link with the rel="nofollow" attribute and by at least one dofollow link
                redirect_loop integer number of redirect chains that start and end at the same URL
number of redirect chains where the destination URL redirects back to the original URL
                onpage_score float shows how website is optimized on a 100-point scale
this field shows how website is optimized considering critical on-page issues and warnings detected;
100 is the highest possible score that means website does not have any critical on-page issues and important warnings;
note that this value depends on the number of crawled pages;
learn more about how the metric is calculated in this help center article
                non_indexable integer number of non-indexable pages
number of pages that are blocked from being indexed by Google and other search engines by robots.txt, HTTP headers, or meta tags settings;
you can receive a list of non-indexable URLs using this endpoint
                checks object page-specific on-page check-ups
                      canonical integer number of canonical pages
                      duplicate_meta_tags integer number of pages with duplicate meta tags
the number of pages with more than one meta tag of the same type
available for canonical pages only
                      no_description integer number of pages with no description
the number of pages with an empty or absent description meta tag
available for canonical pages only
                      frame integer number of pages with frames
the number of pages that contain frame, iframe, frameset tags
                      large_page_size integer number of heavy pages
the number of pages that have a size exceeding 1 megabyte
                      irrelevant_description integer number of pages with irrelevant description
the number of pages with description tags that are irrelevant to the content of a page
the relevance threshold is 0.2
available for canonical pages only
                      irrelevant_meta_keywords integer number of pages with irrelevant meta keywords
the number of pages with keywords tags that are irrelevant to the content of a page
the relevance threshold is 0.6
available for canonical pages only
                      is_https integer number of pages with the https protocol
                      is_http integer number of pages with the http protocol
                      title_too_long integer number of pages with long titles
the number of pages with the content of title tag exceeding 65 characters
                      low_content_rate integer number of pages with a low content rate
number of pages, which have the plaintext size to page size ratio of less than 0.1 or more than 0.9
                      small_page_size integer number of small pages
the number of pages that have the size smaller than 1024 bytes
                      no_h1_tag integer number of pages with empty or absent h1 tags
                      recursive_canonical integer recursive canonical error
indicates the number of pages that contain rel="canonical" tag to another page, which in turn, refers back to the initial page
                      no_favicon integer number of pages with no favicon
                      no_image_alt integer number of pages containing images without alt tags
                      no_image_title integer number of pages containing images without title tags
                      seo_friendly_url integer number of pages with seo-frienldy urls
the ‘SEO-friendliness’ of a page URL is checked by four parameters:
– the length of the relative path is less than 120 characters
– no special characters
– no dynamic parameters
– relevance of the URL to the page
if at least one of them is failed then such URL is considered as not ‘SEO-friendly’
the data is available for canonical pages only
                      seo_friendly_url_characters_check integer url characters check-up
the number of pages with URLs containing only uppercase and lowercase Latin characters, digits and dashes
                      seo_friendly_url_dynamic_check integer url dynamic check-up
the number of pages with no dynamic parameters in the url
                      seo_friendly_url_keywords_check integer url keyword check-up
the number of pages that have URLs consistent with the title meta tag
                      seo_friendly_url_relative_length_check integer url leghth check-up
the number of pages with URLs no longer than 120 characters
                      title_too_short integer pages with short titles
the number of pages that have titles shorter than 30 characters
                      no_content_encoding integer pages with no content encoding
the number of pages with no compression algorithm of the content
                      high_waiting_time integer pages with high waiting time
the number of pages with waiting time (aka Time to First Byte) exceeding 1.5 seconds
                      high_loading_time integer pages with high loading time
the number of pages with loading time exceeding 3 seconds
                      is_redirect integer pages with redirects
the number of pages with 3XX redirects to other pages
                      is_broken integer broken pages
the number of pages with response codes less than 200 or greater than 400
                      is_4xx_code integer pages with 4xx status codes
the number of pages with 4xx response codes
                      is_5xx_code integer pages with 5xx status codes
the number of pages with 5xx response codes
                      is_www integer pages with www
the number of pages on a www subdomain
                      no_doctype integer pages with no doctype
the number of pages without theDOCTYPE declaration
                      no_encoding_meta_tag integer pages with no meta tag encoding
the number of pages without Content-Type
informative only if the encoding is not explicit in the Content-Type header
for example: Content-Type: "text/html; charset=utf8"
                      high_content_rate integer pages with high content rate
number of pages, which have the plaintext size to page size ratio of more than 0.9
available for canonical pages only
                      low_character_count integer pages with low character count
the number of pages containing less than 1024 characters
                      high_character_count integer pages with high character count
the number of pages containing more than 256,000 characters
                      low_readability_rate integer pages with low readability rate
the number of pages that scored less than 15 points on the Flesch–Kincaid readability test
                      irrelevant_title integer pages with irrelevant titles
the number of pages with title tags that are irrelevant to the content of the page
the relevance threshold is 0.3
available for canonical pages only
                      deprecated_html_tags integer pages with deprecated tags
the number of pages with deprecated HTML tags
                      duplicate_title_tag integer pages with more than one title tag
the number of pages that have more than one title tag
                      no_title integer pages with no title
the number of pages with empty or absent title tags
                      flash integer pages with flash
the number of pages with flash elements
                      lorem_ipsum integer pages with lorem ipsum
the number of pages with lorem ipsum content
                      has_misspelling integer pages with misspelling
the number of pages with spelling mistakes
informative if the check_spell was set to true in the POST array
                      canonical_to_broken integer canonical pages pointing to broken pages
the number of pages with a canonical link element pointing to a page that responds with a 404 error
                      canonical_to_redirect integer canonical pages pointing to pages that redirect elsewhere
the number of pages with a canonical link element pointing to a page that responds with a 3XX redirect
                      has_links_to_redirects integer pages pointing to pages that redirect elsewhere
the number of pages pointing to a page that responds with a 3XX redirect
                      is_orphan_page integer pages with no internal links pointing to them
the number of pages with no reference from other pages of the domain
                      has_meta_refresh_redirect integer pages with meta refresh redirect
the number of pages with <meta http-equiv=”refresh”> tag that instructs a browser to load another page after a specified time span
                      meta_charset_consistency integer pages with meta charset tag
the number of pages with meta charset tag that sets character encoding for a page
                      size_greater_than_3mb integer pages with size larger than 3 MB
the number of pages with size exceeding 3 MB
                      has_html_doctype integer pages with HTML doctype declaration
the number of pages with the DOCTYPE declaration
                      https_to_http_links integer pages with HTTPS protocol that link to pages with HTTP protocol
the number of pages with secure HTTPS protocol that link to pages with unsecure HTTP protocol
                      has_render_blocking_resources integer pages with render-blocking resources
the number of pages with render-blocking resources
                      redirect_chain integer pages with multiple redirects
the number of pages with at least two redirects between the original page and the destination page
                      canonical_chain integer pages with canonical pointing to a page that has a canonical pointing elsewhere
the number of pages with a canonical link element pointing to a page that has a canonical pointing to a different page
e.g. page a is canonicalized to page b, which is canonicalized to page c
                      is_link_relation_conflict integer pages on the target website that may have a link relation conflict
for example, if "is_link_relation_conflict": 1, the target website has 1 page receiving at least one internal link with the rel="nofollow" attribute and at least one dofollow link

‌‌