NAVNavbar
Logo
cURL php NodeJS Python cSharp

OnPage API Resources

‌‌
This endpoint will provide you with a list of resources, including images, scripts, stylesheets, and broken elements.
You will get a detailed overview of every resource found on the crawled pages.

If you would like to receive a list of pages that contain a specific resource, please refer to the Pages By Resource endpoint.

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard

<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
$client = new RestClient($api_url, null, 'login', 'password');

$post_array = array();
// simple way to get a result
$post_array[] = array(
   "id" => "07281559-0695-0216-0000-c269be8b7592",
   "filters" => [
      ["resource_type", "=", "image"],
      "and",
      ["size", ">", 100000]
   ],
   "order_by" => ["size,desc"],
   "limit" => 10
);
try {
   // POST /v3/on_page/resources
   // the full list of possible parameters is available in documentation
   $result = $client->post('/v3/on_page/resources', $post_array);
   print_r($result);
   // do something with post result
} catch (RestClientException $e) {
   echo "\n";
   print "HTTP code: {$e->getHttpCode()}\n";
   print "Error code: {$e->getCode()}\n";
   print "Message: {$e->getMessage()}\n";
   print  $e->getTraceAsString();
   echo "\n";
}
$client = null;
?>

The above command returns JSON structured like this:

{
  "version": "0.1.20200805",
  "status_code": 20000,
  "status_message": "Ok.",
  "time": "4.8323 sec.",
  "cost": 0,
  "tasks_count": 1,
  "tasks_error": 0,
  "tasks": [
    {
      "id": "08091838-1535-0216-0000-cee52596d188",
      "status_code": 20000,
      "status_message": "Ok.",
      "time": "4.7640 sec.",
      "cost": 0,
      "result_count": 1,
      "path": [
        "v3",
        "on_page",
        "resources"
      ],
      "data": {
        "api": "on_page",
        "function": "resources",
        "limit": 100
      },
      "result": [
        {
          "crawl_progress": "finished",
          "crawl_status": {
            "max_crawl_pages": 10,
            "pages_in_queue": 0,
            "pages_crawled": 10
          },
          "total_items_count": 15,
          "items_count": 100,
          "items": [
            {
              "resource_type": "stylesheet",
              "meta": null,
              "status_code": 200,
              "location": null,
              "url": "https://dataforseo.com/wp-content/themes/startit/assets/css/style_dynamic.css?ver=1566565352",
              "size": 1216,
              "encoded_size": 0,
              "total_transfer_size": 275,
              "fetch_time": "2021-02-17 13:54:15 +00:00",
              "fetch_timing": {
                "duration_time": 0,
                "fetch_start": 0,
                "fetch_end": 0
              },
              "cache_control": {
                "cachable": false,
                "ttl": 0
              },
              "checks": {
                "no_content_encoding": false,
                "high_loading_time": false,
                "is_redirect": false,
                "is_4xx_code": false,
                "is_5xx_code": false,
                "is_broken": false,
                "is_www": false,
                "is_https": true,
                "is_http": false,
                "is_minified": false,
                "has_subrequests": false
              },
              "content_encoding": "gzip",
              "media_type": "text/css",
              "accept_type": "stylesheet",
              "server": "nginx/1.10.1 (Ubuntu)",
              "last_modified": {
                "header": "2021-10-21 14:11:10 +00:00",
                "sitemap": null,
                "meta_tag": "2021-03-15 00:00:00 +00:00"
              }
            },
            {
              "resource_type": "script",
              "meta": null,
              "status_code": 200,
              "location": null,
              "url": "https://dataforseo.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=1.4.1",
              "size": 10056,
              "encoded_size": 0,
              "total_transfer_size": 385,
              "fetch_time": "2021-02-17 13:54:15 +00:00",
              "fetch_timing": {
                "duration_time": 0,
                "fetch_start": 0,
                "fetch_end": 0
              },
              "cache_control": {
                "cachable": true,
                "ttl": 2592000
              },
              "checks": {
                "no_content_encoding": false,
                "high_loading_time": false,
                "is_redirect": false,
                "is_4xx_code": false,
                "is_5xx_code": false,
                "is_broken": false,
                "is_www": false,
                "is_https": true,
                "is_http": false,
                "is_minified": false,
                "has_redirect": false,
                "has_subrequests": false
              },
              "content_encoding": "gzip",
              "media_type": "application/javascript",
              "accept_type": "script",
              "server": "nginx/1.10.1 (Ubuntu)",
              "last_modified": {
                "header": "2021-10-21 14:11:10 +00:00",
                "sitemap": null,
                "meta_tag": "2021-03-15 00:00:00 +00:00"
              }
            },
            {
              "resource_type": "image",
              "meta": {
                "alternative_text": "sean-cooney-review",
                "title": null,
                "original_width": 250,
                "original_height": 250,
                "width": 250,
                "height": 250
              },
              "status_code": 200,
              "location": null,
              "url": "https://dataforseo.com/wp-content/uploads/2020/06/sean-cooney-review-1.png",
              "size": 94695,
              "encoded_size": 94695,
              "total_transfer_size": 95036,
              "fetch_time": "2021-02-17 13:54:15 +00:00",
              "fetch_timing": {
                "duration_time": 0,
                "fetch_start": 0,
                "fetch_end": 0
              },
              "cache_control": {
                "cachable": true,
                "ttl": 2592000
              },
              "checks": {
                "no_content_encoding": true,
                "high_loading_time": false,
                "is_redirect": false,
                "is_4xx_code": false,
                "is_5xx_code": false,
                "is_broken": false,
                "is_www": false,
                "is_https": true,
                "is_http": false,
                "has_redirect": false,
                "original_size_displayed": true
              },
              "content_encoding": null,
              "media_type": "image/png",
              "accept_type": "image",
              "server": "nginx/1.10.1 (Ubuntu)",
              "last_modified": {
                "header": "2021-10-21 14:11:10 +00:00",
                "sitemap": null,
                "meta_tag": "2021-03-15 00:00:00 +00:00"
              }
            },
            {
              "resource_type": "broken",
              "meta": null,
              "status_code": 404,
              "location": null,
              "url": "https://dataforseo.com/css/bootstrap.css",
              "size": 75924,
              "encoded_size": 0,
              "total_transfer_size": 464,
              "fetch_time": "2021-02-17 13:54:15 +00:00",
              "fetch_timing": {
                "duration_time": 0,
                "fetch_start": 0,
                "fetch_end": 0
              },
              "cache_control": {
                "cachable": true,
                "ttl": 0
              },
              "checks": {
                "no_content_encoding": false,
                "high_loading_time": false,
                "is_redirect": false,
                "is_4xx_code": true,
                "is_5xx_code": false,
                "is_broken": true,
                "is_www": false,
                "is_https": true,
                "is_http": false
              },
              "content_encoding": "gzip",
              "media_type": "text/html",
              "accept_type": "stylesheet",
              "server": "nginx/1.10.1 (Ubuntu)",
              "last_modified": {
                "header": "2021-10-21 14:11:10 +00:00",
                "sitemap": null,
                "meta_tag": "2021-03-15 00:00:00 +00:00"
              }
            }
          ]
        }
      ]
    }
  ]
}

All POST data should be sent in the JSON format (UTF-8 encoding). The task setting is done using the POST method. When setting a task, you should send all task parameters in the task array of the generic POST array.

Description of the fields for setting a task:

Field name Type Description
id string ID of the task
required field
you can get this ID in the response of the Task POST endpoint
example:
“07131248-1535-0216-1000-17384017ad04”
url string page URL
optional field
specify this field if you want to get the resources for a specific page
note that to obtain resource’s meta from a particular URL, you should specify the URL in this field;
if you do not indicate a url when setting a task, resource’s meta in the results will be returned based on the data from the page where our crawler first saw the resource
limit integer the maximum number of returned resources
optional field
default value: 100
maximum value: 1000
offset integer offset in the results array of returned resources
optional field
default value: 0
if you specify the 10 value, the first ten resources in the results array will be omitted and the data will be provided for the successive resources
filters array array of results filtering parameters
optional field
you can add several filters at once (8 filters maximum)
you should set a logical operator and, or between the conditions
the following operators are supported:
regex, not_regex, <, <=, >, >=, =, <>, in, not_in, like, not_like
you can use the % operator with like and not_like to match any string of zero or more characters
example:
["resource_type","=","stylesheet"]

[["resource_type","=","image"],
"and",["checks.is_https","=",false]]

[["fetch_timing.duration_time",">",1],"and",[["total_transfer_size",">",100],"or",["checks.high_loading_time","=",true]]]

The full list of possible filters is available by this link.

relevant_pages_filters array filter the resources by relevant pages
optional field
you can use this field to obtain resources from pages matching to the defined parameters
you can apply the same filters here as available for the pages endpoint
you can add several filters at once (8 filters maximum)
you should set a logical operator and, or between the conditions
the following operators are supported:
regex, not_regex, <, <=, >, >=, =, <>, in, not_in, like, not_like
you can use the % operator with like and not_like to match any string of zero or more characters
example:
["checks.no_image_title","=",true]
order_by array results sorting rules
optional field
you can use the same values as in the filters array to sort the results
possible sorting types:
asc – results will be sorted in the ascending order
desc – results will be sorted in the descending order
you should use a comma to set up a sorting type
example:
["size,desc"]
note that you can set no more than three sorting rules in a single request
you should use a comma to separate several sorting rules
example:
["size,desc","fetch_timing.fetch_end,desc"]
tag string user-defined task identifier
optional field
the character limit is 255
you can use this parameter to identify the task and match it with the result
you will find the specified tag value in the data object of the response

‌‌‌‌‌‌
As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name Type Description
version string the current version of the API
status_code integer general status code
you can find the full list of the response codes here
Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message string general informational message
you can find the full list of general informational messages here
time string execution time, seconds
cost float total tasks cost, USD
tasks_count integer the number of tasks in the tasks array
tasks_error integer the number of tasks in the tasks array returned with an error
tasks array array of tasks
        id string task identifier
unique task identifier in our system in the UUID format
        status_code integer status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
        status_message string informational message of the task
you can find the full list of general informational messages here
        time string execution time, seconds
        cost float cost of the task, USD
        result_count integer number of elements in the result array
        path array URL path
        data object contains the same parameters that you specified in the POST request
        result array array of results
            crawl_progress string status of the crawling session
possible values: in_progress, finished
            crawl_status object details of the crawling session
               max_crawl_pages integer maximum number of pages to crawl
indicates the max_crawl_pages limit you specified when setting a task
               pages_in_queue integer number of pages that are currently in the crawling queue
               pages_crawled integer number of crawled pages
            total_items_count integer total number of relevant items crawled
            items_count integer number of items in the results array
            items array items array
                resource_type string type of the returned resource
possible types: script, image, stylesheet, broken
                meta object resource properties
the value depends on the resource_type
note that if you do not indicate a url when setting a task, resource’s meta is returned based on the data from the page where our crawler first saw the resource;
to obtain resource’s meta from a particular url, specify that URL when setting a task
                    alternative_text string content of the image alt attribute
the value depends on the resource_type
                    title string title
                    original_width integer original image width in px
                    original_height integer original image height in px
                    width integer image width in px
                    height integer image height in px
                status_code integer status code of the page where a given resource is located
                location string location header
indicates the URL to redirect a page to
                url string resource URL
                size integer resource size
indicates the size of a given resource measured in bytes
                encoded_size integer resource size after encoding
indicates the size of the encoded resource measured in bytes
                total_transfer_size integer compressed resource size
indicates the compressed size of a given resource in bytes
                fetch_time string date and time when a resource was fetched
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2021-02-17 13:54:15 +00:00
                fetch_timing object resource fething time range
                    duration_time integer indicates how many milliseconds it took to fetch a resource
                   fetch_start integer time to start downloading the resource
the amount of time a browser needs to start downloading a resource
                    fetch_end integer time to complete downloading the resource
the amount of time a browser needs to complete downloading a resource
                cache_control object instructions for caching
                   cachable boolean indicates whether the resource is cacheable
                   ttl integer time to live
the amount of time it takes for the browser to cache a resource; measured in milliseconds
                checks object resource check-ups
contents of the array depend on the resource_type
                    no_content_encoding boolean resource with no content encoding
indicates whether a page has no compression algorithm of the content;
available for items with the following resource_type: script, image, stylesheet, broken
                    high_loading_time boolean resource with high loading time
indicates whether a resource loading time exceeds 3 seconds;
available for items with the following resource_type: script, image, stylesheet, broken
                    is_redirect boolean resource with redirects
indicates whether a page with this resource has 3XX redirects to other pages;
available for items with the following resource_type: script, image, stylesheet, broken
                    is_4xx_code boolean resource with with 4xx status code
indicates whether a page with this resource has 4XX response code
                    is_5xx_code boolean resource with 5xx status code
indicates whethera page with this resource has 5XX response code
                    is_broken boolean broken resource
indicates whether a page with this resource returns 4xx, 5xx response codes or has broken elements inside the resource;
available for items with the following resource_type: script, image, stylesheet, broken
                    is_www boolean page with www
indicates whether a page with this resource is on a www subdomain;
available for items with the following resource_type: script, image, stylesheet, broken
                    is_https boolean page with the https protocol
available for items with the following resource_type: script, image, stylesheet, broken
                    is_http boolean page with the http protocol
available for items with the following resource_type: script, image, stylesheet, broken
                    original_size_displayed boolean image desplayes in its original size
indicates whether the image is displayed in its original size;
available for items with the following resource_type: image
                    is_minified boolean resource is minified
indicates whether the content of a stylesheet or script is minified;
available for items with the following resource_type: stylesheet, script
                    has_redirect boolean resource has a redirect
available for items with the following resource_type: script, image;
if the resource_type is image, this field will indicate whether other pages and/or resources have redirects pointing at the image;
if the resource_type is script, this field will indicate whether the script contains a redirect
                    has_subrequests boolean resource contains subrequests
indicates whether the content of a stylesheet or script contain additional requests;
available for items with the following resource_type: stylesheet, script
              resource_errors object resource errors and warnings
                  errors array resource errors
                      line integer line where the error was found
                      column integer column where the error was found
                      message string text message of the error
the full list of possible HTML errors can be found here
                      status_code integer status code of the error
possible values:
0 — Unidentified Error;
501 — Html Parse Error;
1501 — JS Parse Error;
2501 — CSS Parse Error;
3501 — Image Parse Error;
3502 — Image Scale Is Zero;
3503 — Image Size Is Zero;
3504 — Image Format Invalid
                  warnings array resource warnings
                      line integer line the warning relates to
note that if "line": 0, the warning relates to the whole page
                      column integer column the warning relates to
note that if "column": 0, the warning relates to the whole page
                      message string text message of the warning
possible messages:
"Has node with more than 60 childs." – HTML page has at least 1 tag nesting over 60 tags of the same level
"Has more that 1500 nodes." – DOM tree contains over 1,500 elements
"HTML depth more than 32 tags." – DOM depth exceeds 32 nodes
                      status_code integer status code of the warning
possible values:
0 — Unidentified Warning;
1 — Has node with more than 60 childs;
2 — Has more that 1500 nodes;
3 — HTML depth more than 32 tags
                content_encoding string type of encoding
                media_type string types of media used to display a resource
                accept_type string indicates the expected type of resource
for example, if "resource_type": "broken", accept_type will indicate the type of the broken resource
possible values:
any, none, image, sitemap, robots, script, stylesheet, redirect, html, text, other, font
                server string server version
                last_modified object contains data on changes related to the resource
if there is no data, the value will be null
                   header string date and time when the header was last modified
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
if there is no data, the value will be null
                   sitemap string date and time when the sitemap was last modified
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
if there is no data, the value will be null
                   meta_tag string date and time when the meta tag was last modified
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
if there is no data, the value will be null

‌‌