NAVNavbar
Logo
cURL php NodeJS Python cSharp

Get Google Dataset Search Advanced Results by id

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard

<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
try {
	// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
	$client = new RestClient($api_url, null, 'login', 'password');
} catch (RestClientException $e) {
	echo "n";
	print "HTTP code: {$e->getHttpCode()}n";
	print "Error code: {$e->getCode()}n";
	print "Message: {$e->getMessage()}n";
	print  $e->getTraceAsString();
	echo "n";
	exit();
}
try {
	$result = array();
	// #1 - using this method you can get a list of completed tasks
	// GET /v3/serp/google/dataset_search/tasks_ready
	$tasks_ready = $client->get('/v3/serp/google/dataset_search/tasks_ready');
	// you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
	if (isset($tasks_ready['status_code']) AND $tasks_ready['status_code'] === 20000) {
		foreach ($tasks_ready['tasks'] as $task) {
			if (isset($task['result'])) {
				foreach ($task['result'] as $task_ready) {
					// #2 - using this method you can get results of each completed task
					// GET /v3/serp/google/dataset_search/task_get/advanced/$id
					if (isset($task_ready['endpoint_advanced'])) {
						$result[] = $client->get($task_ready['endpoint_advanced']);
					}
					// #3 - another way to get the task results by id
					// GET /v3/serp/google/dataset_search/task_get/advanced/$id
					/*
					if (isset($task_ready['id'])) {
						$result[] = $client->get('/v3/serp/google/dataset_search/task_get/advanced/' . $task_ready['id']);
					}
					*/
				}
			}
		}
	}
	print_r($result);
	// do something with result
} catch (RestClientException $e) {
	echo "n";
	print "HTTP code: {$e->getHttpCode()}n";
	print "Error code: {$e->getCode()}n";
	print "Message: {$e->getMessage()}n";
	print  $e->getTraceAsString();
	echo "n";
}
$client = null;
?>

The above command returns JSON structured like this:

{
  "version": "0.1.20220819",
  "status_code": 20000,
  "status_message": "Ok.",
  "time": "0.2755 sec.",
  "cost": 0,
  "tasks_count": 1,
  "tasks_error": 0,
  "tasks": [
    {
      "id": "12141749-4426-0066-0000-ec6c038930d0",
      "status_code": 20000,
      "status_message": "Ok.",
      "time": "0.0272 sec.",
      "cost": 0,
      "result_count": 1,
      "path": [
        "v3",
        "serp",
        "google",
        "dataset_search",
        "task_get",
        "advanced",
        "12141749-4426-0066-0000-ec6c038930d0"
      ],
      "data": {
        "api": "serp",
        "function": "task_get",
        "se": "google",
        "se_type": "dataset_search",
        "keyword": "water quality",
        "last_updated": "1m",
        "file_formats": [
          "archive",
          "image"
        ],
        "usage_rights": "noncommercial",
        "is_free": true,
        "topics": [
          "natural_sciences",
          "geo"
        ],
        "device": "desktop",
        "os": "windows"
      },
      "result": [
        {
          "keyword": "water quality",
          "se_domain": "datasetsearch.research.google.com",
          "language_code": "en",
          "check_url": "https://datasetsearch.research.google.com/search?query=water%20quality&hl=en&filters=WyJbXCJ1cGRhdGVkX2RhdGVcIixbXCIxbVwiXV0iLCJbXCJmaWxlX2Zvcm1hdF9jbGFzc1wiLFtcIjdcIixcIjVcIl1dIiwiW1wibGljZW5zZV9jbGFzc1wiLFtcIm5vbmNvbW1lcmNpYWxcIl1dIiwiW1wiaXNfYWNjZXNzaWJsZV9mb3JfZnJlZVwiLFtdXSIsIltcImZpZWxkX29mX3N0dWR5XCIsW1wibmF0dXJhbF9zY2llbmNlc1wiLFwiZ2VvXCJdXSJd",
          "datetime": "2023-01-16 15:41:03 +00:00",
          "spell": null,
          "item_types": [
            "dataset"
          ],
          "se_results_count": 11,
          "items_count": 11,
          "items": [
            {
              "type": "dataset",
              "rank_group": 1,
              "rank_absolute": 1,
              "position": "left",
              "xpath": null,
              "dataset_id": "L2cvMTFwYzA4cmhqeg==",
              "title": "Logan River Observatory: South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West Aquatic Site (SLB_600W_CNL) Quality Controlled Data",
              "image_url": null,
              "scholarly_citations_count": null,
              "links": [
                {
                  "type": "link_element",
                  "title": "hydroshare.org",
                  "description": null,
                  "url": "http://www.hydroshare.org/",
                  "domain": "www.hydroshare.org"
                },
                {
                  "type": "link_element",
                  "title": "dataone.org",
                  "description": null,
                  "url": "http://search.dataone.org/",
                  "domain": "search.dataone.org"
                }
              ],
              "dataset_providers": [
                {
                  "type": "dataset_providers_element",
                  "title": "HydroShare",
                  "url": null,
                  "domain": null
                }
              ],
              "formats": [
                {
                  "type": "formats_element",
                  "format": "zip",
                  "size": null
                }
              ],
              "authors": [
                {
                  "type": "authors_element",
                  "name": "Logan River Observatory",
                  "url": null,
                  "domain": null
                }
              ],
              "licenses": [
                {
                  "type": "licenses_element",
                  "title": "Attribution 4.0 (CC BY 4.0)",
                  "url": "https://creativecommons.org/licenses/by/4.0/",
                  "domain": "creativecommons.org"
                }
              ],
              "updated_date": "2022-12-27 02:00:00 +00:00",
              "area_covered": [
                "2300 North 600 West",
                "South Logan Benson Canal at Benson Irrigation Company Flume",
                "Logan",
                "North America",
                "Rocky Mountains"
              ],
              "period_covered": null,
              "dataset_description": {
                "text": "This dataset contains quality control level 1 (QC1) data for all of the variables measured for the aquatic site on the South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West (SLB_600W_CNL). Each file contains all available QC1 data for a specific variable. Files will be updated as new data become available, but no more than once daily. These data have passed QA/QC procedures such as sensor calibration and visual inspection and removal of obvious errors. These data are approved by Technicians as the best available version of the data. See published script for correction steps specific to this data series. Each file header contains detailed metadata for site information, variable and method information, source information, and qualifiers referenced in the data. This site is currently operated as part of the Logan River Observatory.\n",
                "links": null
              }
            },
            {
              "type": "dataset",
              "rank_group": 2,
              "rank_absolute": 2,
              "position": "left",
              "xpath": null,
              "dataset_id": "L2cvMTFuMDQ3X3B6aA==",
              "title": "Lake Simcoe Monitoring",
              "image_url": null,
              "scholarly_citations_count": 31,
              "links": [
                {
                  "type": "link_element",
                  "title": "canada.ca",
                  "description": null,
                  "url": "http://open.canada.ca/",
                  "domain": "open.canada.ca"
                },
                {
                  "type": "link_element",
                  "title": "arctic-sdi.org",
                  "description": null,
                  "url": "http://catalogue.arctic-sdi.org/",
                  "domain": "catalogue.arctic-sdi.org"
                }
              ],
              "dataset_providers": [
                {
                  "type": "dataset_providers_element",
                  "title": "Government of Ontario",
                  "url": null,
                  "domain": null
                }
              ],
              "formats": [
                {
                  "type": "formats_element",
                  "format": "pdf",
                  "size": null
                },
                {
                  "type": "formats_element",
                  "format": "html",
                  "size": null
                },
                {
                  "type": "formats_element",
                  "format": "zip",
                  "size": null
                }
              ],
              "authors": null,
              "licenses": [
                {
                  "type": "licenses_element",
                  "title": "Open Government Licence - Canada 2.0",
                  "url": "https://open.canada.ca/en/open-government-licence-canada",
                  "domain": "open.canada.ca"
                }
              ],
              "updated_date": "2022-12-30 02:00:00 +00:00",
              "area_covered": null,
              "period_covered": {
                "start_date": "1980-01-01 03:00:00 +00:00",
                "end_date": "2021-12-31 02:00:00 +00:00",
                "displayed_date": "Jan 1, 1980 - Dec 31, 2021"
              },
              "dataset_description": {
                "text": "The Lake Simcoe lake monitoring program provides measurements of chemical and physical water quality limits such as total phosphorus, nitrogen, chlorophyll a, pH, alkalinity, conductivity, dissolved organic and inorganic carbon, silica, other ions, water transparency, temperature and dissolved oxygen. Samples are collected biweekly during the spring, summer and fall. *[pH]: potential of hydrogen\n",
                "links": null
              }
            }
          ]
        }
      ]
    }
  ]
}

Description of the fields for sending a request:

Field name Type Description
id string task identifier
unique task identifier in our system in the UUID format
you will be able to use it within 30 days to request the results of the task at any time

‌‌
‌‌As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name Type Description
version string the current version of the API
status_code integer general status code
you can find the full list of the response codes here
Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message string general informational message
you can find the full list of general informational messages here
time string execution time, seconds
cost float total tasks cost, USD
tasks_count integer the number of tasks in the tasks array
tasks_error integer the number of tasks in the tasks array returned with an error
tasks array array of tasks
        id string task identifier
unique task identifier in our system in the UUID format
        status_code integer status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
        status_message string informational message of the task
you can find the full list of general informational messages here
        time string execution time, seconds
        cost float cost of the task, USD
        result_count integer number of elements in the result array
        path array URL path
        data object contains the same parameters that you specified in the POST request
        result array array of results
            keyword string keyword received in a POST array
the keyword is returned with decoded %## (plus symbol ‘+’ will be decoded to a space character)
            se_domain string search engine domain in a POST array
            language_code string language code in a POST array
            check_url string direct URL to search engine results
you can use it to make sure that we provided accurate results
            datetime string date and time when the result was received
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
            spell object autocorrection of the search engine
if the search engine provided results for a keyword that was corrected, we will specify the keyword corrected by the search engine and the type of autocorrection
            item_types array types of search results in SERP
contains types of search results (items) found in SERP.
possible item type: dataset
            se_results_count integer total number of results in SERP
            items_count integer the number of results returned in the items array
            items array elements of search results found in SERP
                type string type of element = ‘dataset’
                rank_group integer group rank in SERP
position within a group of elements with identical type values
positions of elements with different type values are omitted from rank_group
                rank_absolute integer absolute rank in SERP
absolute position among all the elements in SERP
                position string the alignment of the element in SERP
can take the following values:
left, right
                xpath string the XPath of the element
                dataset_id string ID of the dataset
                title string title of the result in SERP
                image_url string URL of the image
the URL leading to the image on the original resource or DataForSEO storage (in case the original source is not available)
                scholarly_citations_count integer count of articles that refer to the dataset
                links array sitelinks
the links shown below some of Google Dataset’s search results
if there are none, equals null
                    type string type of element = ‘link_element
                    title string title of the result in SERP
                    description string description of the results element in SERP
                    url string sitelink URL
                    domain string domain in SERP
                dataset_providers array the list of institutions that provided the dataset
                    type string type of element = ‘dataset_providers_element
                    title string name of the dataset provider
                    url string site URL of the dataset provider
                    domain string site domain of the dataset provider
                formats array the list of file formats of the dataset
                    type string type of element = ‘formats_element
                    format string type of file format of the dataset
for example: zip, html, csv
                    size string file size in bytes
                authors array the list of authors of the dataset
                    type string type of element = ‘authors_element
                    name string name of the dataset author
                    url string author’s link URL
                    domain string author’s link domain
                licenses array the list of licenses issued to the dataset
                    type string type of element = ‘licenses_element
                    title string name of the license
                    url string license URL
                    domain string lisense page domain
                updated_date string date and time when the result was last updated
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2022-11-27 02:00:00 +00:00
                area_covered array the list of areas covered in the dataset
for example: Africa, Global
                period_covered object period covered in the dataset
                    start_date string date and time when the period starts
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2020-03-02 02:00:00 +00:00
                    end_date string date and time when the period ends
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2022-12-09 02:00:00 +00:00
                    displayed_date string period displayed in SERP
example:
Mar 2, 2020 - Dec 9, 2022
                dataset_description object description of the dataset
                    text string text of the description
                    links array links featured in the ‘dataset_description’
                       type string type of element = ‘link_element
                       title string link anchor text
                       description string description of the results element in SERP
                       url string URL link
                       domain string domain in SERP

‌‌