{
    "openapi": "3.1.0",
    "info": {
        "title": "Extract APIs",
        "description": "Diffbot's Extraction APIs include various endpoints to extract JSON data/fields from different types of web pages.  The Analyze endpoint is used when the page type is unknown, and will attempt to identify the page type and send it to the appropriate extraction API.",
        "termsOfService": "https://www.diffbot.com/terms/",
        "contact": {
            "email": "support@diffbot.com"
        },
        "version": "1.1.0"
    },
    "servers": [
        {
            "url": "https://api.diffbot.com/v3"
        }
    ],
    "tags": [
        {
            "name": "Extract APIs",
            "description": "Extract APIs"
        }
    ],
    "paths": {
        "/analyze": {
            "get": {
                "tags": [
                    "Analyze"
                ],
                "summary": "Analyze",
                "description": "Automatically classify a page and extract data according to its type.",
                "operationId": "extract-analyze",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Web page URL of the analyze to process (URL encoded)",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/"
                        }
                    },
                    {
                        "name": "mode",
                        "in": "query",
                        "description": "By default the Analyze API will fully extract all pages that match an existing Extract API. Set mode to a specific Extract API (e.g., `mode=\"article\"`) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields.",
                        "schema": {
                            "type": "string",
                            "format": "enum",
                            "enum": [
                                "article",
                                "product",
                                "discussion",
                                "image",
                                "video",
                                "list",
                                "event"
                            ]
                        }
                    },
                    {
                        "name": "fallback",
                        "in": "query",
                        "description": "If an appropriate API cannot be determined (pages classified with type \"other\"), fall back to this API.",
                        "schema": {
                            "type": "string",
                            "format": "enum",
                            "enum": [
                                "article",
                                "product",
                                "discussion",
                                "image",
                                "video",
                                "list",
                                "event"
                            ]
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "discussion",
                        "in": "query",
                        "description": "Pass `discussion=false` to disable automatic extraction of comments or reviews from pages identified as articles or products. This will not affect pages identified as discussions.",
                        "schema": {
                            "type": "boolean"
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/",
                                        "api": "analyze",
                                        "version": 3
                                    },
                                    "humanLanguage": "en",
                                    "objects": [
                                        {
                                            "date": "Fri, 04 Sep 2020 00:00:00 GMT",
                                            "sentiment": 0.153,
                                            "images": [
                                                {
                                                    "naturalHeight": 869,
                                                    "width": 654,
                                                    "diffbotUri": "image|3|1663647584",
                                                    "url": "https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640",
                                                    "naturalWidth": 1366,
                                                    "height": 418
                                                },
                                                {
                                                    "naturalHeight": 1900,
                                                    "width": 460,
                                                    "diffbotUri": "image|3|683243517",
                                                    "url": "https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=1006,1400",
                                                    "naturalWidth": 1366,
                                                    "height": 294
                                                }
                                            ],
                                            "author": "Will Douglas Heaven",
                                            "estimatedDate": "Fri, 04 Sep 2020 00:00:00 GMT",
                                            "publisherRegion": "North America",
                                            "icon": "https://www.technologyreview.com/static/media/favicon.1cfcdb44.ico",
                                            "diffbotUri": "article|3|973247980",
                                            "siteName": "MIT Technology Review",
                                            "type": "article",
                                            "title": "This know-it-all AI learns by reading the entire web nonstop",
                                            "tags": [
                                                {
                                                    "score": 0.998680055141449,
                                                    "sentiment": 0,
                                                    "count": 10,
                                                    "label": "artificial intelligence",
                                                    "uri": "https://diffbot.com/entity/E_lYDrjmAMlKKwXaDf958zg",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Skill",
                                                        "http://dbpedia.org/ontology/Activity"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9686350226402283,
                                                    "sentiment": 0.889,
                                                    "count": 7,
                                                    "label": "Diffbot",
                                                    "uri": "https://diffbot.com/entity/EYX1i02YVPsuT7fPLUYgRhQ",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9306924939155579,
                                                    "sentiment": 0,
                                                    "count": 2,
                                                    "label": "Michigan",
                                                    "uri": "https://diffbot.com/entity/E2eIrTt0jPUmGmuV6N2O3KQ",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Place",
                                                        "http://dbpedia.org/ontology/PopulatedPlace",
                                                        "http://dbpedia.org/ontology/State"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9025880098342896,
                                                    "sentiment": 0,
                                                    "count": 1,
                                                    "label": "Paul Katsen",
                                                    "uri": "https://diffbot.com/entity/EqUim_ci0ObmrK2gZM3UfNA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8933213353157043,
                                                    "sentiment": 0.48,
                                                    "count": 4,
                                                    "label": "Katy Perry",
                                                    "uri": "https://diffbot.com/entity/E_6rhi_PEOD6vGencwOxd2A",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8848651051521301,
                                                    "sentiment": 0,
                                                    "count": 4,
                                                    "label": "Mike Tung",
                                                    "uri": "https://diffbot.com/entity/ESGMaGV9uP0SuTmfPTtNEoA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8562507629394531,
                                                    "sentiment": 0,
                                                    "count": 4,
                                                    "label": "Google",
                                                    "uri": "https://diffbot.com/entity/EUFq-3WlpNsq0pvfUYWXOEA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7750672101974487,
                                                    "sentiment": 0,
                                                    "count": 2,
                                                    "label": "Alaska",
                                                    "uri": "https://diffbot.com/entity/E4odwkG_xMNeZTbHrnNrojA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Place",
                                                        "http://dbpedia.org/ontology/PopulatedPlace",
                                                        "http://dbpedia.org/ontology/State"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7653270959854126,
                                                    "sentiment": 0,
                                                    "count": 1,
                                                    "label": "Zola",
                                                    "uri": "https://diffbot.com/entity/E0qGTA2o5NjaeezggjMsoVw",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7643865942955017,
                                                    "sentiment": 0.75,
                                                    "count": 3,
                                                    "label": "GUID Partition Table",
                                                    "uri": "https://diffbot.com/entity/EReKbXuSJMYmoM8lawtgEsA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Skill",
                                                        "http://dbpedia.org/ontology/Activity"
                                                    ]
                                                }
                                            ],
                                            "publisherCountry": "United States",
                                            "humanLanguage": "en",
                                            "authorUrl": "https://www.technologyreview.com/author/will-douglas-heaven/",
                                            "pageUrl": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/",
                                            "html": "<figure><img alt=\"knowledge graph illustration\" sizes=\"(max-width: 32rem) 472px,(max-width: 48rem) 728px,(max-width: 64rem) 808px,(max-width: 80rem) 1064px,(max-width: 90rem) 1126px,1080px\" src=\"https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2252,1266\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=944,530 944w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=472,265 472w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1456,818 1456w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=728,409 728w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1616,908 1616w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=808,454 808w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2128,1196 2128w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1064,598 1064w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2252,1266 2252w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1126,633 1126w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2160,1214 2160w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1080,607 1080w\"></img></figure>\n<p>Back in July, OpenAI&rsquo;s <a href=\"https://www.technologyreview.com/2020/07/20/1005454/openai-machine-learning-language-generator-gpt-3-nlp/\">latest language model, GPT-3</a>, dazzled with its ability to churn out paragraphs that look as if they could have been written by a human. People started showing off how GPT-3 could also autocomplete code or fill in blanks in spreadsheets.</p>\n<p>In one example, Twitter employee Paul Katsen tweeted &ldquo;the spreadsheet function to rule them all,&rdquo; in which<a href=\"https://twitter.com/pavtalk/status/1285410751092416513\"> GPT-3 fills out columns</a> by itself, pulling in data for US states: the population of Michigan is 10.3 million, Alaska became a state in 1906, and so on.</p>\n<p>Except that GPT-3 can be a bit of a bullshitter. The population of Michigan has never been 10.3 million, and Alaska became a state in 1959.</p>\n<p>Language models like GPT-3 are <a href=\"https://www.technologyreview.com/2020/07/31/1005876/natural-language-processing-evaluation-ai-opinion/\">amazing mimics</a>, but they have little sense of what they&rsquo;re actually saying. &ldquo;They&rsquo;re really good at generating stories about unicorns,&rdquo; says Mike Tung, CEO of Stanford startup Diffbot. &ldquo;But they&rsquo;re not trained to be factual.&rdquo;</p>\n<p>This is a problem if we want <a href=\"https://forms.technologyreview.com/in-machines-we-trust/\">AIs to be trustworthy</a>. That&rsquo;s why Diffbot takes a different approach. It is building an AI that reads every page on the entire public web, in multiple languages, and extracts as many facts from those pages as it can.</p>\n<p>Like GPT-3, Diffbot&rsquo;s system learns by vacuuming up vast amounts of human-written text found online. But instead of using that data to train a language model, Diffbot turns what it reads into a series of three-part factoids that relate one thing to another: subject, verb, object.</p>\n<p>Pointed at <a href=\"https://www.technologyreview.com/author/will-douglas-heaven/\">my bio</a>, for example, Diffbot learns that Will Douglas Heaven is a journalist; Will Douglas Heaven works at MIT Technology Review; MIT Technology Review is a media company; and so on. Each of these factoids gets joined up with billions of others in a sprawling, interconnected network of facts. This is known as a knowledge graph.</p>\n<p>Knowledge graphs are not new. They have been around for decades, and were a fundamental concept in early AI research. But constructing and maintaining knowledge graphs has typically been done by hand, which is hard. This also stopped Tim Berners-Lee from realizing what he called the semantic web, which would have included information for machines as well as humans, so that bots could book our flights, do our shopping, or give smarter answers to questions than search engines.</p>\n<p>A few years ago, Google started using knowledge graphs too. Search for &ldquo;Katy Perry&rdquo; and you will get a box next to the main search results telling you that Katy Perry is an American singer-songwriter with music available on YouTube, Spotify, and Deezer. You can see at a glance that she is married to Orlando Bloom, she&rsquo;s 35 and worth $125 million, and so on. Instead of giving you a list of links to pages about Katy Perry, Google gives you a set of facts about her drawn from its knowledge graph.</p>\n<p>But Google only does this for its most popular search terms. Diffbot wants to do it for everything. By fully automating the construction process, Diffbot has been able to build what may be the largest knowledge graph ever.</p>\n<p>Alongside Google and Microsoft, it is one of only three US companies that crawl the entire public web. &ldquo;It definitely makes sense to crawl the web,&rdquo; says Victoria Lin, a research scientist at Salesforce who works on natural-language processing and knowledge representation. &ldquo;A lot of human effort can otherwise go into making a large knowledge base.&rdquo; Heiko Paulheim at the University of Mannheim in Germany agrees: &ldquo;Automation is the only way to build large-scale knowledge graphs.&rdquo;</p>\n<h3>Super surfer</h3>\n<p>To collect its facts, Diffbot&rsquo;s AI reads the web as a human would&mdash;but much faster. Using a super-charged version of the Chrome browser, the AI views the raw pixels of a web page and uses image-recognition algorithms to categorize the page as one of 20 different types, including video, image, article, event, and discussion thread. It then identifies key elements on the page, such as headline, author, product description, or price, and uses NLP to extract facts from any text.</p>\n<p>Every three-part factoid gets added to the knowledge graph. Diffbot extracts facts from pages written in any language, which means that it can answer queries about Katy Perry, say, using facts taken from articles in Chinese or Arabic even if they do not contain the term &ldquo;Katy Perry.&rdquo;</p>\n<p>Browsing the web like a human lets the AI see the same facts that we see. It also means it has had to learn to navigate the web like us. The AI must scroll down, switch between tabs, and click away pop-ups. &ldquo;The AI has to play the web like a video game just to experience the pages,&rdquo; says Tung.</p>\n<p>Diffbot crawls the web nonstop and rebuilds its knowledge graph every four to five days. According to Tung, the AI adds 100 million to 150 million entities each month as new people pop up online, companies are created, and products are launched. It uses more machine-learning algorithms to fuse new facts with old, creating new connections or overwriting out-of-date ones. Diffbot has to add new hardware to its data center as the knowledge graph grows.</p>\n<p>Researchers can access Diffbot&rsquo;s knowledge graph for free. But Diffbot also has around 400 paying customers. The search engine DuckDuckGo uses it to generate its own Google-like boxes. Snapchat uses it to extract highlights from news pages. The popular wedding-planner app Zola uses it to help people make wedding lists, pulling in images and prices. NASDAQ, which provides information about the stock market, uses it for financial research.</p>\n<h3>Fake shoes</h3>\n<p>Adidas and Nike even use it to search the web for counterfeit shoes. A search engine will return a long list of sites that mention Nike trainers. But Diffbot lets these companies look for sites that are actually selling their shoes, rather just talking about them.</p>\n<p>For now, these companies must interact with Diffbot using code. But Tung plans to add a natural-language interface. Ultimately, he wants to build what he calls a &ldquo;universal factoid question answering system&rdquo;: an AI that could answer almost anything you asked it, with sources to back up its response.</p>\n<p>Tung and Lin agree that this kind of AI cannot be built with language models alone. But better yet would be to combine the technologies, using a language model like GPT-3 to craft a human-like front end for a know-it-all bot.</p>\n<p>Still, even an AI that has its facts straight is not necessarily smart. &ldquo;We&rsquo;re not trying to define what intelligence is, or anything like that,&rdquo; says Tung. &ldquo;We&rsquo;re just trying to build something useful.&rdquo;</p>\n<figure><img alt=\"NLP maps hallucinogenic experience\" sizes=\"(max-width: 32rem) 287px,(max-width: 48rem) 503px,100vw\" src=\"https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=574,574 574w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=287,287 287w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640 1006w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=503,320 503w\"></img></figure>\n<figure><img alt=\"Demis Hassabis\" sizes=\"(max-width: 32rem) 287px,(max-width: 48rem) 503px,100vw\" src=\"https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=1006,1400\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=574,574 574w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=287,287 287w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=1006,1400 1006w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=503,700 503w\"></img></figure>",
                                            "categories": [
                                                {
                                                    "score": 0.962,
                                                    "name": "Technology & Computing",
                                                    "id": "iabv2-596"
                                                },
                                                {
                                                    "score": 0.962,
                                                    "name": "Artificial Intelligence",
                                                    "id": "iabv2-597"
                                                }
                                            ],
                                            "text": "Back in July, OpenAI’s latest language model, GPT-3, dazzled with its ability to churn out paragraphs that look as if they could have been written by a human. People started showing off how GPT-3 could also autocomplete code or fill in blanks in spreadsheets.\nIn one example, Twitter employee Paul Katsen tweeted “the spreadsheet function to rule them all,” in which GPT-3 fills out columns by itself, pulling in data for US states: the population of Michigan is 10.3 million, Alaska became a state in 1906, and so on.\nExcept that GPT-3 can be a bit of a bullshitter. The population of Michigan has never been 10.3 million, and Alaska became a state in 1959.\nLanguage models like GPT-3 are amazing mimics, but they have little sense of what they’re actually saying. “They’re really good at generating stories about unicorns,” says Mike Tung, CEO of Stanford startup Diffbot. “But they’re not trained to be factual.”\nThis is a problem if we want AIs to be trustworthy. That’s why Diffbot takes a different approach. It is building an AI that reads every page on the entire public web, in multiple languages, and extracts as many facts from those pages as it can.\nLike GPT-3, Diffbot’s system learns by vacuuming up vast amounts of human-written text found online. But instead of using that data to train a language model, Diffbot turns what it reads into a series of three-part factoids that relate one thing to another: subject, verb, object.\nPointed at my bio, for example, Diffbot learns that Will Douglas Heaven is a journalist; Will Douglas Heaven works at MIT Technology Review; MIT Technology Review is a media company; and so on. Each of these factoids gets joined up with billions of others in a sprawling, interconnected network of facts. This is known as a knowledge graph.\nKnowledge graphs are not new. They have been around for decades, and were a fundamental concept in early AI research. But constructing and maintaining knowledge graphs has typically been done by hand, which is hard. This also stopped Tim Berners-Lee from realizing what he called the semantic web, which would have included information for machines as well as humans, so that bots could book our flights, do our shopping, or give smarter answers to questions than search engines.\nA few years ago, Google started using knowledge graphs too. Search for “Katy Perry” and you will get a box next to the main search results telling you that Katy Perry is an American singer-songwriter with music available on YouTube, Spotify, and Deezer. You can see at a glance that she is married to Orlando Bloom, she’s 35 and worth $125 million, and so on. Instead of giving you a list of links to pages about Katy Perry, Google gives you a set of facts about her drawn from its knowledge graph.\nBut Google only does this for its most popular search terms. Diffbot wants to do it for everything. By fully automating the construction process, Diffbot has been able to build what may be the largest knowledge graph ever.\nAlongside Google and Microsoft, it is one of only three US companies that crawl the entire public web. “It definitely makes sense to crawl the web,” says Victoria Lin, a research scientist at Salesforce who works on natural-language processing and knowledge representation. “A lot of human effort can otherwise go into making a large knowledge base.” Heiko Paulheim at the University of Mannheim in Germany agrees: “Automation is the only way to build large-scale knowledge graphs.”\nSuper surfer\nTo collect its facts, Diffbot’s AI reads the web as a human would—but much faster. Using a super-charged version of the Chrome browser, the AI views the raw pixels of a web page and uses image-recognition algorithms to categorize the page as one of 20 different types, including video, image, article, event, and discussion thread. It then identifies key elements on the page, such as headline, author, product description, or price, and uses NLP to extract facts from any text.\nEvery three-part factoid gets added to the knowledge graph. Diffbot extracts facts from pages written in any language, which means that it can answer queries about Katy Perry, say, using facts taken from articles in Chinese or Arabic even if they do not contain the term “Katy Perry.”\nBrowsing the web like a human lets the AI see the same facts that we see. It also means it has had to learn to navigate the web like us. The AI must scroll down, switch between tabs, and click away pop-ups. “The AI has to play the web like a video game just to experience the pages,” says Tung.\nDiffbot crawls the web nonstop and rebuilds its knowledge graph every four to five days. According to Tung, the AI adds 100 million to 150 million entities each month as new people pop up online, companies are created, and products are launched. It uses more machine-learning algorithms to fuse new facts with old, creating new connections or overwriting out-of-date ones. Diffbot has to add new hardware to its data center as the knowledge graph grows.\nResearchers can access Diffbot’s knowledge graph for free. But Diffbot also has around 400 paying customers. The search engine DuckDuckGo uses it to generate its own Google-like boxes. Snapchat uses it to extract highlights from news pages. The popular wedding-planner app Zola uses it to help people make wedding lists, pulling in images and prices. NASDAQ, which provides information about the stock market, uses it for financial research.\nFake shoes\nAdidas and Nike even use it to search the web for counterfeit shoes. A search engine will return a long list of sites that mention Nike trainers. But Diffbot lets these companies look for sites that are actually selling their shoes, rather just talking about them.\nFor now, these companies must interact with Diffbot using code. But Tung plans to add a natural-language interface. Ultimately, he wants to build what he calls a “universal factoid question answering system”: an AI that could answer almost anything you asked it, with sources to back up its response.\nTung and Lin agree that this kind of AI cannot be built with language models alone. But better yet would be to combine the technologies, using a language model like GPT-3 to craft a human-like front end for a know-it-all bot.\nStill, even an AI that has its facts straight is not necessarily smart. “We’re not trying to define what intelligence is, or anything like that,” says Tung. “We’re just trying to build something useful.”",
                                            "authors": [
                                                {
                                                    "name": "Will Douglas Heavenarchive page",
                                                    "link": "technologyreview.com/author/will-douglas-heaven"
                                                }
                                            ]
                                        }
                                    ],
                                    "type": "article",
                                    "title": "This know-it-all AI learns by reading the entire web nonstop | MIT Technology Review"
                                },
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "humanLanguage": {
                                            "type": "string"
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "date": {
                                                        "type": "string"
                                                    },
                                                    "sentiment": {
                                                        "type": "number"
                                                    },
                                                    "images": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "naturalHeight": {
                                                                    "type": "integer"
                                                                },
                                                                "width": {
                                                                    "type": "integer"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "url": {
                                                                    "type": "string"
                                                                },
                                                                "naturalWidth": {
                                                                    "type": "integer"
                                                                },
                                                                "height": {
                                                                    "type": "integer"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "author": {
                                                        "type": "string"
                                                    },
                                                    "estimatedDate": {
                                                        "type": "string"
                                                    },
                                                    "publisherRegion": {
                                                        "type": "string"
                                                    },
                                                    "icon": {
                                                        "type": "string"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "siteName": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "tags": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "score": {
                                                                    "type": "number"
                                                                },
                                                                "sentiment": {
                                                                    "type": "number"
                                                                },
                                                                "count": {
                                                                    "type": "integer"
                                                                },
                                                                "label": {
                                                                    "type": "string"
                                                                },
                                                                "uri": {
                                                                    "type": "string"
                                                                },
                                                                "rdfTypes": {
                                                                    "type": "array",
                                                                    "items": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "publisherCountry": {
                                                        "type": "string"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "authorUrl": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "html": {
                                                        "type": "string"
                                                    },
                                                    "categories": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "score": {
                                                                    "type": "number"
                                                                },
                                                                "name": {
                                                                    "type": "string"
                                                                },
                                                                "id": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "text": {
                                                        "type": "string"
                                                    },
                                                    "authors": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "name": {
                                                                    "type": "string"
                                                                },
                                                                "link": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        },
                                        "type": {
                                            "type": "string"
                                        },
                                        "title": {
                                            "type": "string"
                                        }
                                    }
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/article": {
            "get": {
                "tags": [
                    "Article"
                ],
                "summary": "Article",
                "description": "Automatically extract clean article text and other data from news articles, blog posts and other text-heavy pages.",
                "operationId": "article",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb",
                                "quotes"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "paging",
                        "in": "query",
                        "description": "Pass `paging=false` to disable automatic concatenation multiple-page articles.",
                        "schema": {
                            "type": "boolean"
                        }
                    },
                    {
                        "name": "maxTags",
                        "in": "query",
                        "description": "Set the maximum number of automatically-generated tags to return. (Default: 10)",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "tagConfidence",
                        "in": "query",
                        "description": "Set the minimum relevance `score` of tags to return, between 0.0 and 1.0. By default only tags with a `score` equal to or above 0.5 will be returned.",
                        "schema": {
                            "type": "number",
                            "format": "float"
                        }
                    },
                    {
                        "name": "categoryConfidence",
                        "in": "query",
                        "description": "Set the minimum relevance `score` of categories to return, between 0.0 and 1.0. By default only categories with a `score` equal to or above 0.5 will be returned.",
                        "schema": {
                            "type": "number",
                            "format": "float"
                        }
                    },
                    {
                        "name": "discussion",
                        "in": "query",
                        "description": "Pass `discussion=false` to disable automatic extraction of article comments.",
                        "schema": {
                            "type": "boolean"
                        }
                    },
                    {
                        "name": "naturalLanguage",
                        "in": "query",
                        "description": "Run extracted text and title through the Diffbot Natural Language API. Example: `&naturalLanguage=entities,facts,categories,sentiment`.",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "entities",
                                "sentiment",
                                "summary",
                                "facts",
                                "openFacts",
                                "records",
                                "categories",
                                "sentences",
                                "language"
                            ]
                        }
                    },
                    {
                        "name": "summaryNumSentences",
                        "in": "query",
                        "description": "Sets the maximum number of sentences for summary generation when using naturalLanguage=summary (Default: 3).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "humanLanguage": {
                                            "type": "string"
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "date": {
                                                        "type": "string"
                                                    },
                                                    "sentiment": {
                                                        "type": "number"
                                                    },
                                                    "images": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "naturalHeight": {
                                                                    "type": "integer"
                                                                },
                                                                "width": {
                                                                    "type": "integer"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "url": {
                                                                    "type": "string"
                                                                },
                                                                "naturalWidth": {
                                                                    "type": "integer"
                                                                },
                                                                "height": {
                                                                    "type": "integer"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "author": {
                                                        "type": "string"
                                                    },
                                                    "estimatedDate": {
                                                        "type": "string"
                                                    },
                                                    "publisherRegion": {
                                                        "type": "string"
                                                    },
                                                    "icon": {
                                                        "type": "string"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "siteName": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "tags": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "score": {
                                                                    "type": "number"
                                                                },
                                                                "sentiment": {
                                                                    "type": "number"
                                                                },
                                                                "count": {
                                                                    "type": "integer"
                                                                },
                                                                "label": {
                                                                    "type": "string"
                                                                },
                                                                "uri": {
                                                                    "type": "string"
                                                                },
                                                                "rdfTypes": {
                                                                    "type": "array",
                                                                    "items": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "publisherCountry": {
                                                        "type": "string"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "authorUrl": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "html": {
                                                        "type": "string"
                                                    },
                                                    "categories": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "score": {
                                                                    "type": "number"
                                                                },
                                                                "name": {
                                                                    "type": "string"
                                                                },
                                                                "id": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "text": {
                                                        "type": "string"
                                                    },
                                                    "authors": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "name": {
                                                                    "type": "string"
                                                                },
                                                                "link": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        },
                                        "type": {
                                            "type": "string"
                                        },
                                        "title": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/",
                                        "api": "article",
                                        "version": 3
                                    },
                                    "humanLanguage": "en",
                                    "objects": [
                                        {
                                            "date": "Fri, 04 Sep 2020 00:00:00 GMT",
                                            "sentiment": 0.153,
                                            "images": [
                                                {
                                                    "naturalHeight": 869,
                                                    "width": 654,
                                                    "diffbotUri": "image|3|1663647584",
                                                    "url": "https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640",
                                                    "naturalWidth": 1366,
                                                    "height": 418
                                                }
                                            ],
                                            "author": "Will Douglas Heaven",
                                            "estimatedDate": "Fri, 04 Sep 2020 00:00:00 GMT",
                                            "publisherRegion": "North America",
                                            "icon": "https://www.technologyreview.com/static/media/favicon.1cfcdb44.ico",
                                            "diffbotUri": "article|3|973247980",
                                            "siteName": "MIT Technology Review",
                                            "type": "article",
                                            "title": "This know-it-all AI learns by reading the entire web nonstop",
                                            "tags": [
                                                {
                                                    "score": 0.998680055141449,
                                                    "sentiment": 0,
                                                    "count": 10,
                                                    "label": "artificial intelligence",
                                                    "uri": "https://diffbot.com/entity/E_lYDrjmAMlKKwXaDf958zg",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Skill",
                                                        "http://dbpedia.org/ontology/Activity"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9686350226402283,
                                                    "sentiment": 0.889,
                                                    "count": 7,
                                                    "label": "Diffbot",
                                                    "uri": "https://diffbot.com/entity/EYX1i02YVPsuT7fPLUYgRhQ",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9306924939155579,
                                                    "sentiment": 0,
                                                    "count": 2,
                                                    "label": "Michigan",
                                                    "uri": "https://diffbot.com/entity/E2eIrTt0jPUmGmuV6N2O3KQ",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Place",
                                                        "http://dbpedia.org/ontology/PopulatedPlace",
                                                        "http://dbpedia.org/ontology/State"
                                                    ]
                                                },
                                                {
                                                    "score": 0.9025880098342896,
                                                    "sentiment": 0,
                                                    "count": 1,
                                                    "label": "Paul Katsen",
                                                    "uri": "https://diffbot.com/entity/EqUim_ci0ObmrK2gZM3UfNA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8933213353157043,
                                                    "sentiment": 0.48,
                                                    "count": 4,
                                                    "label": "Katy Perry",
                                                    "uri": "https://diffbot.com/entity/E_6rhi_PEOD6vGencwOxd2A",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8848651051521301,
                                                    "sentiment": 0,
                                                    "count": 4,
                                                    "label": "Mike Tung",
                                                    "uri": "https://diffbot.com/entity/ESGMaGV9uP0SuTmfPTtNEoA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Person"
                                                    ]
                                                },
                                                {
                                                    "score": 0.8562507629394531,
                                                    "sentiment": 0,
                                                    "count": 4,
                                                    "label": "Google",
                                                    "uri": "https://diffbot.com/entity/EUFq-3WlpNsq0pvfUYWXOEA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7750672101974487,
                                                    "sentiment": 0,
                                                    "count": 2,
                                                    "label": "Alaska",
                                                    "uri": "https://diffbot.com/entity/E4odwkG_xMNeZTbHrnNrojA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Place",
                                                        "http://dbpedia.org/ontology/PopulatedPlace",
                                                        "http://dbpedia.org/ontology/State"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7653270959854126,
                                                    "sentiment": 0,
                                                    "count": 1,
                                                    "label": "Zola",
                                                    "uri": "https://diffbot.com/entity/E0qGTA2o5NjaeezggjMsoVw",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Organisation"
                                                    ]
                                                },
                                                {
                                                    "score": 0.7643865942955017,
                                                    "sentiment": 0.75,
                                                    "count": 3,
                                                    "label": "GUID Partition Table",
                                                    "uri": "https://diffbot.com/entity/EReKbXuSJMYmoM8lawtgEsA",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Skill",
                                                        "http://dbpedia.org/ontology/Activity"
                                                    ]
                                                }
                                            ],
                                            "publisherCountry": "United States",
                                            "humanLanguage": "en",
                                            "authorUrl": "https://www.technologyreview.com/author/will-douglas-heaven/",
                                            "pageUrl": "https://www.technologyreview.com/2020/09/04/1008156/knowledge-graph-ai-reads-web-machine-learning-natural-language-processing/",
                                            "html": "<figure><img alt=\"knowledge graph illustration\" sizes=\"(max-width: 32rem) 472px,(max-width: 48rem) 728px,(max-width: 64rem) 808px,(max-width: 80rem) 1064px,(max-width: 90rem) 1126px,1080px\" src=\"https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2252,1266\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=944,530 944w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=472,265 472w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1456,818 1456w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=728,409 728w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1616,908 1616w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=808,454 808w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2128,1196 2128w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1064,598 1064w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2252,1266 2252w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1126,633 1126w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=2160,1214 2160w,https://wp.technologyreview.com/wp-content/uploads/2020/09/knowledge-graph2_web.jpg?fit=1080,607 1080w\"></img></figure>\n<p>Back in July, OpenAI&rsquo;s <a href=\"https://www.technologyreview.com/2020/07/20/1005454/openai-machine-learning-language-generator-gpt-3-nlp/\">latest language model, GPT-3</a>, dazzled with its ability to churn out paragraphs that look as if they could have been written by a human. People started showing off how GPT-3 could also autocomplete code or fill in blanks in spreadsheets.</p>\n<p>In one example, Twitter employee Paul Katsen tweeted &ldquo;the spreadsheet function to rule them all,&rdquo; in which<a href=\"https://twitter.com/pavtalk/status/1285410751092416513\"> GPT-3 fills out columns</a> by itself, pulling in data for US states: the population of Michigan is 10.3 million, Alaska became a state in 1906, and so on.</p>\n<p>Except that GPT-3 can be a bit of a bullshitter. The population of Michigan has never been 10.3 million, and Alaska became a state in 1959.</p>\n<p>Language models like GPT-3 are <a href=\"https://www.technologyreview.com/2020/07/31/1005876/natural-language-processing-evaluation-ai-opinion/\">amazing mimics</a>, but they have little sense of what they&rsquo;re actually saying. &ldquo;They&rsquo;re really good at generating stories about unicorns,&rdquo; says Mike Tung, CEO of Stanford startup Diffbot. &ldquo;But they&rsquo;re not trained to be factual.&rdquo;</p>\n<p>This is a problem if we want <a href=\"https://forms.technologyreview.com/in-machines-we-trust/\">AIs to be trustworthy</a>. That&rsquo;s why Diffbot takes a different approach. It is building an AI that reads every page on the entire public web, in multiple languages, and extracts as many facts from those pages as it can.</p>\n<p>Like GPT-3, Diffbot&rsquo;s system learns by vacuuming up vast amounts of human-written text found online. But instead of using that data to train a language model, Diffbot turns what it reads into a series of three-part factoids that relate one thing to another: subject, verb, object.</p>\n<p>Pointed at <a href=\"https://www.technologyreview.com/author/will-douglas-heaven/\">my bio</a>, for example, Diffbot learns that Will Douglas Heaven is a journalist; Will Douglas Heaven works at MIT Technology Review; MIT Technology Review is a media company; and so on. Each of these factoids gets joined up with billions of others in a sprawling, interconnected network of facts. This is known as a knowledge graph.</p>\n<p>Knowledge graphs are not new. They have been around for decades, and were a fundamental concept in early AI research. But constructing and maintaining knowledge graphs has typically been done by hand, which is hard. This also stopped Tim Berners-Lee from realizing what he called the semantic web, which would have included information for machines as well as humans, so that bots could book our flights, do our shopping, or give smarter answers to questions than search engines.</p>\n<p>A few years ago, Google started using knowledge graphs too. Search for &ldquo;Katy Perry&rdquo; and you will get a box next to the main search results telling you that Katy Perry is an American singer-songwriter with music available on YouTube, Spotify, and Deezer. You can see at a glance that she is married to Orlando Bloom, she&rsquo;s 35 and worth $125 million, and so on. Instead of giving you a list of links to pages about Katy Perry, Google gives you a set of facts about her drawn from its knowledge graph.</p>\n<p>But Google only does this for its most popular search terms. Diffbot wants to do it for everything. By fully automating the construction process, Diffbot has been able to build what may be the largest knowledge graph ever.</p>\n<p>Alongside Google and Microsoft, it is one of only three US companies that crawl the entire public web. &ldquo;It definitely makes sense to crawl the web,&rdquo; says Victoria Lin, a research scientist at Salesforce who works on natural-language processing and knowledge representation. &ldquo;A lot of human effort can otherwise go into making a large knowledge base.&rdquo; Heiko Paulheim at the University of Mannheim in Germany agrees: &ldquo;Automation is the only way to build large-scale knowledge graphs.&rdquo;</p>\n<h3>Super surfer</h3>\n<p>To collect its facts, Diffbot&rsquo;s AI reads the web as a human would&mdash;but much faster. Using a super-charged version of the Chrome browser, the AI views the raw pixels of a web page and uses image-recognition algorithms to categorize the page as one of 20 different types, including video, image, article, event, and discussion thread. It then identifies key elements on the page, such as headline, author, product description, or price, and uses NLP to extract facts from any text.</p>\n<p>Every three-part factoid gets added to the knowledge graph. Diffbot extracts facts from pages written in any language, which means that it can answer queries about Katy Perry, say, using facts taken from articles in Chinese or Arabic even if they do not contain the term &ldquo;Katy Perry.&rdquo;</p>\n<p>Browsing the web like a human lets the AI see the same facts that we see. It also means it has had to learn to navigate the web like us. The AI must scroll down, switch between tabs, and click away pop-ups. &ldquo;The AI has to play the web like a video game just to experience the pages,&rdquo; says Tung.</p>\n<p>Diffbot crawls the web nonstop and rebuilds its knowledge graph every four to five days. According to Tung, the AI adds 100 million to 150 million entities each month as new people pop up online, companies are created, and products are launched. It uses more machine-learning algorithms to fuse new facts with old, creating new connections or overwriting out-of-date ones. Diffbot has to add new hardware to its data center as the knowledge graph grows.</p>\n<p>Researchers can access Diffbot&rsquo;s knowledge graph for free. But Diffbot also has around 400 paying customers. The search engine DuckDuckGo uses it to generate its own Google-like boxes. Snapchat uses it to extract highlights from news pages. The popular wedding-planner app Zola uses it to help people make wedding lists, pulling in images and prices. NASDAQ, which provides information about the stock market, uses it for financial research.</p>\n<h3>Fake shoes</h3>\n<p>Adidas and Nike even use it to search the web for counterfeit shoes. A search engine will return a long list of sites that mention Nike trainers. But Diffbot lets these companies look for sites that are actually selling their shoes, rather just talking about them.</p>\n<p>For now, these companies must interact with Diffbot using code. But Tung plans to add a natural-language interface. Ultimately, he wants to build what he calls a &ldquo;universal factoid question answering system&rdquo;: an AI that could answer almost anything you asked it, with sources to back up its response.</p>\n<p>Tung and Lin agree that this kind of AI cannot be built with language models alone. But better yet would be to combine the technologies, using a language model like GPT-3 to craft a human-like front end for a know-it-all bot.</p>\n<p>Still, even an AI that has its facts straight is not necessarily smart. &ldquo;We&rsquo;re not trying to define what intelligence is, or anything like that,&rdquo; says Tung. &ldquo;We&rsquo;re just trying to build something useful.&rdquo;</p>\n<figure><img alt=\"NLP maps hallucinogenic experience\" sizes=\"(max-width: 32rem) 287px,(max-width: 48rem) 503px,100vw\" src=\"https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=574,574 574w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=287,287 287w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=1006,640 1006w,https://wp.technologyreview.com/wp-content/uploads/2022/03/Flower-Trip-style.jpeg?resize=503,320 503w\"></img></figure>\n<figure><img alt=\"Demis Hassabis\" sizes=\"(max-width: 32rem) 287px,(max-width: 48rem) 503px,100vw\" src=\"https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=1006,1400\" srcset=\"https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=574,574 574w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=287,287 287w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=1006,1400 1006w,https://wp.technologyreview.com/wp-content/uploads/2022/02/MA22_Demis-Hassabis-99-v1.jpg?resize=503,700 503w\"></img></figure>",
                                            "categories": [
                                                {
                                                    "score": 0.962,
                                                    "name": "Technology & Computing",
                                                    "id": "iabv2-596"
                                                },
                                                {
                                                    "score": 0.962,
                                                    "name": "Artificial Intelligence",
                                                    "id": "iabv2-597"
                                                }
                                            ],
                                            "text": "Back in July, OpenAI’s latest language model, GPT-3, dazzled with its ability to churn out paragraphs that look as if they could have been written by a human. People started showing off how GPT-3 could also autocomplete code or fill in blanks in spreadsheets.\nIn one example, Twitter employee Paul Katsen tweeted “the spreadsheet function to rule them all,” in which GPT-3 fills out columns by itself, pulling in data for US states: the population of Michigan is 10.3 million, Alaska became a state in 1906, and so on.\nExcept that GPT-3 can be a bit of a bullshitter. The population of Michigan has never been 10.3 million, and Alaska became a state in 1959.\nLanguage models like GPT-3 are amazing mimics, but they have little sense of what they’re actually saying. “They’re really good at generating stories about unicorns,” says Mike Tung, CEO of Stanford startup Diffbot. “But they’re not trained to be factual.”\nThis is a problem if we want AIs to be trustworthy. That’s why Diffbot takes a different approach. It is building an AI that reads every page on the entire public web, in multiple languages, and extracts as many facts from those pages as it can.\nLike GPT-3, Diffbot’s system learns by vacuuming up vast amounts of human-written text found online. But instead of using that data to train a language model, Diffbot turns what it reads into a series of three-part factoids that relate one thing to another: subject, verb, object.\nPointed at my bio, for example, Diffbot learns that Will Douglas Heaven is a journalist; Will Douglas Heaven works at MIT Technology Review; MIT Technology Review is a media company; and so on. Each of these factoids gets joined up with billions of others in a sprawling, interconnected network of facts. This is known as a knowledge graph.\nKnowledge graphs are not new. They have been around for decades, and were a fundamental concept in early AI research. But constructing and maintaining knowledge graphs has typically been done by hand, which is hard. This also stopped Tim Berners-Lee from realizing what he called the semantic web, which would have included information for machines as well as humans, so that bots could book our flights, do our shopping, or give smarter answers to questions than search engines.\nA few years ago, Google started using knowledge graphs too. Search for “Katy Perry” and you will get a box next to the main search results telling you that Katy Perry is an American singer-songwriter with music available on YouTube, Spotify, and Deezer. You can see at a glance that she is married to Orlando Bloom, she’s 35 and worth $125 million, and so on. Instead of giving you a list of links to pages about Katy Perry, Google gives you a set of facts about her drawn from its knowledge graph.\nBut Google only does this for its most popular search terms. Diffbot wants to do it for everything. By fully automating the construction process, Diffbot has been able to build what may be the largest knowledge graph ever.\nAlongside Google and Microsoft, it is one of only three US companies that crawl the entire public web. “It definitely makes sense to crawl the web,” says Victoria Lin, a research scientist at Salesforce who works on natural-language processing and knowledge representation. “A lot of human effort can otherwise go into making a large knowledge base.” Heiko Paulheim at the University of Mannheim in Germany agrees: “Automation is the only way to build large-scale knowledge graphs.”\nSuper surfer\nTo collect its facts, Diffbot’s AI reads the web as a human would—but much faster. Using a super-charged version of the Chrome browser, the AI views the raw pixels of a web page and uses image-recognition algorithms to categorize the page as one of 20 different types, including video, image, article, event, and discussion thread. It then identifies key elements on the page, such as headline, author, product description, or price, and uses NLP to extract facts from any text.\nEvery three-part factoid gets added to the knowledge graph. Diffbot extracts facts from pages written in any language, which means that it can answer queries about Katy Perry, say, using facts taken from articles in Chinese or Arabic even if they do not contain the term “Katy Perry.”\nBrowsing the web like a human lets the AI see the same facts that we see. It also means it has had to learn to navigate the web like us. The AI must scroll down, switch between tabs, and click away pop-ups. “The AI has to play the web like a video game just to experience the pages,” says Tung.\nDiffbot crawls the web nonstop and rebuilds its knowledge graph every four to five days. According to Tung, the AI adds 100 million to 150 million entities each month as new people pop up online, companies are created, and products are launched. It uses more machine-learning algorithms to fuse new facts with old, creating new connections or overwriting out-of-date ones. Diffbot has to add new hardware to its data center as the knowledge graph grows.\nResearchers can access Diffbot’s knowledge graph for free. But Diffbot also has around 400 paying customers. The search engine DuckDuckGo uses it to generate its own Google-like boxes. Snapchat uses it to extract highlights from news pages. The popular wedding-planner app Zola uses it to help people make wedding lists, pulling in images and prices. NASDAQ, which provides information about the stock market, uses it for financial research.\nFake shoes\nAdidas and Nike even use it to search the web for counterfeit shoes. A search engine will return a long list of sites that mention Nike trainers. But Diffbot lets these companies look for sites that are actually selling their shoes, rather just talking about them.\nFor now, these companies must interact with Diffbot using code. But Tung plans to add a natural-language interface. Ultimately, he wants to build what he calls a “universal factoid question answering system”: an AI that could answer almost anything you asked it, with sources to back up its response.\nTung and Lin agree that this kind of AI cannot be built with language models alone. But better yet would be to combine the technologies, using a language model like GPT-3 to craft a human-like front end for a know-it-all bot.\nStill, even an AI that has its facts straight is not necessarily smart. “We’re not trying to define what intelligence is, or anything like that,” says Tung. “We’re just trying to build something useful.”",
                                            "authors": [
                                                {
                                                    "name": "Will Douglas Heavenarchive page",
                                                    "link": "technologyreview.com/author/will-douglas-heaven"
                                                }
                                            ]
                                        }
                                    ],
                                    "type": "article",
                                    "title": "This know-it-all AI learns by reading the entire web nonstop | MIT Technology Review"
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/product": {
            "get": {
                "tags": [
                    "Product"
                ],
                "summary": "Product",
                "description": "Automatically extract pricing, product specs, images, and more from an e-commerce product page.",
                "operationId": "product",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.microcenter.com/product/628738/evga-nvidia-geforce-rtx-3090-ftw3-ultra-triple-fan-24gb-gddr6x-pcie-40-graphics-card"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "discussion",
                        "in": "query",
                        "description": "Pass `discussion=false` to disable automatic extraction of product reviews.",
                        "schema": {
                            "type": "boolean"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "humanLanguage": {
                                            "type": "string"
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "images": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "xpath": {
                                                                    "type": "string"
                                                                },
                                                                "naturalHeight": {
                                                                    "type": "integer"
                                                                },
                                                                "width": {
                                                                    "type": "integer"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "title": {
                                                                    "type": "string"
                                                                },
                                                                "url": {
                                                                    "type": "string"
                                                                },
                                                                "naturalWidth": {
                                                                    "type": "integer"
                                                                },
                                                                "primary": {
                                                                    "type": "boolean"
                                                                },
                                                                "height": {
                                                                    "type": "integer"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "offerPrice": {
                                                        "type": "string"
                                                    },
                                                    "productId": {
                                                        "type": "string"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "upc": {
                                                        "type": "string"
                                                    },
                                                    "productOrigin": {
                                                        "type": "string"
                                                    },
                                                    "mpn": {
                                                        "type": "string"
                                                    },
                                                    "gs1Code": {
                                                        "type": "string"
                                                    },
                                                    "multipleProducts": {
                                                        "type": "boolean"
                                                    },
                                                    "availability": {
                                                        "type": "boolean"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "offerPriceDetails": {
                                                        "type": "object",
                                                        "properties": {
                                                            "symbol": {
                                                                "type": "string"
                                                            },
                                                            "amount": {
                                                                "type": "number"
                                                            },
                                                            "text": {
                                                                "type": "string"
                                                            }
                                                        }
                                                    },
                                                    "specs": {
                                                        "type": "object",
                                                        "properties": {
                                                            "mfr_part_": {
                                                                "type": "string"
                                                            },
                                                            "upc": {
                                                                "type": "string"
                                                            },
                                                            "sku": {
                                                                "type": "string"
                                                            }
                                                        }
                                                    },
                                                    "normalizedSpecs": {
                                                        "type": "object",
                                                        "properties": {
                                                            "sku": {
                                                                "type": "array",
                                                                "items": {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "cleanLiteral": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "text": {
                                                        "type": "string"
                                                    },
                                                    "category": {
                                                        "type": "string"
                                                    },
                                                    "sku": {
                                                        "type": "string"
                                                    },
                                                    "brand": {
                                                        "type": "string"
                                                    }
                                                }
                                            }
                                        },
                                        "type": {
                                            "type": "string"
                                        },
                                        "title": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.microcenter.com/product/628738/evga-nvidia-geforce-rtx-3090-ftw3-ultra-triple-fan-24gb-gddr6x-pcie-40-graphics-card",
                                        "api": "product",
                                        "version": 3
                                    },
                                    "humanLanguage": "en",
                                    "objects": [
                                        {
                                            "images": [
                                                {
                                                    "xpath": "/html[1]/body[1]/main[1]/article[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]/img[1]",
                                                    "naturalHeight": 200,
                                                    "width": 260,
                                                    "diffbotUri": "image|3|735477840",
                                                    "title": "Product Image View 2",
                                                    "url": "https://90a1c75758623581b3f8-5c119c3de181c9857fcb2784776b17ef.ssl.cf2.rackcdn.com/628738_177923_01_front_thumbnail.jpg",
                                                    "naturalWidth": 200,
                                                    "primary": true,
                                                    "height": 260
                                                }
                                            ],
                                            "offerPrice": "$1,919.99",
                                            "productId": "843368067137",
                                            "diffbotUri": "product|3|-1003574237",
                                            "upc": "843368067137",
                                            "productOrigin": "us",
                                            "mpn": "24G-P5-3987-KR",
                                            "gs1Code": "U.S. and Canada",
                                            "multipleProducts": true,
                                            "availability": true,
                                            "type": "product",
                                            "title": "EVGA NVIDIA GeForce RTX 3090 FTW3 Ultra Triple-Fan 24GB GDDR6X PCIe 4.0 Graphics Card",
                                            "offerPriceDetails": {
                                                "symbol": "$",
                                                "amount": 1919.99,
                                                "text": "$1,919.99"
                                            },
                                            "specs": {
                                                "mfr_part_": "24G-P5-3987-KR",
                                                "upc": "843368067137",
                                                "sku": "177923"
                                            },
                                            "normalizedSpecs": {
                                                "sku": [
                                                    {
                                                        "cleanLiteral": "843368067137"
                                                    }
                                                ]
                                            },
                                            "humanLanguage": "en",
                                            "pageUrl": "https://www.microcenter.com/product/628738/evga-nvidia-geforce-rtx-3090-ftw3-ultra-triple-fan-24gb-gddr6x-pcie-40-graphics-card",
                                            "text": "The EVGA GeForce RTX 3090 is colossally powerful in every way imaginable, giving you a whole new tier of performance at 8K resolution. It's powered by the NVIDIA Ampere architecture, which doubles down on ray tracing and AI performance with enhanced RT Cores, Tensor Cores, and new streaming multiprocessors. Combined with the next generation of design, cooling, and overclocking with EVGA Precision X1, the EVGA GeForce RTX 3090 Series redefines the definition of ultimate performance.",
                                            "category": "Computers",
                                            "sku": "177923",
                                            "brand": "EVGA"
                                        }
                                    ],
                                    "type": "product",
                                    "title": "EVGA NVIDIA GeForce RTX 3090 FTW3 Ultra Triple-Fan 24GB GDDR6X PCIe 4.0 Graphics Card - Micro Center"
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/discussion": {
            "get": {
                "tags": [
                    "Discussion"
                ],
                "summary": "Discussion",
                "description": "Automatically structure and extract entire threads of reviews/comments from articles, product pages, and forum threads.",
                "operationId": "discussion",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://forum.gl-inet.com/t/ssh-connection-protocol-does-not-work-well-between-vpn-clients/22779"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb",
                                "sentiment"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "paging",
                        "in": "query",
                        "description": "Pass `paging=false` to disable automatic concatenation multiple-page articles.",
                        "schema": {
                            "type": "boolean"
                        }
                    },
                    {
                        "name": "maxPages",
                        "in": "query",
                        "description": "Set the maximum number of pages in a thread to automatically concatenate in a single response. Default = 1 (no concatenation). Set `maxPages=all` to retrieve all pages of a thread regardless of length. Each individual page will count as a separate API call.",
                        "schema": {
                            "type": "string",
                            "default": "20"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "numPages": {
                                                        "type": "integer"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "confidence": {
                                                        "type": "number"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "numPosts": {
                                                        "type": "integer"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "posts": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "date": {
                                                                    "type": "string"
                                                                },
                                                                "images": {
                                                                    "type": "array",
                                                                    "items": {
                                                                        "type": "object",
                                                                        "properties": {
                                                                            "naturalHeight": {
                                                                                "type": "integer"
                                                                            },
                                                                            "width": {
                                                                                "type": "integer"
                                                                            },
                                                                            "diffbotUri": {
                                                                                "type": "string"
                                                                            },
                                                                            "pageUrl": {
                                                                                "type": "string"
                                                                            },
                                                                            "url": {
                                                                                "type": "string"
                                                                            },
                                                                            "naturalWidth": {
                                                                                "type": "integer"
                                                                            },
                                                                            "height": {
                                                                                "type": "integer"
                                                                            }
                                                                        }
                                                                    }
                                                                },
                                                                "humanLanguage": {
                                                                    "type": "string"
                                                                },
                                                                "author": {
                                                                    "type": "string"
                                                                },
                                                                "authorUrl": {
                                                                    "type": "string"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "html": {
                                                                    "type": "string"
                                                                },
                                                                "pageUrl": {
                                                                    "type": "string"
                                                                },
                                                                "id": {
                                                                    "type": "integer"
                                                                },
                                                                "text": {
                                                                    "type": "string"
                                                                },
                                                                "type": {
                                                                    "type": "string"
                                                                },
                                                                "title": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "tags": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "score": {
                                                                    "type": "number"
                                                                },
                                                                "count": {
                                                                    "type": "integer"
                                                                },
                                                                "label": {
                                                                    "type": "string"
                                                                },
                                                                "uri": {
                                                                    "type": "string"
                                                                },
                                                                "rdfTypes": {
                                                                    "type": "array",
                                                                    "items": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "participants": {
                                                        "type": "integer"
                                                    },
                                                    "rssUrl": {
                                                        "type": "string"
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                        "api": "discussion",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "numPages": 1,
                                            "humanLanguage": "en",
                                            "confidence": 0.05500000089407453,
                                            "diffbotUri": "discussion|3|-870809033",
                                            "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                            "numPosts": 13,
                                            "type": "discussion",
                                            "title": "[OC] 66% of Top 50 Russian Exposed Companies Have Announced Sanctions",
                                            "posts": [
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "images": [
                                                        {
                                                            "naturalHeight": 767,
                                                            "width": 457,
                                                            "diffbotUri": "image|3|-804821395",
                                                            "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                            "url": "https://preview.redd.it/l76k59t8jsm81.png?width=457&auto=webp&s=632efa1f24e607358bbec99c161a6aa579aebfe1",
                                                            "naturalWidth": 457,
                                                            "height": 767
                                                        }
                                                    ],
                                                    "humanLanguage": "en",
                                                    "author": "hicheoo",
                                                    "authorUrl": "https://old.reddit.com/user/hicheoo",
                                                    "diffbotUri": "post|3|29462830",
                                                    "html": "<figure><a href=\"https://i.redd.it/l76k59t8jsm81.png\"><img src=\"https://preview.redd.it/l76k59t8jsm81.png?width=457&auto=webp&s=632efa1f24e607358bbec99c161a6aa579aebfe1\"></img></a></figure>\n<h2>Want to add to the discussion?</h2>\n<p>Post a comment!</p>\n<p>Create an account</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 0,
                                                    "text": "Want to add to the discussion?\nPost a comment!\n\n \nCreate an account",
                                                    "type": "post",
                                                    "title": "[OC] 66% of Top 50 Russian Exposed Companies Have Announced Sanctions"
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "not_mig",
                                                    "authorUrl": "https://old.reddit.com/user/not_mig",
                                                    "diffbotUri": "post|3|-720375378",
                                                    "html": "<p>What's the difference between blue, yellow, and green?</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 1,
                                                    "text": "What's the difference between blue, yellow, and green?",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "hicheoo",
                                                    "authorUrl": "https://old.reddit.com/user/hicheoo",
                                                    "diffbotUri": "post|3|-148816221",
                                                    "html": "<p>They're exemptions. I should've clarified up top, but they're basically all in the description.</p>\n<p>Green: Typical Sanctions<br>\n Yellow: Sanctions, but might be a PR move.<br>\n Blue: Healthcare</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 2,
                                                    "text": "They're exemptions. I should've clarified up top, but they're basically all in the description.\nGreen: Typical Sanctions\nYellow: Sanctions, but might be a PR move.\nBlue: Healthcare",
                                                    "type": "post",
                                                    "parentId": 1
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "Zealousideal-Lie7255",
                                                    "authorUrl": "https://old.reddit.com/user/Zealousideal-Lie7255",
                                                    "diffbotUri": "post|3|-683402068",
                                                    "html": "<p>A lot of oil service companies have no reported sanctions. Like Schlumberger, Baker Hughes. Some Chinese companies too.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 3,
                                                    "text": "A lot of oil service companies have no reported sanctions. Like Schlumberger, Baker Hughes. Some Chinese companies too.",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "varnima",
                                                    "authorUrl": "https://old.reddit.com/user/varnima",
                                                    "diffbotUri": "post|3|-603833918",
                                                    "html": "<p>JetBrains changed and imposed sanctions <a href=\"https://blog.jetbrains.com/blog/2022/03/11/jetbrains-statement-on-ukraine/\">https://blog.jetbrains.com/blog/2022/03/11/jetbrains-statement-on-ukraine/</a></p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 4,
                                                    "text": "JetBrains changed and imposed sanctions https://blog.jetbrains.com/blog/2022/03/11/jetbrains-statement-on-ukraine/",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "hicheoo",
                                                    "authorUrl": "https://old.reddit.com/user/hicheoo",
                                                    "diffbotUri": "post|3|-296888207",
                                                    "html": "<p>Yeah, they're green in the chart.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 5,
                                                    "text": "Yeah, they're green in the chart.",
                                                    "type": "post",
                                                    "parentId": 4
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "hicheoo",
                                                    "authorUrl": "https://old.reddit.com/user/hicheoo",
                                                    "diffbotUri": "post|3|624793084",
                                                    "html": "<p><strong>Sources:</strong> - Diffbot Sanctions Tracker (<a href=\"https://www.diffbot.com/insights/every-company-affected-by-sanctions/\">https://www.diffbot.com/insights/every-company-affected-by-sanctions/</a>) - Diffbot Knowledge Graph (more detail on query below)</p>\n<p><strong>Data Viz Tool:</strong> Infogram</p>\n<p><strong>Disclaimer:</strong> I work for Diffbot</p>\n<p>I started by querying the Knowledge Graph for people who live in Russia but work for a non-Russian company. Faceting this query by their employer provides me with a list of non-Russian companies ranked by # of Russian employees.</p>\n<p><code>\ntype:Person location.country.name:&quot;Russia&quot; employments.{employer.{location.country.name!=&quot;Russia&quot; nbLocations&gt;0} isCurrent:true} facet:employments.{employer.name isCurrent:true}\n</code></p>\n<p>This data underrepresents actual employment figures, as there are many employees who do not maintain an internet presence linking them to their employer. Underrepresentation should be fairly equal across all companies, and relative position in the rankings should be accurate.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 6,
                                                    "text": "Sources: - Diffbot Sanctions Tracker (https://www.diffbot.com/insights/every-company-affected-by-sanctions/) - Diffbot Knowledge Graph (more detail on query below)\nData Viz Tool: Infogram\nDisclaimer: I work for Diffbot\nI started by querying the Knowledge Graph for people who live in Russia but work for a non-Russian company. Faceting this query by their employer provides me with a list of non-Russian companies ranked by # of Russian employees.\ntype:Person location.country.name:\"Russia\" employments.{employer.{location.country.name!=\"Russia\" nbLocations>0} isCurrent:true} facet:employments.{employer.name isCurrent:true}\nThis data underrepresents actual employment figures, as there are many employees who do not maintain an internet presence linking them to their employer. Underrepresentation should be fairly equal across all companies, and relative position in the rankings should be accurate.",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "zzzmick",
                                                    "authorUrl": "https://old.reddit.com/user/zzzmick",
                                                    "diffbotUri": "post|3|-130810969",
                                                    "html": "<p>epam had over 10k employees in Russia</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 7,
                                                    "text": "epam had over 10k employees in Russia",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "hicheoo",
                                                    "authorUrl": "https://old.reddit.com/user/hicheoo",
                                                    "diffbotUri": "post|3|-1458692070",
                                                    "html": "<p>Yup. The data underrepresents actual employment figures, as there are many employees who do not maintain an internet presence linking them to their employer. Underrepresentation should be fairly equal across all companies, and relative position in the rankings should be accurate.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 8,
                                                    "text": "Yup. The data underrepresents actual employment figures, as there are many employees who do not maintain an internet presence linking them to their employer. Underrepresentation should be fairly equal across all companies, and relative position in the rankings should be accurate.",
                                                    "type": "post",
                                                    "parentId": 7
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "JanitorKarl",
                                                    "authorUrl": "https://old.reddit.com/user/JanitorKarl",
                                                    "diffbotUri": "post|3|-149138223",
                                                    "html": "<p>Schlumberger and Baker Hughes are both in the oilfield services industry.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 9,
                                                    "text": "Schlumberger and Baker Hughes are both in the oilfield services industry.",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "flumenia",
                                                    "authorUrl": "https://old.reddit.com/user/flumenia",
                                                    "diffbotUri": "post|3|889762151",
                                                    "html": "<p>What if Microsoft stops to extend licenses of Microsoft Office to Russia? That would make the biggest impact, I guess</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 10,
                                                    "text": "What if Microsoft stops to extend licenses of Microsoft Office to Russia? That would make the biggest impact, I guess",
                                                    "type": "post",
                                                    "parentId": 0
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "Imperial_Empirical",
                                                    "authorUrl": "https://old.reddit.com/user/Imperial_Empirical",
                                                    "diffbotUri": "post|3|-179317804",
                                                    "html": "<p>Putin ordered the development of Russian alternatives after the Crimean annexation due to dependancy/spying fears. I believe from 2016 onwards Microsoft was largely fased out internally.</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 11,
                                                    "text": "Putin ordered the development of Russian alternatives after the Crimean annexation due to dependancy/spying fears. I believe from 2016 onwards Microsoft was largely fased out internally.",
                                                    "type": "post",
                                                    "parentId": 10
                                                },
                                                {
                                                    "date": "Fri, 11 Mar 2022 00:00:00 GMT",
                                                    "humanLanguage": "en",
                                                    "author": "Nightblood83",
                                                    "authorUrl": "https://old.reddit.com/user/Nightblood83",
                                                    "diffbotUri": "post|3|-901046006",
                                                    "html": "<p>A lot of accountants for commies...</p>",
                                                    "pageUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/",
                                                    "id": 12,
                                                    "text": "A lot of accountants for commies...",
                                                    "type": "post",
                                                    "parentId": 0
                                                }
                                            ],
                                            "tags": [
                                                {
                                                    "score": 0.8428076505661011,
                                                    "count": 5,
                                                    "label": "economic sanctions",
                                                    "uri": "https://diffbot.com/entity/EWnXSPtH6Osi0pmx8-WPKAg",
                                                    "rdfTypes": [
                                                        "http://dbpedia.org/ontology/Miscellaneous"
                                                    ]
                                                }
                                            ],
                                            "participants": 9,
                                            "rssUrl": "https://old.reddit.com/r/dataisbeautiful/comments/tbvdhu/oc_66_of_top_50_russian_exposed_companies_have/.rss"
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/job": {
            "get": {
                "tags": [
                    "Job"
                ],
                "summary": "Job",
                "description": "Automatically extracts structured information from job postings.",
                "operationId": "job",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.pathai.com/careers/DirectorClinicalOperations-6715804002"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "requirements": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "string"
                                                        }
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "postedDate": {
                                                        "type": "string"
                                                    },
                                                    "remote": {
                                                        "type": "string"
                                                    },
                                                    "skills": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "salience": {
                                                                    "type": "number"
                                                                },
                                                                "skill": {
                                                                    "type": "string"
                                                                },
                                                                "confidence": {
                                                                    "type": "number"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "employer": {
                                                        "type": "object",
                                                        "properties": {
                                                            "name": {
                                                                "type": "string"
                                                            },
                                                            "diffbotUri": {
                                                                "type": "string"
                                                            }
                                                        }
                                                    },
                                                    "text": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "tasks": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "string"
                                                        }
                                                    },
                                                    "locations": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "country": {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "summary": {
                                                                            "type": "string"
                                                                        },
                                                                        "image": {
                                                                            "type": "string"
                                                                        },
                                                                        "types": {
                                                                            "type": "array",
                                                                            "items": {
                                                                                "type": "string"
                                                                            }
                                                                        },
                                                                        "name": {
                                                                            "type": "string"
                                                                        },
                                                                        "diffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotId": {
                                                                            "type": "string"
                                                                        },
                                                                        "type": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                },
                                                                "address": {
                                                                    "type": "string"
                                                                },
                                                                "city": {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "summary": {
                                                                            "type": "string"
                                                                        },
                                                                        "image": {
                                                                            "type": "string"
                                                                        },
                                                                        "types": {
                                                                            "type": "array",
                                                                            "items": {
                                                                                "type": "string"
                                                                            }
                                                                        },
                                                                        "name": {
                                                                            "type": "string"
                                                                        },
                                                                        "diffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotId": {
                                                                            "type": "string"
                                                                        },
                                                                        "type": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                },
                                                                "region": {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "summary": {
                                                                            "type": "string"
                                                                        },
                                                                        "image": {
                                                                            "type": "string"
                                                                        },
                                                                        "types": {
                                                                            "type": "array",
                                                                            "items": {
                                                                                "type": "string"
                                                                            }
                                                                        },
                                                                        "name": {
                                                                            "type": "string"
                                                                        },
                                                                        "diffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotId": {
                                                                            "type": "string"
                                                                        },
                                                                        "type": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                },
                                                                "subregion": {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "summary": {
                                                                            "type": "string"
                                                                        },
                                                                        "image": {
                                                                            "type": "string"
                                                                        },
                                                                        "types": {
                                                                            "type": "array",
                                                                            "items": {
                                                                                "type": "string"
                                                                            }
                                                                        },
                                                                        "name": {
                                                                            "type": "string"
                                                                        },
                                                                        "diffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotUri": {
                                                                            "type": "string"
                                                                        },
                                                                        "targetDiffbotId": {
                                                                            "type": "string"
                                                                        },
                                                                        "type": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                },
                                                                "latitude": {
                                                                    "type": "number"
                                                                },
                                                                "precision": {
                                                                    "type": "number"
                                                                },
                                                                "longitude": {
                                                                    "type": "number"
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.pathai.com/careers/DirectorClinicalOperations-6715804002",
                                        "api": "job",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "requirements": [
                                                "Experience in histologic-based or other medical imaging in a regulated environment is strongly preferred."
                                            ],
                                            "diffbotUri": "job|3|1793551482",
                                            "type": "job",
                                            "title": "Director, Clinical Operations",
                                            "remote": "REMOTE",
                                            "skills": [
                                                {
                                                    "salience": 0.7316089,
                                                    "skill": "DDT",
                                                    "confidence": 0.6169359,
                                                    "diffbotUri": "https://diffbot.com/entity/Ey44naXoPNamQWWt5PK0JEQ"
                                                }
                                            ],
                                            "humanLanguage": "en",
                                            "employer": {
                                                "name": "PathAI",
                                                "diffbotUri": "https://diffbot.com/entity/EIf6kAhRvMq2mBsM6DV2Krw"
                                            },
                                            "pageUrl": "https://www.pathai.com/careers/DirectorClinicalOperations-6715804002",
                                            "text": "Who We Are\nPathAI is on a mission to improve patient outcomes with AI-powered pathology. We are transforming traditional pathology methods into powerful, new technologies. These innovations in pathology can help accelerate drug development, improve confidence in the accuracy of diagnosis, and get life-saving therapies to patients more quickly. At PathAI, you'll work with a diverse and talented team of people, who are dedicated to solving complex problems and making a huge impact.\nWhere You Fit\nThe Director of Clinical Operations will be responsible for programmatic tactical execution of regulated research and clinical products, including companion diagnostic (CDx), non-CDx-IVD, Drug Development Tools (DDT) and RUO strategies. This is a highly matrixed role that requires outstanding emotional intelligence combined with detailed subject matter expertise in order to help coordinate the program across all functional areas including quality, clinical, business development, scientific programs, engineering and product teams. The Director of Clinical Operations will bring together all stakeholders internally and externally to successfully design, execute on all aspects of our clinical development programs. Outstanding detailed organizational skills with a proactive mindset toward identification of all programmatic risks and mitigations is essential.\nWhat You’ll Do\nLead the execution of research and clinical development strategies for new clinical trial, research, and medical device product development, and operationalize these strategies from development to commercialization.\nAccountable for oversight of all assigned studies within the country in accordance with the overall development plan and clinical operations plan, including adherence to quality, timelines and budget.\nEstablish and continually communicate detailed program and project plans with critical paths and dependencies clearly identified across each area and communicate timeline progress and risks through defined operating mechanisms.\nEducate relevant leads from each functional area on the regulations impacting development programs.\nClearly communicate strategy, progress, and risks cross functionally and proactively drive/facilitate the relationship between internal project teams and external stakeholders.\nEffectively prioritize the projects within our clinical development programs to ensure adequate resourcing and efficient delivery on priority projects for PathAI.\nInform leadership of any identified gaps, risks, solutions to the program as a whole.\nWhat You Bring\nOur employees' skills come in all shapes and sizes, but to be successful in this role with us, you'll at least need:\nPh.D (preferred) or M.S. in a Life Science or equivalent field, plus at least 5-7 years of relevant industry experience, including at least 3 years in a role of primary responsibility for clinical trial execution and management.\nYou have a solid understanding of and experience in drug development (clinical trials, biomarker testing), assay development, and co-development for CDx programs.\nDirect management experience and supervisory responsibility.\nYou have knowledge (training preferred) of the software development life cycle (IEC 62304), design control, and experience in leading in vitro diagnostic development teams.\nYou have supported projects for FDA and other regulatory body submissions from development to validation to approval.\nYou have cross therapeutic area clinical biomarker testing and diagnostic testing experience in global clinical trials.\nYou have experience and a deep understanding of biomarker testing and reporting requirements globally.\nYou excel in leading multidisciplinary teams of scientists and engineers in highly matrixed organization is strongly preferred\nYou are a strategic thinker, pro-active and agile, and you have a strong interest in bringing teams together at PathAI, as well as external partners, to bring better treatments to patients.\nExperience in histologic-based or other medical imaging in a regulated environment is strongly preferred.\nWe Want To Hear From You\nAt PathAI, we are looking for individuals who are team players, are willing to do the work no matter how big or small it may be, and who are passionate about everything they do. If this sounds like you, even if you may not match the job description to a tee, we encourage you to apply. You could be exactly what we're looking for.\nPathAI is an equal opportunity employer, dedicated to creating a workplace that is free of harassment and discrimination. We base our employment decisions on business needs, job requirements, and qualifications — that's all. We do not discriminate based on race, gender, religion, health, personal beliefs, age, family or parental status, or any other status. We don't tolerate any kind of discrimination or bias, and we are looking for teammates who feel the same way.\n#LI-Remote\n",
                                            "datePosted": "Sat, 07 May 2022 00:00:00 GMT",
                                            "tasks": [
                                                "Lead the execution of research and clinical development strategies for new clinical trial, research, and medical device product development, and operationalize these strategies from development to commercialization.",
                                                "Accountable for oversight of all assigned studies within the country in accordance with the overall development plan and clinical operations plan, including adherence to quality, timelines and budget.",
                                                "Establish and continually communicate detailed program and project plans with critical paths and dependencies clearly identified across each area and communicate timeline progress and risks through defined operating mechanisms.\n",
                                                "Educate relevant leads from each functional area on the regulations impacting development programs.\n",
                                                "Clearly communicate strategy, progress, and risks cross functionally and proactively drive/facilitate the relationship between internal project teams and external stakeholders.",
                                                "Effectively prioritize the projects within our clinical development programs to ensure adequate resourcing and efficient delivery on priority projects for PathAI.\n",
                                                "Inform leadership of any identified gaps, risks, solutions to the program as a whole.\n",
                                                "Ph.D (preferred) or M.S. in a Life Science or equivalent field, plus at least 5-7 years of relevant industry experience, including at least 3 years in a role of primary responsibility for clinical trial execution and management.",
                                                "You have a solid understanding of and experience in drug development (clinical trials, biomarker testing), assay development, and co-development for CDx programs.\n",
                                                "Direct management experience and supervisory responsibility.",
                                                "You have knowledge (training preferred) of the software development life cycle (IEC 62304), design control, and experience in leading in vitro diagnostic development teams.",
                                                "You have supported projects for FDA and other regulatory body submissions from development to validation to approval.",
                                                "You have cross therapeutic area clinical biomarker testing and diagnostic testing experience in global clinical trials.",
                                                "You have experience and a deep understanding of biomarker testing and reporting requirements globally.",
                                                "You excel in leading multidisciplinary teams of scientists and engineers in highly matrixed organization is strongly preferred",
                                                "You are a strategic thinker, pro-active and agile, and you have a strong interest in bringing teams together at PathAI, as well as external partners, to bring better treatments to patients.\n"
                                            ]
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/image": {
            "get": {
                "tags": [
                    "Image"
                ],
                "summary": "Image",
                "description": "Automatically identifies the primary image(s) on any web page and returns comprehensive information and metadata for each image.",
                "operationId": "image",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.diffbot.com/products/extract/"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb",
                                "displayHeight",
                                "displayWidth"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "xpath": {
                                                        "type": "string"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "naturalHeight": {
                                                        "type": "integer"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "url": {
                                                        "type": "string"
                                                    },
                                                    "naturalWidth": {
                                                        "type": "integer"
                                                    },
                                                    "tags": {
                                                        "type": "array",
                                                        "items": {
                                                            "oneOf": [
                                                                {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "typeHierarchy": {
                                                                            "type": "array",
                                                                            "items": {
                                                                                "type": "string"
                                                                            }
                                                                        },
                                                                        "id": {
                                                                            "type": "integer"
                                                                        },
                                                                        "label": {
                                                                            "type": "string"
                                                                        },
                                                                        "type": {
                                                                            "type": "string"
                                                                        },
                                                                        "uri": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                },
                                                                {
                                                                    "type": "object",
                                                                    "properties": {
                                                                        "id": {
                                                                            "type": "integer"
                                                                        },
                                                                        "label": {
                                                                            "type": "string"
                                                                        },
                                                                        "uri": {
                                                                            "type": "string"
                                                                        }
                                                                    }
                                                                }
                                                            ]
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.diffbot.com/products/extract/",
                                        "api": "image",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "xpath": "/HTML/BODY/MAIN/DIV[@id='slice-readslikehumans']/DIV[@class='container px-3 py-5 py-md-6 mx-auto']/DIV[@class='row justify-content-center']/DIV[@class='col-12  col-md  d-flex justify-content-center align-items-center  order-md-1']/IMG[@class='img-fluid mb-4 align-self-start']",
                                            "humanLanguage": "en",
                                            "naturalHeight": 1023,
                                            "diffbotUri": "image|3|666824882",
                                            "pageUrl": "https://www.diffbot.com/products/extract/",
                                            "type": "image",
                                            "url": "https://www.diffbot.com/assets/img/products/extract_screenshot.png",
                                            "naturalWidth": 897,
                                            "tags": [
                                                {
                                                    "typeHierarchy": [
                                                        "http://www.w3.org/2002/07/owl#Thing",
                                                        "http://dbpedia.org/ontology/Work",
                                                        "http://dbpedia.org/ontology/Website"
                                                    ],
                                                    "id": 33898,
                                                    "label": "Website",
                                                    "type": "http://dbpedia.org/ontology/Website",
                                                    "uri": "https://www.diffbot.com/entity/Xd90vp_U4MJOoRHIja3quxg"
                                                },
                                                {
                                                    "id": 1404579,
                                                    "label": "Ring binder",
                                                    "uri": "http://diffbot.com/entity/XYwPij6UgPdaDEY6fXSrrRA"
                                                }
                                            ]
                                        },
                                        {
                                            "xpath": "/HTML/BODY/MAIN/DIV[@id='slice-lookslikeahuman']/DIV[@class='container px-3 py-5 py-md-6 mx-auto']/DIV[@class='row justify-content-center']/DIV[@class='col-12  col-md  d-flex justify-content-center align-items-center  order-md-1']/IMG[@class='img-fluid mb-4']",
                                            "humanLanguage": "en",
                                            "naturalHeight": 270,
                                            "diffbotUri": "image|3|1865428953",
                                            "pageUrl": "https://www.diffbot.com/products/extract/",
                                            "type": "image",
                                            "url": "https://www.diffbot.com/assets/img/products/any_language.png",
                                            "naturalWidth": 554,
                                            "tags": [
                                                {
                                                    "id": 5462349,
                                                    "label": "Sachet",
                                                    "uri": "http://diffbot.com/entity/X1_7MIKBoPpuRnAPVQXqQxA"
                                                },
                                                {
                                                    "id": 479373,
                                                    "label": "Eraser",
                                                    "uri": "http://diffbot.com/entity/XGPnWNBUZPyuNyfK9rJr6vQ"
                                                },
                                                {
                                                    "id": 57260,
                                                    "label": "Envelope",
                                                    "uri": "http://diffbot.com/entity/XK5gEV93iP6SqKMDjDr0YBQ"
                                                }
                                            ]
                                        },
                                        {
                                            "xpath": "/HTML/BODY/MAIN/DIV[@id='slice-one-click-crawling']/DIV[@class='container px-3 py-5 py-md-6 mx-auto']/DIV[@class='row justify-content-center']/DIV[@class='col-12  offset-md-1 col-md  d-flex justify-content-center align-items-center  order-md-2']/IMG[@class='img-fluid mb-4']",
                                            "humanLanguage": "en",
                                            "naturalHeight": 372,
                                            "diffbotUri": "image|3|1699007329",
                                            "pageUrl": "https://www.diffbot.com/products/extract/",
                                            "type": "image",
                                            "url": "https://www.diffbot.com/assets/img/products/analyze_two.png",
                                            "naturalWidth": 451,
                                            "tags": [
                                                {
                                                    "id": 268267,
                                                    "label": "Tray",
                                                    "uri": "http://diffbot.com/entity/XwXgQ3A7sNmypX2VX3oZCgg"
                                                },
                                                {
                                                    "id": 467731,
                                                    "label": "Spatula",
                                                    "uri": "http://diffbot.com/entity/XZohZtDoDPvq1wusJQCSZZA"
                                                },
                                                {
                                                    "id": 2649730,
                                                    "label": "Measuring cup",
                                                    "uri": "http://diffbot.com/entity/XhwdQ4HnKOsCVrf9YU3hyWw"
                                                }
                                            ]
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/video": {
            "get": {
                "tags": [
                    "Video"
                ],
                "summary": "Video",
                "description": "The Video API automatically extracts detailed video information—including most metadata, thumbnail images, direct video URL and embed code from nearly any video page or video platform on the web.",
                "operationId": "video",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.youtube.com/watch?v=hFZFjoX2cGg"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "date": {
                                                        "type": "string"
                                                    },
                                                    "images": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "title": {
                                                                    "type": "string"
                                                                },
                                                                "url": {
                                                                    "type": "string"
                                                                },
                                                                "primary": {
                                                                    "type": "boolean"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "author": {
                                                        "type": "string"
                                                    },
                                                    "mime": {
                                                        "type": "string"
                                                    },
                                                    "naturalHeight": {
                                                        "type": "integer"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "url": {
                                                        "type": "string"
                                                    },
                                                    "naturalWidth": {
                                                        "type": "integer"
                                                    },
                                                    "duration": {
                                                        "type": "integer"
                                                    },
                                                    "provider": {
                                                        "type": "string"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "html": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "text": {
                                                        "type": "string"
                                                    },
                                                    "viewCount": {
                                                        "type": "integer"
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.youtube.com/watch?v=hFZFjoX2cGg",
                                        "api": "video",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "date": "Sun, 24 May 2020 07:00:00 GMT",
                                            "images": [
                                                {
                                                    "diffbotUri": "image|3|231854607",
                                                    "title": "Backyard Squirrel Maze 1.0- Ninja Warrior Course",
                                                    "url": "https://i.ytimg.com/vi/hFZFjoX2cGg/maxresdefault.jpg",
                                                    "primary": true
                                                }
                                            ],
                                            "author": "Mark Rober",
                                            "mime": "video/mp4",
                                            "naturalHeight": 720,
                                            "diffbotUri": "video|3|1870173316",
                                            "type": "video",
                                            "title": "Backyard Squirrel Maze 1.0- Ninja Warrior Course",
                                            "url": "https://rr6---sn-5uaeznks.googlevideo.com/videoplayback?expire=1649742262&ei=Vr1UYoLhFpDm8wTUt76wAQ&ip=23.229.39.25&id=o-AOUTPxsk0l8eAnvqC6G9PtYnkFK4S-lYoQ1G-mt8W40U&itag=399&aitags=133%2C134%2C135%2C136%2C137%2C160%2C242%2C243%2C244%2C247%2C248%2C278%2C394%2C395%2C396%2C397%2C398%2C399&source=youtube&requiressl=yes&mh=s4&mm=31%2C29&mn=sn-5uaeznks%2Csn-5ualdnl7&ms=au%2Crdu&mv=u&mvi=6&pl=23&spc=4ocVC0JfKoZvX0FSPklAcHZ7LBTF&vprv=1&mime=video%2Fmp4&ns=ZSXKPHKFPvaGHi2sm3qErYcG&gir=yes&clen=246339629&dur=1220.093&lmt=1637823031873234&mt=1649720229&fvip=3&keepalive=yes&fexp=24001373%2C24007246&c=WEB&txp=5531432&n=b2VCd9pxXsFyoV_m&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRgIhAKiKoHk2DfvaKA3Adu0CkNNF_lOavT4Mk0CYkQvbG0wpAiEAgiAc4h_1GqjzjGXgdiKdgdWZXWM0M9ubY-ns1qhGyns%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl&lsig=AG3C_xAwRQIhAM5DsCeq2RzEblNQ3qRq-3xEydeAidszCKAg8PvahEK1AiAzkjRXQQmDItpKWMxLq1TTb_32NWjPxiTDIVeOrBvxLw%3D%3D",
                                            "naturalWidth": 1280,
                                            "duration": 1220,
                                            "provider": "YouTube",
                                            "humanLanguage": "en",
                                            "html": "<iframe src=\"https://www.youtube.com/embed/hFZFjoX2cGg\" frameborder=\"0\" allowfullscreen></iframe>",
                                            "pageUrl": "https://www.youtube.com/watch?v=hFZFjoX2cGg",
                                            "text": "Squirrels were stealing my bird seed so I solved the problem with mechanical engineering :)\n\nHere is an explanation of the illusion dish thing!- https://demos.smu.ca/index.php/demos/optics/69-mirage-mirror\n\nHere is a link to the illusion dish (not sponsored :) https://www.amazon.com/dp/B0718XCG7F/ref=cm_sw_em_r_mt_dp_U_D9PYEbC85X14F\n\n*MUSIC*\n0:02 - Arrow (Instrumental) - Andrew Applepie http://andrewapplepie.com/\n0:27 - Kalimba Jam - Blue Wednesday https://soundcloud.com/bluewednesday/\n3:21 - Zambo - Devil in Disguise https://danijel-zambo.bandcamp.com/track/devil-in-disguise-2 \n3:47 - Cereal Killa - Blue Wednesday https://soundcloud.com/bluewednesday/\n5:28 - J. Thompson - Real Quick Lovin' https://www.amazon.com/Real-Quick-Lovin/dp/B0010YGE3W\n5:39 - New Shoes - Blue Wednesday https://soundcloud.com/bluewednesday/\n7:51- Chi- Ponder- https://www.prodbyponder.com/5-free-beats32029775\n8:31 - Marimba Idea - Blue Wednesday https://soundcloud.com/bluewednesday/ \n9:25 - Josef Falkenskold - Tiny Tumble https://www.epidemicsound.com/artists/josef-falkenskold\n19:07 - Nik- Ponder- https://www.prodbyponder.com/5-free-beats32029775",
                                            "viewCount": 92199455
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/list": {
            "get": {
                "tags": [
                    "List"
                ],
                "summary": "List",
                "description": "Automatically structures a list of items from news index pages, product listings pages, search engine results pages, and other \"list-like\" pages.",
                "operationId": "list",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://slashdot.org/"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "icon": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "items": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "date": {
                                                                    "type": "string"
                                                                },
                                                                "summary": {
                                                                    "type": "string"
                                                                },
                                                                "link": {
                                                                    "type": "string"
                                                                },
                                                                "title": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://slashdot.org/",
                                        "api": "list",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "diffbotUri": "list|3|-768312769",
                                            "icon": "https://slashdot.org/favicon.ico",
                                            "pageUrl": "https://slashdot.org/",
                                            "type": "list",
                                            "title": "News for nerds, stuff that matters",
                                            "items": [
                                                {
                                                    "date": "Tue, 05 Oct 2021 00:00:00 GMT",
                                                    "summary": "According to fluent Korean speaker Youngmi Mayer, Squid Game features \"botched\" subtitles that have changed the show's meaning for English-speaking viewers.\nFor those unaware of Squid Game, it's a Korean-language drama about an alternative world where people in debt compete in deadly games. The plot sees a group of people tempted into a survival game where they have the chance to walk away with 45.6 billion Korean won ($38 million) if they win a series of six games. According to a BBC article, it's currently on track to become Netflix's biggest original series. From the report:  \"The dialogue was so well written and zero of it was preserved [in the subtitles],\" Youngmi said in a Twitter post. In a TikTok video that's had almost nine million views, Youngmi gave several examples of mistranslation. In one scene a character tries to convince people to play the game with her, and the closed-caption subtitles read: \"I'm not a genius, but I still got it worked out.\" But what the character actually says, Youngmi explains, is: \"I am very smart, I just never got a chance to study.\" That translation puts more emphasis on the wealth disparity in society -- which is also a theme in the Oscar-winning 2019 Korean film, Parasite. \"Almost everything she says is being botched translation-wise... the writers, all they want you to know about her is that,\" Youngmi said. \"[It] seems so small, but it's the entire character's purpose of being in the show.\"  Youngmi later clarifies that her initial comments were about the automatically generated closed-caption subtitles rather than the English language subtitles, which are \"substantially better.\" But she added: \"The misses in the metaphors -- and what the writers were trying to actually say -- are still pretty present.\"",
                                                    "link": "https://entertainment.slashdot.org/story/21/10/05/0146245/squid-game-subtitles-change-meaning-of-netflix-show",
                                                    "title": "Squid Game Subtitles 'Change Meaning' of Netflix Show"
                                                },
                                                {
                                                    "date": "Mon, 04 Oct 2021 00:00:00 GMT",
                                                    "summary": "An anonymous reader quotes a report from The Verge:  Sarah was the patient in a proof-of-concept trial of a new approach to treating severe, treatment-resistant depression, published today in the journal Nature Medicine. The findings open up another possible strategy for helping people with the disorder. The study only involved Sarah, and it's still not clear how well it might work in other people. The lessons from the trial, though, helped the researchers understand more about the nature of depression and could apply to other efforts to treat the disease. The trial used a technique called deep brain stimulation, where electrodes implanted within the brain deliver electrical impulses in an attempt to change or regulate abnormal brain activity. It's common for conditions like epilepsy and Parkinson's disease. Research over the past decade has shown that it can sometimes help with depression, but the findings have been inconsistent. Most previous efforts delivered stimulation to individual regions of the brain thought to be involved in depression. This study, though, was targeted at regions that were part of specific brain circuits -- interconnected parts of the brain that are responsible for specific functions.\nIn addition, the circuits involved might be different for each person. So in this trial, the study team personalized the treatment approach to the specific patient's depression. They mapped out the type of brain activity that occurred when Sarah's depression symptoms flared. Then, they surgically implanted a device that could detect that brain activity and send stimulation to the circuit where the activity was happening. For Sarah, the procedure was highly effective. Her scores on depression rating scales dropped the morning after the device was turned on. And perhaps more importantly, she felt dramatic changes in her mood. During her first time getting the stimulation, she laughed out loud in the lab. \"And everyone in the room went, 'Oh my god,' because that's the first time I spontaneously laughed and smiled, where it wasn't faked, in five years,\" she said. Sarah's depression circuit flares up hundreds of times a day, and each time, the implanted device delivers a brief stimulating pulse. In total, she gets around 30 minutes of stimulation each day [...]. Sarah can't feel the pulses, but she said she does have a general idea of when they're happening throughout the day. \"There's a sense of alertness and energy or positivity that I'll feel,\" she said.",
                                                    "link": "https://science.slashdot.org/story/21/10/04/2241244/a-surgically-implanted-brain-stimulation-device-could-help-treat-severe-depression",
                                                    "title": "A Surgically Implanted Brain Stimulation Device Could Help Treat Severe Depression"
                                                },
                                                {
                                                    "date": "Mon, 04 Oct 2021 00:00:00 GMT",
                                                    "summary": "German officials on Monday unveiled what they said is the world's first commercial plant for making synthetic kerosene, touted as a climate-friendly fuel of the future. The Associated Press reports:  The facility in Werlte, near Germany's northwestern border with the Netherlands, will use water and electricity from four nearby wind farms to produce hydrogen. In a century-old process, the hydrogen is combined with carbon dioxide to make crude oil, which can then be refined into jet fuel. Burning that synthetic kerosene releases only as much CO2 into the atmosphere as was previously removed to produce the fuel, making it \"carbon neutral.\"\nThe amount of fuel that the plant can produce beginning early next year is modest: just eight barrels a day, or about 336 gallons of jet fuel. That would be enough to fill up one small passenger plane every three weeks. By comparison, total fuel consumption of commercial airlines worldwide reached 95 billion gallons in 2019, before the pandemic hit the travel industry, according to the International Air Transport Association, or IATA. But Atmosfair, a German non-profit group behind the project, says its purpose is to show that the process is technologically feasible and -- once it is scaled up and with sufficient demand -- economically viable.\nInitially the price of synthetic kerosene produced in Werlte will be far higher than that of regular jet fuel, though Atmosfair won't divulge how much it will be charging its first customer, the German airline Lufthansa. However Atmosfair's chief executive, Dietrich Brockhagen, says a price of 5 euros ($5.80) per liter (0.26 gallons) is possible. That's still several times what kerosene currently costs, but Atsmofair is banking on carbon taxes driving up the price of fossil fuels, making his product more competitive. Additionally, authorities at the national and European level are putting in place quotas for the amount of e-fuel that airlines will have to use in future. That will create demand, making it more attractive to invest in bigger and better plants. Ueckerdt said 5 euros per liter is feasible by 2030, when the European Union's executive may require airlines to meet 0.7% of their kerosene needs with e-fuels. Under current plans, that would rise to 28% by 2050.",
                                                    "link": "https://news.slashdot.org/story/21/10/04/2236211/germany-unveils-worlds-first-commercial-plant-for-making-synthetic-kerosene",
                                                    "title": "Germany Unveils World's First Commercial Plant For Making Synthetic Kerosene"
                                                },
                                                {
                                                    "date": "Mon, 04 Oct 2021 00:00:00 GMT",
                                                    "summary": "We thought the carnage was over for popular decentralized finance, or DeFi, staking protocol Compound, but as it turns out, millions more than we thought are at risk. About $162 million is up for grabs after an upgrade gone very wrong, according to Robert Leshner, founder of Compound Labs. CNBC reports:  At first, the Compound chief tweeted Friday that there was a cap to how many comp tokens could be accidentally distributed, noting that âoethe impact is bounded, at worst, 280,000 comp tokens,â or about $92.6 million. But on Sunday morning, Leshner revealed that the pool of cash that had already been emptied once had been replenished â\" exposing another 202,472.5 comp tokens to exploit, or roughly $66.9 million at its current price.\nOn Wednesday, Compound rolled out what should have been a pretty standard upgrade. Soon after implementation, however, it was clear that something had gone seriously wrong, once users started to receive millions of dollars in comp tokens. For example, $30 million worth of comp tokens were claimed in one transaction. The saving grace of the entire debacle, however, was the fact that the pool of cash that was open to exploit -- something called the Comptroller contract -- had a finite amount of tokens. The problem is that this leaky pool got a fresh influx of cash, and 0.5 comp tokens are being added roughly every 15 seconds, according to Gupta. \"When the drip() function was called this morning, it sent the backlog (202,472.5, about two months of COMP since the last time the function was called) into the protocol for distribution to users,\" Leshner wrote in a tweet Sunday morning. Leshner noted that this brought the total comp at risk to 490,000 comp tokens, or about $162 million.\nThere are a few proposals to fix the bug, but Compound's governance model is such that any changes to the protocol require a multiday voting window, and Gupta said it takes another week for the successful proposal to be executed. In the meantime, this pool of cash is once again up for grabs for users who know how to exploit the bug. Compound made clear that no supplied or borrowed funds were at risk, which is some consolation. \"No user funds are or were at risk so it's not that big of a deal,\" said Gupta. \"Everyone kinda got diluted but didn't lose anything directly.\"",
                                                    "link": "https://news.slashdot.org/story/21/10/04/2228209/bug-puts-162-million-up-for-grabs-says-founder-of-defi-platform-compound",
                                                    "title": "Bug Puts $162 Million Up For Grabs, Says Founder of DeFi Platform Compound"
                                                }
                                            ]
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/event": {
            "get": {
                "tags": [
                    "Event"
                ],
                "summary": "Event",
                "description": "Automatically extracts dates, location and address information, images and event descriptions from event pages.",
                "operationId": "event",
                "parameters": [
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract",
                        "required": true,
                        "schema": {
                            "type": "string",
                            "default": "https://www.eventbrite.com/e/graphql-summit-2018-tickets-46601841362"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "venue": {
                                                        "type": "string"
                                                    },
                                                    "images": {
                                                        "type": "array",
                                                        "items": {
                                                            "type": "object",
                                                            "properties": {
                                                                "naturalHeight": {
                                                                    "type": "integer"
                                                                },
                                                                "width": {
                                                                    "type": "integer"
                                                                },
                                                                "diffbotUri": {
                                                                    "type": "string"
                                                                },
                                                                "url": {
                                                                    "type": "string"
                                                                },
                                                                "naturalWidth": {
                                                                    "type": "integer"
                                                                },
                                                                "primary": {
                                                                    "type": "boolean"
                                                                },
                                                                "height": {
                                                                    "type": "integer"
                                                                }
                                                            }
                                                        }
                                                    },
                                                    "timezoneOffset": {
                                                        "type": "string"
                                                    },
                                                    "endDate": {
                                                        "type": "string"
                                                    },
                                                    "endDateLocalTimezone": {
                                                        "type": "string"
                                                    },
                                                    "humanLanguage": {
                                                        "type": "string"
                                                    },
                                                    "diffbotUri": {
                                                        "type": "string"
                                                    },
                                                    "description": {
                                                        "type": "string"
                                                    },
                                                    "pageUrl": {
                                                        "type": "string"
                                                    },
                                                    "location": {
                                                        "type": "object",
                                                        "properties": {
                                                            "country": {
                                                                "type": "object",
                                                                "properties": {
                                                                    "name": {
                                                                        "type": "string"
                                                                    },
                                                                    "diffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "websiteUris": {
                                                                        "type": "array",
                                                                        "items": {
                                                                            "type": "string"
                                                                        }
                                                                    },
                                                                    "targetDiffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "surfaceForm": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            },
                                                            "isCurrent": {
                                                                "type": "boolean"
                                                            },
                                                            "address": {
                                                                "type": "string"
                                                            },
                                                            "city": {
                                                                "type": "object",
                                                                "properties": {
                                                                    "name": {
                                                                        "type": "string"
                                                                    },
                                                                    "diffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "websiteUris": {
                                                                        "type": "array",
                                                                        "items": {
                                                                            "type": "string"
                                                                        }
                                                                    },
                                                                    "targetDiffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "surfaceForm": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            },
                                                            "street": {
                                                                "type": "string"
                                                            },
                                                            "subregion": {
                                                                "type": "object",
                                                                "properties": {
                                                                    "name": {
                                                                        "type": "string"
                                                                    },
                                                                    "diffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "websiteUris": {
                                                                        "type": "array",
                                                                        "items": {
                                                                            "type": "string"
                                                                        }
                                                                    },
                                                                    "targetDiffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "surfaceForm": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            },
                                                            "latitude": {
                                                                "type": "number"
                                                            },
                                                            "precision": {
                                                                "type": "number"
                                                            },
                                                            "postalCode": {
                                                                "type": "string"
                                                            },
                                                            "region": {
                                                                "type": "object",
                                                                "properties": {
                                                                    "name": {
                                                                        "type": "string"
                                                                    },
                                                                    "diffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "websiteUris": {
                                                                        "type": "array",
                                                                        "items": {
                                                                            "type": "string"
                                                                        }
                                                                    },
                                                                    "targetDiffbotUri": {
                                                                        "type": "string"
                                                                    },
                                                                    "surfaceForm": {
                                                                        "type": "string"
                                                                    }
                                                                }
                                                            },
                                                            "longitude": {
                                                                "type": "number"
                                                            }
                                                        }
                                                    },
                                                    "type": {
                                                        "type": "string"
                                                    },
                                                    "title": {
                                                        "type": "string"
                                                    },
                                                    "startDate": {
                                                        "type": "string"
                                                    },
                                                    "startDateLocalTimezone": {
                                                        "type": "string"
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "request": {
                                        "pageUrl": "https://www.eventbrite.com/e/graphql-summit-2018-tickets-46601841362",
                                        "api": "event",
                                        "version": 3
                                    },
                                    "objects": [
                                        {
                                            "venue": "The Regency Center",
                                            "images": [
                                                {
                                                    "naturalHeight": 0,
                                                    "width": 720,
                                                    "diffbotUri": "image|3|2088675194",
                                                    "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F45534724%2F182934487936%2F1%2Foriginal.jpg?w=800&auto=compress&rect=0%2C0%2C2160%2C1080&s=e73c0be202c2cd939dcd3e809d96e093",
                                                    "naturalWidth": 0,
                                                    "primary": true,
                                                    "height": 360
                                                }
                                            ],
                                            "timezoneOffset": "-07:00",
                                            "endDate": "Fri, 09 Nov 2018 01:00:00 GMT",
                                            "endDateLocalTimezone": "Thu, 08 Nov 2018 17:00:00",
                                            "humanLanguage": "en",
                                            "diffbotUri": "event|3|-2069254052",
                                            "description": "​Join over 850 developers at the 3rd annual GraphQL Summit, the world's largest conference dedicated to GraphQL. Whether you're just getting started or a veteran GraphQL practitioner, this is the must-attend developer conference of the year.\nChoose from 40+ sessions across two tracks at one of San Francisco's most historic event venues\n​See how teams at top startups and enterprises are modernizing their platforms by using GraphQL in complex production environments\nHear where the technology is heading next, directly from the leaders of GraphQL's major open source projects\nMeet top solution providers from around the world - all under one roof.\nEnroll in hands-on workshops and classroom training to boost your skills to get the most out of GraphQL.\nGraphQL Summit Day Code of Conduct\nWe believe our community and events should be truly open for everyone. As such, we are committed to providing a friendly, safe, and welcoming environment for all, regardless of gender, sexual orientation, disability, ethnicity, or religion. Our event code of conduct outlines our expectations for participant behavior, as well as the consequences for unacceptable behavior.\n*GraphQL Summit is hosted at the Regency Center which is an accessible space with elevator access*",
                                            "pageUrl": "https://archives.diffbot.com/replay/4F02A5A651C6767544362C27B1D9BAC1/",
                                            "location": {
                                                "country": {
                                                    "name": "United States",
                                                    "diffbotUri": "http://diffbot.com/entity/A01d4EK33MmCosgI2KXa4-A",
                                                    "websiteUris": [
                                                        "wikidata.org/entity/Q30"
                                                    ],
                                                    "targetDiffbotUri": "http://diffbot.com/entity/A01d4EK33MmCosgI2KXa4-A",
                                                    "surfaceForm": "United States"
                                                },
                                                "isCurrent": true,
                                                "address": "1300 Van Ness Ave, San Francisco, California",
                                                "city": {
                                                    "name": "San Francisco",
                                                    "diffbotUri": "http://diffbot.com/entity/A8AV-O2v0MBuACAY7pFZzbg",
                                                    "websiteUris": [
                                                        "wikidata.org/entity/Q62"
                                                    ],
                                                    "targetDiffbotUri": "http://diffbot.com/entity/A8AV-O2v0MBuACAY7pFZzbg",
                                                    "surfaceForm": "San Francisco"
                                                },
                                                "street": "1300 Van Ness Ave",
                                                "subregion": {
                                                    "name": "San Francisco County",
                                                    "diffbotUri": "http://diffbot.com/entity/A8AV-O2v0MBuACAY7pFZzbg",
                                                    "websiteUris": [
                                                        "wikidata.org/entity/Q62"
                                                    ],
                                                    "targetDiffbotUri": "http://diffbot.com/entity/A8AV-O2v0MBuACAY7pFZzbg",
                                                    "surfaceForm": "San Francisco County"
                                                },
                                                "latitude": 37.78785705566406,
                                                "precision": 0.10000000149011612,
                                                "postalCode": "94109",
                                                "region": {
                                                    "name": "California",
                                                    "diffbotUri": "http://diffbot.com/entity/Al0_8ehooNxOPT36Y4RdV9w",
                                                    "websiteUris": [
                                                        "wikidata.org/entity/Q99"
                                                    ],
                                                    "targetDiffbotUri": "http://diffbot.com/entity/Al0_8ehooNxOPT36Y4RdV9w",
                                                    "surfaceForm": "California"
                                                },
                                                "longitude": -122.42138671875
                                            },
                                            "type": "event",
                                            "title": "GraphQL Summit 2018",
                                            "startDate": "Wed, 07 Nov 2018 16:30:00 GMT",
                                            "startDateLocalTimezone": "Wed, 07 Nov 2018 08:30:00"
                                        }
                                    ]
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/{api}": {
            "get": {
                "summary": "Extract with Custom API",
                "tags": [
                    "Custom"
                ],
                "description": "Extracts a page using a modified Extract API or a custom ruleset.",
                "operationId": "customget",
                "parameters": [
                    {
                        "name": "api",
                        "in": "path",
                        "description": "Name of your Custom API",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "url",
                        "in": "query",
                        "description": "Target URL to extract (url encoded)",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "fields",
                        "in": "query",
                        "description": "Specify optional fields to be returned from any fully-extracted pages (e.g. `fields=querystring,links`)",
                        "schema": {
                            "type": "string",
                            "enum": [
                                "links",
                                "extlinks",
                                "meta",
                                "querystring",
                                "breadcrumb"
                            ]
                        }
                    },
                    {
                        "name": "timeout",
                        "in": "query",
                        "description": "Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000).",
                        "schema": {
                            "type": "integer",
                            "format": "int32"
                        }
                    },
                    {
                        "name": "callback",
                        "in": "query",
                        "description": "Use for jsonp requests. Needed for cross-domain ajax.",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "proxy",
                        "in": "query",
                        "description": "Specify an IP address of a [custom proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) that will be used to fetch the target page. (Ex: `&proxy` or `&proxy=0.0.0.0`)",
                        "schema": {
                            "type": "string",
                            "default": ""
                        }
                    },
                    {
                        "name": "proxyAuth",
                        "in": "query",
                        "description": "Used to specify the authentication parameters that will be used with a custom proxy specified in the &proxy parameter. (Ex: `proxyAuth=username:password`)",
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "useProxy",
                        "in": "query",
                        "description": "Set to `default` to use [Diffbot's datacenter proxy](https://docs.diffbot.com/reference/using-proxies#how-to-use-proxies) for this request. `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally.",
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "request": {
                                            "type": "object",
                                            "properties": {
                                                "pageUrl": {
                                                    "type": "string"
                                                },
                                                "api": {
                                                    "type": "string"
                                                },
                                                "version": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "objects": {
                                            "type": "array",
                                            "items": {
                                                "type": "object"
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        },
        "/custom": {
            "get": {
                "summary": "Retrieve Custom APIs",
                "tags": [
                    "Custom"
                ],
                "description": "Get all the Custom APIs and their rules currently defined on your token",
                "operationId": "retrieve-a-custom-api",
                "parameters": [],
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "array",
                                    "items": {
                                        "type": "object",
                                        "properties": {
                                            "notes": {
                                                "type": "array",
                                                "items": {
                                                    "type": "string"
                                                }
                                            },
                                            "xForwardHeaders": {
                                                "type": "object",
                                                "properties": {
                                                    "X-Evaluate": {
                                                        "type": "string"
                                                    }
                                                },
                                                "required": [
                                                    "X-Evaluate"
                                                ]
                                            },
                                            "rules": {
                                                "type": "array",
                                                "items": {
                                                    "type": "object",
                                                    "properties": {
                                                        "name": {
                                                            "type": "string"
                                                        },
                                                        "selector": {
                                                            "type": "string"
                                                        }
                                                    }
                                                }
                                            },
                                            "api": {
                                                "type": "string"
                                            },
                                            "urlPattern": {
                                                "type": "string"
                                            },
                                            "testUrl": {
                                                "type": "string"
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "errorCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "errorCode": 500,
                                    "error": "Internal Server Error"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            },
            "post": {
                "summary": "Create or Update a Custom API",
                "tags": [
                    "Custom"
                ],
                "description": "Create or update the parameters and ruleset of an existing Custom API",
                "operationId": "create-a-custom-api",
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "type": "object",
                                "properties": {
                                    "notes": {
                                        "type": "array",
                                        "items": {
                                            "type": "string"
                                        },
                                        "description": "An array of strings that can be added manually.  The API automatically adds a notes specifying when the API was last updated."
                                    },
                                    "xForwardHeaders": {
                                        "type": "object",
                                        "properties": {
                                            "X-Evaluate": {
                                                "type": "string"
                                            }
                                        },
                                        "description": "Allows you to pass in X-Forward headers by name, including X-Evaluate (aka. X-Forward-X-Evaluate; omit \"X-Forward\" in the header name)"
                                    },
                                    "rules": {
                                        "type": "array",
                                        "items": {
                                            "type": "object",
                                            "properties": {
                                                "name": {
                                                    "type": "string"
                                                },
                                                "selector": {
                                                    "type": "string"
                                                },
                                                "filters": {
                                                    "type": "array",
                                                    "items": {
                                                        "type": "object",
                                                        "properties": {
                                                            "args": {
                                                                "type": "array",
                                                                "items": {
                                                                    "type": "string"
                                                                }
                                                            },
                                                            "type": {
                                                                "type": "string"
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        },
                                        "description": "An object that defines a set of rules for a specific urlPattern-api combination"
                                    },
                                    "api": {
                                        "type": "string",
                                        "description": "The specific API being targeted.  Always precede the API name with \"/api/\" as in `\"/api/article\"` except for \"all\")"
                                    },
                                    "urlPattern": {
                                        "type": "string",
                                        "description": "A regex that defines the URLs for which the ruleset will be applied"
                                    },
                                    "testUrl": {
                                        "type": "string",
                                        "description": "A URL that can be used to check that the rule still works as intended.  This is the page that will load automatically when editing the ruleset in the Dashboard UI"
                                    },
                                    "renderOptions": {
                                        "type": "string",
                                        "description": "Rendering options"
                                    },
                                    "prefilters": {
                                        "type": "array",
                                        "items": {
                                            "type": "string"
                                        },
                                        "description": "An array of string selectors that should be omitted from the DOM before extraction occurs"
                                    },
                                    "useProxy": {
                                        "type": "string",
                                        "description": "Used to disable proxies (when they have been set globally), by applying the value `\"none\"`"
                                    }
                                }
                            }
                        }
                    }
                },
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "hashes": {
                                            "type": "array",
                                            "items": {
                                                "type": "string"
                                            }
                                        }
                                    }
                                },
                                "example": {
                                    "hashes": [
                                        "abcd1234"
                                    ]
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            },
            "delete": {
                "summary": "Delete a Custom API",
                "tags": [
                    "Custom"
                ],
                "description": "Delete definitions of existing Custom APIs for a given URL pattern and API on your token",
                "operationId": "delete-a-custom-api",
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "type": "object",
                                "properties": {
                                    "api": {
                                        "type": "string",
                                        "description": "The specific API being targeted.  Always precede the API name with \"/api/\" as in `\"/api/article\"` except for \"all\")"
                                    },
                                    "urlPattern": {
                                        "type": "string",
                                        "description": "A regex that defines the URLs corresponding to which the Custom API definitions will be deleted. Make sure that is json-escaped before url-encoding it."
                                    },
                                    "token": {
                                        "type": "string",
                                        "description": "Your token. The Custom API definitions only for this token will be deleted"
                                    }
                                }
                            }
                        }
                    }
                },
                "responses": {
                    "200": {
                        "description": "Successful API Response",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "token": {
                                            "type": "string"
                                        },
                                        "urlPattern": {
                                            "type": "string"
                                        },
                                        "api": {
                                            "type": "string"
                                        },
                                        "status": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "api": "/api/discussion",
                                    "token": "TOKEN",
                                    "urlPattern": "examplePattern.com",
                                    "status": "success"
                                }
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "statusCode": {
                                            "type": "integer"
                                        },
                                        "error": {
                                            "type": "string"
                                        }
                                    }
                                },
                                "example": {
                                    "statusCode": 500,
                                    "error": "urlPattern and apiPrefix required to delete rule"
                                }
                            }
                        }
                    }
                },
                "security": [
                    {
                        "tokenscheme": []
                    }
                ]
            }
        }
    },
    "components": {
        "securitySchemes": {
            "tokenscheme": {
                "type": "apiKey",
                "name": "token",
                "in": "query"
            }
        }
    },
    "x-readme": {
        "explorer-enabled": true,
        "proxy-enabled": true,
        "samples-enabled": true
    }
}