diff --git a/workflows/Generate rotating user-agents and proxy IPs for scraping APIs-13637/generate_rotating_user-agents_and_proxy_ips_for_scraping_apis.json b/workflows/Generate rotating user-agents and proxy IPs for scraping APIs-13637/generate_rotating_user-agents_and_proxy_ips_for_scraping_apis.json new file mode 100644 index 000000000..9ee8a3475 --- /dev/null +++ b/workflows/Generate rotating user-agents and proxy IPs for scraping APIs-13637/generate_rotating_user-agents_and_proxy_ips_for_scraping_apis.json @@ -0,0 +1 @@ +{"id":"","meta":{"instanceId":""},"name":"Multiple headers in API req - decodo proxy","tags":[],"nodes":[{"id":"b3ad398f-314e-4319-95e1-9a9ebe4286c9","name":"user-agents","type":"n8n-nodes-base.httpRequest","position":[-1296,256],"parameters":{"url":"https://www.useragentstring.com/pages/Browserlist/","options":{}},"typeVersion":4.3},{"id":"6e5f162f-54e8-4e39-b6c8-ccf1e784a657","name":"clean the return to line in user-agent","type":"n8n-nodes-base.set","position":[-688,256],"parameters":{"options":{},"assignments":{"assignments":[{"id":"a7ce2938-67b0-4d66-8f84-facaed5f9483","name":"clean_user-agent","type":"string","value":"={{ $json.userAgents.replace(/\\n/g, ' ').replace(/\\s+/g, ' ').trim() }}"}]}},"typeVersion":3.4},{"id":"22db55f3-cf19-48b4-a204-e5af0af1e1ef","name":"random sort","type":"n8n-nodes-base.sort","position":[-480,256],"parameters":{"type":"random"},"typeVersion":1},{"id":"0db0b1ae-0ee8-4c12-9cc8-3202cc656e62","name":"Extract user-agent values","type":"n8n-nodes-base.html","position":[-1104,256],"parameters":{"options":{},"operation":"extractHtmlContent","extractionValues":{"values":[{"key":"userAgents","cssSelector":"ul li a","returnArray":true}]}},"typeVersion":1.2},{"id":"aefcdf94-b9a7-4de1-ad3d-09e9ecc53ef0","name":"Split Out","type":"n8n-nodes-base.splitOut","position":[-896,256],"parameters":{"options":{},"fieldToSplitOut":"userAgents"},"typeVersion":1},{"id":"d3317611-f80d-42b1-8b45-4d5c215709e7","name":"Check used IP/user-agent with cloudflare","type":"n8n-nodes-base.httpRequest","position":[256,-368],"parameters":{"url":"https://cloudflare.com/cdn-cgi/trace","method":"POST","options":{"proxy":"=http://{{ $json.proxy_username }}:{{ $json.proxy_password }}@gate.decodo.com:{{ $json.proxy_port }}","response":{"response":{"fullResponse":true}}},"sendHeaders":true,"headerParameters":{"parameters":[{"name":"user-agent","value":"={{ $json[\"clean_user-agent\"] }}"}]}},"typeVersion":4.4},{"id":"9371e1a4-445b-4156-9a2f-b4f14f2691b3","name":"Manual trigger","type":"n8n-nodes-base.manualTrigger","position":[-1520,96],"parameters":{},"typeVersion":1},{"id":"0e8c6c40-68c0-421b-956d-d6fe19420955","name":"SET your proxy connection details here","type":"n8n-nodes-base.set","position":[-1280,-112],"parameters":{"options":{},"assignments":{"assignments":[{"id":"25938c49-3531-40e5-8164-8d294ae67b50","name":"proxy_username","type":"string","value":"xxxxx"},{"id":"d4ac7a84-2e74-4fa4-8770-58d0bd1bbcd2","name":"proxy_password","type":"string","value":"xxxxx"},{"id":"b94ab04f-96aa-4cff-b79f-2d58c2684c56","name":"proxy_port","type":"string","value":"xxxxx"}]}},"typeVersion":3.4},{"id":"37312114-a111-4abc-8a19-62d7e7fc4940","name":"Merge","type":"n8n-nodes-base.merge","position":[-32,-96],"parameters":{"mode":"combine","options":{},"combineBy":"combineAll"},"typeVersion":3.2},{"id":"32f504be-251e-4975-acfe-a31778d69966","name":"Sticky Note","type":"n8n-nodes-base.stickyNote","position":[-2432,-224],"parameters":{"width":864,"height":1024,"content":"# Generate multiple dynamic user-agent & IP address for scraping APIs using proxy \n\n## ✨What it does\n\nUseful for scraping only/API data.\n\nThis workflow will give you the ability to bypass the IP address limitation control of **some** publicly available APIs by using a different couple of “user-agent” and “IP address” for each call to the targeted API.\n\nYou can therefore place this workflow before any HTTP request node.\n\n## 🛠️How to set up ?\n\nIf you're using Decodo proxy server (or any other proxy service provider) you can use the Residential proxy with \"session type\" as \"rotating\" is your want to have `one IP address per API call`\n\n### 1) The proxy connection details\n\nCredentials usually work like this in any proxy service provider :\n\n`http://username:password@gate.decodo.com:PORT`\n\nYou just need to configure these credentials in your node `SET your proxy connection details in the node `SET your proxy connection details here`: \n* `proxy_username`\n* `proxy_password`\n* `proxy_port`\n\n\n### 2) Number of user-agents needed\n\nYou can configure the number of different user-agents you want to use in the node `Take X random user-agents`\n\n### 3) Call your targeted API\n\nYou just configure your HTTP node for the targeted API you want to call with these additional details in the node `Targeted API` : \n\n* add header name `user-agent` with value `{{ $json[\"clean_user-agent\"] }}`\n* add option \"proxy\" with value `http://{{ $json.proxy_username }}:{{ $json.proxy_password }}@gate.decodo.com:{{ $json.proxy_port }}`\n\n\n### ⚠️ Please note that some APIs may reject connections from a proxy server\n\n\n"},"typeVersion":1},{"id":"b93449a3-d468-4bb7-b4e7-2a66077877ef","name":"Sticky Note1","type":"n8n-nodes-base.stickyNote","position":[-1392,-224],"parameters":{"color":4,"width":336,"height":288,"content":"## Add proxy credentials here"},"typeVersion":1},{"id":"8848764a-cba2-4e96-9b77-44c2482fb03a","name":"Sticky Note2","type":"n8n-nodes-base.stickyNote","position":[192,-16],"parameters":{"color":4,"width":336,"height":288,"content":"## Add the HTTP node for the API you want to call"},"typeVersion":1},{"id":"60c848b7-6df4-4db8-88d1-4ce7c05aba10","name":"Sticky Note3","type":"n8n-nodes-base.stickyNote","position":[-320,96],"parameters":{"color":4,"width":336,"height":304,"content":"## Update the number of user-agent you want to use"},"typeVersion":1},{"id":"e40f1c90-4c00-45f6-8c0b-f7e987d5eca5","name":"Take X random user-agents","type":"n8n-nodes-base.limit","position":[-240,256],"parameters":{"maxItems":5},"typeVersion":1},{"id":"95670562-97b7-4a2e-a978-0bf64f58d8a6","name":"Targeted API","type":"n8n-nodes-base.httpRequest","position":[288,96],"parameters":{"url":"API_URL","options":{"proxy":"=http://{{ $json.proxy_username }}:{{ $json.proxy_password }}@gate.decodo.com:{{ $json.proxy_port }}"},"sendHeaders":true,"headerParameters":{"parameters":[{"name":"user-agent","value":"={{ $json[\"clean_user-agent\"] }}"}]}},"typeVersion":4.4},{"id":"d199c0a2-00a7-4e2d-b334-f21cd056c4e7","name":"IP address and user-agent used","type":"n8n-nodes-base.set","position":[608,-368],"parameters":{"options":{},"assignments":{"assignments":[{"id":"c7a0ce79-6322-4f6c-b148-a2b7d0929861","name":"Ip_address","type":"string","value":"={{ $json.data.match(/ip=([^\\n]+)/)[1] }}\n"},{"id":"ec5ba2f4-a3af-4158-8a67-2594b99f61e1","name":"user-agent","type":"string","value":"={{ $json.data.match(/uag=([^\\n]+)/)[1] }}\n"}]}},"typeVersion":3.4},{"id":"76052c7e-93d2-4894-a963-2cee06495e9f","name":"Sticky Note4","type":"n8n-nodes-base.stickyNote","position":[416,-512],"parameters":{"width":480,"height":336,"content":"## Informative node to show with couple IP_address/user-agent are being used"},"typeVersion":1}],"active":false,"pinData":{},"settings":{"executionOrder":"v1"},"versionId":"","connections":{"Merge":{"main":[[{"node":"Check used IP/user-agent with cloudflare","type":"main","index":0},{"node":"Targeted API","type":"main","index":0}]]},"Split Out":{"main":[[{"node":"clean the return to line in user-agent","type":"main","index":0}]]},"random sort":{"main":[[{"node":"Take X random user-agents","type":"main","index":0}]]},"user-agents":{"main":[[{"node":"Extract user-agent values","type":"main","index":0}]]},"Manual trigger":{"main":[[{"node":"user-agents","type":"main","index":0},{"node":"SET your proxy connection details here","type":"main","index":0}]]},"Extract user-agent values":{"main":[[{"node":"Split Out","type":"main","index":0}]]},"Take X random user-agents":{"main":[[{"node":"Merge","type":"main","index":1}]]},"SET your proxy connection details here":{"main":[[{"node":"Merge","type":"main","index":0}]]},"clean the return to line in user-agent":{"main":[[{"node":"random sort","type":"main","index":0}]]},"Check used IP/user-agent with cloudflare":{"main":[[{"node":"IP address and user-agent used","type":"main","index":0}]]}}} \ No newline at end of file