This topic describes how to address DNS resolution failures that occur when numerous concurrent requests attempt to access a peer service's domain name.
Error message
The error message displayed in Logview includes "name or service not known".
Problem description
DNS resolution failures can occur when UDF or Spark jobs generate a high volume of concurrent requests to a peer service's domain name.
Solution
To prevent this issue, resolve the domain name to an IP address during the job's initialization phase. Then, use this IP address for subsequent access throughout the job's execution phase. The following provides an example of the code:
# -*- coding:UTF-8 -*-
from odps.udf import annotate
@annotate("string->string")
class test_udf(object):
__ip_address = ''
def evaluate(self, inputPath):
import requests
output = 'false'
retries = 3
print(self.__ip_address)
url = f"http://{self.__ip_address}/{inputPath}"
print(url)
for i in range(retries):
try:
response = requests.get(url)
if response.status_code == 200:
output = 'true'
else:
raise
except Exception as e:
if i < retries:
print('connect retry: ' + str(i + 1))
print('error: ' + e.message)
continue
else:
raise
break
return output
def __init__(self):
import socket
retries = 3
for i in range(retries):
try:
self.__ip_address = socket.gethostbyname("xxx-vpc.cn-shanghai.aliyuncs.com")
print(self.__ip_address)
except socket.gaierror as e:
print('Failed to resolve domain' + e.strerror)
if i < retries:
print('resolve domain retry: ' + str(i + 1))
continue
else:
raise
break