array( 'method' => 'GET', 'header' => array( 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language: ja-JP,ja;q=0.9,en-US;q=0.8,en;q=0.7', 'Accept-Encoding: gzip, deflate, br', 'Connection: keep-alive', 'Upgrade-Insecure-Requests: 1' ) ) ); // 创建上下文 $context = stream_context_create($options); // 发送GET请求 // echo "正在发送GET请求到: " . $url . "\n"; $response = file_get_contents($url, false, $context); if ($response === FALSE) { die("请求失败!\n"); } // echo "请求成功,正在解析HTML内容...\n"; // 保存响应内容到变量,用于后续处理 $html_content = $response; // 创建DOMDocument对象来解析HTML libxml_use_internal_errors(true); $dom = new DOMDocument(); // 确保正确处理UTF-8编码 $dom->loadHTML(''.$html_content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); // 创建DOMXPath对象以便使用XPath查询 $xpath = new DOMXPath($dom); // echo "HTML解析完成,正在提取信息...\n"; // 使用XPath查找包含商号又は名称和本店又は主たる事務所の所在地的元素 $dt_elements = $xpath->query('//dt'); $dd_elements = $xpath->query('//dd'); $company_name = ''; $address = ''; // 遍历dt元素,查找目标标签 for ($i = 0; $i < $dt_elements->length; $i++) { $dt_text = mb_convert_kana(trim($dt_elements->item($i)->textContent), 's'); // 检查是否是目标dt元素,精确匹配商号又は名称(不包含フリガナ) if ($dt_text === '商号又は名称') { // 获取对应的dd元素 $next_dd = $dt_elements->item($i)->nextSibling; while ($next_dd && $next_dd->nodeName !== 'dd') { $next_dd = $next_dd->nextSibling; } if ($next_dd) { $company_name = trim($next_dd->textContent); } } elseif ($dt_text === '本店又は主たる事務所の所在地') { // 获取对应的dd元素 $next_dd = $dt_elements->item($i)->nextSibling; while ($next_dd && $next_dd->nodeName !== 'dd') { $next_dd = $next_dd->nextSibling; } if ($next_dd) { $address = trim($next_dd->textContent); } } } // 如果上面的方法没有找到地址,尝试另一种方法 if (empty($address)) { // 查找所有dt元素,找到包含地址关键词的元素 $dt_nodes = $dom->getElementsByTagName('dt'); foreach ($dt_nodes as $dt_node) { $dt_content = mb_convert_kana(trim($dt_node->textContent), 's'); if (strpos($dt_content, '本店又は主たる事務所の所在地') !== false) { // 获取下一个兄弟节点(应该是dd) $next_sibling = $dt_node->nextSibling; while ($next_sibling) { if ($next_sibling->nodeType === XML_ELEMENT_NODE && $next_sibling->nodeName === 'dd') { $address = trim($next_sibling->textContent); break; } $next_sibling = $next_sibling->nextSibling; } } } } // 准备JSON数据 $result = array(); if (!empty($company_name)) { $result['name'] = $company_name; } else { $result['name'] = '未找到公示名称'; } if (!empty($address)) { $result['address'] = $address; } else { $result['address'] = '未找到地址'; } // 输出JSON格式 header('Content-Type: application/json'); echo json_encode($result, JSON_UNESCAPED_UNICODE);