环境
- 目标板:QRB5165 EB5
- 基线:
qrb5165-eb5.dts继承qrb5165-rb5.dts - 目标:恢复
pcie1下有线网卡 endpoint 枚举
主线 dts
// SPDX-License-Identifier: BSD-3-Clause
/*
* Qualcomm Robotics EB5 platform based on QRB5165/RB5
*/
/dts-v1/;
#include "qrb5165-rb5.dts"
/ {
model = "Qualcomm Technologies, Inc. qrb5165 IOT EB5";
compatible = "qcom,kona-iot", "qcom,kona", "qcom,iot",
"qcom,eb5", "qcom,qrb5165-rb5", "qcom,sm8250";
aliases {
ufshc1 = &ufs_mem_hc;
pci-domain0 = &pcie0;
pci-domain1 = &pcie1;
pci-domain2 = &pcie2;
};
};
&pcie1 {
status = "okay";
max-link-speed = <3>;
num-lanes = <2>;
wake-gpios = <&tlmm 84 GPIO_ACTIVE_HIGH>;
vdda-supply = <&vreg_l9a_1p2>;
vddpe-3v3-supply = <&vreg_l11c_3p3>;
/*
* Remove iommu-map so qcom_pcie_config_sid_1_9_0() returns early
* and never clears BDF_TO_SID_BYPASS. With bypass ON (hardware
* default), inbound PCIe completions skip SMMU BDF-to-SID lookup.
*
* Background: during PCI bus walk the SMMU has no context banks for
* downstream-device SIDs yet (iommu_attach_device() is called only
* after device_add(), i.e. AFTER the first config read). The vendor
* msm_pcie driver never touches BDF_TO_SID_CFG, so bypass stays ON
* there and ASM2806 completions flow through unblocked.
*/
/delete-property/ iommu-map;
/*
* Append the helper as an extra clock entry so fw_devlink ensures
* the helper probes (and gpio141 is driven) before qcom-pcie probes.
* qcom-pcie only requests clocks by the names it knows; "lan-en" is
* silently ignored by the driver but honored by fw_devlink.
*/
clocks = <&gcc GCC_PCIE_1_PIPE_CLK>,
<&gcc GCC_PCIE_1_AUX_CLK>,
<&gcc GCC_PCIE_1_CFG_AHB_CLK>,
<&gcc GCC_PCIE_1_MSTR_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_Q2A_AXI_CLK>,
<&gcc GCC_PCIE_WIGIG_CLKREF_EN>,
<&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>,
<&gcc GCC_DDRSS_PCIE_SF_TBU_CLK>,
<&eb5_pcie1_helper>;
clock-names = "pipe", "aux", "cfg", "bus_master", "bus_slave",
"slave_q2a", "ref", "tbu", "ddrss_sf_tbu", "lan-en";
pinctrl-0 = <&pcie1_default_state>;
pcie@0 {
};
};
/*
* Lightweight anchor node for the out-of-tree eb5-pcie1-helper kmod.
* The driver waits for RC1 link stabilisation and then rescans pcie1 (domain 1)
* so the ASM2806 cascade + RTL8168 endpoints can be discovered without any
* modifications to qcom.c.
*/
/ {
eb5_pcie1_helper: eb5-pcie1-helper {
compatible = "qcom,eb5-pcie1-helper";
lan-en-gpios = <&tlmm 141 GPIO_ACTIVE_HIGH>;
#clock-cells = <0>;
status = "okay";
};
};
&pcie2 {
status = "disabled";
};
&pcie2_phy {
status = "disabled";
};
&tlmm {
};EB5 Helper Module
// SPDX-License-Identifier: GPL-2.0
/*
* pcie-qcom-eb5-helper.c - PCIe1 ASM2806 bridge bring-up helper for QRB5165 EB5
*
* The Qualcomm vendor 4.19 kernel has a private "use-pcie-bridge-asm2806" DT
* property that drives gpio141 (ASM2806 bridge-enable) before the RC1 PCIe
* controller enumerates the bus. Mainline qcom-pcie has no such logic.
*
* This driver:
* 1. Acquires gpio141 and drives it HIGH on probe (before qcom-pcie touches
* PERST#).
* 2. Registers itself as a dummy fixed-rate clock provider (#clock-cells = <0>),
* which qrb5165-eb5.dts adds to pcie1's clock list. fw_devlink sees this
* phandle and guarantees that pcie1 will NOT be probed until this driver's
* probe() returns successfully.
* 3. Schedules a deferred rescan after the ASM2806 cascade downstream links
* finish training. The rescan explicitly programs bridge MEMORY_BASE/LIMIT
* registers to hardware — pci_assign_unassigned_bus_resources() only updates
* kernel data structures but does NOT write the bridge window registers,
* which would leave the RTL8168 endpoints inaccessible.
*
* Result: gpio141 is driven high BEFORE qcom_pcie_host_init() de-asserts PERST#,
* so the ASM2806 bridge is powered and ready for config-space enumeration.
*
* Nothing in qcom.c / pcie-qcom.c is modified.
*/
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
#include <linux/clk-provider.h>
#include <linux/clk.h>
#include <linux/pci.h>
#include <linux/workqueue.h>
#include <linux/io.h>
/* PCI domain 1, root bus 0 — matches pcie1 (1c08000.pcie) */
#define EB5_PCIE1_DOMAIN 1
#define EB5_PCIE1_ROOT_BUS 0
/*
* Poll interval while waiting for pcie1 to create its root bus.
* pcie1 probe can take 10-15 s at boot; we retry up to 30 times (30 s).
*/
#define EB5_POLL_INTERVAL_MS 1000
#define EB5_POLL_MAX_RETRIES 30
#define DRV_NAME "qcom-eb5-pcie1-helper"
struct eb5_pcie_helper {
struct gpio_desc *lan_en_gpio;
struct clk_hw clk_hw;
struct delayed_work rescan_work;
};
static const struct clk_ops eb5_lan_clk_ops = { /* no-op clock, ordering only */ };
/*
* program_bridge_windows - write bridge memory windows to hardware registers.
*
* pci_assign_unassigned_bus_resources() assigns memory windows in the kernel's
* resource tree but does NOT write PCI_MEMORY_BASE / PCI_MEMORY_LIMIT to the
* bridge's config space. Without this step the bridge does not forward memory
* transactions downstream, so endpoint drivers fail on their very first MMIO
* access. Recurse into child buses so all levels of ASM2806 are programmed.
*/
static void program_bridge_windows(struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
struct resource *res;
u16 cmd;
if (!dev->subordinate)
continue;
/* Write 32-bit non-prefetchable memory window */
res = &dev->resource[PCI_BRIDGE_MEM_WINDOW];
if (resource_size(res) > 0) {
u16 mem_base = (res->start >> 16) & 0xfff0;
u16 mem_limit = (res->end >> 16) & 0xfff0;
pci_write_config_word(dev, PCI_MEMORY_BASE, mem_base);
pci_write_config_word(dev, PCI_MEMORY_LIMIT, mem_limit);
dev_info(&dev->dev, "bridge mem window programmed: %pR\n", res);
}
/* Write I/O window if present */
res = &dev->resource[PCI_BRIDGE_IO_WINDOW];
if (resource_size(res) > 0) {
pci_write_config_byte(dev, PCI_IO_BASE,
(res->start >> 8) & 0xf0);
pci_write_config_byte(dev, PCI_IO_LIMIT,
(res->end >> 8) & 0xf0);
}
/* Enable bus-mastering and memory-space decoding on the bridge */
pci_read_config_word(dev, PCI_COMMAND, &cmd);
cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
pci_write_config_word(dev, PCI_COMMAND, cmd);
program_bridge_windows(dev->subordinate);
}
}
static void eb5_pcie1_rescan_work(struct work_struct *work)
{
struct eb5_pcie_helper *h =
container_of(work, struct eb5_pcie_helper, rescan_work.work);
struct pci_bus *root_bus;
int retries = 0;
/* Re-assert gpio141 in case it was reset during suspend/resume */
gpiod_set_value_cansleep(h->lan_en_gpio, 1);
/*
* pcie1 probe (and bus creation) can take 10-15 s at boot — much
* longer than the helper's own probe. Poll until the root bus
* appears, then give the ASM2806 downstream links an extra second
* to finish training before we scan.
*/
while (retries < EB5_POLL_MAX_RETRIES) {
root_bus = pci_find_bus(EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS);
if (root_bus)
break;
pr_debug(DRV_NAME ": waiting for pcie1 bus (attempt %d/%d)\n",
retries + 1, EB5_POLL_MAX_RETRIES);
msleep(EB5_POLL_INTERVAL_MS);
retries++;
}
if (!root_bus) {
pr_err(DRV_NAME ": domain %u bus %02x not found after %d s, giving up\n",
EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS, EB5_POLL_MAX_RETRIES);
return;
}
/* Extra settling time for ASM2806 downstream link training */
msleep(1000);
pr_info(DRV_NAME ": rescanning pcie1 (domain %u bus %02x) after %d poll(s)\n",
EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS, retries);
/*
* Issue a Secondary Bus Reset (SBR) via the root port's Bridge Control
* register. This pulses the downstream PERST# from the RC side without
* needing direct GPIO access. The ASM2806 may have been unresponsive
* during the initial enumeration because its internal init was not yet
* complete when qcom-pcie first de-asserted PERST# at probe time.
* Driving SBR here — after gpio141 has been high for several seconds —
* gives the ASM2806 a clean reset cycle with power already stable.
*/
{
struct pci_dev *rp = pci_get_domain_bus_and_slot(
EB5_PCIE1_DOMAIN, 0, PCI_DEVFN(0, 0));
if (rp) {
u16 bctl;
pci_read_config_word(rp, PCI_BRIDGE_CONTROL, &bctl);
/* Assert Secondary Bus Reset */
pci_write_config_word(rp, PCI_BRIDGE_CONTROL,
bctl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(100); /* hold reset ≥ 100 ms (PCIe r3.0 §6.6.1) */
/* De-assert Secondary Bus Reset */
pci_write_config_word(rp, PCI_BRIDGE_CONTROL, bctl);
msleep(500); /* wait for ASM2806 to finish link re-training */
dev_info(&rp->dev,
DRV_NAME ": SBR pulse done, waiting for ASM2806\n");
pci_dev_put(rp);
} else {
pr_warn(DRV_NAME ": root port not found, skipping SBR\n");
}
}
pci_lock_rescan_remove();
/* Step 1: discover new devices (ASM2806 cascade + RTL8168) */
pci_scan_child_bus(root_bus);
/* Step 2: assign BARs and bridge windows in kernel resource structs */
pci_assign_unassigned_bus_resources(root_bus);
/*
* Step 3: write bridge MEMORY_BASE/LIMIT to hardware config space.
* This is the step that pci_rescan_bus() / pci_assign_…() omit,
* and without it the RTL8168 endpoints are unreachable via MMIO.
*/
program_bridge_windows(root_bus);
/* Step 4: add devices to driver model — triggers driver probes */
pci_bus_add_devices(root_bus);
pci_unlock_rescan_remove();
pr_info(DRV_NAME ": rescan complete, bridges programmed\n");
}
/* Forward declaration for sysfs attribute */
static DEVICE_ATTR_WO(reset_asm2806);
static int eb5_pcie_helper_probe(struct platform_device *pwd)
{
struct device *dev = &pwd->dev;
struct eb5_pcie_helper *h;
struct clk_init_data init = {};
int ret;
h = devm_kzalloc(dev, sizeof(*h), GFP_KERNEL);
if (!h)
return -ENOMEM;
/*
* Acquire gpio141 (ASM2806 bridge-enable) and drive it HIGH.
* This must happen before qcom-pcie de-asserts PERST# on RC1.
* The clock provider registration below ensures that qcom-pcie
* does not even begin probing until after this point.
*/
h->lan_en_gpio = devm_gpiod_get(dev, "lan-en", GPIOD_OUT_HIGH);
if (IS_ERR(h->lan_en_gpio))
return dev_err_probe(dev, PTR_ERR(h->lan_en_gpio),
"failed to get lan-en gpio\n");
msleep(50); /* allow ASM2806 to become ready */
dev_info(dev, "lan-en (gpio141) asserted high\n");
/*
* Register a zero-rate fixed clock so that fw_devlink can enforce
* the probe ordering: pcie1 (which lists us in its 'clocks') will
* not probe before of_clk_add_hw_provider() returns.
*/
init.name = DRV_NAME;
init.ops = &eb5_lan_clk_ops;
h->clk_hw.init = &init;
ret = devm_clk_hw_register(dev, &h->clk_hw);
if (ret)
return dev_err_probe(dev, ret, "clk_hw_register failed\n");
ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, &h->clk_hw);
if (ret)
return dev_err_probe(dev, ret,
"of_clk_add_hw_provider failed\n");
/* Start polling immediately; the work itself waits for the bus */
INIT_DELAYED_WORK(&h->rescan_work, eb5_pcie1_rescan_work);
schedule_delayed_work(&h->rescan_work, 0);
platform_set_drvdata(pwd, h);
ret = device_create_file(dev, &dev_attr_reset_asm2806.attr);
if (ret)
dev_warn(dev, "failed to create reset_asm2806 sysfs: %d\n", ret);
dev_info(dev, "clock provider registered, waiting for pcie1 bus\n");
return 0;
}
static void eb5_pcie_helper_remove(struct platform_device *pdev)
{
struct eb5_pcie_helper *h = platform_get_drvdata(pdev);
cancel_delayed_work_sync(&h->rescan_work);
device_remove_file(&pwd->dev, &dev_attr_reset_asm2806.attr);
}
static const struct of_device_id eb5_pcie_helper_of_match[] = {
{ .compatible = "qcom,eb5-pcie1-helper" },
{}
};
MODULE_DEVICE_TABLE(of, eb5_pcie_helper_of_match);
static struct platform_driver eb5_pcie_helper_driver = {
.probe = eb5_pcie_helper_probe,
.remove = eb5_pcie_helper_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = eb5_pcie_helper_of_match,
},
};
builtin_platform_driver(eb5_pcie_helper_driver);
/* Sysfs interface to trigger full reset sequence */
static ssize_t reset_asm2806_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct eb5_pcie_helper *h = dev_get_drvdata(dev);
void __iomem *parf;
struct pci_bus *root_bus;
dev_info(dev, "triggering full ASM2806 reset sequence...\n");
/* 1. Power cycle gpio141 */
gpiod_set_value_cansleep(h->lan_en_gpio, 0);
msleep(200);
gpiod_set_value_cansleep(h->lan_en_gpio, 1);
msleep(500);
/* 2. Write bypass bit */
parf = ioremap(0x1c0ac00, 4);
if (parf) {
writel(BIT(0), parf);
dev_info(dev, "BDF_TO_SID_BYPASS set\n");
iounmap(parf);
}
/* 3. Trigger SBR */
root_bus = pci_find_bus(EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS);
if (root_bus) {
struct pci_dev *rp = pci_get_domain_bus_and_slot(
EB5_PCIE1_DOMAIN, 0, PCI_DEVFN(0, 0));
if (rp) {
u16 bctl;
pci_read_config_word(rp, PCI_BRIDGE_CONTROL, &bctl);
pci_write_config_word(rp, PCI_BRIDGE_CONTROL,
bctl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(100);
pci_write_config_word(rp, PCI_BRIDGE_CONTROL, bctl);
msleep(500);
pci_dev_put(rp);
dev_info(dev, "SBR done\n");
}
}
/* 4. Rescan */
pci_lock_rescan_remove();
pci_scan_child_bus(root_bus);
pci_bus_add_devices(root_bus);
pci_unlock_rescan_remove();
dev_info(dev, "reset sequence complete\n");
return count;
}已执行测试(按大类)
1) DTS 基线与 include 链确认
- 确认
qrb5165-eb5.dts -> qrb5165-rb5.dts -> sm8250.dtsi链路正确。 - 校对
pcie1主线默认脚位语义:perst=82、wake=84。
2) pcie1 链路参数测试
&pcie1设置:max-link-speed = <1>(降速到 Gen1)。&pcie1设置:num-lanes = <1>(单 lane 试验)。- 结果:
0001域仍仅有 root port,无桥后 endpoint。
3) IOMMU 映射扩展测试
- 将
iommu-map从基础项扩展到多 BDF(覆盖0x0..0x600相关条目)。 - 结果:未触发有线 endpoint 出现。
4) 供电影响排查
- 增加/确认
vdda-supply与vddpe-3v3-supply。 - 结果:无显著变化,
pcie1仍空下游。
5) WAKE/RESET 脚位组合测试
wake-gpios做过候选切换(22/84),最终回归84(与主线/旧树一致)。pcie@0/reset-gpios做过极性 A/B 试验(active low/high 组合对比)。- 结果:仍未枚举到有线 endpoint。
6) 141 板级控制线测试
- 早期方案:
gpio141作为gpio-hog output-high(常高驱动)。 - 最新方案:移除 hog,改为
pcie1附加 pinctrl(pcie1_lan1_wake_default,bias-pull-up输入态),贴近旧树语义。 - 结果:等待该版本实机回传确认。
7) pcie2 冲突隔离测试
&pcie2、&pcie2_phy设为disabled。- 结果:未改变
pcie1无下游端点现象。
8) GPIO 共享告警处置
- 观察到告警:
gpio_shared_add_proxy_lookup/qcom_pcie_probe。 - 做过修复:删除
&pcie1的perst-gpios,保留pcie@0/reset-gpios,避免同线重复申请。 - 新日志定位:告警主要落在
1c00000.pcie (pcie0),非1c08000.pcie (pcie1)。
9) 旧内核基线确认(4.19)
在旧内核
4.19.125-perf-v2下,0001域可完整枚举:0001:01:00.0/0001:02:00.0/0001:02:06.0/0001:02:0e.0均为ASMedia 1b21:28060001:04:00.0与0001:05:00.0为RTL8111/8168 (10ec:8168)
- 旧日志明确出现
RC1: asm2806(1),说明厂商侧存在 RC1 私有桥控制语义。
10) 主线运行时补偿脚本尝试(失败)
在主线
6.19.0-dirty运行pcie1-kick.sh:gpioset gpiochip0 141=...返回Invalid argument/sys/bus/platform/drivers/qcom-pcie/{bind,unbind}不存在或不可用,无法运行时重绑 RC1
- 结论:该平台当前不支持“运行时重绑+踢脚”路径,需回到“重启生效”的 DTS 单变量法。
11) 纯 DTS 单变量回归(最新)
- 已将
gpio141改回gpio-hog output-high,并移除pcie1的pcie1_lan1_wake_default引用。 - 该版本仅改
gpio141启动策略,便于与上一版直接对比。
12) 纯 DTS 单变量(本轮)
- 将
gpio141从gpio-hog改为pcie1设备级 pinctrl(pcie1_lan1_en_default),配置为output-high。 &pcie1的pinctrl-0调整为<&pcie1_default_state>, <&pcie1_lan1_en_default>。- 保持
wake-gpios=84、reset-gpios=82、qcom.c不变,确保本轮仅变更 141 的所有权与生效时机。 - 结果:等待该版本实机回传确认。
当前观测结论(最新)
- 主线 6.19
lspci稳定仅有0001:00:00.0(root port),ASM2806 / RTL8168 不可见 - vendor 4.19 完整枚举:ASM2806 × 4 + RTL8168 × 2,
eth1UP - 链路速度不是根因:Gen1 x1 下 config TLP 依然可以工作,vendor 4.19 跑 Gen3 x2 但根因另在
- 已确认根因:主线
iommu-map触发BDF_TO_SID_BYPASS被清除 → SMMU fault → 0xFFFF(见下方根因分析节) - 当前最新镜像(step 22)已将
iommu-map删除,待实机验证
13) built-in helper driver + gpiod 主动驱动 gpio141(本轮)
背景:步骤 12 中 rescan 已成功执行(dmesg 显示 "PCI 1:00 rescan complete"),但 ASM2806 config space 返回全 0xFF,说明 gpio141 未被物理驱动。Qualcomm TLMM 的 pinctrl output-high 仅设置 mux,开机后不保证实际电平被 GPIO 控制器持续驱动。
变更内容(单变量):
- DTS —
eb5-pcie1-helper节点加入lan-en-gpios = <&tlmm 141 GPIO_ACTIVE_HIGH> - DTS — 从
&pcie1 pinctrl-0移除pcie1_lan1_en_default;从&tlmm删除该 pinctrl block(gpio141 所有权唯一归 helper driver) - 驱动 —
pcie-qcom-eb5-helper.c:struct 加struct gpio_desc *lan_en_gpio - 驱动 — probe 时
devm_gpiod_get_optional(dev, "lan-en", GPIOD_OUT_HIGH)请求并立即驱动 gpio141 为高 - 驱动 — worker 在
pci_find_bus()前再次gpiod_set_value_cansleep(h->lan_en_gpio, 1)+msleep(50)确保 ASM2806 就绪
预期验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|lan-en|gpio141|rescan|0001:'
lspci -nn | grep '^0001:'
ip -br a若仍无响应,追加:
lspci -vv -s 0001:00:00.0 | grep -E 'LnkSta|Speed|Width'14) genpd provider 方案 — 让 pcie1 依赖 helper(本轮)
方案来源:建议写个像 power-domain 的驱动。
原理:将 helper 改为 generic_pm_domain (genpd) provider。&pcie1 的 power-domains 列表加入 <&eb5_pcie1_helper>,内核 PM domain 框架会在 qcom-pcie probe 之前自动调用 helper 的 power_on 回调,保证 gpio141 在 RC1 第一次枚举时已经 asserted。彻底不需要 rescan、delayed_work 和时序猜测。
变更内容(单变量):
驱动重构 — 删除所有
pci.h / workqueue.h / delayed_work / pci_scan_child_bus代码- 新结构体:
struct eb5_lan_pd { struct generic_pm_domain genpd; struct gpio_desc *lan_en_gpio; } probe:devm_gpiod_get(GPIOD_OUT_HIGH)+msleep(50)+pm_genpd_init(is_off=false)+of_genpd_add_provider_simple()remove:of_genpd_del_provider()+pm_genpd_remove()power_on回调:gpiod_set_value_cansleep(1) + msleep(50)(供 suspend/resume 使用)GENPD_FLAG_ALWAYS_ON:运行时不切断电源
- 新结构体:
- DTS — helper 节点加
#power-domain-cells = <0>;并加 labeleb5_pcie1_helper: - DTS —
&pcie1覆盖power-domains = <&gcc PCIE_1_GDSC>, <&eb5_pcie1_helper>;及power-domain-names = "gdsc", "lan-en";
时序保证:
- fw_devlink 使 pcie1 不会在 helper 注册 genpd provider 之前 probe
- genpd 框架在 pcie1 上电时调用
power_on,gpio141 在 qcom_pcie_probe→link_up→enumeration 全链路之前已高
预期验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|lan-en|pcie1.*power|0001:'
lspci -nn | grep '^0001:'
ip -br a15) 时钟 provider 排序方案(本轮)
根因分析(步骤 14 失败原因):
- 步骤 14 添加
power-domains = <&gcc PCIE_1_GDSC>, <&eb5_pcie1_helper>后,fw_devlink 和 genpd 两套机制各自为 pcie1↔helper 创建了一条 device_link。双 device_link 冲突导致pci_register_host_bridge → request_resource返回 -16 (EBUSY),pcie1 probe 失败。
当前方案(朋友思路的正确实现):
不触碰
power-domains,改用 clock provider 做排序锚点:- helper 注册一个 zero-rate dummy 时钟(
#clock-cells = <0>) qrb5165-eb5.dts在 pcie1 的clocks/clock-names末尾加<&eb5_pcie1_helper> "lan-en"- fw_devlink 识别
clocksphandle,自动保证 pcie1 probe 在 helper probe 完成之后 - helper probe 时驱动 gpio141 high +
msleep(50)→of_clk_add_hw_provider完成 - qcom-pcie 只
devm_clk_bulk_get它知道的 9 个名字,"lan-en" 被忽略,无副作用
- helper 注册一个 zero-rate dummy 时钟(
时序:
- T=1.2s: helper probe → gpio141 HIGH + clock provider 注册 → fw_devlink 放行 pcie1
- T=6.4s: pcie1 probe →
qcom_pcie_host_init→ PERST# de-assert(gpio141 已 high ≥ 5s)
变更内容:
- 驱动重构 — 移除所有 genpd/pm_domain 代码;改用
clk_init_data + devm_clk_hw_register + devm_of_clk_add_hw_provider - DTS — helper 节点
#power-domain-cells→#clock-cells = <0> - DTS — pcie1 恢复原始
power-domains(不再覆盖),添加完整clocks/clock-names列表并追加<&eb5_pcie1_helper> "lan-en"
预期 dmesg 口径:
dmesg | grep -E 'eb5-pcie1|lan-en|1c08000.pcie|0001:'
lspci -nn | grep '^0001:'
ip -br a应看到:
eb5-pcie1-helper: lan-en (gpio141) asserted high@ ~1.2seb5-pcie1-helper: clock provider registered...@ ~1.2sqcom-pcie 1c08000.pcie: PCI host bridge to bus 0001:00@ ~6.x s(probe 不再 EBUSY)0001:01:00.0 ... 1b21:2806(ASM2806)+0001:04:00.0 10ec:8168(RTL8168)
21) 修正为 Gen3 x2,重建镜像
变更(单变量):
qrb5165-eb5.dts中&pcie1的max-link-speed = <1>→<3>,num-lanes = <1>→<2>- 其余不变(clock provider、gpio141、SBR 等保留)
实测结果(已刷机):
- LnkSta 仍为
0x1011(Gen1 x1),即使写了 max-link-speed=3 也无法训练到 Gen3 - 手动写 LnkCtl2 TLS=Gen3(
setpci ... CAP_EXP+0x30.w=0x0003)再触发 retrain → 仍 Gen1 x1 - PHY 寄存器层面
sm8250_qmp_gen3x2_pciephy_cfg在主线确实存在(lanes=2,tbls_rc Gen3x2 表完整),但实际无法跑到 Gen3 - ASM2806 config space 仍返回
0xffffffff❌ - 结论:Gen1 x1 ≠ 根因。速度与 config TLP 可见性正交,Gen1 链路也应能读 config space
关键反转:步骤 21 之前的假设("Gen1 导致 ASM2806 不可见")已被证伪。继续在旧内核 4.19 上用 pci-msm debugfs 深挖寄存器差异。
22) 修复:/delete-property/ iommu-map(当前待验证)
变更(单变量,仅改 DTS):
qrb5165-eb5.dts的&pcie1中,将多条iommu-map条目替换为/delete-property/ iommu-map;- 这使
qcom_pcie_config_sid_1_9_0()因size=0提前返回,不清除BDF_TO_SID_BYPASSbit - 其余不变(max-link-speed=3, num-lanes=2, clock provider, gpio141 均保留)
镜像哈希:53d1472f5be3b1d28131ff79752dce537176661193bf33100089ea4e1bcd6066
验证口径(刷机后):
dmesg | grep -E '0001:|1c08000|eb5-pcie1'
lspci -nn | grep '^0001:' # 期望:1b21:2806 + 10ec:8168
ip -br a # 期望:eth0/eth1 UP
# 若 ASM2806 出现但 eth DOWN,追查:
dmesg | grep -E 'smmu.*fault|iommu.*error|r8169|rtl8168'预期原理:bypass=1 → SMMU 透传所有 PCIe CplD → ASM2806 config read 正常返回 0x1b212806
22) 实测结果(步骤 22 镜像,主线 6.19)
板上 dmesg 关键输出:
[ 1.232651] qcom-eb5-pcie1-helper eb5-pcie1-helper: lan-en (gpio141) asserted high
[ 1.232708] qcom-eb5-pcie1-helper eb5-pcie1-helper: clock provider registered, waiting for pcie1 bus
[ 14.151749] qcom-eb5-pcie1-helper: rescanning pcie1 (domain 1 bus 00) after 8 poll(s)
[ 14.159817] qcom-eb5-pcie1-helper: BDF_TO_SID_BYPASS re-asserted before SBR
[ 14.791767] pcieport 0001:00:00.0: qcom-eb5-pcie1-helper: SBR pulse done, waiting for ASM2806
[ 14.824456] qcom-eb5-pcie1-helper: rescan complete, bridges programmedlspci -nn | grep '^0001:'
0001:00:00.0 PCI bridge [0604]: Qualcomm Device [17cb:010b] ← 仅 root port,ASM2806 未出现 ❌诊断数据:
| 项目 | 结果 |
|---|---|
LnkSta | Speed 2.5GT/s (downgraded), Width x1 (downgraded) — 链路已训练 ✅ |
gpio141 (debugfs) | out high func0 2mA pull down — 逻辑高但驱动力弱、内部下拉有效 ⚠️ |
PARF_BDF_TO_SID_CFG (devmem) | 0x00000001 — bypass=1 已生效 ✅ |
/sys/bus/pci/devices/ | 仅 0001:00:00.0 — bus 01 无设备 ❌ |
根因分析(两处 bug):
Bug 1 — SBR 打断已训练链路:rescan_work 中 SBR 将下游链路重置,500 ms 等待不足以完成重新训练。证据:
"SBR pulse done" (T=14.791) → "rescan complete" (T=14.824),仅 33 ms。pci_scan_child_bus 在链路还未训练完成时扫描,所有 config 读返回 0xFFFF(超时),ASM2806 未出现。
SBR 结束后链路自行训练完毕(LnkSta 此时已显示 Gen1 x1),但扫描已经结束。
Bug 2 — BDF_TO_SID_BYPASS 在首次扫描时尚未置位:helper 的 probe() 仅注册时钟 provider 后让 pcie1 解锁,
但 bypass bit 只在 rescan_work(T≈14s)中才写入。pcie1 首次扫描(T≈12s)时 bypass=0,
SMMU 对所有下游 completion 执行 BDF→SID 查表,无 context bank → fault → 0xFFFF。
Bug 3 — gpio141 驱动强度不足:移除 pinctrl 后,TLMM 硬件默认为 2mA + pull-down,
输出为高但驱动力弱、内部下拉使电气余量降低。
23) 修复:probe() 预置 bypass + 移除 SBR + 恢复 gpio141 pinctrl
变更内容(三处修复):
1. 驱动 pcie-qcom-eb5-helper.c
probe():在of_clk_add_hw_provider()之前(即在 pcie1 解锁之前)写入BDF_TO_SID_BYPASS=1(read-modify-write),确保 pcie1 首次扫描时 bypass 已生效rescan_work():删除 SBR 全段(链路已由 qcom-pcie 在 probe 时训练完毕,SBR 只会打断它);bypass 仍做 read-modify-write re-assert(防止 suspend/resume 清除)reset_asm2806_store():bypass 写入改为 read-modify-write
2. DTS qrb5165-eb5.dts
- 恢复 helper 节点的 pinctrl:
drive-strength = <16>; bias-disable; output-high;(gpio141) - 修正 gpio141 电气配置,消除
2mA pull down状态
预期启动时序(修复后):
~1.2s eb5-pcie1-helper: lan-en (gpio141) asserted high
~1.2s eb5-pcie1-helper: BDF_TO_SID_BYPASS asserted before pcie1 probe
~1.2s eb5-pcie1-helper: clock provider registered, waiting for pcie1 bus
~6-12s qcom-pcie 1c08000.pcie: PCI host bridge to bus 0001:00
0001:01:00.0 1b21:2806 ← 首次扫描即发现,无需 rescan
0001:04:00.0 10ec:8168验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|BDF_TO_SID|0001:'
lspci -nn | grep '^0001:' # 期望:1b21:2806 + 10ec:8168
ip -br a # 期望:eth1/eth2 UP
cat /sys/kernel/debug/gpio | grep 141 # 期望:out high func0 16mA no pull旧内核(4.19)pci-msm debugfs 深度探查
22-vendor) 确认旧内核链路和设备状态
板上执行(vendor 4.19):
ip -br a
lo UNKNOWN 127.0.0.1/8 ::1/128
bond0 DOWN
dummy0 UNKNOWN fe80::d3f:ae5d:b7ab:4c76/64
eth1 UP 192.168.1.185/24 ... ← NIC working ✅
tailscale0 UNKNOWN 100.64.0.70/32 ...
wlan0 UP 192.168.6.1/24 ...
lspci -nn | grep '^0001:'
0001:00:00.0 PCI bridge [0604]: Qualcomm Device [17cb:010b]
0001:01:00.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:00.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:06.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:0e.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:04:00.0 Ethernet controller [0200]: Realtek ... RTL8111/8168 [10ec:8168] (rev 15) ✅
0001:05:00.0 Ethernet controller [0200]: Realtek ... RTL8111/8168 [10ec:8168] (rev 15) ✅23-vendor) pci-msm debugfs case 9-32 探查结果
执行(vendor 4.19,rc_sel=2):
echo 2 > /sys/kernel/debug/pci-msm/rc_sel
for i in 9 10 11 12 13 14 15 16 20 21 22 32; do
echo "=== case $i ==="; echo $i > case; dmesg | tail -5; sleep 0.3
done结果摘要:
| case | 含义 | 输出关键内容 |
|---|---|---|
| 9 | disable L1ss | PCIe: RC1: disable L1ss |
| 10 | enable L1ss | PCIe: RC1: enable L1ss |
| 11 | enumerate RC1 | PCIe: RC1 is already enumerated(不重枚举) |
| 12 | read PARF register | base: parf: 0xffffff80089d8000,wr_offset: 0x2c0,value: 0x0 |
| 13 | write PARF register | 写 parf+0x2c0 = 0x0(vendor 的 wr_offset=0x2c0 对应 PARF 内一个不同寄存器,非主线 PARF_BDF_TO_SID_CFG=0x2c00) |
| 14 | write PARF register | 同 case 13(延续 wr_offset/mask/value 设置) |
| 15 | map LBAR 4K DDR | 映射 DDR + LBAR=0x40008000 到内核虚地址 0xffffff8010e4d000 |
| 16 | unmap LBAR | 释放 case 15 的映射 |
| 20 | Read DDR values | DDR is not mapped(需先做 case 15) |
| 21 | Read LBAR values | LBAR address is not mapped(需先做 case 15) |
| 22 | Write 0x1 to DDR | DDR address is not mapped |
| 32 | set target speed Gen3 | PCIe: RC1: set target speed to Gen 3 |
重点:
- case=12 读
parf+0x2c0值为0x0→ vendor 4.19 该偏移为零,不对应主线的PARF_BDF_TO_SID_CFG(主线定义为0x2c00,差了一个零) - case=15 LBAR=
0x40008000(vendor driver 的 debug ioremap 窗口,非 mainline ATU0x40001000) - gpio debugfs 无输出:
cat /sys/kernel/debug/gpio | grep -E ' 82 | 141 '→ 空,说明 vendor 4.19 gpio 82/141 未以 debugfs 方式暴露(可能走的是msm_pcie私有接口)
24-vendor) pci-msm case=4 shadow dump 解析
执行(之前已获取):
echo 2 > /sys/kernel/debug/pci-msm/rc_sel
echo 4 > /sys/kernel/debug/pci-msm/case输出(RTL8168 at 05:00.0 的 shadow registers):
shadow_dw[4]:cfg 0x10: 0x40202001 # BAR0 low (64-bit prefetchable, addr=0x40202000)
shadow_dw[6]:cfg 0x18: 0x40404004 # Type-1 bridge bus numbers
shadow_dw[8]:cfg 0x20: 0x40400004 # Memory Base/Limit
shadow_dw[20]:cfg 0x50: 0x817005 # PCIe capability
shadow_dw[30]:cfg 0x78: 0x10501f # Link cap: Gen3 x2
shadow_dw[32]:cfg 0x80: 0x10110002 # LnkSta2/LnkCtl2
shadow_dw[44]:cfg 0xb0: 0x30011 # L1 sub-state cap→ vendor RTL8168 BAR 被正确分配在 0x40202000,内存窗口 0x40400000 区域正常。
根因分析(最终结论)
代码路径溯源
- 主线
ops_1_9_0(pcie-qcom.c)对应qcom,pcie-sm8250,其.config_sid = qcom_pcie_config_sid_1_9_0 该函数逻辑:
of_get_property(dev->of_node, "iommu-map", &size); if (!size) return 0; // ← 有 iommu-map 才往下走 val = readl(pcie->parf + PARF_BDF_TO_SID_CFG); // PARF_BDF_TO_SID_CFG = 0x2c00 val &= ~BDF_TO_SID_BYPASS; // ← 清除 bypass bit(BIT(0)) writel(val, pcie->parf + PARF_BDF_TO_SID_CFG); // 然后填充 BDF-to-SID 哈希表...- eb5.dts 中
&pcie1有多条iommu-map(BDF 0x0000→0x0600),导致config_sid被调用 - bypass bit(BIT(0))被清除 → SMMU 对所有下游设备的 CplD 执行 BDF→SID 查表
- PCI 枚举阶段,
iommu_attach_device()尚未被调用(它发生在device_add()之后,即第一次 config read 成功之后) - SMMU 查 SID 0x1c81(ASM2806,BDF=0x0100)→ 无 context bank → fault → Cpl 被丢弃 → CPU 读到
0xffffffff
vendor 4.19 为何正常:
- out-of-tree
msm_pcie驱动从不调用任何等价config_sid逻辑 BDF_TO_SID_BYPASS(硬件上电默认值 = 1,即 bypass 开启)始终保持- 所有 PCIe completions 绕过 SMMU SID 查表,直达 CPU → ASM2806 config read 正常返回
对比表:
| 项目 | vendor 4.19 (msm_pcie) | 主线 6.19 (pcie-qcom.c) |
|---|---|---|
| 驱动 | out-of-tree,特有 RC 初始化 | 主线,通用 DWC ops_1_9_0 |
config_sid 调用 | 从不调用 | iommu-map 非空时调用 |
BDF_TO_SID_BYPASS | = 1(硬件默认,从未清除) | = 0(被主动清除) |
| ASM2806 CplD 路径 | 绕过 SMMU → CPU ✅ | SMMU fault → 丢包 ❌ |
| LnkSta | Gen3 x2 | Gen1 x1(PHY 实际不支持 Gen3) |
24) 修复:移除 probe() 中的 PARF 写入(当前待验证)
现象:步骤 23 镜像刷机后,UEFI 交接给内核后无任何串口输出(既不 reboot 也不打印),无法看到 earlycon 输出。
根因分析:
- helper 驱动为
builtin_platform_driver,在initcall阶段 probe,早于 pcie1 probe(这是 fw_devlink 时钟排序的设计目标) - pcie1 的 GDSC(
PCIE_1_GDSC)在 pcie1 自身 probe 时才被使能 - helper 的
probe()中尝试ioremap(0x01c0ac00)+readl/writel,但此时 pcie1 GDSC 未开启,PCIe PARF 寄存器组尚未上电 - 访问未上电的 PARF → NoC/AXI 总线返回错误 → SError 中断 → 内核 panic
- panic 发生在 earlycon 初始化之前,串口无任何输出
为何之前可工作(步骤 22):
- 步骤 22 的
probe()中没有 PARF 访问,bypass 写入仅在rescan_work()中(彼时 pcie1 GDSC 已开启) - 步骤 23 错误地将 bypass 写入移到了
probe(),引发上电时序问题
变更(单变量,仅驱动):
pcie-qcom-eb5-helper.c:从probe()中删除ioremap / readl / writel / iounmapPARF 写入块rescan_work()中的 bypass re-assert 保持不变(此时 pcie1 GDSC 已开启,PARF 可访问)
启动时序(修复后预期):
~1.2s eb5-pcie1-helper: lan-en (gpio141) asserted high
~1.2s eb5-pcie1-helper: clock provider registered, waiting for pcie1 bus
~6-12s qcom-pcie 1c08000.pcie: PCI host bridge to bus 0001:00 ← 首次扫描
(bypass 尚未写入 → 初次扫描可能 ASM2806 不可见,属正常)
~13-43s eb5-pcie1-helper: rescanning pcie1 (domain 1 bus 00)
eb5-pcie1-helper: BDF_TO_SID_BYPASS re-asserted ← GDSC 已开,写入安全
0001:01:00.0 1b21:2806 ← 期望出现
0001:04:00.0 10ec:8168验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|BDF_TO_SID|0001:|panic'
lspci -nn | grep '^0001:' # 期望:1b21:2806 + 10ec:8168
ip -br a # 期望:eth1/eth2 UP
cat /sys/kernel/debug/gpio | grep 141 # 期望:out high func0 16mA no pull若首次扫描已见 ASM2806(bypass 硬件默认 = 1 且 /delete-property/ iommu-map 生效):
则无需 rescan,eth1/eth2 应在 ~12s 后自动 UP,无需等待 rescan_work。
24) 实测结果(步骤 24 镜像)
板上状态:
LnkSta: Speed 2.5GT/s (downgraded), Width x1 (downgraded) ← 链路已训练 ✅
BDF_TO_SID_CFG (0x01c0ac00) = 0x00000001 ← bypass=1 ✅
gpio141: out high func0 16mA no pull ← 电气正确 ✅仍未找到 ASM2806 ❌。rescan 在 31ms 内完成,bridge mem window 显示 disabled。
根因(步骤 24 新增 bug):
iommu: Default domain type: Translated 表明所有未分配 IOMMU group 的设备,其 DMA 事务被 SMMU 静默丢弃(无日志)。
步骤 22 的 /delete-property/ iommu-map 修复了 config_sid 清除 bypass 的问题,但同时移除了 pcie1 的 IOMMU group,导致 SMMU 没有为 pcie1 建立 context bank。
bypass=1 只跳过 PARF 内的 BDF→SID 查表,不绕过 SMMU 本身:completion 到达 SMMU 时使用 bypass SID(= SID 0x1c80,root port 的 SID),而无 context bank → SMMU 静默丢弃 → ASM2806 config read 读到 0xFFFF → 未找到。
完整机制(iommu-map 两条 entry 的作用):
| 时机 | 事件 |
|---|---|
| pcie1 probe | config_sid() 写入 BDF→SID 表(0x0→0x1c80, 0x100→0x1c81),然后清除 bypass=0 |
| 初次扫描 | root port(BDF=0x0) 被找到 → iommu_attach_device → SID 0x1c80 context bank 建立 ✅ |
| 初次扫描 | ASM2806(BDF=0x100) completion 用 SID 0x1c81,无 context bank → SMMU fault → 0xFFFF ❌ |
| rescan_work | bypass=1 → 所有 completion 改用 bypass SID 0x1c80 → context bank 已存在 → 通过 ✅ |
25) 修复:恢复 iommu-map(当前待验证)
变更(单变量,仅 DTS):
qrb5165-eb5.dts中删除/delete-property/ iommu-map,令iommu-map从sm8250.dtsi正常继承- 其余不变(驱动 probe() 无 PARF 写、rescan_work bypass RMW、无 SBR、gpio141 pinctrl)
预期启动时序:
~1.2s eb5-pcie1-helper: lan-en (gpio141) asserted high
~1.2s eb5-pcie1-helper: clock provider registered
~6-12s qcom-pcie 1c08000.pcie: PCI host bridge to bus 0001:00 ← config_sid 建立 CB[0x1c80],清除 bypass=0
初次扫描:仅 root port,ASM2806 SMMU fault(预期)
~13s eb5-pcie1-helper: BDF_TO_SID_BYPASS re-asserted ← bypass=1,CB[0x1c80] 已存在
0001:01:00.0 1b21:2806 ← rescan 时 bypass SID 有 context bank
0001:04:00.0 10ec:8168验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|BDF_TO_SID|0001:|smmu.*fault'
lspci -nn | grep '^0001:' # 期望:1b21:2806 + 10ec:8168
ip -br a # 期望:eth1/eth2 UP26) 步骤 25 实测:DLActive 轮询超时 30s(Boot 1)
现象(dmesg 关键行):
eb5-pcie1-helper: waiting for DLActive (try 1/30)
...
eb5-pcie1-helper: DLActive never asserted after 30 siommu-map 恢复、bypass re-assert 均正确。但 rescan_work 等待 PCI_EXP_LNKSTA_DLLLA
(bit 13,= 0x2000)始终为 0,30 s 后放弃,不执行 rescan → ASM2806 未枚举。
根因:rescan_work 中调用 pci_set_power_state(rp, PCI_D0) 只写 PCI PM 寄存器,
不触发 qcom-pcie 平台设备的 runtime_resume(clocks/PHY 未唤醒)。
链路 LTSSM 处于低功耗状态,DLL 不处于 Active → DLLLA 永远为 0。
修复:用 pm_runtime_resume_and_get(hw_dev) 先唤醒 qcom-pcie 平台设备(clocks/PHY),
再用 pm_runtime_resume_and_get(&rp->dev) 唤醒 pcieport pci_dev,然后触发链路重训练。
27) Boot 2:pm_runtime 加入后 LnkSta=0x1011 持续 30s
变更:在 rescan_work 中改用双层 pm_runtime_resume_and_get(平台设备 + pcieport)。
触发 PCI_EXP_LNKCTL_RL(link retrain)后,轮询 LnkSta。
现象(dmesg 关键行):
eb5-pcie1-helper: qcom-pcie platform device resumed (was suspended=no)
eb5-pcie1-helper: root port in D0, resuming via runtime PM
eb5-pcie1-helper: link retrain triggered (LnkCtl=0x0041)
eb5-pcie1-helper: waiting for link-up (LnkSta=0x1011, 0/30 s)
eb5-pcie1-helper: waiting for link-up (LnkSta=0x1011, 5/30 s)
...
eb5-pcie1-helper: link never came up after 30 s (last LnkSta=0x1011)发现:LnkSta=0x1011 解码:
| 字段 | 值 | 含义 |
|---|---|---|
| CLS (bits 3:0) | 0x1 | Gen1 (2.5 GT/s) — 链路已训练 |
| NLW (bits 9:4) | 0x1 | x1 宽度 |
| SLC (bit 12) | 1 | Slot Clock Config |
| DLLLA (bit 13) | 0 | DLL 未报告 Active |
链路物理上已训练完成(Gen1 x1),但 DLLLA = 0。
旧轮询条件 LnkSta & PCI_EXP_LNKSTA_DLLLA 永远不满足 → 超时。
根因:PCI_EXP_LNKCAP_DLLLARC(LnkCap bit 20)= 0,
DWC 硬件上电默认值不置位此 bit,导致 DLLLA 字段只读且始终为 0。
见 PCIe spec §7.8.8:只有 DLLLARC=1 时 DLLLA 才反映真实 DLL 状态。
临时修复:将轮询条件改为"速度非零 + 宽度非零 + 未在训练中",
不再依赖 DLLLA:
if ((lnksta & PCI_EXP_LNKSTA_CLS) &&
(lnksta & PCI_EXP_LNKSTA_NLW) &&
!(lnksta & PCI_EXP_LNKSTA_LT))
/* link up */28) Boot 3:链路检测通过,但 rescan 仍找不到 ASM2806
现象:
eb5-pcie1-helper: link up Gen1 x1 (LnkSta=0x1011) after 0 s
eb5-pcie1-helper: BDF_TO_SID_BYPASS re-asserted
eb5-pcie1-helper: rescanning...
(无 0001:01:00.0 输出)根因溯源:
pci_scan_child_bus() → pci_scan_slot() → pci_bus_read_config_dword() →pci_bus_ops->read() → dw_pcie_other_conf_map_bus():
static struct pci_bus *dw_pcie_other_conf_map_bus(...)
{
if (!dw_pcie_link_up(pci)) /* <-- 这里 */
return NULL;
...
}dw_pcie_link_up() → pci->ops->link_up() → qcom_pcie_link_up():
static bool qcom_pcie_link_up(struct dw_pcie *pci)
{
u16 val;
pcie_capability_read_word(pci->dev, PCI_EXP_LNKSTA, &val);
return !!(val & PCI_EXP_LNKSTA_DLLLA); /* DLLLA=0 → 返回 false */
}因 DLLLARC=0,DLLLA 永远为 0 → qcom_pcie_link_up() 永远返回 false →dw_pcie_other_conf_map_bus() 返回 NULL → bus 1 的 config read 全返回 0xFFFF →
ASM2806 读到 VID=0xFFFF → 未枚举。
这是真正的根因:不是 bypass,不是 iommu-map,而是 DLLLA=0 导致 DWC 驱动
自己拒绝发出 bus 1 的 config TLP。
29) 根因修复:设置 LnkCap.DLLLARC via DBI_RO_WR_EN
方法:DWC 有 PCIE_MISC_CONTROL_1_OFF(DBI 偏移 0x8BC),bit 0 = DBI_RO_WR_EN。
置位后,原本只读的 DBI 寄存器(含 LnkCap)可写。
写 LnkCap |= PCI_EXP_LNKCAP_DLLLARC,再清除 DBI_RO_WR_EN。
之后做一次 Secondary Bus Reset(SBR)让 LTSSM 重走 Detect→L0,
此后 DLLLA 会被硬件正确维护为 1。
sm8250.dtsi 中 pcie1 DBI 基地址 = 0x40000000(reg-names = "dbi")。
新增函数 eb5_enable_dllla_reporting():
#define EB5_PCIE1_DBI_PHYS 0x40000000UL
#define PCIE_MISC_CONTROL_1_OFF 0x8BC
#define PCIE_DBI_RO_WR_EN BIT(0)
static void eb5_enable_dllla_reporting(struct pci_dev *rp)
{
void __iomem *dbi;
u32 mc1, lnkcap;
u16 lnksta;
dbi = ioremap(EB5_PCIE1_DBI_PHYS, 0x1000);
if (!dbi) {
pr_warn(DRV_NAME ": cannot ioremap DBI, DLLLARC not set\n");
return;
}
mc1 = readl(dbi + PCIE_MISC_CONTROL_1_OFF);
writel(mc1 | PCIE_DBI_RO_WR_EN, dbi + PCIE_MISC_CONTROL_1_OFF);
lnkcap = readl(dbi + rp->pcie_cap + PCI_EXP_LNKCAP);
writel(lnkcap | PCI_EXP_LNKCAP_DLLLARC,
dbi + rp->pcie_cap + PCI_EXP_LNKCAP);
pr_info(DRV_NAME ": LnkCap 0x%08x -> 0x%08x (DLLLARC set)\n",
lnkcap, lnkcap | PCI_EXP_LNKCAP_DLLLARC);
writel(mc1, dbi + PCIE_MISC_CONTROL_1_OFF); /* 恢复 RO 保护 */
iounmap(dbi);
pci_read_config_word(rp, rp->pcie_cap + PCI_EXP_LNKSTA, &lnksta);
pr_info(DRV_NAME ": LnkSta=0x%04x DLLLA=%d (will be 1 after SBR)\n",
lnksta, !!(lnksta & PCI_EXP_LNKSTA_DLLLA));
}注:PCI_EXP_LNKCAP_DLLLARC 已在 <linux/pci_regs.h> 中定义为 0x00100000,
不重复 #define。
SBR 序列(rescan_work 中,紧接 eb5_enable_dllla_reporting 之后):
/* SBR: 100ms assert + 500ms retrain wait */
{
struct pci_dev *rp_sbr = pci_get_domain_bus_and_slot(EB5_PCIE1_DOMAIN, 0, PCI_DEVFN(0,0));
if (rp_sbr) {
u16 bctl, lnksta;
pci_read_config_word(rp_sbr, PCI_BRIDGE_CONTROL, &bctl);
pci_write_config_word(rp_sbr, PCI_BRIDGE_CONTROL, bctl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(100);
pci_write_config_word(rp_sbr, PCI_BRIDGE_CONTROL, bctl);
msleep(500);
pci_read_config_word(rp_sbr, rp_sbr->pcie_cap + PCI_EXP_LNKSTA, &lnksta);
pr_info(DRV_NAME ": post-SBR LnkSta=0x%04x DLLLA=%d\n",
lnksta, !!(lnksta & PCI_EXP_LNKSTA_DLLLA));
pci_dev_put(rp_sbr);
}
}期望:post-SBR LnkSta=0x3011(bit 13 = DLLLA=1),qcom_pcie_link_up() 返回 true。
30) Boot 4:SBR 后仍找不到设备(DLLLARC 写入未测试)
现象:
eb5-pcie1-helper: link up Gen1 x1 (LnkSta=0x1011) after 0 s
eb5-pcie1-helper: BDF_TO_SID_BYPASS re-asserted
eb5-pcie1-helper: post-SBR LnkSta=0x1011 DLLLA=0
(无 0001:01:00.0)SBR 触发正确,但 DLLLA 仍为 0。
原因:此版本 eb5_enable_dllla_reporting() 尚未合并,DLLLARC 未写入。
31) Boot 5:pci_get_slot 竞争导致 "root port device not found"
变更:合并 eb5_enable_dllla_reporting()(DBI_RO_WR_EN + DLLLARC 写入)。
现象:
eb5-pcie1-helper: root port device not found after 500 msrescan_work 调用 pci_find_bus() 成功(bus 结构已创建),
但紧接着 pci_get_slot(root_bus, 0) 失败(root port 的 pci_dev 尚未
由 pci_host_probe() 加入,约 13 ms 的窗口)。
修复:添加重试循环,最多 50×10 ms = 500 ms:
int slot_try;
for (slot_try = 0; slot_try < 50; slot_try++) {
rp = pci_get_slot(root_bus, 0);
if (rp)
break;
msleep(10);
}
if (!rp) {
pr_err(DRV_NAME ": root port device not found after 500 ms\n");
return false;
}附注:此 boot 同时发现 format string 中有 UTF-8 箭头 →,
gcc 产生警告。改为 ASCII -> 消除警告。
32) 当前代码状态(Boot 6 前,待验证)
pcie-qcom-eb5-helper.c 关键流程:
probe():
1. gpio141 驱动高(ASM2806 enable,PERST# 之前)
2. 注册 dummy clock provider(#clock-cells = <0>)
3. 调度 rescan_work(延迟 10s)
rescan_work():
1. pci_find_bus(domain=1, busnum=0),最多等待 60s
2. pci_get_slot() 重试最多 500ms(修复竞争)
3. pm_runtime_resume_and_get(hw_dev) ← qcom-pcie 平台设备
4. pm_runtime_resume_and_get(&rp->dev) ← pcieport pci_dev
5. 触发 PCI_EXP_LNKCTL_RL(link retrain)
6. 轮询链路(速度+宽度+非训练中),最多 30s
7. ioremap(0x01c0ac00) → BDF_TO_SID_BYPASS re-assert(RMW BIT(0)=1)
8. eb5_enable_dllla_reporting(): DBI_RO_WR_EN + DLLLARC 写入
9. SBR: PCI_BRIDGE_CTL_BUS_RESET 100ms,等待 500ms
10. 读 post-SBR LnkSta,期望 DLLLA=1
11. pci_lock_rescan_remove()
12. pci_scan_child_bus() + assign resources + add devices
13. pci_unlock_rescan_remove()
14. pm_runtime_put(&rp->dev) + pm_runtime_put(hw_dev)
15. pci_dev_put(rp)qrb5165-eb5.dts 关键配置:
iommu-map:9 条 entry(SID 0x1c80–0x1c88),预建所有下游 context bankmax-link-speed = <1>:强制 Gen1,避免 DWC 发起 Gen3 equalizationnum-lanes = <2>:clocks末尾加<&eb5_pcie1_helper>+clock-names末尾"lan-en":
fw_devlink 确保 helper probe 先于 qcom-pcieeb5_pcie1_helper节点:lan-en-gpios = <&tlmm 141>,#clock-cells = <0>pcie1_lan_en_default:gpio141,16mA,bias-disable,output-high
待验证(Boot 6 期望):
eb5-pcie1-helper: LnkCap 0x... -> 0x... (DLLLARC set)
eb5-pcie1-helper: post-SBR LnkSta=0x3011 DLLLA=1
0001:01:00.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806]
0001:04:00.0 Ethernet controller [0200]: Realtek ... [10ec:8168]