环境
- 目标板:QRB5165 EB5
- 基线:
qrb5165-eb5.dts继承qrb5165-rb5.dts - 目标:恢复
pcie1下有线网卡 endpoint 枚举
主线 dts
// SPDX-License-Identifier: BSD-3-Clause
/*
* Qualcomm Robotics EB5 platform based on QRB5165/RB5
*/
/dts-v1/;
#include "qrb5165-rb5.dts"
/ {
model = "Qualcomm Technologies, Inc. qrb5165 IOT EB5";
compatible = "qcom,kona-iot", "qcom,kona", "qcom,iot",
"qcom,eb5", "qcom,qrb5165-rb5", "qcom,sm8250";
aliases {
ufshc1 = &ufs_mem_hc;
pci-domain0 = &pcie0;
pci-domain1 = &pcie1;
pci-domain2 = &pcie2;
};
};
&pcie1 {
status = "okay";
max-link-speed = <3>;
num-lanes = <2>;
wake-gpios = <&tlmm 84 GPIO_ACTIVE_HIGH>;
vdda-supply = <&vreg_l9a_1p2>;
vddpe-3v3-supply = <&vreg_l11c_3p3>;
/*
* Remove iommu-map so qcom_pcie_config_sid_1_9_0() returns early
* and never clears BDF_TO_SID_BYPASS. With bypass ON (hardware
* default), inbound PCIe completions skip SMMU BDF-to-SID lookup.
*
* Background: during PCI bus walk the SMMU has no context banks for
* downstream-device SIDs yet (iommu_attach_device() is called only
* after device_add(), i.e. AFTER the first config read). The vendor
* msm_pcie driver never touches BDF_TO_SID_CFG, so bypass stays ON
* there and ASM2806 completions flow through unblocked.
*/
/delete-property/ iommu-map;
/*
* Append the helper as an extra clock entry so fw_devlink ensures
* the helper probes (and gpio141 is driven) before qcom-pcie probes.
* qcom-pcie only requests clocks by the names it knows; "lan-en" is
* silently ignored by the driver but honored by fw_devlink.
*/
clocks = <&gcc GCC_PCIE_1_PIPE_CLK>,
<&gcc GCC_PCIE_1_AUX_CLK>,
<&gcc GCC_PCIE_1_CFG_AHB_CLK>,
<&gcc GCC_PCIE_1_MSTR_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_Q2A_AXI_CLK>,
<&gcc GCC_PCIE_WIGIG_CLKREF_EN>,
<&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>,
<&gcc GCC_DDRSS_PCIE_SF_TBU_CLK>,
<&eb5_pcie1_helper>;
clock-names = "pipe", "aux", "cfg", "bus_master", "bus_slave",
"slave_q2a", "ref", "tbu", "ddrss_sf_tbu", "lan-en";
pinctrl-0 = <&pcie1_default_state>;
pcie@0 {
};
};
/*
* Lightweight anchor node for the out-of-tree eb5-pcie1-helper kmod.
* The driver waits for RC1 link stabilisation and then rescans pcie1 (domain 1)
* so the ASM2806 cascade + RTL8168 endpoints can be discovered without any
* modifications to qcom.c.
*/
/ {
eb5_pcie1_helper: eb5-pcie1-helper {
compatible = "qcom,eb5-pcie1-helper";
lan-en-gpios = <&tlmm 141 GPIO_ACTIVE_HIGH>;
#clock-cells = <0>;
status = "okay";
};
};
&pcie2 {
status = "disabled";
};
&pcie2_phy {
status = "disabled";
};
&tlmm {
};EB5 Helper Module
// SPDX-License-Identifier: GPL-2.0
/*
* pcie-qcom-eb5-helper.c - PCIe1 ASM2806 bridge bring-up helper for QRB5165 EB5
*
* The Qualcomm vendor 4.19 kernel has a private "use-pcie-bridge-asm2806" DT
* property that drives gpio141 (ASM2806 bridge-enable) before the RC1 PCIe
* controller enumerates the bus. Mainline qcom-pcie has no such logic.
*
* This driver:
* 1. Acquires gpio141 and drives it HIGH on probe (before qcom-pcie touches
* PERST#).
* 2. Registers itself as a dummy fixed-rate clock provider (#clock-cells = <0>),
* which qrb5165-eb5.dts adds to pcie1's clock list. fw_devlink sees this
* phandle and guarantees that pcie1 will NOT be probed until this driver's
* probe() returns successfully.
* 3. Schedules a deferred rescan after the ASM2806 cascade downstream links
* finish training. The rescan explicitly programs bridge MEMORY_BASE/LIMIT
* registers to hardware — pci_assign_unassigned_bus_resources() only updates
* kernel data structures but does NOT write the bridge window registers,
* which would leave the RTL8168 endpoints inaccessible.
*
* Result: gpio141 is driven high BEFORE qcom_pcie_host_init() de-asserts PERST#,
* so the ASM2806 bridge is powered and ready for config-space enumeration.
*
* Nothing in qcom.c / pcie-qcom.c is modified.
*/
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
#include <linux/clk-provider.h>
#include <linux/clk.h>
#include <linux/pci.h>
#include <linux/workqueue.h>
#include <linux/io.h>
/* PCI domain 1, root bus 0 — matches pcie1 (1c08000.pcie) */
#define EB5_PCIE1_DOMAIN 1
#define EB5_PCIE1_ROOT_BUS 0
/*
* Poll interval while waiting for pcie1 to create its root bus.
* pcie1 probe can take 10-15 s at boot; we retry up to 30 times (30 s).
*/
#define EB5_POLL_INTERVAL_MS 1000
#define EB5_POLL_MAX_RETRIES 30
#define DRV_NAME "qcom-eb5-pcie1-helper"
struct eb5_pcie_helper {
struct gpio_desc *lan_en_gpio;
struct clk_hw clk_hw;
struct delayed_work rescan_work;
};
static const struct clk_ops eb5_lan_clk_ops = { /* no-op clock, ordering only */ };
/*
* program_bridge_windows - write bridge memory windows to hardware registers.
*
* pci_assign_unassigned_bus_resources() assigns memory windows in the kernel's
* resource tree but does NOT write PCI_MEMORY_BASE / PCI_MEMORY_LIMIT to the
* bridge's config space. Without this step the bridge does not forward memory
* transactions downstream, so endpoint drivers fail on their very first MMIO
* access. Recurse into child buses so all levels of ASM2806 are programmed.
*/
static void program_bridge_windows(struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
struct resource *res;
u16 cmd;
if (!dev->subordinate)
continue;
/* Write 32-bit non-prefetchable memory window */
res = &dev->resource[PCI_BRIDGE_MEM_WINDOW];
if (resource_size(res) > 0) {
u16 mem_base = (res->start >> 16) & 0xfff0;
u16 mem_limit = (res->end >> 16) & 0xfff0;
pci_write_config_word(dev, PCI_MEMORY_BASE, mem_base);
pci_write_config_word(dev, PCI_MEMORY_LIMIT, mem_limit);
dev_info(&dev->dev, "bridge mem window programmed: %pR\n", res);
}
/* Write I/O window if present */
res = &dev->resource[PCI_BRIDGE_IO_WINDOW];
if (resource_size(res) > 0) {
pci_write_config_byte(dev, PCI_IO_BASE,
(res->start >> 8) & 0xf0);
pci_write_config_byte(dev, PCI_IO_LIMIT,
(res->end >> 8) & 0xf0);
}
/* Enable bus-mastering and memory-space decoding on the bridge */
pci_read_config_word(dev, PCI_COMMAND, &cmd);
cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
pci_write_config_word(dev, PCI_COMMAND, cmd);
program_bridge_windows(dev->subordinate);
}
}
static void eb5_pcie1_rescan_work(struct work_struct *work)
{
struct eb5_pcie_helper *h =
container_of(work, struct eb5_pcie_helper, rescan_work.work);
struct pci_bus *root_bus;
int retries = 0;
/* Re-assert gpio141 in case it was reset during suspend/resume */
gpiod_set_value_cansleep(h->lan_en_gpio, 1);
/*
* pcie1 probe (and bus creation) can take 10-15 s at boot — much
* longer than the helper's own probe. Poll until the root bus
* appears, then give the ASM2806 downstream links an extra second
* to finish training before we scan.
*/
while (retries < EB5_POLL_MAX_RETRIES) {
root_bus = pci_find_bus(EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS);
if (root_bus)
break;
pr_debug(DRV_NAME ": waiting for pcie1 bus (attempt %d/%d)\n",
retries + 1, EB5_POLL_MAX_RETRIES);
msleep(EB5_POLL_INTERVAL_MS);
retries++;
}
if (!root_bus) {
pr_err(DRV_NAME ": domain %u bus %02x not found after %d s, giving up\n",
EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS, EB5_POLL_MAX_RETRIES);
return;
}
/* Extra settling time for ASM2806 downstream link training */
msleep(1000);
pr_info(DRV_NAME ": rescanning pcie1 (domain %u bus %02x) after %d poll(s)\n",
EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS, retries);
/*
* Issue a Secondary Bus Reset (SBR) via the root port's Bridge Control
* register. This pulses the downstream PERST# from the RC side without
* needing direct GPIO access. The ASM2806 may have been unresponsive
* during the initial enumeration because its internal init was not yet
* complete when qcom-pcie first de-asserted PERST# at probe time.
* Driving SBR here — after gpio141 has been high for several seconds —
* gives the ASM2806 a clean reset cycle with power already stable.
*/
{
struct pci_dev *rp = pci_get_domain_bus_and_slot(
EB5_PCIE1_DOMAIN, 0, PCI_DEVFN(0, 0));
if (rp) {
u16 bctl;
pci_read_config_word(rp, PCI_BRIDGE_CONTROL, &bctl);
/* Assert Secondary Bus Reset */
pci_write_config_word(rp, PCI_BRIDGE_CONTROL,
bctl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(100); /* hold reset ≥ 100 ms (PCIe r3.0 §6.6.1) */
/* De-assert Secondary Bus Reset */
pci_write_config_word(rp, PCI_BRIDGE_CONTROL, bctl);
msleep(500); /* wait for ASM2806 to finish link re-training */
dev_info(&rp->dev,
DRV_NAME ": SBR pulse done, waiting for ASM2806\n");
pci_dev_put(rp);
} else {
pr_warn(DRV_NAME ": root port not found, skipping SBR\n");
}
}
pci_lock_rescan_remove();
/* Step 1: discover new devices (ASM2806 cascade + RTL8168) */
pci_scan_child_bus(root_bus);
/* Step 2: assign BARs and bridge windows in kernel resource structs */
pci_assign_unassigned_bus_resources(root_bus);
/*
* Step 3: write bridge MEMORY_BASE/LIMIT to hardware config space.
* This is the step that pci_rescan_bus() / pci_assign_…() omit,
* and without it the RTL8168 endpoints are unreachable via MMIO.
*/
program_bridge_windows(root_bus);
/* Step 4: add devices to driver model — triggers driver probes */
pci_bus_add_devices(root_bus);
pci_unlock_rescan_remove();
pr_info(DRV_NAME ": rescan complete, bridges programmed\n");
}
/* Forward declaration for sysfs attribute */
static DEVICE_ATTR_WO(reset_asm2806);
static int eb5_pcie_helper_probe(struct platform_device *pwd)
{
struct device *dev = &pwd->dev;
struct eb5_pcie_helper *h;
struct clk_init_data init = {};
int ret;
h = devm_kzalloc(dev, sizeof(*h), GFP_KERNEL);
if (!h)
return -ENOMEM;
/*
* Acquire gpio141 (ASM2806 bridge-enable) and drive it HIGH.
* This must happen before qcom-pcie de-asserts PERST# on RC1.
* The clock provider registration below ensures that qcom-pcie
* does not even begin probing until after this point.
*/
h->lan_en_gpio = devm_gpiod_get(dev, "lan-en", GPIOD_OUT_HIGH);
if (IS_ERR(h->lan_en_gpio))
return dev_err_probe(dev, PTR_ERR(h->lan_en_gpio),
"failed to get lan-en gpio\n");
msleep(50); /* allow ASM2806 to become ready */
dev_info(dev, "lan-en (gpio141) asserted high\n");
/*
* Register a zero-rate fixed clock so that fw_devlink can enforce
* the probe ordering: pcie1 (which lists us in its 'clocks') will
* not probe before of_clk_add_hw_provider() returns.
*/
init.name = DRV_NAME;
init.ops = &eb5_lan_clk_ops;
h->clk_hw.init = &init;
ret = devm_clk_hw_register(dev, &h->clk_hw);
if (ret)
return dev_err_probe(dev, ret, "clk_hw_register failed\n");
ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, &h->clk_hw);
if (ret)
return dev_err_probe(dev, ret,
"of_clk_add_hw_provider failed\n");
/* Start polling immediately; the work itself waits for the bus */
INIT_DELAYED_WORK(&h->rescan_work, eb5_pcie1_rescan_work);
schedule_delayed_work(&h->rescan_work, 0);
platform_set_drvdata(pwd, h);
ret = device_create_file(dev, &dev_attr_reset_asm2806.attr);
if (ret)
dev_warn(dev, "failed to create reset_asm2806 sysfs: %d\n", ret);
dev_info(dev, "clock provider registered, waiting for pcie1 bus\n");
return 0;
}
static void eb5_pcie_helper_remove(struct platform_device *pdev)
{
struct eb5_pcie_helper *h = platform_get_drvdata(pdev);
cancel_delayed_work_sync(&h->rescan_work);
device_remove_file(&pwd->dev, &dev_attr_reset_asm2806.attr);
}
static const struct of_device_id eb5_pcie_helper_of_match[] = {
{ .compatible = "qcom,eb5-pcie1-helper" },
{}
};
MODULE_DEVICE_TABLE(of, eb5_pcie_helper_of_match);
static struct platform_driver eb5_pcie_helper_driver = {
.probe = eb5_pcie_helper_probe,
.remove = eb5_pcie_helper_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = eb5_pcie_helper_of_match,
},
};
builtin_platform_driver(eb5_pcie_helper_driver);
/* Sysfs interface to trigger full reset sequence */
static ssize_t reset_asm2806_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct eb5_pcie_helper *h = dev_get_drvdata(dev);
void __iomem *parf;
struct pci_bus *root_bus;
dev_info(dev, "triggering full ASM2806 reset sequence...\n");
/* 1. Power cycle gpio141 */
gpiod_set_value_cansleep(h->lan_en_gpio, 0);
msleep(200);
gpiod_set_value_cansleep(h->lan_en_gpio, 1);
msleep(500);
/* 2. Write bypass bit */
parf = ioremap(0x1c0ac00, 4);
if (parf) {
writel(BIT(0), parf);
dev_info(dev, "BDF_TO_SID_BYPASS set\n");
iounmap(parf);
}
/* 3. Trigger SBR */
root_bus = pci_find_bus(EB5_PCIE1_DOMAIN, EB5_PCIE1_ROOT_BUS);
if (root_bus) {
struct pci_dev *rp = pci_get_domain_bus_and_slot(
EB5_PCIE1_DOMAIN, 0, PCI_DEVFN(0, 0));
if (rp) {
u16 bctl;
pci_read_config_word(rp, PCI_BRIDGE_CONTROL, &bctl);
pci_write_config_word(rp, PCI_BRIDGE_CONTROL,
bctl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(100);
pci_write_config_word(rp, PCI_BRIDGE_CONTROL, bctl);
msleep(500);
pci_dev_put(rp);
dev_info(dev, "SBR done\n");
}
}
/* 4. Rescan */
pci_lock_rescan_remove();
pci_scan_child_bus(root_bus);
pci_bus_add_devices(root_bus);
pci_unlock_rescan_remove();
dev_info(dev, "reset sequence complete\n");
return count;
}已执行测试(按大类)
1) DTS 基线与 include 链确认
- 确认
qrb5165-eb5.dts -> qrb5165-rb5.dts -> sm8250.dtsi链路正确。 - 校对
pcie1主线默认脚位语义:perst=82、wake=84。
2) pcie1 链路参数测试
&pcie1设置:max-link-speed = <1>(降速到 Gen1)。&pcie1设置:num-lanes = <1>(单 lane 试验)。- 结果:
0001域仍仅有 root port,无桥后 endpoint。
3) IOMMU 映射扩展测试
- 将
iommu-map从基础项扩展到多 BDF(覆盖0x0..0x600相关条目)。 - 结果:未触发有线 endpoint 出现。
4) 供电影响排查
- 增加/确认
vdda-supply与vddpe-3v3-supply。 - 结果:无显著变化,
pcie1仍空下游。
5) WAKE/RESET 脚位组合测试
wake-gpios做过候选切换(22/84),最终回归84(与主线/旧树一致)。pcie@0/reset-gpios做过极性 A/B 试验(active low/high 组合对比)。- 结果:仍未枚举到有线 endpoint。
6) 141 板级控制线测试
- 早期方案:
gpio141作为gpio-hog output-high(常高驱动)。 - 最新方案:移除 hog,改为
pcie1附加 pinctrl(pcie1_lan1_wake_default,bias-pull-up输入态),贴近旧树语义。 - 结果:等待该版本实机回传确认。
7) pcie2 冲突隔离测试
&pcie2、&pcie2_phy设为disabled。- 结果:未改变
pcie1无下游端点现象。
8) GPIO 共享告警处置
- 观察到告警:
gpio_shared_add_proxy_lookup/qcom_pcie_probe。 - 做过修复:删除
&pcie1的perst-gpios,保留pcie@0/reset-gpios,避免同线重复申请。 - 新日志定位:告警主要落在
1c00000.pcie (pcie0),非1c08000.pcie (pcie1)。
9) 旧内核基线确认(4.19)
在旧内核
4.19.125-perf-v2下,0001域可完整枚举:0001:01:00.0/0001:02:00.0/0001:02:06.0/0001:02:0e.0均为ASMedia 1b21:28060001:04:00.0与0001:05:00.0为RTL8111/8168 (10ec:8168)
- 旧日志明确出现
RC1: asm2806(1),说明厂商侧存在 RC1 私有桥控制语义。
10) 主线运行时补偿脚本尝试(失败)
在主线
6.19.0-dirty运行pcie1-kick.sh:gpioset gpiochip0 141=...返回Invalid argument/sys/bus/platform/drivers/qcom-pcie/{bind,unbind}不存在或不可用,无法运行时重绑 RC1
- 结论:该平台当前不支持“运行时重绑+踢脚”路径,需回到“重启生效”的 DTS 单变量法。
11) 纯 DTS 单变量回归(最新)
- 已将
gpio141改回gpio-hog output-high,并移除pcie1的pcie1_lan1_wake_default引用。 - 该版本仅改
gpio141启动策略,便于与上一版直接对比。
12) 纯 DTS 单变量(本轮)
- 将
gpio141从gpio-hog改为pcie1设备级 pinctrl(pcie1_lan1_en_default),配置为output-high。 &pcie1的pinctrl-0调整为<&pcie1_default_state>, <&pcie1_lan1_en_default>。- 保持
wake-gpios=84、reset-gpios=82、qcom.c不变,确保本轮仅变更 141 的所有权与生效时机。 - 结果:等待该版本实机回传确认。
当前观测结论(最新)
- 主线 6.19
lspci稳定仅有0001:00:00.0(root port),ASM2806 / RTL8168 不可见 - vendor 4.19 完整枚举:ASM2806 × 4 + RTL8168 × 2,
eth1UP - 链路速度不是根因:Gen1 x1 下 config TLP 依然可以工作,vendor 4.19 跑 Gen3 x2 但根因另在
- 已确认根因:主线
iommu-map触发BDF_TO_SID_BYPASS被清除 → SMMU fault → 0xFFFF(见下方根因分析节) - 当前最新镜像(step 22)已将
iommu-map删除,待实机验证
13) built-in helper driver + gpiod 主动驱动 gpio141(本轮)
背景:步骤 12 中 rescan 已成功执行(dmesg 显示 "PCI 1:00 rescan complete"),但 ASM2806 config space 返回全 0xFF,说明 gpio141 未被物理驱动。Qualcomm TLMM 的 pinctrl output-high 仅设置 mux,开机后不保证实际电平被 GPIO 控制器持续驱动。
变更内容(单变量):
- DTS —
eb5-pcie1-helper节点加入lan-en-gpios = <&tlmm 141 GPIO_ACTIVE_HIGH> - DTS — 从
&pcie1 pinctrl-0移除pcie1_lan1_en_default;从&tlmm删除该 pinctrl block(gpio141 所有权唯一归 helper driver) - 驱动 —
pcie-qcom-eb5-helper.c:struct 加struct gpio_desc *lan_en_gpio - 驱动 — probe 时
devm_gpiod_get_optional(dev, "lan-en", GPIOD_OUT_HIGH)请求并立即驱动 gpio141 为高 - 驱动 — worker 在
pci_find_bus()前再次gpiod_set_value_cansleep(h->lan_en_gpio, 1)+msleep(50)确保 ASM2806 就绪
预期验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|lan-en|gpio141|rescan|0001:'
lspci -nn | grep '^0001:'
ip -br a若仍无响应,追加:
lspci -vv -s 0001:00:00.0 | grep -E 'LnkSta|Speed|Width'14) genpd provider 方案 — 让 pcie1 依赖 helper(本轮)
方案来源:建议写个像 power-domain 的驱动。
原理:将 helper 改为 generic_pm_domain (genpd) provider。&pcie1 的 power-domains 列表加入 <&eb5_pcie1_helper>,内核 PM domain 框架会在 qcom-pcie probe 之前自动调用 helper 的 power_on 回调,保证 gpio141 在 RC1 第一次枚举时已经 asserted。彻底不需要 rescan、delayed_work 和时序猜测。
变更内容(单变量):
驱动重构 — 删除所有
pci.h / workqueue.h / delayed_work / pci_scan_child_bus代码- 新结构体:
struct eb5_lan_pd { struct generic_pm_domain genpd; struct gpio_desc *lan_en_gpio; } probe:devm_gpiod_get(GPIOD_OUT_HIGH)+msleep(50)+pm_genpd_init(is_off=false)+of_genpd_add_provider_simple()remove:of_genpd_del_provider()+pm_genpd_remove()power_on回调:gpiod_set_value_cansleep(1) + msleep(50)(供 suspend/resume 使用)GENPD_FLAG_ALWAYS_ON:运行时不切断电源
- 新结构体:
- DTS — helper 节点加
#power-domain-cells = <0>;并加 labeleb5_pcie1_helper: - DTS —
&pcie1覆盖power-domains = <&gcc PCIE_1_GDSC>, <&eb5_pcie1_helper>;及power-domain-names = "gdsc", "lan-en";
时序保证:
- fw_devlink 使 pcie1 不会在 helper 注册 genpd provider 之前 probe
- genpd 框架在 pcie1 上电时调用
power_on,gpio141 在 qcom_pcie_probe→link_up→enumeration 全链路之前已高
预期验证口径(刷机后):
dmesg | grep -E 'eb5-pcie1|lan-en|pcie1.*power|0001:'
lspci -nn | grep '^0001:'
ip -br a15) 时钟 provider 排序方案(本轮)
根因分析(步骤 14 失败原因):
- 步骤 14 添加
power-domains = <&gcc PCIE_1_GDSC>, <&eb5_pcie1_helper>后,fw_devlink 和 genpd 两套机制各自为 pcie1↔helper 创建了一条 device_link。双 device_link 冲突导致pci_register_host_bridge → request_resource返回 -16 (EBUSY),pcie1 probe 失败。
当前方案(朋友思路的正确实现):
不触碰
power-domains,改用 clock provider 做排序锚点:- helper 注册一个 zero-rate dummy 时钟(
#clock-cells = <0>) qrb5165-eb5.dts在 pcie1 的clocks/clock-names末尾加<&eb5_pcie1_helper> "lan-en"- fw_devlink 识别
clocksphandle,自动保证 pcie1 probe 在 helper probe 完成之后 - helper probe 时驱动 gpio141 high +
msleep(50)→of_clk_add_hw_provider完成 - qcom-pcie 只
devm_clk_bulk_get它知道的 9 个名字,"lan-en" 被忽略,无副作用
- helper 注册一个 zero-rate dummy 时钟(
时序:
- T=1.2s: helper probe → gpio141 HIGH + clock provider 注册 → fw_devlink 放行 pcie1
- T=6.4s: pcie1 probe →
qcom_pcie_host_init→ PERST# de-assert(gpio141 已 high ≥ 5s)
变更内容:
- 驱动重构 — 移除所有 genpd/pm_domain 代码;改用
clk_init_data + devm_clk_hw_register + devm_of_clk_add_hw_provider - DTS — helper 节点
#power-domain-cells→#clock-cells = <0> - DTS — pcie1 恢复原始
power-domains(不再覆盖),添加完整clocks/clock-names列表并追加<&eb5_pcie1_helper> "lan-en"
预期 dmesg 口径:
dmesg | grep -E 'eb5-pcie1|lan-en|1c08000.pcie|0001:'
lspci -nn | grep '^0001:'
ip -br a应看到:
eb5-pcie1-helper: lan-en (gpio141) asserted high@ ~1.2seb5-pcie1-helper: clock provider registered...@ ~1.2sqcom-pcie 1c08000.pcie: PCI host bridge to bus 0001:00@ ~6.x s(probe 不再 EBUSY)0001:01:00.0 ... 1b21:2806(ASM2806)+0001:04:00.0 10ec:8168(RTL8168)
21) 修正为 Gen3 x2,重建镜像
变更(单变量):
qrb5165-eb5.dts中&pcie1的max-link-speed = <1>→<3>,num-lanes = <1>→<2>- 其余不变(clock provider、gpio141、SBR 等保留)
实测结果(已刷机):
- LnkSta 仍为
0x1011(Gen1 x1),即使写了 max-link-speed=3 也无法训练到 Gen3 - 手动写 LnkCtl2 TLS=Gen3(
setpci ... CAP_EXP+0x30.w=0x0003)再触发 retrain → 仍 Gen1 x1 - PHY 寄存器层面
sm8250_qmp_gen3x2_pciephy_cfg在主线确实存在(lanes=2,tbls_rc Gen3x2 表完整),但实际无法跑到 Gen3 - ASM2806 config space 仍返回
0xffffffff❌ - 结论:Gen1 x1 ≠ 根因。速度与 config TLP 可见性正交,Gen1 链路也应能读 config space
关键反转:步骤 21 之前的假设("Gen1 导致 ASM2806 不可见")已被证伪。继续在旧内核 4.19 上用 pci-msm debugfs 深挖寄存器差异。
22) 修复:/delete-property/ iommu-map(当前待验证)
变更(单变量,仅改 DTS):
qrb5165-eb5.dts的&pcie1中,将多条iommu-map条目替换为/delete-property/ iommu-map;- 这使
qcom_pcie_config_sid_1_9_0()因size=0提前返回,不清除BDF_TO_SID_BYPASSbit - 其余不变(max-link-speed=3, num-lanes=2, clock provider, gpio141 均保留)
镜像哈希:53d1472f5be3b1d28131ff79752dce537176661193bf33100089ea4e1bcd6066
验证口径(刷机后):
dmesg | grep -E '0001:|1c08000|eb5-pcie1'
lspci -nn | grep '^0001:' # 期望:1b21:2806 + 10ec:8168
ip -br a # 期望:eth0/eth1 UP
# 若 ASM2806 出现但 eth DOWN,追查:
dmesg | grep -E 'smmu.*fault|iommu.*error|r8169|rtl8168'预期原理:bypass=1 → SMMU 透传所有 PCIe CplD → ASM2806 config read 正常返回 0x1b212806
旧内核(4.19)pci-msm debugfs 深度探查
22-vendor) 确认旧内核链路和设备状态
板上执行(vendor 4.19):
ip -br a
lo UNKNOWN 127.0.0.1/8 ::1/128
bond0 DOWN
dummy0 UNKNOWN fe80::d3f:ae5d:b7ab:4c76/64
eth1 UP 192.168.1.185/24 ... ← NIC working ✅
tailscale0 UNKNOWN 100.64.0.70/32 ...
wlan0 UP 192.168.6.1/24 ...
lspci -nn | grep '^0001:'
0001:00:00.0 PCI bridge [0604]: Qualcomm Device [17cb:010b]
0001:01:00.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:00.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:06.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:02:0e.0 PCI bridge [0604]: ASMedia Technology Inc. Device [1b21:2806] (rev 01) ✅
0001:04:00.0 Ethernet controller [0200]: Realtek ... RTL8111/8168 [10ec:8168] (rev 15) ✅
0001:05:00.0 Ethernet controller [0200]: Realtek ... RTL8111/8168 [10ec:8168] (rev 15) ✅23-vendor) pci-msm debugfs case 9-32 探查结果
执行(vendor 4.19,rc_sel=2):
echo 2 > /sys/kernel/debug/pci-msm/rc_sel
for i in 9 10 11 12 13 14 15 16 20 21 22 32; do
echo "=== case $i ==="; echo $i > case; dmesg | tail -5; sleep 0.3
done结果摘要:
| case | 含义 | 输出关键内容 |
|---|---|---|
| 9 | disable L1ss | PCIe: RC1: disable L1ss |
| 10 | enable L1ss | PCIe: RC1: enable L1ss |
| 11 | enumerate RC1 | PCIe: RC1 is already enumerated(不重枚举) |
| 12 | read PARF register | base: parf: 0xffffff80089d8000,wr_offset: 0x2c0,value: 0x0 |
| 13 | write PARF register | 写 parf+0x2c0 = 0x0(vendor 的 wr_offset=0x2c0 对应 PARF 内一个不同寄存器,非主线 PARF_BDF_TO_SID_CFG=0x2c00) |
| 14 | write PARF register | 同 case 13(延续 wr_offset/mask/value 设置) |
| 15 | map LBAR 4K DDR | 映射 DDR + LBAR=0x40008000 到内核虚地址 0xffffff8010e4d000 |
| 16 | unmap LBAR | 释放 case 15 的映射 |
| 20 | Read DDR values | DDR is not mapped(需先做 case 15) |
| 21 | Read LBAR values | LBAR address is not mapped(需先做 case 15) |
| 22 | Write 0x1 to DDR | DDR address is not mapped |
| 32 | set target speed Gen3 | PCIe: RC1: set target speed to Gen 3 |
重点:
- case=12 读
parf+0x2c0值为0x0→ vendor 4.19 该偏移为零,不对应主线的PARF_BDF_TO_SID_CFG(主线定义为0x2c00,差了一个零) - case=15 LBAR=
0x40008000(vendor driver 的 debug ioremap 窗口,非 mainline ATU0x40001000) - gpio debugfs 无输出:
cat /sys/kernel/debug/gpio | grep -E ' 82 | 141 '→ 空,说明 vendor 4.19 gpio 82/141 未以 debugfs 方式暴露(可能走的是msm_pcie私有接口)
24-vendor) pci-msm case=4 shadow dump 解析
执行(之前已获取):
echo 2 > /sys/kernel/debug/pci-msm/rc_sel
echo 4 > /sys/kernel/debug/pci-msm/case输出(RTL8168 at 05:00.0 的 shadow registers):
shadow_dw[4]:cfg 0x10: 0x40202001 # BAR0 low (64-bit prefetchable, addr=0x40202000)
shadow_dw[6]:cfg 0x18: 0x40404004 # Type-1 bridge bus numbers
shadow_dw[8]:cfg 0x20: 0x40400004 # Memory Base/Limit
shadow_dw[20]:cfg 0x50: 0x817005 # PCIe capability
shadow_dw[30]:cfg 0x78: 0x10501f # Link cap: Gen3 x2
shadow_dw[32]:cfg 0x80: 0x10110002 # LnkSta2/LnkCtl2
shadow_dw[44]:cfg 0xb0: 0x30011 # L1 sub-state cap→ vendor RTL8168 BAR 被正确分配在 0x40202000,内存窗口 0x40400000 区域正常。
根因分析(最终结论)
代码路径溯源
- 主线
ops_1_9_0(pcie-qcom.c)对应qcom,pcie-sm8250,其.config_sid = qcom_pcie_config_sid_1_9_0 该函数逻辑:
of_get_property(dev->of_node, "iommu-map", &size); if (!size) return 0; // ← 有 iommu-map 才往下走 val = readl(pcie->parf + PARF_BDF_TO_SID_CFG); // PARF_BDF_TO_SID_CFG = 0x2c00 val &= ~BDF_TO_SID_BYPASS; // ← 清除 bypass bit(BIT(0)) writel(val, pcie->parf + PARF_BDF_TO_SID_CFG); // 然后填充 BDF-to-SID 哈希表...- eb5.dts 中
&pcie1有多条iommu-map(BDF 0x0000→0x0600),导致config_sid被调用 - bypass bit(BIT(0))被清除 → SMMU 对所有下游设备的 CplD 执行 BDF→SID 查表
- PCI 枚举阶段,
iommu_attach_device()尚未被调用(它发生在device_add()之后,即第一次 config read 成功之后) - SMMU 查 SID 0x1c81(ASM2806,BDF=0x0100)→ 无 context bank → fault → Cpl 被丢弃 → CPU 读到
0xffffffff
vendor 4.19 为何正常:
- out-of-tree
msm_pcie驱动从不调用任何等价config_sid逻辑 BDF_TO_SID_BYPASS(硬件上电默认值 = 1,即 bypass 开启)始终保持- 所有 PCIe completions 绕过 SMMU SID 查表,直达 CPU → ASM2806 config read 正常返回
对比表:
| 项目 | vendor 4.19 (msm_pcie) | 主线 6.19 (pcie-qcom.c) |
|---|---|---|
| 驱动 | out-of-tree,特有 RC 初始化 | 主线,通用 DWC ops_1_9_0 |
config_sid 调用 | 从不调用 | iommu-map 非空时调用 |
BDF_TO_SID_BYPASS | = 1(硬件默认,从未清除) | = 0(被主动清除) |
| ASM2806 CplD 路径 | 绕过 SMMU → CPU ✅ | SMMU fault → 丢包 ❌ |
| LnkSta | Gen3 x2 | Gen1 x1(PHY 实际不支持 Gen3) |