【环境信息】
硬件信息:Taishan 2280服务器直通机型
软件信息:
[root@localhost ~]# uname -r
4.19.90-2003.4.0.0036.oe1.aarch64
[root@localhost ~]#
[root@localhost ~]#
[root@localhost ~]# cat /etc/os-release
NAME="openEuler"
VERSION="20.03 (LTS)"
ID="openEuler"
VERSION_ID="20.03"
PRETTY_NAME="openEuler 20.03 (LTS)"
ANSI_COLOR="0;31"
【问题复现步骤】
- 操作系统安装在直通背板连接的硬盘里面
- 使用下线CPU接口关闭很多核(如:128核环境下线100个CPU)
- 输入dmesg命令,此时dmesg命令会卡住
- 串口日志出现hisi_sas_v3_hw驱动异常打印
[root@localhost RAS_0410]# [ 279.332776] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 285.476780] hisi_sas_v3_hw 0000:b4:04.0: internal task abort: timeout and not done.
[ 285.494212] hisi_sas_v3_hw 0000:b4:04.0: abort task: internal abort (-5)
[ 305.956823] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 306.025296] hisi_sas_v3_hw 0000:b4:04.0: phydown: phy5 phy_state=0x0
[ 306.037148] hisi_sas_v3_hw 0000:b4:04.0: ignore flutter phy5 down
[ 306.193041] hisi_sas_v3_hw 0000:b4:04.0: phy5 invalid dword cnt: 661
[ 306.205001] hisi_sas_v3_hw 0000:b4:04.0: phy5 code violation cnt: 749
[ 306.216909] hisi_sas_v3_hw 0000:b4:04.0: phy5 disparity error cnt: 703
[ 306.231927] hisi_sas_v3_hw 0000:b4:04.0: phyup: phy5 link_rate=11
[ 307.193040] hisi_sas_v3_hw 0000:b4:04.0: phy5 disparity error cnt: 300005
[ 357.156801] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 363.300775] hisi_sas_v3_hw 0000:b4:04.0: internal task abort: timeout and not done.
[ 363.319663] hisi_sas_v3_hw 0000:b4:04.0: abort task: internal abort (-5)
出现概率(是否必现,概率性错误):必现
【预期结果】
dmesg命令可以正常输出结果;
【实际结果】
dmesg命令卡住;
【附件信息】
比如系统message日志/组件日志、dump信息、图片等
root@localhost RAS_0410]#
[root@localhost RAS_0410]#
[root@localhost RAS_0410]# [ 279.332776] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 285.476780] hisi_sas_v3_hw 0000:b4:04.0: internal task abort: timeout and not done.
[ 285.494212] hisi_sas_v3_hw 0000:b4:04.0: abort task: internal abort (-5)
[ 305.956823] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 306.025296] hisi_sas_v3_hw 0000:b4:04.0: phydown: phy5 phy_state=0x0
[ 306.037148] hisi_sas_v3_hw 0000:b4:04.0: ignore flutter phy5 down
[ 306.193041] hisi_sas_v3_hw 0000:b4:04.0: phy5 invalid dword cnt: 661
[ 306.205001] hisi_sas_v3_hw 0000:b4:04.0: phy5 code violation cnt: 749
[ 306.216909] hisi_sas_v3_hw 0000:b4:04.0: phy5 disparity error cnt: 703
[ 306.231927] hisi_sas_v3_hw 0000:b4:04.0: phyup: phy5 link_rate=11
[ 307.193040] hisi_sas_v3_hw 0000:b4:04.0: phy5 disparity error cnt: 300005
[ 357.156801] hisi_sas_v3_hw 0000:b4:04.0: abort tmf: TMF task timeout and not done
[ 363.300775] hisi_sas_v3_hw 0000:b4:04.0: internal task abort: timeout and not done.
[ 363.319663] hisi_sas_v3_hw 0000:b4:04.0: abort task: internal abort (-5)
[ 367.396882] INFO: task jbd2/dm-0-8:1259 blocked for more than 120 seconds.
[ 367.409820] Not tainted 4.19.90-2012.4.0.0053.oe1.aarch64 #1
[ 367.422152] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 367.442480] jbd2/dm-0-8 D 0 1259 2 0x00000228
[ 367.454363] Call trace:
[ 367.463132] __switch_to+0xf0/0x158
[ 367.472922] __schedule+0x2ac/0x970
[ 367.482755] schedule+0x2c/0x88
[ 367.492076] io_schedule+0x20/0x118
[ 367.501715] bit_wait_io+0x1c/0x70
[ 367.511110] __wait_on_bit+0x78/0xf0
[ 367.520525] out_of_line_wait_on_bit+0xac/0xd8
[ 367.530832] __wait_on_buffer+0x40/0x50
[ 367.540232] jbd2_journal_commit_transaction+0x1010/0x1c28 [jbd2]
[ 367.551903] kjournald2+0xe4/0x2e8 [jbd2]
[ 367.561475] kthread+0x134/0x138
[ 367.570076] ret_from_fork+0x10/0x18
[ 367.578919] INFO: task rs:main Q:Reg:2374 blocked for more than 120 seconds.
[ 367.591464] Not tainted 4.19.90-2012.4.0.0053.oe1.aarch64 #1
[ 367.603227] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 367.622637] rs:main Q:Reg D 0 2374 1 0x00000200
[ 367.634236] Call trace:
[ 367.642522] __switch_to+0xf0/0x158
[ 367.651943] __schedule+0x2ac/0x970
[ 367.661064] schedule+0x2c/0x88
[ 367.669640] io_schedule+0x20/0x118
[ 367.678401] bit_wait_io+0x1c/0x70
[ 367.686985] __wait_on_bit+0x78/0xf0
[ 367.695643] out_of_line_wait_on_bit+0xac/0xd8
[ 367.705095] do_get_write_access+0x240/0x480 [jbd2]
[ 367.714765] jbd2_journal_get_write_access+0x64/0xc0 [jbd2]
[ 367.725187] __ext4_journal_get_write_access+0xdc/0x160 [ext4]
[ 367.735796] ext4_reserve_inode_write+0xb0/0x100 [ext4]
[ 367.745810] ext4_mark_inode_dirty+0x5c/0x218 [ext4]
[ 367.755612] ext4_dirty_inode+0x60/0x88 [ext4]
[ 367.764916] __mark_inode_dirty+0x50/0x3f8
[ 367.773896] generic_update_time+0xa4/0xe8
[ 367.782885] file_update_time+0xf8/0x148
[ 367.791862] __generic_file_write_iter+0x90/0x1c8
[ 367.801583] ext4_file_write_iter+0x120/0x400 [ext4]
[ 367.811715] __vfs_write+0x11c/0x190
[ 367.820294] vfs_write+0xac/0x1c0
[ 367.828657] ksys_write+0x74/0xf0
[ 367.836869] __arm64_sys_write+0x24/0x30
[ 367.845729] el0_svc_common+0x78/0x130